Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1128]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 614103147, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:28:43.781300: step: 2/466, loss: 13.73521900177002 2023-01-24 01:28:44.615113: step: 4/466, loss: 27.88758087158203 2023-01-24 01:28:45.364151: step: 6/466, loss: 15.083219528198242 2023-01-24 01:28:46.106670: step: 8/466, loss: 9.161754608154297 2023-01-24 01:28:46.846236: step: 10/466, loss: 6.564452171325684 2023-01-24 01:28:47.557767: step: 12/466, loss: 5.852215766906738 2023-01-24 01:28:48.286565: step: 14/466, loss: 18.493228912353516 2023-01-24 01:28:48.989483: step: 16/466, loss: 9.561614990234375 2023-01-24 01:28:49.759204: step: 18/466, loss: 16.085447311401367 2023-01-24 01:28:50.533572: step: 20/466, loss: 16.76175308227539 2023-01-24 01:28:51.243398: step: 22/466, loss: 14.106352806091309 2023-01-24 01:28:52.015550: step: 24/466, loss: 18.976318359375 2023-01-24 01:28:52.815995: step: 26/466, loss: 6.381458759307861 2023-01-24 01:28:53.751837: step: 28/466, loss: 39.91090393066406 2023-01-24 01:28:54.536419: step: 30/466, loss: 15.294017791748047 2023-01-24 01:28:55.300343: step: 32/466, loss: 16.546710968017578 2023-01-24 01:28:56.025403: step: 34/466, loss: 26.49373435974121 2023-01-24 01:28:56.795781: step: 36/466, loss: 9.152485847473145 2023-01-24 01:28:57.542138: step: 38/466, loss: 13.001274108886719 2023-01-24 01:28:58.402566: step: 40/466, loss: 7.560708045959473 2023-01-24 01:28:59.104656: step: 42/466, loss: 12.648591041564941 2023-01-24 01:28:59.812856: step: 44/466, loss: 18.493995666503906 2023-01-24 01:29:00.531569: step: 46/466, loss: 11.046037673950195 2023-01-24 01:29:01.260863: step: 48/466, loss: 13.510764122009277 2023-01-24 01:29:02.085428: step: 50/466, loss: 28.686752319335938 2023-01-24 01:29:02.818636: step: 52/466, loss: 5.125367641448975 2023-01-24 01:29:03.581786: step: 54/466, loss: 22.069759368896484 2023-01-24 01:29:04.250678: step: 56/466, loss: 8.92529296875 2023-01-24 01:29:05.058548: step: 58/466, loss: 16.351173400878906 2023-01-24 01:29:05.765848: step: 60/466, loss: 18.710268020629883 2023-01-24 01:29:06.477265: step: 62/466, loss: 32.937156677246094 2023-01-24 01:29:07.162591: step: 64/466, loss: 5.1975226402282715 2023-01-24 01:29:08.037225: step: 66/466, loss: 11.405698776245117 2023-01-24 01:29:08.757459: step: 68/466, loss: 8.95192813873291 2023-01-24 01:29:09.563713: step: 70/466, loss: 22.55552864074707 2023-01-24 01:29:10.367865: step: 72/466, loss: 9.215524673461914 2023-01-24 01:29:11.145853: step: 74/466, loss: 16.336763381958008 2023-01-24 01:29:11.878887: step: 76/466, loss: 13.137796401977539 2023-01-24 01:29:12.623086: step: 78/466, loss: 10.040982246398926 2023-01-24 01:29:13.363546: step: 80/466, loss: 15.665406227111816 2023-01-24 01:29:14.137642: step: 82/466, loss: 9.019591331481934 2023-01-24 01:29:14.918484: step: 84/466, loss: 5.707324028015137 2023-01-24 01:29:15.557617: step: 86/466, loss: 7.152530670166016 2023-01-24 01:29:16.247127: step: 88/466, loss: 11.222397804260254 2023-01-24 01:29:17.020819: step: 90/466, loss: 23.643239974975586 2023-01-24 01:29:17.817285: step: 92/466, loss: 13.26652717590332 2023-01-24 01:29:18.566630: step: 94/466, loss: 5.377017021179199 2023-01-24 01:29:19.449563: step: 96/466, loss: 20.393341064453125 2023-01-24 01:29:20.252788: step: 98/466, loss: 28.030920028686523 2023-01-24 01:29:21.022027: step: 100/466, loss: 8.011981010437012 2023-01-24 01:29:21.772614: step: 102/466, loss: 22.163223266601562 2023-01-24 01:29:22.526317: step: 104/466, loss: 16.60025978088379 2023-01-24 01:29:23.371905: step: 106/466, loss: 11.448785781860352 2023-01-24 01:29:24.132330: step: 108/466, loss: 5.252057075500488 2023-01-24 01:29:24.883366: step: 110/466, loss: 10.07214641571045 2023-01-24 01:29:25.683271: step: 112/466, loss: 9.368322372436523 2023-01-24 01:29:26.445163: step: 114/466, loss: 17.051410675048828 2023-01-24 01:29:27.239691: step: 116/466, loss: 11.597021102905273 2023-01-24 01:29:28.085024: step: 118/466, loss: 14.312658309936523 2023-01-24 01:29:28.857851: step: 120/466, loss: 19.73943328857422 2023-01-24 01:29:29.674010: step: 122/466, loss: 8.307870864868164 2023-01-24 01:29:30.446798: step: 124/466, loss: 16.454423904418945 2023-01-24 01:29:31.245517: step: 126/466, loss: 10.20925235748291 2023-01-24 01:29:32.005778: step: 128/466, loss: 6.411805152893066 2023-01-24 01:29:32.733489: step: 130/466, loss: 3.820077419281006 2023-01-24 01:29:33.544478: step: 132/466, loss: 11.879013061523438 2023-01-24 01:29:34.286262: step: 134/466, loss: 19.770090103149414 2023-01-24 01:29:35.171366: step: 136/466, loss: 19.46176528930664 2023-01-24 01:29:35.937340: step: 138/466, loss: 12.031578063964844 2023-01-24 01:29:36.678237: step: 140/466, loss: 12.311153411865234 2023-01-24 01:29:37.491492: step: 142/466, loss: 7.866918087005615 2023-01-24 01:29:38.215892: step: 144/466, loss: 13.755508422851562 2023-01-24 01:29:38.961082: step: 146/466, loss: 8.731468200683594 2023-01-24 01:29:39.677412: step: 148/466, loss: 14.674220085144043 2023-01-24 01:29:40.409565: step: 150/466, loss: 11.923317909240723 2023-01-24 01:29:41.071074: step: 152/466, loss: 2.6268796920776367 2023-01-24 01:29:41.855832: step: 154/466, loss: 8.184459686279297 2023-01-24 01:29:42.604123: step: 156/466, loss: 10.321874618530273 2023-01-24 01:29:43.345510: step: 158/466, loss: 12.427175521850586 2023-01-24 01:29:44.089760: step: 160/466, loss: 7.840033531188965 2023-01-24 01:29:44.864287: step: 162/466, loss: 6.400445938110352 2023-01-24 01:29:45.623934: step: 164/466, loss: 13.40277099609375 2023-01-24 01:29:46.393635: step: 166/466, loss: 3.168619155883789 2023-01-24 01:29:47.177562: step: 168/466, loss: 11.076648712158203 2023-01-24 01:29:47.989407: step: 170/466, loss: 3.6342873573303223 2023-01-24 01:29:48.780386: step: 172/466, loss: 8.787984848022461 2023-01-24 01:29:49.504645: step: 174/466, loss: 7.403632640838623 2023-01-24 01:29:50.323509: step: 176/466, loss: 4.207934379577637 2023-01-24 01:29:51.038368: step: 178/466, loss: 3.733793020248413 2023-01-24 01:29:51.759272: step: 180/466, loss: 6.416247367858887 2023-01-24 01:29:52.537815: step: 182/466, loss: 8.14820671081543 2023-01-24 01:29:53.249260: step: 184/466, loss: 7.063889503479004 2023-01-24 01:29:53.974767: step: 186/466, loss: 17.30549430847168 2023-01-24 01:29:54.805862: step: 188/466, loss: 28.624164581298828 2023-01-24 01:29:55.647690: step: 190/466, loss: 3.90836763381958 2023-01-24 01:29:56.401228: step: 192/466, loss: 5.004888534545898 2023-01-24 01:29:57.155867: step: 194/466, loss: 3.8708887100219727 2023-01-24 01:29:57.915402: step: 196/466, loss: 7.665855407714844 2023-01-24 01:29:58.760440: step: 198/466, loss: 13.581562042236328 2023-01-24 01:29:59.508163: step: 200/466, loss: 11.876445770263672 2023-01-24 01:30:00.442955: step: 202/466, loss: 6.107644081115723 2023-01-24 01:30:01.184994: step: 204/466, loss: 5.8673577308654785 2023-01-24 01:30:01.914509: step: 206/466, loss: 9.80533218383789 2023-01-24 01:30:02.632050: step: 208/466, loss: 12.336395263671875 2023-01-24 01:30:03.345831: step: 210/466, loss: 12.121980667114258 2023-01-24 01:30:04.047138: step: 212/466, loss: 2.9984421730041504 2023-01-24 01:30:04.843607: step: 214/466, loss: 9.93939208984375 2023-01-24 01:30:05.684509: step: 216/466, loss: 10.636306762695312 2023-01-24 01:30:06.404911: step: 218/466, loss: 8.842659950256348 2023-01-24 01:30:07.203438: step: 220/466, loss: 11.319211959838867 2023-01-24 01:30:07.997942: step: 222/466, loss: 7.915955066680908 2023-01-24 01:30:08.704890: step: 224/466, loss: 4.226457118988037 2023-01-24 01:30:09.410424: step: 226/466, loss: 7.111764907836914 2023-01-24 01:30:10.085321: step: 228/466, loss: 3.8790555000305176 2023-01-24 01:30:10.927764: step: 230/466, loss: 5.482752799987793 2023-01-24 01:30:11.570871: step: 232/466, loss: 6.5940842628479 2023-01-24 01:30:12.377252: step: 234/466, loss: 4.292502403259277 2023-01-24 01:30:13.132903: step: 236/466, loss: 4.059828758239746 2023-01-24 01:30:13.932337: step: 238/466, loss: 13.06025505065918 2023-01-24 01:30:14.634758: step: 240/466, loss: 3.3042235374450684 2023-01-24 01:30:15.290313: step: 242/466, loss: 6.879997253417969 2023-01-24 01:30:16.085093: step: 244/466, loss: 6.937021732330322 2023-01-24 01:30:16.911896: step: 246/466, loss: 11.337417602539062 2023-01-24 01:30:17.605629: step: 248/466, loss: 1.7524123191833496 2023-01-24 01:30:18.325669: step: 250/466, loss: 2.4118971824645996 2023-01-24 01:30:19.098968: step: 252/466, loss: 9.450803756713867 2023-01-24 01:30:19.883585: step: 254/466, loss: 6.877956867218018 2023-01-24 01:30:20.631926: step: 256/466, loss: 6.180779457092285 2023-01-24 01:30:21.396234: step: 258/466, loss: 4.517580986022949 2023-01-24 01:30:22.249710: step: 260/466, loss: 7.087218761444092 2023-01-24 01:30:23.079596: step: 262/466, loss: 17.341150283813477 2023-01-24 01:30:23.939534: step: 264/466, loss: 4.7916436195373535 2023-01-24 01:30:24.722894: step: 266/466, loss: 1.7789692878723145 2023-01-24 01:30:25.548441: step: 268/466, loss: 10.851947784423828 2023-01-24 01:30:26.304624: step: 270/466, loss: 2.1357803344726562 2023-01-24 01:30:27.051312: step: 272/466, loss: 11.99293327331543 2023-01-24 01:30:27.819647: step: 274/466, loss: 6.715605735778809 2023-01-24 01:30:28.643299: step: 276/466, loss: 9.224824905395508 2023-01-24 01:30:29.425033: step: 278/466, loss: 6.91132926940918 2023-01-24 01:30:30.162936: step: 280/466, loss: 7.182122230529785 2023-01-24 01:30:30.931329: step: 282/466, loss: 3.719740390777588 2023-01-24 01:30:31.726663: step: 284/466, loss: 5.512752532958984 2023-01-24 01:30:32.503420: step: 286/466, loss: 4.903224468231201 2023-01-24 01:30:33.182701: step: 288/466, loss: 8.032452583312988 2023-01-24 01:30:33.935234: step: 290/466, loss: 6.836153507232666 2023-01-24 01:30:34.742039: step: 292/466, loss: 3.4128684997558594 2023-01-24 01:30:35.721040: step: 294/466, loss: 4.554594039916992 2023-01-24 01:30:36.463572: step: 296/466, loss: 3.2199997901916504 2023-01-24 01:30:37.272416: step: 298/466, loss: 5.25563907623291 2023-01-24 01:30:38.081952: step: 300/466, loss: 5.8102850914001465 2023-01-24 01:30:38.986366: step: 302/466, loss: 14.86589241027832 2023-01-24 01:30:39.666334: step: 304/466, loss: 7.137749195098877 2023-01-24 01:30:40.401112: step: 306/466, loss: 9.122910499572754 2023-01-24 01:30:41.215417: step: 308/466, loss: 10.541033744812012 2023-01-24 01:30:41.940853: step: 310/466, loss: 7.755885601043701 2023-01-24 01:30:42.706848: step: 312/466, loss: 4.080110549926758 2023-01-24 01:30:43.364252: step: 314/466, loss: 6.619048118591309 2023-01-24 01:30:44.168172: step: 316/466, loss: 2.064182996749878 2023-01-24 01:30:44.852266: step: 318/466, loss: 1.4975166320800781 2023-01-24 01:30:45.594333: step: 320/466, loss: 8.109560012817383 2023-01-24 01:30:46.358517: step: 322/466, loss: 9.021079063415527 2023-01-24 01:30:47.165007: step: 324/466, loss: 3.7328691482543945 2023-01-24 01:30:47.893319: step: 326/466, loss: 8.172685623168945 2023-01-24 01:30:48.654305: step: 328/466, loss: 3.3030261993408203 2023-01-24 01:30:49.444356: step: 330/466, loss: 4.315179824829102 2023-01-24 01:30:50.159236: step: 332/466, loss: 8.02114200592041 2023-01-24 01:30:50.996179: step: 334/466, loss: 14.793797492980957 2023-01-24 01:30:51.689713: step: 336/466, loss: 3.144601821899414 2023-01-24 01:30:52.486609: step: 338/466, loss: 16.421733856201172 2023-01-24 01:30:53.286150: step: 340/466, loss: 7.852216720581055 2023-01-24 01:30:53.966667: step: 342/466, loss: 5.317742347717285 2023-01-24 01:30:54.703524: step: 344/466, loss: 3.8623104095458984 2023-01-24 01:30:55.458864: step: 346/466, loss: 13.985149383544922 2023-01-24 01:30:56.154246: step: 348/466, loss: 5.1254119873046875 2023-01-24 01:30:56.909068: step: 350/466, loss: 2.3744070529937744 2023-01-24 01:30:57.707493: step: 352/466, loss: 2.442744016647339 2023-01-24 01:30:58.441701: step: 354/466, loss: 9.244939804077148 2023-01-24 01:30:59.172157: step: 356/466, loss: 4.649564266204834 2023-01-24 01:30:59.930445: step: 358/466, loss: 9.64559555053711 2023-01-24 01:31:00.780939: step: 360/466, loss: 8.238361358642578 2023-01-24 01:31:01.462855: step: 362/466, loss: 8.38036823272705 2023-01-24 01:31:02.282424: step: 364/466, loss: 2.4607577323913574 2023-01-24 01:31:03.034328: step: 366/466, loss: 2.487901210784912 2023-01-24 01:31:03.823858: step: 368/466, loss: 9.80903148651123 2023-01-24 01:31:04.595219: step: 370/466, loss: 4.288197040557861 2023-01-24 01:31:05.323944: step: 372/466, loss: 8.693002700805664 2023-01-24 01:31:06.156754: step: 374/466, loss: 7.432981491088867 2023-01-24 01:31:06.962049: step: 376/466, loss: 4.512446403503418 2023-01-24 01:31:07.817706: step: 378/466, loss: 5.332370758056641 2023-01-24 01:31:08.612425: step: 380/466, loss: 2.3127150535583496 2023-01-24 01:31:09.401476: step: 382/466, loss: 3.342420816421509 2023-01-24 01:31:10.194498: step: 384/466, loss: 5.061471939086914 2023-01-24 01:31:10.899021: step: 386/466, loss: 3.820502996444702 2023-01-24 01:31:11.704497: step: 388/466, loss: 7.286428451538086 2023-01-24 01:31:12.494121: step: 390/466, loss: 3.637666702270508 2023-01-24 01:31:13.201363: step: 392/466, loss: 8.565852165222168 2023-01-24 01:31:13.918864: step: 394/466, loss: 3.7479686737060547 2023-01-24 01:31:14.632277: step: 396/466, loss: 2.165762424468994 2023-01-24 01:31:15.400213: step: 398/466, loss: 8.176563262939453 2023-01-24 01:31:16.148279: step: 400/466, loss: 4.044155120849609 2023-01-24 01:31:16.896373: step: 402/466, loss: 4.002263069152832 2023-01-24 01:31:17.593766: step: 404/466, loss: 7.4720869064331055 2023-01-24 01:31:18.328975: step: 406/466, loss: 3.0752673149108887 2023-01-24 01:31:19.069351: step: 408/466, loss: 11.876520156860352 2023-01-24 01:31:19.926435: step: 410/466, loss: 1.0020477771759033 2023-01-24 01:31:20.691941: step: 412/466, loss: 2.2785210609436035 2023-01-24 01:31:21.436239: step: 414/466, loss: 2.8014371395111084 2023-01-24 01:31:22.162736: step: 416/466, loss: 4.996395111083984 2023-01-24 01:31:22.910400: step: 418/466, loss: 3.1675033569335938 2023-01-24 01:31:23.728514: step: 420/466, loss: 2.1907224655151367 2023-01-24 01:31:24.436322: step: 422/466, loss: 3.5931363105773926 2023-01-24 01:31:25.232139: step: 424/466, loss: 1.7587145566940308 2023-01-24 01:31:25.959017: step: 426/466, loss: 1.4456634521484375 2023-01-24 01:31:26.773696: step: 428/466, loss: 0.6929149627685547 2023-01-24 01:31:27.588507: step: 430/466, loss: 1.4032700061798096 2023-01-24 01:31:28.350900: step: 432/466, loss: 2.134281635284424 2023-01-24 01:31:29.103674: step: 434/466, loss: 2.9200680255889893 2023-01-24 01:31:29.839601: step: 436/466, loss: 6.406781196594238 2023-01-24 01:31:30.633274: step: 438/466, loss: 1.358239769935608 2023-01-24 01:31:31.483470: step: 440/466, loss: 2.697464942932129 2023-01-24 01:31:32.315206: step: 442/466, loss: 3.9414467811584473 2023-01-24 01:31:33.034371: step: 444/466, loss: 1.3173900842666626 2023-01-24 01:31:33.718448: step: 446/466, loss: 2.6215689182281494 2023-01-24 01:31:34.513734: step: 448/466, loss: 7.385811805725098 2023-01-24 01:31:35.259461: step: 450/466, loss: 2.411505699157715 2023-01-24 01:31:36.056698: step: 452/466, loss: 1.800626516342163 2023-01-24 01:31:36.813126: step: 454/466, loss: 3.886320114135742 2023-01-24 01:31:37.645796: step: 456/466, loss: 1.4516246318817139 2023-01-24 01:31:38.370910: step: 458/466, loss: 2.2257678508758545 2023-01-24 01:31:39.153253: step: 460/466, loss: 2.3415117263793945 2023-01-24 01:31:39.870783: step: 462/466, loss: 1.9631130695343018 2023-01-24 01:31:40.590971: step: 464/466, loss: 4.780514240264893 2023-01-24 01:31:41.285543: step: 466/466, loss: 3.7143073081970215 2023-01-24 01:31:41.998020: step: 468/466, loss: 4.045224666595459 2023-01-24 01:31:42.723693: step: 470/466, loss: 3.0051512718200684 2023-01-24 01:31:43.610093: step: 472/466, loss: 5.037962913513184 2023-01-24 01:31:44.449914: step: 474/466, loss: 3.1078267097473145 2023-01-24 01:31:45.295815: step: 476/466, loss: 3.585723876953125 2023-01-24 01:31:46.066699: step: 478/466, loss: 1.0921087265014648 2023-01-24 01:31:46.773802: step: 480/466, loss: 6.455872535705566 2023-01-24 01:31:47.526531: step: 482/466, loss: 5.039011001586914 2023-01-24 01:31:48.354800: step: 484/466, loss: 5.26975154876709 2023-01-24 01:31:49.314042: step: 486/466, loss: 2.4059643745422363 2023-01-24 01:31:50.035233: step: 488/466, loss: 5.975546836853027 2023-01-24 01:31:50.879712: step: 490/466, loss: 0.2733404040336609 2023-01-24 01:31:51.646544: step: 492/466, loss: 2.014416217803955 2023-01-24 01:31:52.417746: step: 494/466, loss: 1.8301392793655396 2023-01-24 01:31:53.204894: step: 496/466, loss: 1.2460603713989258 2023-01-24 01:31:53.936407: step: 498/466, loss: 3.2944400310516357 2023-01-24 01:31:54.681819: step: 500/466, loss: 2.0257420539855957 2023-01-24 01:31:55.439913: step: 502/466, loss: 6.985259532928467 2023-01-24 01:31:56.194659: step: 504/466, loss: 1.8203388452529907 2023-01-24 01:31:56.881457: step: 506/466, loss: 1.344951868057251 2023-01-24 01:31:57.684555: step: 508/466, loss: 3.0250954627990723 2023-01-24 01:31:58.422065: step: 510/466, loss: 4.147697448730469 2023-01-24 01:31:59.176289: step: 512/466, loss: 1.4706324338912964 2023-01-24 01:31:59.983318: step: 514/466, loss: 2.5658233165740967 2023-01-24 01:32:00.727801: step: 516/466, loss: 2.607825517654419 2023-01-24 01:32:01.434900: step: 518/466, loss: 1.3773353099822998 2023-01-24 01:32:02.266193: step: 520/466, loss: 12.196202278137207 2023-01-24 01:32:03.100031: step: 522/466, loss: 1.900867223739624 2023-01-24 01:32:03.842584: step: 524/466, loss: 1.8195151090621948 2023-01-24 01:32:04.579719: step: 526/466, loss: 0.8975844383239746 2023-01-24 01:32:05.287768: step: 528/466, loss: 2.051081657409668 2023-01-24 01:32:06.059135: step: 530/466, loss: 7.602580547332764 2023-01-24 01:32:06.814933: step: 532/466, loss: 1.4406651258468628 2023-01-24 01:32:07.762601: step: 534/466, loss: 2.3649492263793945 2023-01-24 01:32:08.500705: step: 536/466, loss: 1.9562591314315796 2023-01-24 01:32:09.258893: step: 538/466, loss: 3.06107497215271 2023-01-24 01:32:10.068757: step: 540/466, loss: 2.2868266105651855 2023-01-24 01:32:10.834278: step: 542/466, loss: 5.655248641967773 2023-01-24 01:32:11.543312: step: 544/466, loss: 3.627987861633301 2023-01-24 01:32:12.424519: step: 546/466, loss: 3.139216423034668 2023-01-24 01:32:13.301354: step: 548/466, loss: 2.9110352993011475 2023-01-24 01:32:14.110935: step: 550/466, loss: 0.9010717868804932 2023-01-24 01:32:14.848682: step: 552/466, loss: 1.010190725326538 2023-01-24 01:32:15.646551: step: 554/466, loss: 17.52189064025879 2023-01-24 01:32:16.383914: step: 556/466, loss: 0.635991096496582 2023-01-24 01:32:17.081592: step: 558/466, loss: 2.4419195652008057 2023-01-24 01:32:17.779725: step: 560/466, loss: 2.1627860069274902 2023-01-24 01:32:18.620281: step: 562/466, loss: 1.6352651119232178 2023-01-24 01:32:19.379091: step: 564/466, loss: 1.4795622825622559 2023-01-24 01:32:20.169940: step: 566/466, loss: 1.9066089391708374 2023-01-24 01:32:20.933664: step: 568/466, loss: 15.391820907592773 2023-01-24 01:32:21.656831: step: 570/466, loss: 6.963404655456543 2023-01-24 01:32:22.415105: step: 572/466, loss: 12.239256858825684 2023-01-24 01:32:23.149436: step: 574/466, loss: 4.118880748748779 2023-01-24 01:32:23.915811: step: 576/466, loss: 5.32997989654541 2023-01-24 01:32:24.697414: step: 578/466, loss: 2.3911309242248535 2023-01-24 01:32:25.529434: step: 580/466, loss: 2.738361120223999 2023-01-24 01:32:26.191267: step: 582/466, loss: 3.573793888092041 2023-01-24 01:32:27.035408: step: 584/466, loss: 0.8956894874572754 2023-01-24 01:32:27.755198: step: 586/466, loss: 1.1735498905181885 2023-01-24 01:32:28.542612: step: 588/466, loss: 1.8540666103363037 2023-01-24 01:32:29.308742: step: 590/466, loss: 3.0030360221862793 2023-01-24 01:32:30.102579: step: 592/466, loss: 0.8821673393249512 2023-01-24 01:32:30.853147: step: 594/466, loss: 1.834218978881836 2023-01-24 01:32:31.610424: step: 596/466, loss: 4.334569931030273 2023-01-24 01:32:32.440660: step: 598/466, loss: 1.5742497444152832 2023-01-24 01:32:33.221726: step: 600/466, loss: 1.7871942520141602 2023-01-24 01:32:33.961171: step: 602/466, loss: 1.2830783128738403 2023-01-24 01:32:34.708076: step: 604/466, loss: 3.2601969242095947 2023-01-24 01:32:35.438088: step: 606/466, loss: 6.348970413208008 2023-01-24 01:32:36.161219: step: 608/466, loss: 4.323052883148193 2023-01-24 01:32:36.988171: step: 610/466, loss: 5.402087211608887 2023-01-24 01:32:37.690183: step: 612/466, loss: 6.45858097076416 2023-01-24 01:32:38.517001: step: 614/466, loss: 2.569106340408325 2023-01-24 01:32:39.301323: step: 616/466, loss: 0.705042839050293 2023-01-24 01:32:40.103877: step: 618/466, loss: 4.169329643249512 2023-01-24 01:32:40.849356: step: 620/466, loss: 1.1026544570922852 2023-01-24 01:32:41.663579: step: 622/466, loss: 2.5629806518554688 2023-01-24 01:32:42.442106: step: 624/466, loss: 1.0137592554092407 2023-01-24 01:32:43.189631: step: 626/466, loss: 0.3781975209712982 2023-01-24 01:32:44.044527: step: 628/466, loss: 4.098159313201904 2023-01-24 01:32:44.778443: step: 630/466, loss: 0.9161981344223022 2023-01-24 01:32:45.530073: step: 632/466, loss: 1.3645033836364746 2023-01-24 01:32:46.256635: step: 634/466, loss: 3.8741254806518555 2023-01-24 01:32:46.995691: step: 636/466, loss: 1.3426021337509155 2023-01-24 01:32:47.742624: step: 638/466, loss: 1.9753289222717285 2023-01-24 01:32:48.520718: step: 640/466, loss: 6.594760894775391 2023-01-24 01:32:49.286634: step: 642/466, loss: 0.7493297457695007 2023-01-24 01:32:50.025630: step: 644/466, loss: 0.8289147615432739 2023-01-24 01:32:50.933437: step: 646/466, loss: 4.53216552734375 2023-01-24 01:32:51.651607: step: 648/466, loss: 2.303760051727295 2023-01-24 01:32:52.352686: step: 650/466, loss: 0.7914081811904907 2023-01-24 01:32:53.086299: step: 652/466, loss: 1.3922420740127563 2023-01-24 01:32:53.771416: step: 654/466, loss: 3.121603488922119 2023-01-24 01:32:54.530691: step: 656/466, loss: 5.185492515563965 2023-01-24 01:32:55.310139: step: 658/466, loss: 1.9905723333358765 2023-01-24 01:32:56.121028: step: 660/466, loss: 4.122713088989258 2023-01-24 01:32:56.855574: step: 662/466, loss: 0.9444369673728943 2023-01-24 01:32:57.710619: step: 664/466, loss: 6.511009693145752 2023-01-24 01:32:58.460877: step: 666/466, loss: 1.7498726844787598 2023-01-24 01:32:59.195985: step: 668/466, loss: 4.1698527336120605 2023-01-24 01:33:00.056211: step: 670/466, loss: 2.089411735534668 2023-01-24 01:33:00.938406: step: 672/466, loss: 3.105118751525879 2023-01-24 01:33:01.709869: step: 674/466, loss: 1.3752529621124268 2023-01-24 01:33:02.481161: step: 676/466, loss: 1.345578670501709 2023-01-24 01:33:03.298957: step: 678/466, loss: 3.6869633197784424 2023-01-24 01:33:04.089595: step: 680/466, loss: 1.509131669998169 2023-01-24 01:33:04.843690: step: 682/466, loss: 1.0781230926513672 2023-01-24 01:33:05.563220: step: 684/466, loss: 1.3234790563583374 2023-01-24 01:33:06.360616: step: 686/466, loss: 2.1727919578552246 2023-01-24 01:33:07.103726: step: 688/466, loss: 4.946380138397217 2023-01-24 01:33:07.892539: step: 690/466, loss: 1.8786697387695312 2023-01-24 01:33:08.623846: step: 692/466, loss: 6.498932838439941 2023-01-24 01:33:09.425061: step: 694/466, loss: 6.784847259521484 2023-01-24 01:33:10.149805: step: 696/466, loss: 2.694711923599243 2023-01-24 01:33:10.924770: step: 698/466, loss: 11.929954528808594 2023-01-24 01:33:11.740471: step: 700/466, loss: 2.7109384536743164 2023-01-24 01:33:12.571008: step: 702/466, loss: 5.202466011047363 2023-01-24 01:33:13.337594: step: 704/466, loss: 3.2965590953826904 2023-01-24 01:33:14.140314: step: 706/466, loss: 1.7430288791656494 2023-01-24 01:33:14.871111: step: 708/466, loss: 3.1917104721069336 2023-01-24 01:33:15.569609: step: 710/466, loss: 0.3418167531490326 2023-01-24 01:33:16.281219: step: 712/466, loss: 1.3266034126281738 2023-01-24 01:33:17.077531: step: 714/466, loss: 2.886962652206421 2023-01-24 01:33:17.730934: step: 716/466, loss: 5.965574741363525 2023-01-24 01:33:18.525652: step: 718/466, loss: 8.379350662231445 2023-01-24 01:33:19.305321: step: 720/466, loss: 2.424729108810425 2023-01-24 01:33:19.966869: step: 722/466, loss: 4.797998428344727 2023-01-24 01:33:20.740480: step: 724/466, loss: 0.6181595325469971 2023-01-24 01:33:21.511951: step: 726/466, loss: 6.5584869384765625 2023-01-24 01:33:22.269617: step: 728/466, loss: 2.542454242706299 2023-01-24 01:33:23.015701: step: 730/466, loss: 4.692009449005127 2023-01-24 01:33:23.737974: step: 732/466, loss: 4.815932750701904 2023-01-24 01:33:24.481675: step: 734/466, loss: 1.2393872737884521 2023-01-24 01:33:25.210289: step: 736/466, loss: 1.4991346597671509 2023-01-24 01:33:25.915032: step: 738/466, loss: 1.4453717470169067 2023-01-24 01:33:26.717872: step: 740/466, loss: 5.386700630187988 2023-01-24 01:33:27.401069: step: 742/466, loss: 1.9336085319519043 2023-01-24 01:33:28.164539: step: 744/466, loss: 0.4682157039642334 2023-01-24 01:33:28.958447: step: 746/466, loss: 0.6913096904754639 2023-01-24 01:33:29.700902: step: 748/466, loss: 1.037575125694275 2023-01-24 01:33:30.510672: step: 750/466, loss: 7.556648254394531 2023-01-24 01:33:31.274813: step: 752/466, loss: 2.2385308742523193 2023-01-24 01:33:32.051492: step: 754/466, loss: 1.7686642408370972 2023-01-24 01:33:32.793028: step: 756/466, loss: 1.0103527307510376 2023-01-24 01:33:33.517631: step: 758/466, loss: 1.3823200464248657 2023-01-24 01:33:34.276614: step: 760/466, loss: 5.599819183349609 2023-01-24 01:33:35.017794: step: 762/466, loss: 3.594778060913086 2023-01-24 01:33:35.786252: step: 764/466, loss: 1.2553966045379639 2023-01-24 01:33:36.569817: step: 766/466, loss: 0.6590578556060791 2023-01-24 01:33:37.369875: step: 768/466, loss: 1.9717657566070557 2023-01-24 01:33:38.082439: step: 770/466, loss: 1.6747913360595703 2023-01-24 01:33:38.856780: step: 772/466, loss: 1.52985680103302 2023-01-24 01:33:39.628671: step: 774/466, loss: 10.834771156311035 2023-01-24 01:33:40.391266: step: 776/466, loss: 0.7611805200576782 2023-01-24 01:33:41.161073: step: 778/466, loss: 1.46249258518219 2023-01-24 01:33:41.897821: step: 780/466, loss: 0.8268005847930908 2023-01-24 01:33:42.709007: step: 782/466, loss: 3.1205601692199707 2023-01-24 01:33:43.425177: step: 784/466, loss: 0.8482595682144165 2023-01-24 01:33:44.274194: step: 786/466, loss: 16.482038497924805 2023-01-24 01:33:45.002982: step: 788/466, loss: 0.9325035214424133 2023-01-24 01:33:45.795494: step: 790/466, loss: 2.5457522869110107 2023-01-24 01:33:46.512819: step: 792/466, loss: 1.3102258443832397 2023-01-24 01:33:47.273888: step: 794/466, loss: 2.163250207901001 2023-01-24 01:33:47.974951: step: 796/466, loss: 1.1271615028381348 2023-01-24 01:33:48.684355: step: 798/466, loss: 3.783787250518799 2023-01-24 01:33:49.549712: step: 800/466, loss: 2.997448205947876 2023-01-24 01:33:50.214151: step: 802/466, loss: 1.2927404642105103 2023-01-24 01:33:51.014325: step: 804/466, loss: 2.083723545074463 2023-01-24 01:33:51.875230: step: 806/466, loss: 1.656034231185913 2023-01-24 01:33:52.641332: step: 808/466, loss: 5.2302069664001465 2023-01-24 01:33:53.387493: step: 810/466, loss: 2.1514246463775635 2023-01-24 01:33:54.125788: step: 812/466, loss: 0.6463648080825806 2023-01-24 01:33:54.842752: step: 814/466, loss: 1.2086327075958252 2023-01-24 01:33:55.592314: step: 816/466, loss: 1.9709173440933228 2023-01-24 01:33:56.402783: step: 818/466, loss: 3.217388153076172 2023-01-24 01:33:57.201676: step: 820/466, loss: 1.2259535789489746 2023-01-24 01:33:57.872991: step: 822/466, loss: 3.5686111450195312 2023-01-24 01:33:58.691756: step: 824/466, loss: 1.0073504447937012 2023-01-24 01:33:59.446944: step: 826/466, loss: 0.7077375054359436 2023-01-24 01:34:00.236319: step: 828/466, loss: 1.840885877609253 2023-01-24 01:34:00.940277: step: 830/466, loss: 2.9541375637054443 2023-01-24 01:34:01.692270: step: 832/466, loss: 0.7959054708480835 2023-01-24 01:34:02.410008: step: 834/466, loss: 1.8715145587921143 2023-01-24 01:34:03.213980: step: 836/466, loss: 4.219241142272949 2023-01-24 01:34:03.990771: step: 838/466, loss: 3.6296257972717285 2023-01-24 01:34:04.769964: step: 840/466, loss: 0.6891568303108215 2023-01-24 01:34:05.471429: step: 842/466, loss: 1.4817897081375122 2023-01-24 01:34:06.221610: step: 844/466, loss: 2.252312421798706 2023-01-24 01:34:06.974615: step: 846/466, loss: 0.2936214804649353 2023-01-24 01:34:07.792920: step: 848/466, loss: 2.450155735015869 2023-01-24 01:34:08.640646: step: 850/466, loss: 0.9076513051986694 2023-01-24 01:34:09.432663: step: 852/466, loss: 1.6361433267593384 2023-01-24 01:34:10.254907: step: 854/466, loss: 1.4986391067504883 2023-01-24 01:34:11.034186: step: 856/466, loss: 0.5485374331474304 2023-01-24 01:34:11.808096: step: 858/466, loss: 1.7036077976226807 2023-01-24 01:34:12.532322: step: 860/466, loss: 4.413128852844238 2023-01-24 01:34:13.253172: step: 862/466, loss: 2.4625015258789062 2023-01-24 01:34:13.975586: step: 864/466, loss: 0.6636428236961365 2023-01-24 01:34:14.697151: step: 866/466, loss: 6.692194938659668 2023-01-24 01:34:15.404514: step: 868/466, loss: 7.346501350402832 2023-01-24 01:34:16.131765: step: 870/466, loss: 0.8011118769645691 2023-01-24 01:34:16.839788: step: 872/466, loss: 1.8357152938842773 2023-01-24 01:34:17.726518: step: 874/466, loss: 1.7394757270812988 2023-01-24 01:34:18.506417: step: 876/466, loss: 2.2443785667419434 2023-01-24 01:34:19.242701: step: 878/466, loss: 0.49604469537734985 2023-01-24 01:34:19.929476: step: 880/466, loss: 5.444522857666016 2023-01-24 01:34:20.713214: step: 882/466, loss: 0.5533512234687805 2023-01-24 01:34:21.538413: step: 884/466, loss: 0.8059678077697754 2023-01-24 01:34:22.277663: step: 886/466, loss: 2.289682626724243 2023-01-24 01:34:23.089117: step: 888/466, loss: 1.675082802772522 2023-01-24 01:34:23.872394: step: 890/466, loss: 1.1696981191635132 2023-01-24 01:34:24.654275: step: 892/466, loss: 2.2912344932556152 2023-01-24 01:34:25.409623: step: 894/466, loss: 5.5102949142456055 2023-01-24 01:34:26.113138: step: 896/466, loss: 1.666417121887207 2023-01-24 01:34:26.817615: step: 898/466, loss: 1.7959926128387451 2023-01-24 01:34:27.539046: step: 900/466, loss: 1.474005937576294 2023-01-24 01:34:28.267238: step: 902/466, loss: 1.0745131969451904 2023-01-24 01:34:29.078282: step: 904/466, loss: 1.2037309408187866 2023-01-24 01:34:29.831668: step: 906/466, loss: 1.165777564048767 2023-01-24 01:34:30.634707: step: 908/466, loss: 1.3042948246002197 2023-01-24 01:34:31.484746: step: 910/466, loss: 1.2583338022232056 2023-01-24 01:34:32.281366: step: 912/466, loss: 4.980291366577148 2023-01-24 01:34:33.082088: step: 914/466, loss: 0.7476434707641602 2023-01-24 01:34:33.809738: step: 916/466, loss: 3.4122965335845947 2023-01-24 01:34:34.592379: step: 918/466, loss: 0.9060400724411011 2023-01-24 01:34:35.380972: step: 920/466, loss: 5.214575290679932 2023-01-24 01:34:36.189377: step: 922/466, loss: 5.202361106872559 2023-01-24 01:34:36.955692: step: 924/466, loss: 2.5000698566436768 2023-01-24 01:34:37.788661: step: 926/466, loss: 3.709892988204956 2023-01-24 01:34:38.533337: step: 928/466, loss: 1.2923870086669922 2023-01-24 01:34:39.225331: step: 930/466, loss: 5.998931407928467 2023-01-24 01:34:40.049103: step: 932/466, loss: 4.255189895629883 ================================================== Loss: 5.868 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.43468915343915343, 'r': 0.10353654694391935, 'f1': 0.16723918575063612}, 'combined': 0.12322887371099503, 'epoch': 0} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.34439900078369906, 'r': 0.06893923052788455, 'f1': 0.11488219724201033}, 'combined': 0.07061052123167465, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.459040346907994, 'r': 0.10152688201934838, 'f1': 0.16627777271899472}, 'combined': 0.12252046410873293, 'epoch': 0} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3387211326979472, 'r': 0.07247872382147619, 'f1': 0.11940702332493376}, 'combined': 0.07339163384849587, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.44752358490566035, 'r': 0.08967391304347826, 'f1': 0.14940944881889762}, 'combined': 0.11009117281392455, 'epoch': 0} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.34508619029275805, 'r': 0.07026776609930191, 'f1': 0.11676034538937766}, 'combined': 0.07211668391696857, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.05714285714285714, 'f1': 0.10526315789473684}, 'combined': 0.07017543859649122, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.43468915343915343, 'r': 0.10353654694391935, 'f1': 0.16723918575063612}, 'combined': 0.12322887371099503, 'epoch': 0} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.34439900078369906, 'r': 0.06893923052788455, 'f1': 0.11488219724201033}, 'combined': 0.07061052123167465, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.05714285714285714, 'f1': 0.10526315789473684}, 'combined': 0.07017543859649122, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.459040346907994, 'r': 0.10152688201934838, 'f1': 0.16627777271899472}, 'combined': 0.12252046410873293, 'epoch': 0} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3387211326979472, 'r': 0.07247872382147619, 'f1': 0.11940702332493376}, 'combined': 0.07339163384849587, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.44752358490566035, 'r': 0.08967391304347826, 'f1': 0.14940944881889762}, 'combined': 0.11009117281392455, 'epoch': 0} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.34508619029275805, 'r': 0.07026776609930191, 'f1': 0.11676034538937766}, 'combined': 0.07211668391696857, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:37:45.398692: step: 2/466, loss: 9.140667915344238 2023-01-24 01:37:46.215094: step: 4/466, loss: 2.162443161010742 2023-01-24 01:37:46.948230: step: 6/466, loss: 2.352289915084839 2023-01-24 01:37:47.775191: step: 8/466, loss: 3.6142661571502686 2023-01-24 01:37:48.809860: step: 10/466, loss: 3.4739856719970703 2023-01-24 01:37:49.635163: step: 12/466, loss: 2.771876096725464 2023-01-24 01:37:50.372484: step: 14/466, loss: 1.1560986042022705 2023-01-24 01:37:51.119796: step: 16/466, loss: 6.923826694488525 2023-01-24 01:37:51.896897: step: 18/466, loss: 7.029289245605469 2023-01-24 01:37:52.659759: step: 20/466, loss: 3.2694921493530273 2023-01-24 01:37:53.473461: step: 22/466, loss: 2.314699649810791 2023-01-24 01:37:54.212836: step: 24/466, loss: 0.4758274555206299 2023-01-24 01:37:54.973749: step: 26/466, loss: 1.258528232574463 2023-01-24 01:37:55.789118: step: 28/466, loss: 1.7560293674468994 2023-01-24 01:37:56.528429: step: 30/466, loss: 0.5054268836975098 2023-01-24 01:37:57.268641: step: 32/466, loss: 1.688248634338379 2023-01-24 01:37:58.042841: step: 34/466, loss: 1.5769336223602295 2023-01-24 01:37:58.714276: step: 36/466, loss: 0.1769465059041977 2023-01-24 01:37:59.459846: step: 38/466, loss: 0.976089596748352 2023-01-24 01:38:00.195250: step: 40/466, loss: 2.1016931533813477 2023-01-24 01:38:00.880175: step: 42/466, loss: 4.128500461578369 2023-01-24 01:38:01.749597: step: 44/466, loss: 1.8174368143081665 2023-01-24 01:38:02.531352: step: 46/466, loss: 0.5061911940574646 2023-01-24 01:38:03.256891: step: 48/466, loss: 2.9876365661621094 2023-01-24 01:38:03.960112: step: 50/466, loss: 0.5157006978988647 2023-01-24 01:38:04.740555: step: 52/466, loss: 1.705984354019165 2023-01-24 01:38:05.522251: step: 54/466, loss: 0.3926768898963928 2023-01-24 01:38:06.240503: step: 56/466, loss: 1.8835524320602417 2023-01-24 01:38:07.047274: step: 58/466, loss: 2.549036979675293 2023-01-24 01:38:07.879864: step: 60/466, loss: 1.1022535562515259 2023-01-24 01:38:08.602872: step: 62/466, loss: 6.8992533683776855 2023-01-24 01:38:09.381097: step: 64/466, loss: 2.8540728092193604 2023-01-24 01:38:10.147043: step: 66/466, loss: 0.3880598545074463 2023-01-24 01:38:10.963096: step: 68/466, loss: 4.757377624511719 2023-01-24 01:38:11.678641: step: 70/466, loss: 1.5769944190979004 2023-01-24 01:38:12.433119: step: 72/466, loss: 2.267717123031616 2023-01-24 01:38:13.260550: step: 74/466, loss: 3.2595067024230957 2023-01-24 01:38:13.959721: step: 76/466, loss: 0.6588865518569946 2023-01-24 01:38:14.774128: step: 78/466, loss: 4.336224555969238 2023-01-24 01:38:15.482878: step: 80/466, loss: 1.129730224609375 2023-01-24 01:38:16.173460: step: 82/466, loss: 0.5617114901542664 2023-01-24 01:38:16.922826: step: 84/466, loss: 2.150784730911255 2023-01-24 01:38:17.656208: step: 86/466, loss: 1.9347360134124756 2023-01-24 01:38:18.398837: step: 88/466, loss: 1.529724359512329 2023-01-24 01:38:19.132628: step: 90/466, loss: 2.5717315673828125 2023-01-24 01:38:19.901630: step: 92/466, loss: 1.7477091550827026 2023-01-24 01:38:20.689057: step: 94/466, loss: 0.5505824089050293 2023-01-24 01:38:21.424900: step: 96/466, loss: 2.1601786613464355 2023-01-24 01:38:22.117675: step: 98/466, loss: 0.8771464228630066 2023-01-24 01:38:22.880272: step: 100/466, loss: 1.4452009201049805 2023-01-24 01:38:23.610274: step: 102/466, loss: 3.2847557067871094 2023-01-24 01:38:24.422019: step: 104/466, loss: 7.115804672241211 2023-01-24 01:38:25.266270: step: 106/466, loss: 1.1448769569396973 2023-01-24 01:38:26.004247: step: 108/466, loss: 1.1002720594406128 2023-01-24 01:38:26.760466: step: 110/466, loss: 1.6691850423812866 2023-01-24 01:38:27.608482: step: 112/466, loss: 4.029344081878662 2023-01-24 01:38:28.403670: step: 114/466, loss: 2.019430637359619 2023-01-24 01:38:29.129978: step: 116/466, loss: 1.26572847366333 2023-01-24 01:38:29.941310: step: 118/466, loss: 1.4695121049880981 2023-01-24 01:38:30.673902: step: 120/466, loss: 1.2214720249176025 2023-01-24 01:38:31.456992: step: 122/466, loss: 1.0011645555496216 2023-01-24 01:38:32.207874: step: 124/466, loss: 5.812806129455566 2023-01-24 01:38:32.912319: step: 126/466, loss: 0.43429237604141235 2023-01-24 01:38:33.602309: step: 128/466, loss: 0.8753689527511597 2023-01-24 01:38:34.343784: step: 130/466, loss: 0.5702555775642395 2023-01-24 01:38:35.084299: step: 132/466, loss: 2.444385528564453 2023-01-24 01:38:35.809896: step: 134/466, loss: 0.8410188555717468 2023-01-24 01:38:36.690053: step: 136/466, loss: 3.110745906829834 2023-01-24 01:38:37.531992: step: 138/466, loss: 4.094484329223633 2023-01-24 01:38:38.314804: step: 140/466, loss: 1.6371426582336426 2023-01-24 01:38:39.117573: step: 142/466, loss: 0.7541942596435547 2023-01-24 01:38:39.934647: step: 144/466, loss: 2.1750974655151367 2023-01-24 01:38:40.759655: step: 146/466, loss: 0.7115434408187866 2023-01-24 01:38:41.557564: step: 148/466, loss: 0.6685702800750732 2023-01-24 01:38:42.352862: step: 150/466, loss: 1.563443660736084 2023-01-24 01:38:43.048570: step: 152/466, loss: 2.2242989540100098 2023-01-24 01:38:43.853812: step: 154/466, loss: 1.2471766471862793 2023-01-24 01:38:44.634297: step: 156/466, loss: 1.1333625316619873 2023-01-24 01:38:45.239324: step: 158/466, loss: 2.8891983032226562 2023-01-24 01:38:46.096318: step: 160/466, loss: 1.4792773723602295 2023-01-24 01:38:46.859272: step: 162/466, loss: 0.8582035303115845 2023-01-24 01:38:47.637928: step: 164/466, loss: 1.1480525732040405 2023-01-24 01:38:48.393604: step: 166/466, loss: 0.9869524836540222 2023-01-24 01:38:49.150304: step: 168/466, loss: 2.1026687622070312 2023-01-24 01:38:49.841894: step: 170/466, loss: 1.9524791240692139 2023-01-24 01:38:50.590039: step: 172/466, loss: 1.8153622150421143 2023-01-24 01:38:51.419885: step: 174/466, loss: 1.347947359085083 2023-01-24 01:38:52.192777: step: 176/466, loss: 7.718930244445801 2023-01-24 01:38:52.900518: step: 178/466, loss: 1.651254415512085 2023-01-24 01:38:53.730359: step: 180/466, loss: 0.29022160172462463 2023-01-24 01:38:54.566821: step: 182/466, loss: 1.8764421939849854 2023-01-24 01:38:55.300180: step: 184/466, loss: 0.9963284730911255 2023-01-24 01:38:56.046068: step: 186/466, loss: 1.812087059020996 2023-01-24 01:38:56.723707: step: 188/466, loss: 2.3929829597473145 2023-01-24 01:38:57.438002: step: 190/466, loss: 1.4361763000488281 2023-01-24 01:38:58.215388: step: 192/466, loss: 0.53630131483078 2023-01-24 01:38:58.907775: step: 194/466, loss: 1.5942753553390503 2023-01-24 01:38:59.648668: step: 196/466, loss: 0.9954477548599243 2023-01-24 01:39:00.372417: step: 198/466, loss: 0.2992696464061737 2023-01-24 01:39:01.310472: step: 200/466, loss: 1.9530024528503418 2023-01-24 01:39:01.988730: step: 202/466, loss: 0.7404540777206421 2023-01-24 01:39:02.765141: step: 204/466, loss: 1.3093249797821045 2023-01-24 01:39:03.546023: step: 206/466, loss: 3.3123230934143066 2023-01-24 01:39:04.337573: step: 208/466, loss: 9.107421875 2023-01-24 01:39:05.108671: step: 210/466, loss: 2.1775870323181152 2023-01-24 01:39:05.864010: step: 212/466, loss: 0.7418627142906189 2023-01-24 01:39:06.639923: step: 214/466, loss: 1.2200261354446411 2023-01-24 01:39:07.403458: step: 216/466, loss: 1.1684129238128662 2023-01-24 01:39:08.175044: step: 218/466, loss: 6.310258865356445 2023-01-24 01:39:08.930282: step: 220/466, loss: 0.6867668032646179 2023-01-24 01:39:09.628212: step: 222/466, loss: 2.495737075805664 2023-01-24 01:39:10.391182: step: 224/466, loss: 1.954410433769226 2023-01-24 01:39:11.104202: step: 226/466, loss: 3.201202630996704 2023-01-24 01:39:11.870552: step: 228/466, loss: 2.4115254878997803 2023-01-24 01:39:12.606568: step: 230/466, loss: 1.5630335807800293 2023-01-24 01:39:13.404273: step: 232/466, loss: 0.6903105974197388 2023-01-24 01:39:14.161570: step: 234/466, loss: 2.7204911708831787 2023-01-24 01:39:14.860968: step: 236/466, loss: 2.1740503311157227 2023-01-24 01:39:15.511805: step: 238/466, loss: 1.7562813758850098 2023-01-24 01:39:16.225407: step: 240/466, loss: 1.4994159936904907 2023-01-24 01:39:16.982820: step: 242/466, loss: 1.1976405382156372 2023-01-24 01:39:17.733764: step: 244/466, loss: 0.43268853425979614 2023-01-24 01:39:18.408037: step: 246/466, loss: 2.2464475631713867 2023-01-24 01:39:19.123220: step: 248/466, loss: 1.5958398580551147 2023-01-24 01:39:19.911225: step: 250/466, loss: 3.0869741439819336 2023-01-24 01:39:20.618992: step: 252/466, loss: 0.9807233214378357 2023-01-24 01:39:21.348512: step: 254/466, loss: 3.9502501487731934 2023-01-24 01:39:22.127469: step: 256/466, loss: 0.6676345467567444 2023-01-24 01:39:22.881448: step: 258/466, loss: 0.45399269461631775 2023-01-24 01:39:23.645089: step: 260/466, loss: 6.352608680725098 2023-01-24 01:39:24.382951: step: 262/466, loss: 1.0999513864517212 2023-01-24 01:39:25.126210: step: 264/466, loss: 2.423794984817505 2023-01-24 01:39:25.875927: step: 266/466, loss: 1.3189613819122314 2023-01-24 01:39:26.656622: step: 268/466, loss: 2.981919050216675 2023-01-24 01:39:27.419271: step: 270/466, loss: 1.505730152130127 2023-01-24 01:39:28.178357: step: 272/466, loss: 1.8038147687911987 2023-01-24 01:39:29.034629: step: 274/466, loss: 3.5392653942108154 2023-01-24 01:39:29.809323: step: 276/466, loss: 0.6619697213172913 2023-01-24 01:39:30.626497: step: 278/466, loss: 0.9468562602996826 2023-01-24 01:39:31.497632: step: 280/466, loss: 3.8605504035949707 2023-01-24 01:39:32.350970: step: 282/466, loss: 1.1731696128845215 2023-01-24 01:39:33.133064: step: 284/466, loss: 1.08231520652771 2023-01-24 01:39:33.897253: step: 286/466, loss: 1.063538670539856 2023-01-24 01:39:34.779210: step: 288/466, loss: 9.27632999420166 2023-01-24 01:39:35.540842: step: 290/466, loss: 1.053228735923767 2023-01-24 01:39:36.419974: step: 292/466, loss: 1.1648130416870117 2023-01-24 01:39:37.227298: step: 294/466, loss: 2.8982105255126953 2023-01-24 01:39:38.081542: step: 296/466, loss: 0.8549784421920776 2023-01-24 01:39:38.900920: step: 298/466, loss: 2.659513235092163 2023-01-24 01:39:39.662916: step: 300/466, loss: 2.1829559803009033 2023-01-24 01:39:40.449304: step: 302/466, loss: 0.779899001121521 2023-01-24 01:39:41.173992: step: 304/466, loss: 0.4097400903701782 2023-01-24 01:39:41.930499: step: 306/466, loss: 1.7801828384399414 2023-01-24 01:39:42.639923: step: 308/466, loss: 0.7560756206512451 2023-01-24 01:39:43.446321: step: 310/466, loss: 0.5453673601150513 2023-01-24 01:39:44.224618: step: 312/466, loss: 0.4129677712917328 2023-01-24 01:39:44.964597: step: 314/466, loss: 4.737407207489014 2023-01-24 01:39:45.729125: step: 316/466, loss: 2.516684055328369 2023-01-24 01:39:46.435112: step: 318/466, loss: 0.49643924832344055 2023-01-24 01:39:47.152548: step: 320/466, loss: 1.9038251638412476 2023-01-24 01:39:47.978357: step: 322/466, loss: 0.6591343283653259 2023-01-24 01:39:48.792585: step: 324/466, loss: 3.3472065925598145 2023-01-24 01:39:49.527523: step: 326/466, loss: 2.4076545238494873 2023-01-24 01:39:50.355863: step: 328/466, loss: 3.0719268321990967 2023-01-24 01:39:51.017861: step: 330/466, loss: 0.3430599570274353 2023-01-24 01:39:51.836978: step: 332/466, loss: 1.3857271671295166 2023-01-24 01:39:52.605664: step: 334/466, loss: 0.30699753761291504 2023-01-24 01:39:53.394299: step: 336/466, loss: 0.47438302636146545 2023-01-24 01:39:54.062609: step: 338/466, loss: 2.707505226135254 2023-01-24 01:39:54.794357: step: 340/466, loss: 1.6239722967147827 2023-01-24 01:39:55.457903: step: 342/466, loss: 0.57518070936203 2023-01-24 01:39:56.245143: step: 344/466, loss: 1.2875721454620361 2023-01-24 01:39:57.071709: step: 346/466, loss: 3.6670823097229004 2023-01-24 01:39:57.797681: step: 348/466, loss: 0.5334588289260864 2023-01-24 01:39:58.480683: step: 350/466, loss: 9.405436515808105 2023-01-24 01:39:59.322516: step: 352/466, loss: 1.1436501741409302 2023-01-24 01:40:00.123411: step: 354/466, loss: 0.8235857486724854 2023-01-24 01:40:00.843311: step: 356/466, loss: 5.213697910308838 2023-01-24 01:40:01.566006: step: 358/466, loss: 1.221557855606079 2023-01-24 01:40:02.264888: step: 360/466, loss: 4.103485584259033 2023-01-24 01:40:03.023674: step: 362/466, loss: 1.9460155963897705 2023-01-24 01:40:03.810060: step: 364/466, loss: 1.0022746324539185 2023-01-24 01:40:04.529307: step: 366/466, loss: 1.3515444993972778 2023-01-24 01:40:05.310210: step: 368/466, loss: 0.4751497209072113 2023-01-24 01:40:06.133035: step: 370/466, loss: 1.04304838180542 2023-01-24 01:40:06.832704: step: 372/466, loss: 0.44798514246940613 2023-01-24 01:40:07.673157: step: 374/466, loss: 0.4274475574493408 2023-01-24 01:40:08.442920: step: 376/466, loss: 2.295330286026001 2023-01-24 01:40:09.196568: step: 378/466, loss: 0.8329123258590698 2023-01-24 01:40:09.978202: step: 380/466, loss: 1.9693539142608643 2023-01-24 01:40:10.744893: step: 382/466, loss: 0.9746490120887756 2023-01-24 01:40:11.532711: step: 384/466, loss: 0.6659336090087891 2023-01-24 01:40:12.333620: step: 386/466, loss: 0.4551994800567627 2023-01-24 01:40:13.119163: step: 388/466, loss: 0.6308054327964783 2023-01-24 01:40:13.943745: step: 390/466, loss: 2.2916059494018555 2023-01-24 01:40:14.773745: step: 392/466, loss: 1.36789071559906 2023-01-24 01:40:15.559272: step: 394/466, loss: 0.6319479942321777 2023-01-24 01:40:16.316541: step: 396/466, loss: 0.5163759589195251 2023-01-24 01:40:17.040936: step: 398/466, loss: 2.276012659072876 2023-01-24 01:40:17.794460: step: 400/466, loss: 1.0691869258880615 2023-01-24 01:40:18.571876: step: 402/466, loss: 2.319650650024414 2023-01-24 01:40:19.305895: step: 404/466, loss: 6.325422286987305 2023-01-24 01:40:19.998329: step: 406/466, loss: 0.4125528931617737 2023-01-24 01:40:20.737856: step: 408/466, loss: 0.8939810395240784 2023-01-24 01:40:21.494923: step: 410/466, loss: 0.45102182030677795 2023-01-24 01:40:22.264859: step: 412/466, loss: 1.2263315916061401 2023-01-24 01:40:23.026350: step: 414/466, loss: 0.9082082509994507 2023-01-24 01:40:23.766350: step: 416/466, loss: 1.0330703258514404 2023-01-24 01:40:24.478020: step: 418/466, loss: 0.951251208782196 2023-01-24 01:40:25.220205: step: 420/466, loss: 1.1964526176452637 2023-01-24 01:40:25.932295: step: 422/466, loss: 1.2323813438415527 2023-01-24 01:40:26.665514: step: 424/466, loss: 2.8224222660064697 2023-01-24 01:40:27.404623: step: 426/466, loss: 1.238973617553711 2023-01-24 01:40:28.295564: step: 428/466, loss: 0.3770541250705719 2023-01-24 01:40:29.060892: step: 430/466, loss: 0.6212438941001892 2023-01-24 01:40:29.872574: step: 432/466, loss: 0.5817193388938904 2023-01-24 01:40:30.728034: step: 434/466, loss: 1.3300286531448364 2023-01-24 01:40:31.508623: step: 436/466, loss: 1.2844303846359253 2023-01-24 01:40:32.290354: step: 438/466, loss: 1.8086202144622803 2023-01-24 01:40:33.024651: step: 440/466, loss: 8.946410179138184 2023-01-24 01:40:33.828706: step: 442/466, loss: 1.169613003730774 2023-01-24 01:40:34.683964: step: 444/466, loss: 1.6764874458312988 2023-01-24 01:40:35.433584: step: 446/466, loss: 4.7825117111206055 2023-01-24 01:40:36.207359: step: 448/466, loss: 12.170503616333008 2023-01-24 01:40:36.983466: step: 450/466, loss: 0.48194020986557007 2023-01-24 01:40:37.688986: step: 452/466, loss: 2.0715715885162354 2023-01-24 01:40:38.423601: step: 454/466, loss: 2.2436349391937256 2023-01-24 01:40:39.274725: step: 456/466, loss: 1.7453405857086182 2023-01-24 01:40:40.032351: step: 458/466, loss: 0.601981520652771 2023-01-24 01:40:40.827851: step: 460/466, loss: 4.245301723480225 2023-01-24 01:40:41.579840: step: 462/466, loss: 1.8211416006088257 2023-01-24 01:40:42.286335: step: 464/466, loss: 1.1473311185836792 2023-01-24 01:40:43.049421: step: 466/466, loss: 3.5540287494659424 2023-01-24 01:40:43.799109: step: 468/466, loss: 0.6876145601272583 2023-01-24 01:40:44.621089: step: 470/466, loss: 4.3995513916015625 2023-01-24 01:40:45.391948: step: 472/466, loss: 1.2393075227737427 2023-01-24 01:40:46.152581: step: 474/466, loss: 0.9958049654960632 2023-01-24 01:40:46.856910: step: 476/466, loss: 0.3819100856781006 2023-01-24 01:40:47.593404: step: 478/466, loss: 0.6085644364356995 2023-01-24 01:40:48.338350: step: 480/466, loss: 8.003620147705078 2023-01-24 01:40:49.130802: step: 482/466, loss: 0.3538343906402588 2023-01-24 01:40:49.991567: step: 484/466, loss: 2.603562116622925 2023-01-24 01:40:50.800033: step: 486/466, loss: 1.1814818382263184 2023-01-24 01:40:51.530315: step: 488/466, loss: 1.0522490739822388 2023-01-24 01:40:52.264838: step: 490/466, loss: 0.9714776277542114 2023-01-24 01:40:53.002294: step: 492/466, loss: 5.451353549957275 2023-01-24 01:40:53.711426: step: 494/466, loss: 1.90500009059906 2023-01-24 01:40:54.456399: step: 496/466, loss: 2.5933101177215576 2023-01-24 01:40:55.187217: step: 498/466, loss: 1.0544312000274658 2023-01-24 01:40:55.946787: step: 500/466, loss: 5.469407081604004 2023-01-24 01:40:56.761270: step: 502/466, loss: 2.7074644565582275 2023-01-24 01:40:57.540767: step: 504/466, loss: 0.4564131200313568 2023-01-24 01:40:58.286633: step: 506/466, loss: 1.8417478799819946 2023-01-24 01:40:59.038920: step: 508/466, loss: 0.9633346199989319 2023-01-24 01:40:59.824097: step: 510/466, loss: 1.4366604089736938 2023-01-24 01:41:00.578160: step: 512/466, loss: 0.8661335110664368 2023-01-24 01:41:01.405969: step: 514/466, loss: 0.5213767290115356 2023-01-24 01:41:02.143663: step: 516/466, loss: 0.5418296456336975 2023-01-24 01:41:02.876165: step: 518/466, loss: 2.071904182434082 2023-01-24 01:41:03.605426: step: 520/466, loss: 5.834861755371094 2023-01-24 01:41:04.388768: step: 522/466, loss: 1.318400263786316 2023-01-24 01:41:05.136078: step: 524/466, loss: 1.7108652591705322 2023-01-24 01:41:05.882056: step: 526/466, loss: 0.3789096772670746 2023-01-24 01:41:06.570213: step: 528/466, loss: 2.4232115745544434 2023-01-24 01:41:07.403381: step: 530/466, loss: 2.5445380210876465 2023-01-24 01:41:08.137437: step: 532/466, loss: 1.4125943183898926 2023-01-24 01:41:08.872018: step: 534/466, loss: 0.5616583228111267 2023-01-24 01:41:09.566994: step: 536/466, loss: 5.609212875366211 2023-01-24 01:41:10.367020: step: 538/466, loss: 7.8469367027282715 2023-01-24 01:41:11.252448: step: 540/466, loss: 1.772813081741333 2023-01-24 01:41:12.032093: step: 542/466, loss: 0.7231072187423706 2023-01-24 01:41:12.839503: step: 544/466, loss: 0.9882017374038696 2023-01-24 01:41:13.559580: step: 546/466, loss: 10.674277305603027 2023-01-24 01:41:14.360650: step: 548/466, loss: 0.8669376373291016 2023-01-24 01:41:15.139102: step: 550/466, loss: 0.8646388053894043 2023-01-24 01:41:15.944168: step: 552/466, loss: 4.222936153411865 2023-01-24 01:41:16.709020: step: 554/466, loss: 3.2410836219787598 2023-01-24 01:41:17.535316: step: 556/466, loss: 1.8248041868209839 2023-01-24 01:41:18.370120: step: 558/466, loss: 3.5040442943573 2023-01-24 01:41:19.089309: step: 560/466, loss: 1.596124291419983 2023-01-24 01:41:19.970111: step: 562/466, loss: 0.30306729674339294 2023-01-24 01:41:20.719129: step: 564/466, loss: 0.9529277682304382 2023-01-24 01:41:21.497511: step: 566/466, loss: 1.5184459686279297 2023-01-24 01:41:22.362577: step: 568/466, loss: 0.9084643721580505 2023-01-24 01:41:23.160872: step: 570/466, loss: 0.4883304834365845 2023-01-24 01:41:23.809909: step: 572/466, loss: 1.4788918495178223 2023-01-24 01:41:24.473704: step: 574/466, loss: 0.7763687968254089 2023-01-24 01:41:25.154394: step: 576/466, loss: 0.799630880355835 2023-01-24 01:41:25.870028: step: 578/466, loss: 2.9817371368408203 2023-01-24 01:41:26.640051: step: 580/466, loss: 1.0672000646591187 2023-01-24 01:41:27.480140: step: 582/466, loss: 1.8494359254837036 2023-01-24 01:41:28.227351: step: 584/466, loss: 1.0946604013442993 2023-01-24 01:41:28.975489: step: 586/466, loss: 2.053010940551758 2023-01-24 01:41:29.698184: step: 588/466, loss: 1.2595796585083008 2023-01-24 01:41:30.547967: step: 590/466, loss: 0.74193274974823 2023-01-24 01:41:31.257038: step: 592/466, loss: 1.6734983921051025 2023-01-24 01:41:31.945542: step: 594/466, loss: 0.6596741080284119 2023-01-24 01:41:32.753889: step: 596/466, loss: 1.8599157333374023 2023-01-24 01:41:33.528968: step: 598/466, loss: 0.7645819187164307 2023-01-24 01:41:34.218378: step: 600/466, loss: 2.639955759048462 2023-01-24 01:41:34.985461: step: 602/466, loss: 0.2884419560432434 2023-01-24 01:41:35.714018: step: 604/466, loss: 1.6108710765838623 2023-01-24 01:41:36.411238: step: 606/466, loss: 6.910267353057861 2023-01-24 01:41:37.154004: step: 608/466, loss: 2.028846263885498 2023-01-24 01:41:37.908739: step: 610/466, loss: 1.0209906101226807 2023-01-24 01:41:38.706563: step: 612/466, loss: 3.6873764991760254 2023-01-24 01:41:39.477127: step: 614/466, loss: 0.35416746139526367 2023-01-24 01:41:40.155343: step: 616/466, loss: 2.9250409603118896 2023-01-24 01:41:40.978856: step: 618/466, loss: 3.456394672393799 2023-01-24 01:41:41.709956: step: 620/466, loss: 2.000549554824829 2023-01-24 01:41:42.477835: step: 622/466, loss: 0.8701221942901611 2023-01-24 01:41:43.267718: step: 624/466, loss: 8.37614631652832 2023-01-24 01:41:43.968909: step: 626/466, loss: 0.4917764365673065 2023-01-24 01:41:44.678143: step: 628/466, loss: 0.5501490831375122 2023-01-24 01:41:45.465995: step: 630/466, loss: 1.4101099967956543 2023-01-24 01:41:46.203517: step: 632/466, loss: 1.3804508447647095 2023-01-24 01:41:46.962918: step: 634/466, loss: 0.34051811695098877 2023-01-24 01:41:47.745267: step: 636/466, loss: 1.925154685974121 2023-01-24 01:41:48.412908: step: 638/466, loss: 0.4945259988307953 2023-01-24 01:41:49.161014: step: 640/466, loss: 3.1828784942626953 2023-01-24 01:41:49.938773: step: 642/466, loss: 0.3875892162322998 2023-01-24 01:41:50.754824: step: 644/466, loss: 2.7635698318481445 2023-01-24 01:41:51.456450: step: 646/466, loss: 1.7485973834991455 2023-01-24 01:41:52.193563: step: 648/466, loss: 0.5556420087814331 2023-01-24 01:41:52.905585: step: 650/466, loss: 1.3510385751724243 2023-01-24 01:41:53.779538: step: 652/466, loss: 0.8903523683547974 2023-01-24 01:41:54.522127: step: 654/466, loss: 3.023341178894043 2023-01-24 01:41:55.253457: step: 656/466, loss: 0.6053643226623535 2023-01-24 01:41:56.031543: step: 658/466, loss: 1.5766246318817139 2023-01-24 01:41:56.828603: step: 660/466, loss: 1.8168840408325195 2023-01-24 01:41:57.627337: step: 662/466, loss: 0.6825411915779114 2023-01-24 01:41:58.388443: step: 664/466, loss: 1.2695629596710205 2023-01-24 01:41:59.133870: step: 666/466, loss: 0.973310112953186 2023-01-24 01:41:59.878255: step: 668/466, loss: 0.8447468280792236 2023-01-24 01:42:00.669163: step: 670/466, loss: 4.895907402038574 2023-01-24 01:42:01.420861: step: 672/466, loss: 0.7715153098106384 2023-01-24 01:42:02.218409: step: 674/466, loss: 3.3655648231506348 2023-01-24 01:42:02.888625: step: 676/466, loss: 0.9991435408592224 2023-01-24 01:42:03.672713: step: 678/466, loss: 3.844038486480713 2023-01-24 01:42:04.384149: step: 680/466, loss: 0.338966429233551 2023-01-24 01:42:05.200559: step: 682/466, loss: 1.0828441381454468 2023-01-24 01:42:05.956281: step: 684/466, loss: 1.0340423583984375 2023-01-24 01:42:06.705797: step: 686/466, loss: 3.864016532897949 2023-01-24 01:42:07.449976: step: 688/466, loss: 0.5012548565864563 2023-01-24 01:42:08.210760: step: 690/466, loss: 1.569545030593872 2023-01-24 01:42:08.968244: step: 692/466, loss: 1.2501015663146973 2023-01-24 01:42:09.702964: step: 694/466, loss: 5.520549774169922 2023-01-24 01:42:10.569920: step: 696/466, loss: 0.9210854172706604 2023-01-24 01:42:11.361713: step: 698/466, loss: 1.2601354122161865 2023-01-24 01:42:12.107998: step: 700/466, loss: 0.6014167070388794 2023-01-24 01:42:12.874363: step: 702/466, loss: 2.394166946411133 2023-01-24 01:42:13.596233: step: 704/466, loss: 0.8901013135910034 2023-01-24 01:42:14.344483: step: 706/466, loss: 3.638638496398926 2023-01-24 01:42:15.127961: step: 708/466, loss: 2.6969895362854004 2023-01-24 01:42:15.904968: step: 710/466, loss: 2.8152315616607666 2023-01-24 01:42:16.668263: step: 712/466, loss: 0.5224518775939941 2023-01-24 01:42:17.537192: step: 714/466, loss: 2.062082052230835 2023-01-24 01:42:18.326132: step: 716/466, loss: 0.602192759513855 2023-01-24 01:42:19.095269: step: 718/466, loss: 1.2655690908432007 2023-01-24 01:42:19.939107: step: 720/466, loss: 1.3014322519302368 2023-01-24 01:42:20.709455: step: 722/466, loss: 0.5693433284759521 2023-01-24 01:42:21.463943: step: 724/466, loss: 0.5791750550270081 2023-01-24 01:42:22.236392: step: 726/466, loss: 0.7184494733810425 2023-01-24 01:42:23.030286: step: 728/466, loss: 0.5201277136802673 2023-01-24 01:42:23.783786: step: 730/466, loss: 0.6421585083007812 2023-01-24 01:42:24.550025: step: 732/466, loss: 4.414198398590088 2023-01-24 01:42:25.312499: step: 734/466, loss: 7.777451992034912 2023-01-24 01:42:26.144404: step: 736/466, loss: 0.8733360767364502 2023-01-24 01:42:26.906912: step: 738/466, loss: 0.46783724427223206 2023-01-24 01:42:27.716107: step: 740/466, loss: 0.41334855556488037 2023-01-24 01:42:28.600154: step: 742/466, loss: 0.6446312069892883 2023-01-24 01:42:29.389175: step: 744/466, loss: 3.32536244392395 2023-01-24 01:42:30.194446: step: 746/466, loss: 0.29394975304603577 2023-01-24 01:42:30.958163: step: 748/466, loss: 6.848193168640137 2023-01-24 01:42:31.759034: step: 750/466, loss: 4.080060005187988 2023-01-24 01:42:32.515158: step: 752/466, loss: 1.6240911483764648 2023-01-24 01:42:33.309819: step: 754/466, loss: 0.9909073114395142 2023-01-24 01:42:34.059674: step: 756/466, loss: 1.2752301692962646 2023-01-24 01:42:34.796234: step: 758/466, loss: 1.7539832592010498 2023-01-24 01:42:35.522325: step: 760/466, loss: 0.8013241291046143 2023-01-24 01:42:36.290810: step: 762/466, loss: 3.9893579483032227 2023-01-24 01:42:37.111214: step: 764/466, loss: 0.5241023898124695 2023-01-24 01:42:37.851578: step: 766/466, loss: 2.236712694168091 2023-01-24 01:42:38.604632: step: 768/466, loss: 4.814619541168213 2023-01-24 01:42:39.430602: step: 770/466, loss: 1.8054533004760742 2023-01-24 01:42:40.264972: step: 772/466, loss: 1.2646602392196655 2023-01-24 01:42:41.055216: step: 774/466, loss: 1.7914949655532837 2023-01-24 01:42:41.834595: step: 776/466, loss: 0.44616395235061646 2023-01-24 01:42:42.674652: step: 778/466, loss: 0.9424298405647278 2023-01-24 01:42:43.495106: step: 780/466, loss: 4.644623279571533 2023-01-24 01:42:44.265397: step: 782/466, loss: 4.011265754699707 2023-01-24 01:42:44.972371: step: 784/466, loss: 0.446866512298584 2023-01-24 01:42:45.799197: step: 786/466, loss: 8.493598937988281 2023-01-24 01:42:46.588937: step: 788/466, loss: 1.4424996376037598 2023-01-24 01:42:47.425244: step: 790/466, loss: 1.4629288911819458 2023-01-24 01:42:48.119906: step: 792/466, loss: 2.3262360095977783 2023-01-24 01:42:48.872427: step: 794/466, loss: 0.5425397753715515 2023-01-24 01:42:49.656817: step: 796/466, loss: 0.9937711954116821 2023-01-24 01:42:50.385858: step: 798/466, loss: 5.378913402557373 2023-01-24 01:42:51.147194: step: 800/466, loss: 0.8644759654998779 2023-01-24 01:42:51.928184: step: 802/466, loss: 1.6354568004608154 2023-01-24 01:42:52.603988: step: 804/466, loss: 0.847054123878479 2023-01-24 01:42:53.331198: step: 806/466, loss: 11.03044319152832 2023-01-24 01:42:54.197634: step: 808/466, loss: 1.2004551887512207 2023-01-24 01:42:55.013271: step: 810/466, loss: 1.6788220405578613 2023-01-24 01:42:55.759658: step: 812/466, loss: 0.9433671832084656 2023-01-24 01:42:56.531918: step: 814/466, loss: 1.2230790853500366 2023-01-24 01:42:57.281295: step: 816/466, loss: 1.431276559829712 2023-01-24 01:42:57.972389: step: 818/466, loss: 0.7971752285957336 2023-01-24 01:42:58.703147: step: 820/466, loss: 1.0101542472839355 2023-01-24 01:42:59.402244: step: 822/466, loss: 1.1521886587142944 2023-01-24 01:43:00.150210: step: 824/466, loss: 1.9171631336212158 2023-01-24 01:43:01.011238: step: 826/466, loss: 0.4679133892059326 2023-01-24 01:43:01.866275: step: 828/466, loss: 3.038625717163086 2023-01-24 01:43:02.713261: step: 830/466, loss: 3.714357852935791 2023-01-24 01:43:03.413844: step: 832/466, loss: 1.6085307598114014 2023-01-24 01:43:04.172273: step: 834/466, loss: 1.6621365547180176 2023-01-24 01:43:04.964253: step: 836/466, loss: 4.6484551429748535 2023-01-24 01:43:05.849956: step: 838/466, loss: 2.3264412879943848 2023-01-24 01:43:06.607969: step: 840/466, loss: 0.3419003486633301 2023-01-24 01:43:07.377255: step: 842/466, loss: 0.6267896890640259 2023-01-24 01:43:08.266490: step: 844/466, loss: 0.9167214632034302 2023-01-24 01:43:08.999747: step: 846/466, loss: 3.188936710357666 2023-01-24 01:43:09.766347: step: 848/466, loss: 0.515191912651062 2023-01-24 01:43:10.530368: step: 850/466, loss: 1.8713839054107666 2023-01-24 01:43:11.399784: step: 852/466, loss: 0.22444993257522583 2023-01-24 01:43:12.116570: step: 854/466, loss: 0.7310824394226074 2023-01-24 01:43:12.864347: step: 856/466, loss: 2.4015276432037354 2023-01-24 01:43:13.581771: step: 858/466, loss: 3.1640782356262207 2023-01-24 01:43:14.379692: step: 860/466, loss: 0.4616748094558716 2023-01-24 01:43:15.139438: step: 862/466, loss: 1.9388368129730225 2023-01-24 01:43:15.825806: step: 864/466, loss: 0.47202399373054504 2023-01-24 01:43:16.572176: step: 866/466, loss: 1.1583646535873413 2023-01-24 01:43:17.311244: step: 868/466, loss: 1.4272856712341309 2023-01-24 01:43:18.087689: step: 870/466, loss: 1.2141351699829102 2023-01-24 01:43:18.911137: step: 872/466, loss: 0.9213106036186218 2023-01-24 01:43:19.685430: step: 874/466, loss: 0.31684863567352295 2023-01-24 01:43:20.450476: step: 876/466, loss: 1.1014714241027832 2023-01-24 01:43:21.164231: step: 878/466, loss: 1.3665329217910767 2023-01-24 01:43:21.910978: step: 880/466, loss: 3.04831862449646 2023-01-24 01:43:22.759700: step: 882/466, loss: 0.9298765659332275 2023-01-24 01:43:23.642804: step: 884/466, loss: 7.784796237945557 2023-01-24 01:43:24.330633: step: 886/466, loss: 0.4797598123550415 2023-01-24 01:43:25.125346: step: 888/466, loss: 4.756036758422852 2023-01-24 01:43:25.807119: step: 890/466, loss: 2.582414388656616 2023-01-24 01:43:26.558549: step: 892/466, loss: 3.721204996109009 2023-01-24 01:43:27.337484: step: 894/466, loss: 1.1353154182434082 2023-01-24 01:43:28.044965: step: 896/466, loss: 0.5402342677116394 2023-01-24 01:43:28.784510: step: 898/466, loss: 1.3996034860610962 2023-01-24 01:43:29.621150: step: 900/466, loss: 0.44790565967559814 2023-01-24 01:43:30.380068: step: 902/466, loss: 2.234361410140991 2023-01-24 01:43:31.071804: step: 904/466, loss: 0.3241014778614044 2023-01-24 01:43:31.760300: step: 906/466, loss: 0.22168482840061188 2023-01-24 01:43:32.603862: step: 908/466, loss: 1.1810328960418701 2023-01-24 01:43:33.396272: step: 910/466, loss: 1.3574362993240356 2023-01-24 01:43:34.241328: step: 912/466, loss: 2.938023567199707 2023-01-24 01:43:34.991743: step: 914/466, loss: 1.8619043827056885 2023-01-24 01:43:35.669791: step: 916/466, loss: 1.254056692123413 2023-01-24 01:43:36.443621: step: 918/466, loss: 0.8026758432388306 2023-01-24 01:43:37.178747: step: 920/466, loss: 1.3725330829620361 2023-01-24 01:43:37.926987: step: 922/466, loss: 1.590737223625183 2023-01-24 01:43:38.707971: step: 924/466, loss: 0.4203554689884186 2023-01-24 01:43:39.459752: step: 926/466, loss: 2.6803925037384033 2023-01-24 01:43:40.193665: step: 928/466, loss: 0.9792072176933289 2023-01-24 01:43:40.968051: step: 930/466, loss: 0.7705917954444885 2023-01-24 01:43:41.681596: step: 932/466, loss: 1.9272147417068481 ================================================== Loss: 1.989 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.300709333818417, 'r': 0.21641959630870922, 'f1': 0.25169503711673674}, 'combined': 0.18545950103338496, 'epoch': 1} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3342482221279307, 'r': 0.17376289267790523, 'f1': 0.2286561701375162}, 'combined': 0.1405398899381807, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3035099735770597, 'r': 0.2226122301472574, 'f1': 0.2568415915984714}, 'combined': 0.18925169907255787, 'epoch': 1} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.31732975768306243, 'r': 0.1747096418704501, 'f1': 0.22535011777492842}, 'combined': 0.13850787726654137, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33181401249024206, 'r': 0.2300074404761905, 'f1': 0.27168664109939283}, 'combined': 0.2001901565995526, 'epoch': 1} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3348279093853188, 'r': 0.16914881432678022, 'f1': 0.2247553952844653}, 'combined': 0.1388195088521698, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.22857142857142856, 'f1': 0.25396825396825395}, 'combined': 0.1693121693121693, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36904761904761907, 'r': 0.33695652173913043, 'f1': 0.3522727272727273}, 'combined': 0.17613636363636365, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5357142857142857, 'r': 0.12931034482758622, 'f1': 0.20833333333333337}, 'combined': 0.1388888888888889, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.300709333818417, 'r': 0.21641959630870922, 'f1': 0.25169503711673674}, 'combined': 0.18545950103338496, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3342482221279307, 'r': 0.17376289267790523, 'f1': 0.2286561701375162}, 'combined': 0.1405398899381807, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.22857142857142856, 'f1': 0.25396825396825395}, 'combined': 0.1693121693121693, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3035099735770597, 'r': 0.2226122301472574, 'f1': 0.2568415915984714}, 'combined': 0.18925169907255787, 'epoch': 1} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.31732975768306243, 'r': 0.1747096418704501, 'f1': 0.22535011777492842}, 'combined': 0.13850787726654137, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36904761904761907, 'r': 0.33695652173913043, 'f1': 0.3522727272727273}, 'combined': 0.17613636363636365, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33181401249024206, 'r': 0.2300074404761905, 'f1': 0.27168664109939283}, 'combined': 0.2001901565995526, 'epoch': 1} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3348279093853188, 'r': 0.16914881432678022, 'f1': 0.2247553952844653}, 'combined': 0.1388195088521698, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5357142857142857, 'r': 0.12931034482758622, 'f1': 0.20833333333333337}, 'combined': 0.1388888888888889, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:46:55.060988: step: 2/466, loss: 0.7782899141311646 2023-01-24 01:46:55.840875: step: 4/466, loss: 1.060962438583374 2023-01-24 01:46:56.572660: step: 6/466, loss: 0.5922555923461914 2023-01-24 01:46:57.338894: step: 8/466, loss: 0.4344215989112854 2023-01-24 01:46:58.156569: step: 10/466, loss: 1.029514193534851 2023-01-24 01:46:58.920893: step: 12/466, loss: 2.223616361618042 2023-01-24 01:46:59.732359: step: 14/466, loss: 1.798203706741333 2023-01-24 01:47:00.478607: step: 16/466, loss: 0.9643449783325195 2023-01-24 01:47:01.224907: step: 18/466, loss: 1.639407992362976 2023-01-24 01:47:02.013625: step: 20/466, loss: 0.7638140320777893 2023-01-24 01:47:02.855177: step: 22/466, loss: 0.561688244342804 2023-01-24 01:47:03.594916: step: 24/466, loss: 6.8926191329956055 2023-01-24 01:47:04.358071: step: 26/466, loss: 1.3338921070098877 2023-01-24 01:47:05.070559: step: 28/466, loss: 0.6786946058273315 2023-01-24 01:47:05.868131: step: 30/466, loss: 3.4620018005371094 2023-01-24 01:47:06.620807: step: 32/466, loss: 0.38275638222694397 2023-01-24 01:47:07.451019: step: 34/466, loss: 1.3385947942733765 2023-01-24 01:47:08.343119: step: 36/466, loss: 1.246219277381897 2023-01-24 01:47:09.110820: step: 38/466, loss: 0.8457755446434021 2023-01-24 01:47:09.897556: step: 40/466, loss: 0.596068263053894 2023-01-24 01:47:10.704420: step: 42/466, loss: 3.452577590942383 2023-01-24 01:47:11.467381: step: 44/466, loss: 0.40949302911758423 2023-01-24 01:47:12.286064: step: 46/466, loss: 1.7453162670135498 2023-01-24 01:47:13.063311: step: 48/466, loss: 4.246631622314453 2023-01-24 01:47:13.813060: step: 50/466, loss: 0.25915151834487915 2023-01-24 01:47:14.536263: step: 52/466, loss: 4.343395233154297 2023-01-24 01:47:15.263656: step: 54/466, loss: 1.3142693042755127 2023-01-24 01:47:16.024964: step: 56/466, loss: 0.6851130723953247 2023-01-24 01:47:16.772893: step: 58/466, loss: 1.0555392503738403 2023-01-24 01:47:17.631922: step: 60/466, loss: 2.663140296936035 2023-01-24 01:47:18.345076: step: 62/466, loss: 3.964977741241455 2023-01-24 01:47:19.076586: step: 64/466, loss: 2.7914316654205322 2023-01-24 01:47:19.882017: step: 66/466, loss: 0.4234890937805176 2023-01-24 01:47:20.635719: step: 68/466, loss: 0.30918264389038086 2023-01-24 01:47:21.471304: step: 70/466, loss: 1.3355333805084229 2023-01-24 01:47:22.247819: step: 72/466, loss: 0.4983653426170349 2023-01-24 01:47:22.961724: step: 74/466, loss: 5.752483367919922 2023-01-24 01:47:23.728422: step: 76/466, loss: 0.2963273823261261 2023-01-24 01:47:24.437140: step: 78/466, loss: 3.8984785079956055 2023-01-24 01:47:25.219771: step: 80/466, loss: 1.214860439300537 2023-01-24 01:47:26.032460: step: 82/466, loss: 4.006836891174316 2023-01-24 01:47:26.823633: step: 84/466, loss: 4.287250995635986 2023-01-24 01:47:27.707065: step: 86/466, loss: 0.7098851203918457 2023-01-24 01:47:28.440325: step: 88/466, loss: 0.141645148396492 2023-01-24 01:47:29.121633: step: 90/466, loss: 1.717045545578003 2023-01-24 01:47:29.890624: step: 92/466, loss: 1.619040608406067 2023-01-24 01:47:30.583791: step: 94/466, loss: 2.095520257949829 2023-01-24 01:47:31.318287: step: 96/466, loss: 1.948998212814331 2023-01-24 01:47:32.031578: step: 98/466, loss: 0.7309465408325195 2023-01-24 01:47:32.760675: step: 100/466, loss: 1.6109943389892578 2023-01-24 01:47:33.481113: step: 102/466, loss: 2.348635196685791 2023-01-24 01:47:34.269207: step: 104/466, loss: 1.4683918952941895 2023-01-24 01:47:35.024624: step: 106/466, loss: 3.8863635063171387 2023-01-24 01:47:35.810343: step: 108/466, loss: 1.4917595386505127 2023-01-24 01:47:36.517223: step: 110/466, loss: 4.646513938903809 2023-01-24 01:47:37.269956: step: 112/466, loss: 0.14392292499542236 2023-01-24 01:47:37.966668: step: 114/466, loss: 1.1361932754516602 2023-01-24 01:47:38.721038: step: 116/466, loss: 0.9295158386230469 2023-01-24 01:47:39.512175: step: 118/466, loss: 1.2146947383880615 2023-01-24 01:47:40.291560: step: 120/466, loss: 1.472395420074463 2023-01-24 01:47:41.049168: step: 122/466, loss: 2.7178173065185547 2023-01-24 01:47:41.954101: step: 124/466, loss: 0.7712768912315369 2023-01-24 01:47:42.727404: step: 126/466, loss: 0.6306812167167664 2023-01-24 01:47:43.439766: step: 128/466, loss: 1.8061941862106323 2023-01-24 01:47:44.212492: step: 130/466, loss: 1.7571167945861816 2023-01-24 01:47:44.923988: step: 132/466, loss: 1.4951488971710205 2023-01-24 01:47:45.763058: step: 134/466, loss: 8.245603561401367 2023-01-24 01:47:46.520849: step: 136/466, loss: 1.6827788352966309 2023-01-24 01:47:47.228652: step: 138/466, loss: 1.1124346256256104 2023-01-24 01:47:47.903982: step: 140/466, loss: 0.7739265561103821 2023-01-24 01:47:48.714115: step: 142/466, loss: 0.3751266896724701 2023-01-24 01:47:49.511351: step: 144/466, loss: 0.5696679353713989 2023-01-24 01:47:50.247033: step: 146/466, loss: 0.279913067817688 2023-01-24 01:47:51.023571: step: 148/466, loss: 0.7016226053237915 2023-01-24 01:47:51.773277: step: 150/466, loss: 0.8330613374710083 2023-01-24 01:47:52.545356: step: 152/466, loss: 0.6634538173675537 2023-01-24 01:47:53.474162: step: 154/466, loss: 0.32156264781951904 2023-01-24 01:47:54.217355: step: 156/466, loss: 1.308510661125183 2023-01-24 01:47:55.006413: step: 158/466, loss: 1.1710666418075562 2023-01-24 01:47:55.713198: step: 160/466, loss: 0.18871276080608368 2023-01-24 01:47:56.516378: step: 162/466, loss: 2.8034167289733887 2023-01-24 01:47:57.266913: step: 164/466, loss: 2.130894422531128 2023-01-24 01:47:58.038199: step: 166/466, loss: 1.1603654623031616 2023-01-24 01:47:58.834993: step: 168/466, loss: 2.9954476356506348 2023-01-24 01:47:59.725276: step: 170/466, loss: 1.5141651630401611 2023-01-24 01:48:00.560395: step: 172/466, loss: 1.787217378616333 2023-01-24 01:48:01.437878: step: 174/466, loss: 1.7773669958114624 2023-01-24 01:48:02.256768: step: 176/466, loss: 1.1324913501739502 2023-01-24 01:48:03.054931: step: 178/466, loss: 2.0455985069274902 2023-01-24 01:48:04.074200: step: 180/466, loss: 1.0055625438690186 2023-01-24 01:48:04.764854: step: 182/466, loss: 0.31716784834861755 2023-01-24 01:48:05.553132: step: 184/466, loss: 2.176579713821411 2023-01-24 01:48:06.318477: step: 186/466, loss: 1.2036975622177124 2023-01-24 01:48:07.166721: step: 188/466, loss: 1.7671464681625366 2023-01-24 01:48:07.982004: step: 190/466, loss: 0.5484171509742737 2023-01-24 01:48:08.689607: step: 192/466, loss: 0.4990229606628418 2023-01-24 01:48:09.416445: step: 194/466, loss: 1.0899813175201416 2023-01-24 01:48:10.182212: step: 196/466, loss: 1.1280584335327148 2023-01-24 01:48:11.010136: step: 198/466, loss: 1.153459072113037 2023-01-24 01:48:11.744285: step: 200/466, loss: 0.8218337297439575 2023-01-24 01:48:12.520474: step: 202/466, loss: 1.1493607759475708 2023-01-24 01:48:13.292316: step: 204/466, loss: 4.972820281982422 2023-01-24 01:48:14.036422: step: 206/466, loss: 0.9070379734039307 2023-01-24 01:48:14.743540: step: 208/466, loss: 0.2831861674785614 2023-01-24 01:48:15.645525: step: 210/466, loss: 0.7909908890724182 2023-01-24 01:48:16.475664: step: 212/466, loss: 2.180725336074829 2023-01-24 01:48:17.199715: step: 214/466, loss: 0.5319482684135437 2023-01-24 01:48:17.910839: step: 216/466, loss: 1.8310894966125488 2023-01-24 01:48:18.680025: step: 218/466, loss: 1.070844292640686 2023-01-24 01:48:19.407363: step: 220/466, loss: 1.9309546947479248 2023-01-24 01:48:20.205052: step: 222/466, loss: 2.1100826263427734 2023-01-24 01:48:20.996018: step: 224/466, loss: 0.9473533630371094 2023-01-24 01:48:21.798536: step: 226/466, loss: 1.6386666297912598 2023-01-24 01:48:22.625345: step: 228/466, loss: 0.47829851508140564 2023-01-24 01:48:23.377337: step: 230/466, loss: 0.7959439158439636 2023-01-24 01:48:24.137382: step: 232/466, loss: 1.6609903573989868 2023-01-24 01:48:24.931594: step: 234/466, loss: 0.3416905701160431 2023-01-24 01:48:25.702537: step: 236/466, loss: 0.921734094619751 2023-01-24 01:48:26.505914: step: 238/466, loss: 1.387770175933838 2023-01-24 01:48:27.411003: step: 240/466, loss: 2.143401861190796 2023-01-24 01:48:28.098078: step: 242/466, loss: 0.17422008514404297 2023-01-24 01:48:28.820300: step: 244/466, loss: 0.9663944840431213 2023-01-24 01:48:29.574624: step: 246/466, loss: 2.27024507522583 2023-01-24 01:48:30.333599: step: 248/466, loss: 1.0249300003051758 2023-01-24 01:48:31.143165: step: 250/466, loss: 0.7960470914840698 2023-01-24 01:48:31.993951: step: 252/466, loss: 0.5432288646697998 2023-01-24 01:48:32.747113: step: 254/466, loss: 0.46682053804397583 2023-01-24 01:48:33.564541: step: 256/466, loss: 2.035393714904785 2023-01-24 01:48:34.251437: step: 258/466, loss: 0.5158565044403076 2023-01-24 01:48:35.087007: step: 260/466, loss: 1.2990877628326416 2023-01-24 01:48:35.889172: step: 262/466, loss: 0.953231692314148 2023-01-24 01:48:36.652224: step: 264/466, loss: 0.5877880454063416 2023-01-24 01:48:37.456766: step: 266/466, loss: 0.7475366592407227 2023-01-24 01:48:38.201036: step: 268/466, loss: 1.8516916036605835 2023-01-24 01:48:38.931621: step: 270/466, loss: 0.42474454641342163 2023-01-24 01:48:39.763106: step: 272/466, loss: 2.250751495361328 2023-01-24 01:48:40.567175: step: 274/466, loss: 2.9599623680114746 2023-01-24 01:48:41.338295: step: 276/466, loss: 3.367034912109375 2023-01-24 01:48:42.082195: step: 278/466, loss: 2.1417951583862305 2023-01-24 01:48:42.899395: step: 280/466, loss: 0.539064347743988 2023-01-24 01:48:43.714359: step: 282/466, loss: 1.0810662508010864 2023-01-24 01:48:44.519923: step: 284/466, loss: 1.3193659782409668 2023-01-24 01:48:45.327308: step: 286/466, loss: 0.47315260767936707 2023-01-24 01:48:46.033072: step: 288/466, loss: 0.982465922832489 2023-01-24 01:48:46.733985: step: 290/466, loss: 1.0310883522033691 2023-01-24 01:48:47.416626: step: 292/466, loss: 0.8889856338500977 2023-01-24 01:48:48.132653: step: 294/466, loss: 1.234919786453247 2023-01-24 01:48:48.856523: step: 296/466, loss: 0.9779350757598877 2023-01-24 01:48:49.611747: step: 298/466, loss: 2.740853786468506 2023-01-24 01:48:50.407061: step: 300/466, loss: 0.8422730565071106 2023-01-24 01:48:51.125503: step: 302/466, loss: 1.5674970149993896 2023-01-24 01:48:51.937658: step: 304/466, loss: 0.2424909919500351 2023-01-24 01:48:52.720204: step: 306/466, loss: 1.2755539417266846 2023-01-24 01:48:53.468816: step: 308/466, loss: 1.3817476034164429 2023-01-24 01:48:54.325454: step: 310/466, loss: 3.8143482208251953 2023-01-24 01:48:55.090589: step: 312/466, loss: 1.4053281545639038 2023-01-24 01:48:55.771550: step: 314/466, loss: 0.4637737572193146 2023-01-24 01:48:56.519872: step: 316/466, loss: 0.3525203466415405 2023-01-24 01:48:57.262302: step: 318/466, loss: 3.6276497840881348 2023-01-24 01:48:58.008048: step: 320/466, loss: 1.2861909866333008 2023-01-24 01:48:58.759764: step: 322/466, loss: 7.183420181274414 2023-01-24 01:48:59.556843: step: 324/466, loss: 1.1658718585968018 2023-01-24 01:49:00.300525: step: 326/466, loss: 1.338639736175537 2023-01-24 01:49:00.976250: step: 328/466, loss: 1.1101205348968506 2023-01-24 01:49:01.698018: step: 330/466, loss: 2.1631531715393066 2023-01-24 01:49:02.457198: step: 332/466, loss: 1.582674503326416 2023-01-24 01:49:03.209564: step: 334/466, loss: 5.041405200958252 2023-01-24 01:49:03.947421: step: 336/466, loss: 2.207145929336548 2023-01-24 01:49:04.759689: step: 338/466, loss: 1.5536359548568726 2023-01-24 01:49:05.497494: step: 340/466, loss: 1.1272821426391602 2023-01-24 01:49:06.201443: step: 342/466, loss: 1.2651877403259277 2023-01-24 01:49:06.926305: step: 344/466, loss: 1.287071943283081 2023-01-24 01:49:07.646209: step: 346/466, loss: 0.9840033054351807 2023-01-24 01:49:08.381535: step: 348/466, loss: 0.834636390209198 2023-01-24 01:49:09.157957: step: 350/466, loss: 4.166690349578857 2023-01-24 01:49:09.897342: step: 352/466, loss: 1.4835872650146484 2023-01-24 01:49:10.611297: step: 354/466, loss: 1.3906885385513306 2023-01-24 01:49:11.382137: step: 356/466, loss: 1.5957164764404297 2023-01-24 01:49:12.117714: step: 358/466, loss: 1.3467870950698853 2023-01-24 01:49:12.822664: step: 360/466, loss: 1.778088927268982 2023-01-24 01:49:13.561425: step: 362/466, loss: 0.4566032290458679 2023-01-24 01:49:14.296444: step: 364/466, loss: 1.277762770652771 2023-01-24 01:49:15.092433: step: 366/466, loss: 0.898509681224823 2023-01-24 01:49:15.812324: step: 368/466, loss: 9.999470710754395 2023-01-24 01:49:16.566600: step: 370/466, loss: 4.15172815322876 2023-01-24 01:49:17.264414: step: 372/466, loss: 0.3042738735675812 2023-01-24 01:49:17.948170: step: 374/466, loss: 1.9609944820404053 2023-01-24 01:49:18.651467: step: 376/466, loss: 1.6575125455856323 2023-01-24 01:49:19.379458: step: 378/466, loss: 1.5117747783660889 2023-01-24 01:49:20.160360: step: 380/466, loss: 1.8306078910827637 2023-01-24 01:49:20.883200: step: 382/466, loss: 0.45823630690574646 2023-01-24 01:49:21.628425: step: 384/466, loss: 0.44007229804992676 2023-01-24 01:49:22.434567: step: 386/466, loss: 1.2810183763504028 2023-01-24 01:49:23.239968: step: 388/466, loss: 0.6912828683853149 2023-01-24 01:49:24.005541: step: 390/466, loss: 0.7221027612686157 2023-01-24 01:49:24.743338: step: 392/466, loss: 0.22919021546840668 2023-01-24 01:49:25.453902: step: 394/466, loss: 4.120431423187256 2023-01-24 01:49:26.226880: step: 396/466, loss: 1.8976693153381348 2023-01-24 01:49:27.014023: step: 398/466, loss: 0.5101234912872314 2023-01-24 01:49:27.671669: step: 400/466, loss: 0.17438340187072754 2023-01-24 01:49:28.463496: step: 402/466, loss: 4.7474684715271 2023-01-24 01:49:29.259272: step: 404/466, loss: 1.2714061737060547 2023-01-24 01:49:30.032666: step: 406/466, loss: 5.490708827972412 2023-01-24 01:49:30.801426: step: 408/466, loss: 1.0497984886169434 2023-01-24 01:49:31.494115: step: 410/466, loss: 1.5065267086029053 2023-01-24 01:49:32.314400: step: 412/466, loss: 0.8111212253570557 2023-01-24 01:49:33.136745: step: 414/466, loss: 1.206809639930725 2023-01-24 01:49:33.874140: step: 416/466, loss: 1.1442924737930298 2023-01-24 01:49:34.606550: step: 418/466, loss: 0.4491981863975525 2023-01-24 01:49:35.384101: step: 420/466, loss: 1.607987403869629 2023-01-24 01:49:36.196070: step: 422/466, loss: 0.7260214686393738 2023-01-24 01:49:37.012024: step: 424/466, loss: 0.8898999691009521 2023-01-24 01:49:37.751082: step: 426/466, loss: 0.27232709527015686 2023-01-24 01:49:38.504313: step: 428/466, loss: 1.1090251207351685 2023-01-24 01:49:39.291583: step: 430/466, loss: 0.3265036642551422 2023-01-24 01:49:39.989062: step: 432/466, loss: 1.7151259183883667 2023-01-24 01:49:40.748767: step: 434/466, loss: 0.5194512605667114 2023-01-24 01:49:41.555821: step: 436/466, loss: 0.7662211656570435 2023-01-24 01:49:42.356333: step: 438/466, loss: 1.1287963390350342 2023-01-24 01:49:43.171273: step: 440/466, loss: 1.9674453735351562 2023-01-24 01:49:43.952828: step: 442/466, loss: 0.32331493496894836 2023-01-24 01:49:44.673511: step: 444/466, loss: 2.2679405212402344 2023-01-24 01:49:45.593104: step: 446/466, loss: 1.2494890689849854 2023-01-24 01:49:46.409831: step: 448/466, loss: 0.2581394910812378 2023-01-24 01:49:47.190386: step: 450/466, loss: 0.34983840584754944 2023-01-24 01:49:47.956378: step: 452/466, loss: 0.41163530945777893 2023-01-24 01:49:48.658950: step: 454/466, loss: 2.9426956176757812 2023-01-24 01:49:49.318852: step: 456/466, loss: 1.1036453247070312 2023-01-24 01:49:50.109873: step: 458/466, loss: 1.412305235862732 2023-01-24 01:49:50.905094: step: 460/466, loss: 1.4345080852508545 2023-01-24 01:49:51.659712: step: 462/466, loss: 1.881574034690857 2023-01-24 01:49:52.432986: step: 464/466, loss: 0.3297499418258667 2023-01-24 01:49:53.202553: step: 466/466, loss: 0.8000501990318298 2023-01-24 01:49:54.080337: step: 468/466, loss: 1.1551730632781982 2023-01-24 01:49:54.878847: step: 470/466, loss: 2.107248306274414 2023-01-24 01:49:55.653939: step: 472/466, loss: 0.9202122092247009 2023-01-24 01:49:56.483460: step: 474/466, loss: 0.5258978009223938 2023-01-24 01:49:57.275129: step: 476/466, loss: 0.8387023210525513 2023-01-24 01:49:57.944507: step: 478/466, loss: 4.732879161834717 2023-01-24 01:49:58.712389: step: 480/466, loss: 3.042332172393799 2023-01-24 01:49:59.447589: step: 482/466, loss: 0.5810714960098267 2023-01-24 01:50:00.255503: step: 484/466, loss: 1.4622222185134888 2023-01-24 01:50:01.034664: step: 486/466, loss: 0.3892802298069 2023-01-24 01:50:01.727116: step: 488/466, loss: 3.730802059173584 2023-01-24 01:50:02.601119: step: 490/466, loss: 0.22228802740573883 2023-01-24 01:50:03.431559: step: 492/466, loss: 2.525942087173462 2023-01-24 01:50:04.231250: step: 494/466, loss: 1.179243564605713 2023-01-24 01:50:05.029519: step: 496/466, loss: 0.464471697807312 2023-01-24 01:50:05.810004: step: 498/466, loss: 1.479258418083191 2023-01-24 01:50:06.455924: step: 500/466, loss: 2.4078640937805176 2023-01-24 01:50:07.302716: step: 502/466, loss: 1.3903347253799438 2023-01-24 01:50:08.119473: step: 504/466, loss: 8.301027297973633 2023-01-24 01:50:08.927829: step: 506/466, loss: 0.9799388647079468 2023-01-24 01:50:09.675621: step: 508/466, loss: 1.767979621887207 2023-01-24 01:50:10.353316: step: 510/466, loss: 1.7241212129592896 2023-01-24 01:50:11.082154: step: 512/466, loss: 3.429478168487549 2023-01-24 01:50:11.892563: step: 514/466, loss: 0.9211028814315796 2023-01-24 01:50:12.638346: step: 516/466, loss: 0.6944395899772644 2023-01-24 01:50:13.410481: step: 518/466, loss: 4.043740749359131 2023-01-24 01:50:14.233526: step: 520/466, loss: 3.6364011764526367 2023-01-24 01:50:14.931867: step: 522/466, loss: 1.1717474460601807 2023-01-24 01:50:15.654410: step: 524/466, loss: 1.412083387374878 2023-01-24 01:50:16.377065: step: 526/466, loss: 1.7653515338897705 2023-01-24 01:50:17.077649: step: 528/466, loss: 8.733113288879395 2023-01-24 01:50:17.763291: step: 530/466, loss: 0.6756694316864014 2023-01-24 01:50:18.516405: step: 532/466, loss: 0.860968291759491 2023-01-24 01:50:19.292657: step: 534/466, loss: 0.4007422626018524 2023-01-24 01:50:20.009217: step: 536/466, loss: 1.1231260299682617 2023-01-24 01:50:20.744080: step: 538/466, loss: 1.2358288764953613 2023-01-24 01:50:21.452879: step: 540/466, loss: 0.16142581403255463 2023-01-24 01:50:22.210899: step: 542/466, loss: 0.8205204606056213 2023-01-24 01:50:23.016750: step: 544/466, loss: 0.4645836651325226 2023-01-24 01:50:23.745700: step: 546/466, loss: 0.9839164018630981 2023-01-24 01:50:24.599338: step: 548/466, loss: 0.6886512637138367 2023-01-24 01:50:25.433834: step: 550/466, loss: 1.2889955043792725 2023-01-24 01:50:26.117933: step: 552/466, loss: 0.3291648328304291 2023-01-24 01:50:26.836884: step: 554/466, loss: 6.707035541534424 2023-01-24 01:50:27.645078: step: 556/466, loss: 0.2883372902870178 2023-01-24 01:50:28.394697: step: 558/466, loss: 0.17781074345111847 2023-01-24 01:50:29.193292: step: 560/466, loss: 0.9045490026473999 2023-01-24 01:50:29.973387: step: 562/466, loss: 1.085221290588379 2023-01-24 01:50:30.676608: step: 564/466, loss: 0.5101253986358643 2023-01-24 01:50:31.463965: step: 566/466, loss: 0.9176614880561829 2023-01-24 01:50:32.275110: step: 568/466, loss: 0.5486728549003601 2023-01-24 01:50:33.036589: step: 570/466, loss: 2.648871898651123 2023-01-24 01:50:33.778991: step: 572/466, loss: 1.380678653717041 2023-01-24 01:50:34.572417: step: 574/466, loss: 0.9654417037963867 2023-01-24 01:50:35.261241: step: 576/466, loss: 1.4313585758209229 2023-01-24 01:50:36.004305: step: 578/466, loss: 0.9260072708129883 2023-01-24 01:50:36.856444: step: 580/466, loss: 1.1708897352218628 2023-01-24 01:50:37.594208: step: 582/466, loss: 2.7310571670532227 2023-01-24 01:50:38.435692: step: 584/466, loss: 0.6548606157302856 2023-01-24 01:50:39.202436: step: 586/466, loss: 0.8693034648895264 2023-01-24 01:50:39.898754: step: 588/466, loss: 0.11078198999166489 2023-01-24 01:50:40.683915: step: 590/466, loss: 1.0300918817520142 2023-01-24 01:50:41.427205: step: 592/466, loss: 0.395668625831604 2023-01-24 01:50:42.186334: step: 594/466, loss: 2.4247560501098633 2023-01-24 01:50:42.957246: step: 596/466, loss: 1.270116925239563 2023-01-24 01:50:43.762533: step: 598/466, loss: 0.23916392028331757 2023-01-24 01:50:44.564275: step: 600/466, loss: 1.0338306427001953 2023-01-24 01:50:45.288940: step: 602/466, loss: 0.6416707634925842 2023-01-24 01:50:46.033088: step: 604/466, loss: 0.8634581565856934 2023-01-24 01:50:46.768093: step: 606/466, loss: 0.5713067650794983 2023-01-24 01:50:47.577502: step: 608/466, loss: 0.630683183670044 2023-01-24 01:50:48.359449: step: 610/466, loss: 0.7142400145530701 2023-01-24 01:50:49.195337: step: 612/466, loss: 6.908291816711426 2023-01-24 01:50:50.123224: step: 614/466, loss: 1.0872340202331543 2023-01-24 01:50:50.938926: step: 616/466, loss: 0.3696516156196594 2023-01-24 01:50:51.717241: step: 618/466, loss: 1.0836267471313477 2023-01-24 01:50:52.498761: step: 620/466, loss: 0.33955225348472595 2023-01-24 01:50:53.251968: step: 622/466, loss: 0.2656218111515045 2023-01-24 01:50:54.016688: step: 624/466, loss: 2.546243190765381 2023-01-24 01:50:54.785577: step: 626/466, loss: 0.2593325972557068 2023-01-24 01:50:55.463292: step: 628/466, loss: 1.3277188539505005 2023-01-24 01:50:56.259670: step: 630/466, loss: 1.4344978332519531 2023-01-24 01:50:57.003945: step: 632/466, loss: 1.8098323345184326 2023-01-24 01:50:57.823355: step: 634/466, loss: 0.2669510841369629 2023-01-24 01:50:58.547428: step: 636/466, loss: 0.3324632942676544 2023-01-24 01:50:59.275076: step: 638/466, loss: 0.35632777214050293 2023-01-24 01:51:00.072698: step: 640/466, loss: 1.4985682964324951 2023-01-24 01:51:00.906687: step: 642/466, loss: 2.8833279609680176 2023-01-24 01:51:01.613951: step: 644/466, loss: 1.559320330619812 2023-01-24 01:51:02.468701: step: 646/466, loss: 1.1288251876831055 2023-01-24 01:51:03.252738: step: 648/466, loss: 1.512923002243042 2023-01-24 01:51:04.048003: step: 650/466, loss: 0.9332684278488159 2023-01-24 01:51:04.807844: step: 652/466, loss: 0.579450249671936 2023-01-24 01:51:05.593979: step: 654/466, loss: 0.6076934933662415 2023-01-24 01:51:06.381677: step: 656/466, loss: 4.72520637512207 2023-01-24 01:51:07.216158: step: 658/466, loss: 0.9739111661911011 2023-01-24 01:51:07.890028: step: 660/466, loss: 0.22468315064907074 2023-01-24 01:51:08.703483: step: 662/466, loss: 4.175695896148682 2023-01-24 01:51:09.553974: step: 664/466, loss: 1.0225563049316406 2023-01-24 01:51:10.297594: step: 666/466, loss: 0.8607184886932373 2023-01-24 01:51:11.136283: step: 668/466, loss: 2.4260406494140625 2023-01-24 01:51:11.828807: step: 670/466, loss: 0.7923364639282227 2023-01-24 01:51:12.552692: step: 672/466, loss: 1.084152340888977 2023-01-24 01:51:13.234561: step: 674/466, loss: 0.5868847370147705 2023-01-24 01:51:14.015737: step: 676/466, loss: 0.8401212692260742 2023-01-24 01:51:14.746702: step: 678/466, loss: 1.404846429824829 2023-01-24 01:51:15.477934: step: 680/466, loss: 0.825631856918335 2023-01-24 01:51:16.255269: step: 682/466, loss: 1.8228082656860352 2023-01-24 01:51:17.122595: step: 684/466, loss: 1.855266809463501 2023-01-24 01:51:17.871054: step: 686/466, loss: 1.5666351318359375 2023-01-24 01:51:18.595778: step: 688/466, loss: 0.6711158752441406 2023-01-24 01:51:19.367362: step: 690/466, loss: 2.033846616744995 2023-01-24 01:51:20.185220: step: 692/466, loss: 0.7143286466598511 2023-01-24 01:51:20.961251: step: 694/466, loss: 0.57305508852005 2023-01-24 01:51:21.706014: step: 696/466, loss: 1.4142602682113647 2023-01-24 01:51:22.452647: step: 698/466, loss: 1.5018336772918701 2023-01-24 01:51:23.230846: step: 700/466, loss: 2.2561750411987305 2023-01-24 01:51:24.029873: step: 702/466, loss: 0.6018629670143127 2023-01-24 01:51:24.808347: step: 704/466, loss: 0.669532299041748 2023-01-24 01:51:25.557967: step: 706/466, loss: 1.3866297006607056 2023-01-24 01:51:26.278857: step: 708/466, loss: 1.3363064527511597 2023-01-24 01:51:27.087373: step: 710/466, loss: 0.15829530358314514 2023-01-24 01:51:27.877972: step: 712/466, loss: 0.6302545070648193 2023-01-24 01:51:28.745160: step: 714/466, loss: 3.307643413543701 2023-01-24 01:51:29.484961: step: 716/466, loss: 1.5970646142959595 2023-01-24 01:51:30.208438: step: 718/466, loss: 5.3095526695251465 2023-01-24 01:51:30.886461: step: 720/466, loss: 2.7509565353393555 2023-01-24 01:51:31.674316: step: 722/466, loss: 1.5113661289215088 2023-01-24 01:51:32.579076: step: 724/466, loss: 1.215741753578186 2023-01-24 01:51:33.369589: step: 726/466, loss: 0.8744622468948364 2023-01-24 01:51:34.120105: step: 728/466, loss: 1.2747092247009277 2023-01-24 01:51:34.875442: step: 730/466, loss: 1.9986467361450195 2023-01-24 01:51:35.710142: step: 732/466, loss: 0.8631834387779236 2023-01-24 01:51:36.422616: step: 734/466, loss: 0.5817731022834778 2023-01-24 01:51:37.229289: step: 736/466, loss: 4.832178115844727 2023-01-24 01:51:38.013871: step: 738/466, loss: 7.882238388061523 2023-01-24 01:51:38.792212: step: 740/466, loss: 0.30403223633766174 2023-01-24 01:51:39.596494: step: 742/466, loss: 1.302971363067627 2023-01-24 01:51:40.349644: step: 744/466, loss: 0.9712323546409607 2023-01-24 01:51:41.068093: step: 746/466, loss: 2.6433591842651367 2023-01-24 01:51:41.800920: step: 748/466, loss: 1.4316619634628296 2023-01-24 01:51:42.577892: step: 750/466, loss: 0.6358616948127747 2023-01-24 01:51:43.305698: step: 752/466, loss: 1.0994406938552856 2023-01-24 01:51:44.101796: step: 754/466, loss: 0.6324729323387146 2023-01-24 01:51:44.849272: step: 756/466, loss: 0.8818514943122864 2023-01-24 01:51:45.642269: step: 758/466, loss: 0.599430501461029 2023-01-24 01:51:46.456680: step: 760/466, loss: 1.258911371231079 2023-01-24 01:51:47.221062: step: 762/466, loss: 0.6301141381263733 2023-01-24 01:51:48.082269: step: 764/466, loss: 1.468433141708374 2023-01-24 01:51:48.821755: step: 766/466, loss: 0.8221463561058044 2023-01-24 01:51:49.609735: step: 768/466, loss: 1.4093496799468994 2023-01-24 01:51:50.402835: step: 770/466, loss: 2.1558051109313965 2023-01-24 01:51:51.236362: step: 772/466, loss: 5.0710296630859375 2023-01-24 01:51:52.037311: step: 774/466, loss: 1.9297935962677002 2023-01-24 01:51:52.823839: step: 776/466, loss: 1.6422162055969238 2023-01-24 01:51:53.575162: step: 778/466, loss: 0.5209250450134277 2023-01-24 01:51:54.349209: step: 780/466, loss: 0.2130260169506073 2023-01-24 01:51:55.097215: step: 782/466, loss: 1.0116814374923706 2023-01-24 01:51:55.842647: step: 784/466, loss: 0.807086169719696 2023-01-24 01:51:56.562213: step: 786/466, loss: 0.7357428073883057 2023-01-24 01:51:57.348766: step: 788/466, loss: 0.8373849391937256 2023-01-24 01:51:58.132798: step: 790/466, loss: 2.5438427925109863 2023-01-24 01:51:58.871346: step: 792/466, loss: 2.2465767860412598 2023-01-24 01:51:59.666371: step: 794/466, loss: 6.289047718048096 2023-01-24 01:52:00.472908: step: 796/466, loss: 1.2244832515716553 2023-01-24 01:52:01.215029: step: 798/466, loss: 0.9194517731666565 2023-01-24 01:52:01.992895: step: 800/466, loss: 0.5414337515830994 2023-01-24 01:52:02.707803: step: 802/466, loss: 0.45343855023384094 2023-01-24 01:52:03.423264: step: 804/466, loss: 0.6903799772262573 2023-01-24 01:52:04.090869: step: 806/466, loss: 0.7730932831764221 2023-01-24 01:52:04.841003: step: 808/466, loss: 1.878928542137146 2023-01-24 01:52:05.487973: step: 810/466, loss: 0.9115766882896423 2023-01-24 01:52:06.358480: step: 812/466, loss: 5.4044294357299805 2023-01-24 01:52:07.150026: step: 814/466, loss: 1.1320093870162964 2023-01-24 01:52:07.846800: step: 816/466, loss: 1.3294376134872437 2023-01-24 01:52:08.622014: step: 818/466, loss: 2.8793349266052246 2023-01-24 01:52:09.441922: step: 820/466, loss: 11.329612731933594 2023-01-24 01:52:10.186590: step: 822/466, loss: 4.094210624694824 2023-01-24 01:52:11.010709: step: 824/466, loss: 2.4406044483184814 2023-01-24 01:52:11.762434: step: 826/466, loss: 1.6032664775848389 2023-01-24 01:52:12.478814: step: 828/466, loss: 0.45030879974365234 2023-01-24 01:52:13.312796: step: 830/466, loss: 1.1435775756835938 2023-01-24 01:52:14.042839: step: 832/466, loss: 0.5184001326560974 2023-01-24 01:52:14.811773: step: 834/466, loss: 1.1396267414093018 2023-01-24 01:52:15.552305: step: 836/466, loss: 2.8804054260253906 2023-01-24 01:52:16.288536: step: 838/466, loss: 12.526484489440918 2023-01-24 01:52:17.016672: step: 840/466, loss: 0.584930956363678 2023-01-24 01:52:17.794321: step: 842/466, loss: 1.3484601974487305 2023-01-24 01:52:18.587333: step: 844/466, loss: 0.5086191892623901 2023-01-24 01:52:19.439227: step: 846/466, loss: 0.2963380515575409 2023-01-24 01:52:20.213393: step: 848/466, loss: 0.8596228361129761 2023-01-24 01:52:20.921991: step: 850/466, loss: 5.564979553222656 2023-01-24 01:52:21.747465: step: 852/466, loss: 0.38722512125968933 2023-01-24 01:52:22.525748: step: 854/466, loss: 1.176393985748291 2023-01-24 01:52:23.273459: step: 856/466, loss: 0.8879210948944092 2023-01-24 01:52:24.018357: step: 858/466, loss: 4.816599369049072 2023-01-24 01:52:24.819411: step: 860/466, loss: 0.23427298665046692 2023-01-24 01:52:25.581580: step: 862/466, loss: 1.898385763168335 2023-01-24 01:52:26.263413: step: 864/466, loss: 0.7958246469497681 2023-01-24 01:52:27.129804: step: 866/466, loss: 1.7949293851852417 2023-01-24 01:52:27.855824: step: 868/466, loss: 1.4004677534103394 2023-01-24 01:52:28.594740: step: 870/466, loss: 0.6244205236434937 2023-01-24 01:52:29.461382: step: 872/466, loss: 0.2835509181022644 2023-01-24 01:52:30.349202: step: 874/466, loss: 1.0040892362594604 2023-01-24 01:52:31.059997: step: 876/466, loss: 2.8210952281951904 2023-01-24 01:52:31.756882: step: 878/466, loss: 0.45503246784210205 2023-01-24 01:52:32.563573: step: 880/466, loss: 0.5614334344863892 2023-01-24 01:52:33.286598: step: 882/466, loss: 2.8984780311584473 2023-01-24 01:52:34.020013: step: 884/466, loss: 0.786919355392456 2023-01-24 01:52:34.779847: step: 886/466, loss: 4.160480499267578 2023-01-24 01:52:35.563545: step: 888/466, loss: 2.23695707321167 2023-01-24 01:52:36.274684: step: 890/466, loss: 3.336459159851074 2023-01-24 01:52:37.083917: step: 892/466, loss: 0.5677067041397095 2023-01-24 01:52:37.856181: step: 894/466, loss: 0.22661782801151276 2023-01-24 01:52:38.604263: step: 896/466, loss: 1.7460044622421265 2023-01-24 01:52:39.394523: step: 898/466, loss: 0.900327205657959 2023-01-24 01:52:40.154687: step: 900/466, loss: 1.9194039106369019 2023-01-24 01:52:40.971381: step: 902/466, loss: 0.6010991334915161 2023-01-24 01:52:41.709296: step: 904/466, loss: 0.4171159863471985 2023-01-24 01:52:42.434500: step: 906/466, loss: 0.975546658039093 2023-01-24 01:52:43.168715: step: 908/466, loss: 0.8966106176376343 2023-01-24 01:52:43.954246: step: 910/466, loss: 1.053739309310913 2023-01-24 01:52:44.775336: step: 912/466, loss: 2.6362953186035156 2023-01-24 01:52:45.506872: step: 914/466, loss: 1.6291375160217285 2023-01-24 01:52:46.324738: step: 916/466, loss: 0.9473370313644409 2023-01-24 01:52:47.075191: step: 918/466, loss: 0.8623666763305664 2023-01-24 01:52:47.865721: step: 920/466, loss: 0.6700409650802612 2023-01-24 01:52:48.595270: step: 922/466, loss: 0.7651454210281372 2023-01-24 01:52:49.346298: step: 924/466, loss: 0.7948426604270935 2023-01-24 01:52:50.052086: step: 926/466, loss: 1.1421416997909546 2023-01-24 01:52:50.851650: step: 928/466, loss: 0.7876683473587036 2023-01-24 01:52:51.604286: step: 930/466, loss: 0.4691295027732849 2023-01-24 01:52:52.375417: step: 932/466, loss: 0.23193420469760895 ================================================== Loss: 1.580 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157035672660673, 'r': 0.21723270034943004, 'f1': 0.2573708812650582}, 'combined': 0.1896417019847797, 'epoch': 2} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3751308493942274, 'r': 0.18416691398215304, 'f1': 0.2470479783626335}, 'combined': 0.15184412328630156, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3119675173760364, 'r': 0.2168410584791768, 'f1': 0.25584822095420195}, 'combined': 0.18851974175572775, 'epoch': 2} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35384086842899887, 'r': 0.18745322969405118, 'f1': 0.24507421682504826}, 'combined': 0.15063098204856626, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3320878561736771, 'r': 0.21030138339920948, 'f1': 0.25752183291245795}, 'combined': 0.1897529295144427, 'epoch': 2} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37199721653916856, 'r': 0.18744294603871825, 'f1': 0.24927868555235164}, 'combined': 0.15396624695880545, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38920454545454547, 'r': 0.24464285714285713, 'f1': 0.3004385964912281}, 'combined': 0.20029239766081874, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.09722222222222222, 'r': 0.07608695652173914, 'f1': 0.08536585365853659}, 'combined': 0.042682926829268296, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.06896551724137931, 'f1': 0.1176470588235294}, 'combined': 0.07843137254901959, 'epoch': 2} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157035672660673, 'r': 0.21723270034943004, 'f1': 0.2573708812650582}, 'combined': 0.1896417019847797, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3751308493942274, 'r': 0.18416691398215304, 'f1': 0.2470479783626335}, 'combined': 0.15184412328630156, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38920454545454547, 'r': 0.24464285714285713, 'f1': 0.3004385964912281}, 'combined': 0.20029239766081874, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3035099735770597, 'r': 0.2226122301472574, 'f1': 0.2568415915984714}, 'combined': 0.18925169907255787, 'epoch': 1} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.31732975768306243, 'r': 0.1747096418704501, 'f1': 0.22535011777492842}, 'combined': 0.13850787726654137, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36904761904761907, 'r': 0.33695652173913043, 'f1': 0.3522727272727273}, 'combined': 0.17613636363636365, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33181401249024206, 'r': 0.2300074404761905, 'f1': 0.27168664109939283}, 'combined': 0.2001901565995526, 'epoch': 1} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3348279093853188, 'r': 0.16914881432678022, 'f1': 0.2247553952844653}, 'combined': 0.1388195088521698, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5357142857142857, 'r': 0.12931034482758622, 'f1': 0.20833333333333337}, 'combined': 0.1388888888888889, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:55:43.138367: step: 2/466, loss: 0.7288981676101685 2023-01-24 01:55:43.980637: step: 4/466, loss: 1.0514813661575317 2023-01-24 01:55:44.777272: step: 6/466, loss: 1.2421069145202637 2023-01-24 01:55:45.496455: step: 8/466, loss: 0.5665553212165833 2023-01-24 01:55:46.259259: step: 10/466, loss: 1.139402151107788 2023-01-24 01:55:47.020092: step: 12/466, loss: 0.49877578020095825 2023-01-24 01:55:47.752408: step: 14/466, loss: 2.224588394165039 2023-01-24 01:55:48.537856: step: 16/466, loss: 0.5720035433769226 2023-01-24 01:55:49.252723: step: 18/466, loss: 1.1438024044036865 2023-01-24 01:55:49.997082: step: 20/466, loss: 1.1782970428466797 2023-01-24 01:55:50.772930: step: 22/466, loss: 2.60244083404541 2023-01-24 01:55:51.520217: step: 24/466, loss: 0.6441414952278137 2023-01-24 01:55:52.393574: step: 26/466, loss: 0.3326947093009949 2023-01-24 01:55:53.186383: step: 28/466, loss: 0.32072970271110535 2023-01-24 01:55:53.983629: step: 30/466, loss: 0.4882255494594574 2023-01-24 01:55:54.748675: step: 32/466, loss: 1.453972578048706 2023-01-24 01:55:55.558835: step: 34/466, loss: 0.5718610286712646 2023-01-24 01:55:56.408801: step: 36/466, loss: 0.7997777462005615 2023-01-24 01:55:57.084270: step: 38/466, loss: 0.3379814326763153 2023-01-24 01:55:57.781278: step: 40/466, loss: 0.7625579237937927 2023-01-24 01:55:58.495355: step: 42/466, loss: 0.29642653465270996 2023-01-24 01:55:59.244005: step: 44/466, loss: 1.0855971574783325 2023-01-24 01:55:59.959374: step: 46/466, loss: 1.693413496017456 2023-01-24 01:56:00.817734: step: 48/466, loss: 0.2988651394844055 2023-01-24 01:56:01.585290: step: 50/466, loss: 1.3019503355026245 2023-01-24 01:56:02.322858: step: 52/466, loss: 1.0249556303024292 2023-01-24 01:56:03.114258: step: 54/466, loss: 1.5618072748184204 2023-01-24 01:56:03.837003: step: 56/466, loss: 0.41827669739723206 2023-01-24 01:56:04.564346: step: 58/466, loss: 0.3538615107536316 2023-01-24 01:56:05.309254: step: 60/466, loss: 1.5650417804718018 2023-01-24 01:56:06.092999: step: 62/466, loss: 0.6613375544548035 2023-01-24 01:56:06.894092: step: 64/466, loss: 0.43019580841064453 2023-01-24 01:56:07.634456: step: 66/466, loss: 4.515617370605469 2023-01-24 01:56:08.384471: step: 68/466, loss: 1.161965250968933 2023-01-24 01:56:09.153678: step: 70/466, loss: 0.6723357439041138 2023-01-24 01:56:09.894063: step: 72/466, loss: 1.1802687644958496 2023-01-24 01:56:10.616116: step: 74/466, loss: 2.829439640045166 2023-01-24 01:56:11.423770: step: 76/466, loss: 0.6573655605316162 2023-01-24 01:56:12.117570: step: 78/466, loss: 0.21155864000320435 2023-01-24 01:56:13.004831: step: 80/466, loss: 1.0598139762878418 2023-01-24 01:56:13.802956: step: 82/466, loss: 0.7500097751617432 2023-01-24 01:56:14.609891: step: 84/466, loss: 1.260082721710205 2023-01-24 01:56:15.374027: step: 86/466, loss: 0.5332298278808594 2023-01-24 01:56:16.207439: step: 88/466, loss: 1.5100605487823486 2023-01-24 01:56:17.025134: step: 90/466, loss: 1.7919092178344727 2023-01-24 01:56:17.774268: step: 92/466, loss: 1.1008827686309814 2023-01-24 01:56:18.543993: step: 94/466, loss: 1.6091139316558838 2023-01-24 01:56:19.348542: step: 96/466, loss: 0.20046083629131317 2023-01-24 01:56:20.198804: step: 98/466, loss: 0.5788496136665344 2023-01-24 01:56:21.035635: step: 100/466, loss: 0.9448338150978088 2023-01-24 01:56:21.840331: step: 102/466, loss: 0.5384277105331421 2023-01-24 01:56:22.673353: step: 104/466, loss: 1.9115006923675537 2023-01-24 01:56:23.407030: step: 106/466, loss: 0.353412002325058 2023-01-24 01:56:24.162665: step: 108/466, loss: 0.2827959656715393 2023-01-24 01:56:24.894154: step: 110/466, loss: 0.7786230444908142 2023-01-24 01:56:25.619067: step: 112/466, loss: 0.6808529496192932 2023-01-24 01:56:26.341036: step: 114/466, loss: 0.3210010230541229 2023-01-24 01:56:27.090681: step: 116/466, loss: 1.32731032371521 2023-01-24 01:56:27.828937: step: 118/466, loss: 1.5391159057617188 2023-01-24 01:56:28.581923: step: 120/466, loss: 0.3658561408519745 2023-01-24 01:56:29.373838: step: 122/466, loss: 0.2683035433292389 2023-01-24 01:56:30.274911: step: 124/466, loss: 0.6769906282424927 2023-01-24 01:56:31.021272: step: 126/466, loss: 0.7051757574081421 2023-01-24 01:56:31.749410: step: 128/466, loss: 1.3545571565628052 2023-01-24 01:56:32.455615: step: 130/466, loss: 1.796260952949524 2023-01-24 01:56:33.216668: step: 132/466, loss: 0.980597198009491 2023-01-24 01:56:34.021189: step: 134/466, loss: 0.6104364395141602 2023-01-24 01:56:34.769229: step: 136/466, loss: 0.17875078320503235 2023-01-24 01:56:35.521796: step: 138/466, loss: 0.6792482137680054 2023-01-24 01:56:36.272484: step: 140/466, loss: 1.7077827453613281 2023-01-24 01:56:36.987378: step: 142/466, loss: 0.34921565651893616 2023-01-24 01:56:37.741590: step: 144/466, loss: 3.0503482818603516 2023-01-24 01:56:38.513057: step: 146/466, loss: 0.8565284013748169 2023-01-24 01:56:39.265790: step: 148/466, loss: 0.40316522121429443 2023-01-24 01:56:40.003019: step: 150/466, loss: 1.3210734128952026 2023-01-24 01:56:40.808396: step: 152/466, loss: 3.314621686935425 2023-01-24 01:56:41.554410: step: 154/466, loss: 3.0272955894470215 2023-01-24 01:56:42.270068: step: 156/466, loss: 3.5029330253601074 2023-01-24 01:56:43.051795: step: 158/466, loss: 0.9041703939437866 2023-01-24 01:56:43.811594: step: 160/466, loss: 0.19255219399929047 2023-01-24 01:56:44.573748: step: 162/466, loss: 1.6574230194091797 2023-01-24 01:56:45.304974: step: 164/466, loss: 1.0237008333206177 2023-01-24 01:56:46.159406: step: 166/466, loss: 1.1813163757324219 2023-01-24 01:56:46.908769: step: 168/466, loss: 3.462944507598877 2023-01-24 01:56:47.587561: step: 170/466, loss: 0.7057815194129944 2023-01-24 01:56:48.360450: step: 172/466, loss: 0.3181644678115845 2023-01-24 01:56:49.147788: step: 174/466, loss: 1.4715278148651123 2023-01-24 01:56:49.861849: step: 176/466, loss: 1.6747549772262573 2023-01-24 01:56:50.612295: step: 178/466, loss: 0.20198678970336914 2023-01-24 01:56:51.410811: step: 180/466, loss: 0.48431089520454407 2023-01-24 01:56:52.212657: step: 182/466, loss: 1.4130982160568237 2023-01-24 01:56:52.955768: step: 184/466, loss: 0.7763614654541016 2023-01-24 01:56:53.682069: step: 186/466, loss: 1.540745496749878 2023-01-24 01:56:54.382007: step: 188/466, loss: 0.9028275609016418 2023-01-24 01:56:55.211311: step: 190/466, loss: 0.4500337243080139 2023-01-24 01:56:55.947629: step: 192/466, loss: 0.5722788572311401 2023-01-24 01:56:56.589771: step: 194/466, loss: 0.26888906955718994 2023-01-24 01:56:57.311371: step: 196/466, loss: 0.9907680749893188 2023-01-24 01:56:58.024223: step: 198/466, loss: 1.654240369796753 2023-01-24 01:56:58.822753: step: 200/466, loss: 0.21867898106575012 2023-01-24 01:56:59.649779: step: 202/466, loss: 3.248518228530884 2023-01-24 01:57:00.414432: step: 204/466, loss: 0.32449525594711304 2023-01-24 01:57:01.155303: step: 206/466, loss: 0.7105669379234314 2023-01-24 01:57:01.904200: step: 208/466, loss: 2.4783077239990234 2023-01-24 01:57:02.638707: step: 210/466, loss: 2.501128673553467 2023-01-24 01:57:03.349992: step: 212/466, loss: 0.6300548315048218 2023-01-24 01:57:04.143876: step: 214/466, loss: 1.6797388792037964 2023-01-24 01:57:04.862375: step: 216/466, loss: 1.4843556880950928 2023-01-24 01:57:05.690570: step: 218/466, loss: 0.28681495785713196 2023-01-24 01:57:06.435261: step: 220/466, loss: 0.5974239110946655 2023-01-24 01:57:07.147403: step: 222/466, loss: 0.27215713262557983 2023-01-24 01:57:07.922042: step: 224/466, loss: 1.0060501098632812 2023-01-24 01:57:08.640677: step: 226/466, loss: 1.3134772777557373 2023-01-24 01:57:09.553373: step: 228/466, loss: 0.22591619193553925 2023-01-24 01:57:10.406254: step: 230/466, loss: 1.7979905605316162 2023-01-24 01:57:11.159443: step: 232/466, loss: 0.454550176858902 2023-01-24 01:57:11.943210: step: 234/466, loss: 0.7232887744903564 2023-01-24 01:57:12.665853: step: 236/466, loss: 2.3125431537628174 2023-01-24 01:57:13.433548: step: 238/466, loss: 1.082403302192688 2023-01-24 01:57:14.176603: step: 240/466, loss: 0.28135067224502563 2023-01-24 01:57:14.878251: step: 242/466, loss: 0.5144278407096863 2023-01-24 01:57:15.763098: step: 244/466, loss: 0.36738258600234985 2023-01-24 01:57:16.557875: step: 246/466, loss: 0.40780818462371826 2023-01-24 01:57:17.305084: step: 248/466, loss: 5.4448699951171875 2023-01-24 01:57:17.990626: step: 250/466, loss: 3.7832436561584473 2023-01-24 01:57:18.749776: step: 252/466, loss: 0.9051271080970764 2023-01-24 01:57:19.477401: step: 254/466, loss: 0.5934857726097107 2023-01-24 01:57:20.320069: step: 256/466, loss: 0.6028462648391724 2023-01-24 01:57:21.178792: step: 258/466, loss: 0.23693428933620453 2023-01-24 01:57:21.952627: step: 260/466, loss: 0.732874870300293 2023-01-24 01:57:22.822238: step: 262/466, loss: 1.4208934307098389 2023-01-24 01:57:23.530243: step: 264/466, loss: 1.9772591590881348 2023-01-24 01:57:24.204208: step: 266/466, loss: 1.3765772581100464 2023-01-24 01:57:24.991298: step: 268/466, loss: 2.3647990226745605 2023-01-24 01:57:25.802579: step: 270/466, loss: 0.6535061001777649 2023-01-24 01:57:26.494685: step: 272/466, loss: 1.8163260221481323 2023-01-24 01:57:27.267810: step: 274/466, loss: 1.1725115776062012 2023-01-24 01:57:28.038131: step: 276/466, loss: 0.42887893319129944 2023-01-24 01:57:28.782021: step: 278/466, loss: 0.5292885899543762 2023-01-24 01:57:29.641298: step: 280/466, loss: 0.70928555727005 2023-01-24 01:57:30.368473: step: 282/466, loss: 0.512312650680542 2023-01-24 01:57:31.090805: step: 284/466, loss: 0.6690589189529419 2023-01-24 01:57:31.859042: step: 286/466, loss: 1.792073130607605 2023-01-24 01:57:32.631227: step: 288/466, loss: 2.455967426300049 2023-01-24 01:57:33.388807: step: 290/466, loss: 0.4556499421596527 2023-01-24 01:57:34.273385: step: 292/466, loss: 1.3838729858398438 2023-01-24 01:57:35.092084: step: 294/466, loss: 0.44971323013305664 2023-01-24 01:57:35.818566: step: 296/466, loss: 3.103240728378296 2023-01-24 01:57:36.503870: step: 298/466, loss: 0.699164867401123 2023-01-24 01:57:37.330452: step: 300/466, loss: 1.1217217445373535 2023-01-24 01:57:38.058063: step: 302/466, loss: 0.5284570455551147 2023-01-24 01:57:38.815926: step: 304/466, loss: 0.3365297317504883 2023-01-24 01:57:39.529723: step: 306/466, loss: 3.434846878051758 2023-01-24 01:57:40.376772: step: 308/466, loss: 1.6268279552459717 2023-01-24 01:57:41.264040: step: 310/466, loss: 0.2957576811313629 2023-01-24 01:57:42.026010: step: 312/466, loss: 0.7529169917106628 2023-01-24 01:57:42.807536: step: 314/466, loss: 1.4831832647323608 2023-01-24 01:57:43.535573: step: 316/466, loss: 0.6919432878494263 2023-01-24 01:57:44.286252: step: 318/466, loss: 1.175722599029541 2023-01-24 01:57:45.085688: step: 320/466, loss: 2.596788167953491 2023-01-24 01:57:45.806028: step: 322/466, loss: 0.6876751780509949 2023-01-24 01:57:46.574962: step: 324/466, loss: 1.394080638885498 2023-01-24 01:57:47.377411: step: 326/466, loss: 1.1835769414901733 2023-01-24 01:57:48.136049: step: 328/466, loss: 0.8824508190155029 2023-01-24 01:57:48.893278: step: 330/466, loss: 0.8156054019927979 2023-01-24 01:57:49.679883: step: 332/466, loss: 0.44112730026245117 2023-01-24 01:57:50.443355: step: 334/466, loss: 3.6188578605651855 2023-01-24 01:57:51.304910: step: 336/466, loss: 8.541708946228027 2023-01-24 01:57:52.057247: step: 338/466, loss: 0.5330957174301147 2023-01-24 01:57:52.928364: step: 340/466, loss: 1.2214475870132446 2023-01-24 01:57:53.725606: step: 342/466, loss: 0.2562291622161865 2023-01-24 01:57:54.484886: step: 344/466, loss: 0.5281143188476562 2023-01-24 01:57:55.282189: step: 346/466, loss: 0.6365292072296143 2023-01-24 01:57:56.067961: step: 348/466, loss: 0.5487217903137207 2023-01-24 01:57:56.844947: step: 350/466, loss: 0.427787184715271 2023-01-24 01:57:57.606774: step: 352/466, loss: 0.3734011650085449 2023-01-24 01:57:58.335560: step: 354/466, loss: 1.6304538249969482 2023-01-24 01:57:59.123252: step: 356/466, loss: 0.672451376914978 2023-01-24 01:57:59.898244: step: 358/466, loss: 0.26452600955963135 2023-01-24 01:58:00.600848: step: 360/466, loss: 1.5289206504821777 2023-01-24 01:58:01.409349: step: 362/466, loss: 0.3662125766277313 2023-01-24 01:58:02.142831: step: 364/466, loss: 1.2627530097961426 2023-01-24 01:58:02.822940: step: 366/466, loss: 0.7326240539550781 2023-01-24 01:58:03.568933: step: 368/466, loss: 2.4031803607940674 2023-01-24 01:58:04.318411: step: 370/466, loss: 0.7940996289253235 2023-01-24 01:58:05.029038: step: 372/466, loss: 0.6374187469482422 2023-01-24 01:58:05.815081: step: 374/466, loss: 0.5218483209609985 2023-01-24 01:58:06.544362: step: 376/466, loss: 0.6916596293449402 2023-01-24 01:58:07.418022: step: 378/466, loss: 2.400697708129883 2023-01-24 01:58:08.176531: step: 380/466, loss: 4.564194679260254 2023-01-24 01:58:08.876923: step: 382/466, loss: 0.934001088142395 2023-01-24 01:58:09.738903: step: 384/466, loss: 1.4111979007720947 2023-01-24 01:58:10.524306: step: 386/466, loss: 0.8816977143287659 2023-01-24 01:58:11.359759: step: 388/466, loss: 0.3229823112487793 2023-01-24 01:58:12.066247: step: 390/466, loss: 1.5400192737579346 2023-01-24 01:58:12.838279: step: 392/466, loss: 0.622016191482544 2023-01-24 01:58:13.624913: step: 394/466, loss: 2.1396517753601074 2023-01-24 01:58:14.395721: step: 396/466, loss: 0.7787021398544312 2023-01-24 01:58:15.109370: step: 398/466, loss: 2.187607526779175 2023-01-24 01:58:15.832384: step: 400/466, loss: 0.5074290037155151 2023-01-24 01:58:16.543995: step: 402/466, loss: 1.3675506114959717 2023-01-24 01:58:17.312405: step: 404/466, loss: 0.653610110282898 2023-01-24 01:58:18.155759: step: 406/466, loss: 1.8830668926239014 2023-01-24 01:58:18.861823: step: 408/466, loss: 1.3166179656982422 2023-01-24 01:58:19.588323: step: 410/466, loss: 1.8001898527145386 2023-01-24 01:58:20.338856: step: 412/466, loss: 2.1397910118103027 2023-01-24 01:58:21.053658: step: 414/466, loss: 1.96988844871521 2023-01-24 01:58:21.836392: step: 416/466, loss: 2.072577476501465 2023-01-24 01:58:22.638676: step: 418/466, loss: 7.2129034996032715 2023-01-24 01:58:23.400931: step: 420/466, loss: 1.306861162185669 2023-01-24 01:58:24.165886: step: 422/466, loss: 1.119315505027771 2023-01-24 01:58:24.977397: step: 424/466, loss: 1.3532315492630005 2023-01-24 01:58:25.742708: step: 426/466, loss: 1.7584383487701416 2023-01-24 01:58:26.550017: step: 428/466, loss: 1.0544145107269287 2023-01-24 01:58:27.317184: step: 430/466, loss: 2.955805540084839 2023-01-24 01:58:28.023820: step: 432/466, loss: 0.6103273034095764 2023-01-24 01:58:28.865829: step: 434/466, loss: 0.7436304092407227 2023-01-24 01:58:29.642034: step: 436/466, loss: 1.3584591150283813 2023-01-24 01:58:30.273705: step: 438/466, loss: 1.8201582431793213 2023-01-24 01:58:31.002248: step: 440/466, loss: 0.9806408882141113 2023-01-24 01:58:31.762444: step: 442/466, loss: 1.0120773315429688 2023-01-24 01:58:32.517132: step: 444/466, loss: 1.465714931488037 2023-01-24 01:58:33.286540: step: 446/466, loss: 1.6572892665863037 2023-01-24 01:58:34.129424: step: 448/466, loss: 1.9525045156478882 2023-01-24 01:58:34.896473: step: 450/466, loss: 1.0817575454711914 2023-01-24 01:58:35.579025: step: 452/466, loss: 1.029548168182373 2023-01-24 01:58:36.306218: step: 454/466, loss: 0.854709267616272 2023-01-24 01:58:37.044619: step: 456/466, loss: 1.9108412265777588 2023-01-24 01:58:37.893800: step: 458/466, loss: 0.26732340455055237 2023-01-24 01:58:38.683546: step: 460/466, loss: 0.5370470285415649 2023-01-24 01:58:39.463833: step: 462/466, loss: 7.576594829559326 2023-01-24 01:58:40.241477: step: 464/466, loss: 2.7582592964172363 2023-01-24 01:58:40.988365: step: 466/466, loss: 1.000118613243103 2023-01-24 01:58:41.853845: step: 468/466, loss: 0.4005153775215149 2023-01-24 01:58:42.600656: step: 470/466, loss: 0.6367888450622559 2023-01-24 01:58:43.373719: step: 472/466, loss: 3.750797748565674 2023-01-24 01:58:44.127381: step: 474/466, loss: 2.3002090454101562 2023-01-24 01:58:44.927220: step: 476/466, loss: 1.1184279918670654 2023-01-24 01:58:45.677202: step: 478/466, loss: 0.9593591690063477 2023-01-24 01:58:46.473398: step: 480/466, loss: 1.2366538047790527 2023-01-24 01:58:47.236026: step: 482/466, loss: 0.5184550881385803 2023-01-24 01:58:48.013379: step: 484/466, loss: 1.9456372261047363 2023-01-24 01:58:48.775853: step: 486/466, loss: 1.1863420009613037 2023-01-24 01:58:49.551494: step: 488/466, loss: 2.484473705291748 2023-01-24 01:58:50.297068: step: 490/466, loss: 4.424013137817383 2023-01-24 01:58:50.997658: step: 492/466, loss: 1.9624214172363281 2023-01-24 01:58:51.742846: step: 494/466, loss: 0.7419807314872742 2023-01-24 01:58:52.515981: step: 496/466, loss: 2.0698354244232178 2023-01-24 01:58:53.242213: step: 498/466, loss: 0.25887054204940796 2023-01-24 01:58:54.080587: step: 500/466, loss: 0.45883864164352417 2023-01-24 01:58:54.869649: step: 502/466, loss: 1.2493901252746582 2023-01-24 01:58:55.566154: step: 504/466, loss: 0.8802942633628845 2023-01-24 01:58:56.412681: step: 506/466, loss: 0.30477413535118103 2023-01-24 01:58:57.186985: step: 508/466, loss: 2.7209062576293945 2023-01-24 01:58:57.875881: step: 510/466, loss: 1.2075614929199219 2023-01-24 01:58:58.642661: step: 512/466, loss: 0.5372397899627686 2023-01-24 01:58:59.656514: step: 514/466, loss: 1.5385627746582031 2023-01-24 01:59:00.394297: step: 516/466, loss: 1.5622609853744507 2023-01-24 01:59:01.077546: step: 518/466, loss: 1.6847238540649414 2023-01-24 01:59:01.895361: step: 520/466, loss: 0.8336131572723389 2023-01-24 01:59:02.639890: step: 522/466, loss: 1.57077956199646 2023-01-24 01:59:03.588593: step: 524/466, loss: 1.0002378225326538 2023-01-24 01:59:04.435150: step: 526/466, loss: 0.6085427403450012 2023-01-24 01:59:05.211973: step: 528/466, loss: 0.9000792503356934 2023-01-24 01:59:06.028892: step: 530/466, loss: 2.0616867542266846 2023-01-24 01:59:06.793444: step: 532/466, loss: 0.26455068588256836 2023-01-24 01:59:07.583645: step: 534/466, loss: 5.376952171325684 2023-01-24 01:59:08.352224: step: 536/466, loss: 1.079664945602417 2023-01-24 01:59:09.079363: step: 538/466, loss: 0.7638109922409058 2023-01-24 01:59:09.807947: step: 540/466, loss: 1.0749095678329468 2023-01-24 01:59:10.564641: step: 542/466, loss: 1.0012367963790894 2023-01-24 01:59:11.414970: step: 544/466, loss: 0.4597180187702179 2023-01-24 01:59:12.110628: step: 546/466, loss: 1.7872309684753418 2023-01-24 01:59:12.908707: step: 548/466, loss: 0.38119691610336304 2023-01-24 01:59:13.654118: step: 550/466, loss: 0.742358922958374 2023-01-24 01:59:14.435669: step: 552/466, loss: 1.5680882930755615 2023-01-24 01:59:15.221009: step: 554/466, loss: 0.574269711971283 2023-01-24 01:59:15.990703: step: 556/466, loss: 0.5157495141029358 2023-01-24 01:59:16.732156: step: 558/466, loss: 0.2620672285556793 2023-01-24 01:59:17.479025: step: 560/466, loss: 0.592666506767273 2023-01-24 01:59:18.285395: step: 562/466, loss: 0.42480170726776123 2023-01-24 01:59:19.148187: step: 564/466, loss: 0.9412723183631897 2023-01-24 01:59:19.960642: step: 566/466, loss: 0.24113863706588745 2023-01-24 01:59:20.713285: step: 568/466, loss: 0.7110223770141602 2023-01-24 01:59:21.480660: step: 570/466, loss: 0.46712177991867065 2023-01-24 01:59:22.235150: step: 572/466, loss: 0.3590967655181885 2023-01-24 01:59:23.001849: step: 574/466, loss: 0.5464659929275513 2023-01-24 01:59:23.748485: step: 576/466, loss: 2.006821632385254 2023-01-24 01:59:24.515369: step: 578/466, loss: 0.6044853925704956 2023-01-24 01:59:25.287803: step: 580/466, loss: 1.7315723896026611 2023-01-24 01:59:26.024700: step: 582/466, loss: 0.8711855411529541 2023-01-24 01:59:26.768499: step: 584/466, loss: 0.8170568346977234 2023-01-24 01:59:27.611865: step: 586/466, loss: 1.6933026313781738 2023-01-24 01:59:28.336033: step: 588/466, loss: 1.312509536743164 2023-01-24 01:59:29.113988: step: 590/466, loss: 1.0134602785110474 2023-01-24 01:59:29.846828: step: 592/466, loss: 0.5736547708511353 2023-01-24 01:59:30.605630: step: 594/466, loss: 1.3567414283752441 2023-01-24 01:59:31.358243: step: 596/466, loss: 1.5852487087249756 2023-01-24 01:59:32.136510: step: 598/466, loss: 0.8714603185653687 2023-01-24 01:59:32.822524: step: 600/466, loss: 0.5756778120994568 2023-01-24 01:59:33.512487: step: 602/466, loss: 1.272370457649231 2023-01-24 01:59:34.300972: step: 604/466, loss: 1.356710433959961 2023-01-24 01:59:35.101979: step: 606/466, loss: 0.39522653818130493 2023-01-24 01:59:35.854784: step: 608/466, loss: 0.75468909740448 2023-01-24 01:59:36.629239: step: 610/466, loss: 0.3983873724937439 2023-01-24 01:59:37.530891: step: 612/466, loss: 1.1683955192565918 2023-01-24 01:59:38.346212: step: 614/466, loss: 1.212808609008789 2023-01-24 01:59:39.136228: step: 616/466, loss: 0.7545867562294006 2023-01-24 01:59:39.870769: step: 618/466, loss: 1.3609846830368042 2023-01-24 01:59:40.616823: step: 620/466, loss: 0.9695680737495422 2023-01-24 01:59:41.348301: step: 622/466, loss: 0.775894045829773 2023-01-24 01:59:42.094281: step: 624/466, loss: 1.3027985095977783 2023-01-24 01:59:42.941462: step: 626/466, loss: 0.5976076126098633 2023-01-24 01:59:43.684355: step: 628/466, loss: 0.483690470457077 2023-01-24 01:59:44.464154: step: 630/466, loss: 1.851898193359375 2023-01-24 01:59:45.199159: step: 632/466, loss: 1.1140954494476318 2023-01-24 01:59:45.978608: step: 634/466, loss: 0.3627002537250519 2023-01-24 01:59:46.750867: step: 636/466, loss: 4.963777542114258 2023-01-24 01:59:47.514291: step: 638/466, loss: 0.44731274247169495 2023-01-24 01:59:48.301849: step: 640/466, loss: 0.9005690813064575 2023-01-24 01:59:49.023749: step: 642/466, loss: 0.2202322781085968 2023-01-24 01:59:49.741670: step: 644/466, loss: 0.3271179795265198 2023-01-24 01:59:50.519246: step: 646/466, loss: 0.7696815133094788 2023-01-24 01:59:51.355244: step: 648/466, loss: 4.6255784034729 2023-01-24 01:59:52.063447: step: 650/466, loss: 0.17914696037769318 2023-01-24 01:59:52.793554: step: 652/466, loss: 0.9905416965484619 2023-01-24 01:59:53.541725: step: 654/466, loss: 0.5770795941352844 2023-01-24 01:59:54.334957: step: 656/466, loss: 2.672057628631592 2023-01-24 01:59:55.113577: step: 658/466, loss: 0.6635293960571289 2023-01-24 01:59:55.834654: step: 660/466, loss: 0.348417729139328 2023-01-24 01:59:56.600865: step: 662/466, loss: 1.1955702304840088 2023-01-24 01:59:57.390619: step: 664/466, loss: 0.9019771814346313 2023-01-24 01:59:58.134293: step: 666/466, loss: 1.5625412464141846 2023-01-24 01:59:58.910361: step: 668/466, loss: 0.8455973267555237 2023-01-24 01:59:59.681334: step: 670/466, loss: 3.6101083755493164 2023-01-24 02:00:00.471644: step: 672/466, loss: 0.7789993286132812 2023-01-24 02:00:01.156273: step: 674/466, loss: 3.386035919189453 2023-01-24 02:00:02.007966: step: 676/466, loss: 0.23694761097431183 2023-01-24 02:00:02.768634: step: 678/466, loss: 0.9110981225967407 2023-01-24 02:00:03.551409: step: 680/466, loss: 1.3185884952545166 2023-01-24 02:00:04.397256: step: 682/466, loss: 3.4931812286376953 2023-01-24 02:00:05.202288: step: 684/466, loss: 0.4314889907836914 2023-01-24 02:00:06.042169: step: 686/466, loss: 0.441946804523468 2023-01-24 02:00:06.781397: step: 688/466, loss: 2.5487897396087646 2023-01-24 02:00:07.491281: step: 690/466, loss: 1.069916844367981 2023-01-24 02:00:08.220452: step: 692/466, loss: 1.4144566059112549 2023-01-24 02:00:09.030628: step: 694/466, loss: 2.8627562522888184 2023-01-24 02:00:09.798725: step: 696/466, loss: 0.24165946245193481 2023-01-24 02:00:10.488119: step: 698/466, loss: 0.5185352563858032 2023-01-24 02:00:11.314828: step: 700/466, loss: 0.8233320713043213 2023-01-24 02:00:12.060705: step: 702/466, loss: 0.9007467031478882 2023-01-24 02:00:12.778359: step: 704/466, loss: 0.9488805532455444 2023-01-24 02:00:13.570652: step: 706/466, loss: 2.8228328227996826 2023-01-24 02:00:14.326776: step: 708/466, loss: 0.9803735017776489 2023-01-24 02:00:15.099219: step: 710/466, loss: 0.8610438108444214 2023-01-24 02:00:15.836417: step: 712/466, loss: 1.4700127840042114 2023-01-24 02:00:16.560338: step: 714/466, loss: 0.8979735374450684 2023-01-24 02:00:17.546453: step: 716/466, loss: 0.7316739559173584 2023-01-24 02:00:18.289757: step: 718/466, loss: 0.3060496747493744 2023-01-24 02:00:19.052047: step: 720/466, loss: 0.6991862058639526 2023-01-24 02:00:19.823900: step: 722/466, loss: 1.6871004104614258 2023-01-24 02:00:20.664324: step: 724/466, loss: 1.376437783241272 2023-01-24 02:00:21.470090: step: 726/466, loss: 0.32766085863113403 2023-01-24 02:00:22.212254: step: 728/466, loss: 1.1724035739898682 2023-01-24 02:00:22.967412: step: 730/466, loss: 1.0425370931625366 2023-01-24 02:00:23.708784: step: 732/466, loss: 1.9704253673553467 2023-01-24 02:00:24.654239: step: 734/466, loss: 1.1451716423034668 2023-01-24 02:00:25.395124: step: 736/466, loss: 0.4858511686325073 2023-01-24 02:00:26.174520: step: 738/466, loss: 0.5664715766906738 2023-01-24 02:00:26.930591: step: 740/466, loss: 0.7027938961982727 2023-01-24 02:00:27.710246: step: 742/466, loss: 2.55523419380188 2023-01-24 02:00:28.503249: step: 744/466, loss: 0.21969053149223328 2023-01-24 02:00:29.382957: step: 746/466, loss: 0.8720443844795227 2023-01-24 02:00:30.184269: step: 748/466, loss: 1.728029727935791 2023-01-24 02:00:30.989232: step: 750/466, loss: 0.46208715438842773 2023-01-24 02:00:31.700506: step: 752/466, loss: 1.1562113761901855 2023-01-24 02:00:32.474709: step: 754/466, loss: 0.8755573034286499 2023-01-24 02:00:33.224117: step: 756/466, loss: 7.363640785217285 2023-01-24 02:00:33.954952: step: 758/466, loss: 1.521296739578247 2023-01-24 02:00:34.878173: step: 760/466, loss: 1.4269208908081055 2023-01-24 02:00:35.642002: step: 762/466, loss: 0.3243265151977539 2023-01-24 02:00:36.413759: step: 764/466, loss: 0.6574571132659912 2023-01-24 02:00:37.187346: step: 766/466, loss: 1.3891175985336304 2023-01-24 02:00:37.988787: step: 768/466, loss: 0.5049048662185669 2023-01-24 02:00:38.696947: step: 770/466, loss: 1.242082118988037 2023-01-24 02:00:39.428428: step: 772/466, loss: 3.593531608581543 2023-01-24 02:00:40.273447: step: 774/466, loss: 0.6223217844963074 2023-01-24 02:00:41.028893: step: 776/466, loss: 0.6413542628288269 2023-01-24 02:00:41.762588: step: 778/466, loss: 0.25577762722969055 2023-01-24 02:00:42.558924: step: 780/466, loss: 0.6415407657623291 2023-01-24 02:00:43.360266: step: 782/466, loss: 1.7713786363601685 2023-01-24 02:00:44.135138: step: 784/466, loss: 0.19394361972808838 2023-01-24 02:00:44.890254: step: 786/466, loss: 1.2882275581359863 2023-01-24 02:00:45.652696: step: 788/466, loss: 4.727072715759277 2023-01-24 02:00:46.438798: step: 790/466, loss: 7.86380672454834 2023-01-24 02:00:47.222140: step: 792/466, loss: 1.1790575981140137 2023-01-24 02:00:47.932786: step: 794/466, loss: 1.2071864604949951 2023-01-24 02:00:48.657940: step: 796/466, loss: 0.38711491227149963 2023-01-24 02:00:49.463731: step: 798/466, loss: 4.495429992675781 2023-01-24 02:00:50.165147: step: 800/466, loss: 0.09176217019557953 2023-01-24 02:00:50.888917: step: 802/466, loss: 0.8673193454742432 2023-01-24 02:00:51.582094: step: 804/466, loss: 0.9799995422363281 2023-01-24 02:00:52.346305: step: 806/466, loss: 1.3106269836425781 2023-01-24 02:00:53.095449: step: 808/466, loss: 2.1999921798706055 2023-01-24 02:00:53.972984: step: 810/466, loss: 4.346255302429199 2023-01-24 02:00:54.767365: step: 812/466, loss: 1.0042102336883545 2023-01-24 02:00:55.603346: step: 814/466, loss: 1.5868946313858032 2023-01-24 02:00:56.422064: step: 816/466, loss: 0.7256672978401184 2023-01-24 02:00:57.138768: step: 818/466, loss: 0.5786141753196716 2023-01-24 02:00:57.866235: step: 820/466, loss: 0.5950932502746582 2023-01-24 02:00:58.670146: step: 822/466, loss: 0.7020928263664246 2023-01-24 02:00:59.430958: step: 824/466, loss: 0.9810025095939636 2023-01-24 02:01:00.143802: step: 826/466, loss: 2.0820469856262207 2023-01-24 02:01:00.886632: step: 828/466, loss: 0.6656241416931152 2023-01-24 02:01:01.606478: step: 830/466, loss: 0.782184362411499 2023-01-24 02:01:02.363503: step: 832/466, loss: 0.6237660050392151 2023-01-24 02:01:03.079196: step: 834/466, loss: 1.8134608268737793 2023-01-24 02:01:03.885149: step: 836/466, loss: 0.722774088382721 2023-01-24 02:01:04.595091: step: 838/466, loss: 0.8320057392120361 2023-01-24 02:01:05.317273: step: 840/466, loss: 9.907206535339355 2023-01-24 02:01:06.133252: step: 842/466, loss: 2.7266483306884766 2023-01-24 02:01:06.893568: step: 844/466, loss: 0.4295651316642761 2023-01-24 02:01:07.651872: step: 846/466, loss: 1.887283205986023 2023-01-24 02:01:08.333341: step: 848/466, loss: 1.4896022081375122 2023-01-24 02:01:09.051366: step: 850/466, loss: 0.31836339831352234 2023-01-24 02:01:09.806128: step: 852/466, loss: 1.7824702262878418 2023-01-24 02:01:10.526868: step: 854/466, loss: 3.638576030731201 2023-01-24 02:01:11.252469: step: 856/466, loss: 0.7391470670700073 2023-01-24 02:01:11.963461: step: 858/466, loss: 1.0898805856704712 2023-01-24 02:01:12.761611: step: 860/466, loss: 0.36856454610824585 2023-01-24 02:01:13.582305: step: 862/466, loss: 2.7287936210632324 2023-01-24 02:01:14.350257: step: 864/466, loss: 0.2533276677131653 2023-01-24 02:01:15.091724: step: 866/466, loss: 0.35639724135398865 2023-01-24 02:01:15.871806: step: 868/466, loss: 0.5558145642280579 2023-01-24 02:01:16.606680: step: 870/466, loss: 0.34684237837791443 2023-01-24 02:01:17.399866: step: 872/466, loss: 1.4609794616699219 2023-01-24 02:01:18.147606: step: 874/466, loss: 0.4918462038040161 2023-01-24 02:01:18.874471: step: 876/466, loss: 0.9814804792404175 2023-01-24 02:01:19.623974: step: 878/466, loss: 0.5223897099494934 2023-01-24 02:01:20.386245: step: 880/466, loss: 1.834208369255066 2023-01-24 02:01:21.232191: step: 882/466, loss: 1.3285759687423706 2023-01-24 02:01:21.994650: step: 884/466, loss: 0.3768533766269684 2023-01-24 02:01:22.743052: step: 886/466, loss: 1.938004970550537 2023-01-24 02:01:23.497130: step: 888/466, loss: 0.7764403820037842 2023-01-24 02:01:24.234618: step: 890/466, loss: 0.6696011424064636 2023-01-24 02:01:25.065024: step: 892/466, loss: 0.8011062145233154 2023-01-24 02:01:25.743113: step: 894/466, loss: 1.0915991067886353 2023-01-24 02:01:26.489894: step: 896/466, loss: 1.013563871383667 2023-01-24 02:01:27.286216: step: 898/466, loss: 1.0057300329208374 2023-01-24 02:01:27.984720: step: 900/466, loss: 4.397829532623291 2023-01-24 02:01:28.700368: step: 902/466, loss: 1.8095632791519165 2023-01-24 02:01:29.441082: step: 904/466, loss: 3.2510788440704346 2023-01-24 02:01:30.281897: step: 906/466, loss: 1.2703807353973389 2023-01-24 02:01:31.021764: step: 908/466, loss: 0.4521823525428772 2023-01-24 02:01:31.732092: step: 910/466, loss: 0.5130724310874939 2023-01-24 02:01:32.480832: step: 912/466, loss: 0.23265279829502106 2023-01-24 02:01:33.317249: step: 914/466, loss: 1.25602388381958 2023-01-24 02:01:34.071359: step: 916/466, loss: 0.9182683229446411 2023-01-24 02:01:34.822955: step: 918/466, loss: 2.160057306289673 2023-01-24 02:01:35.608180: step: 920/466, loss: 0.5430825352668762 2023-01-24 02:01:36.432943: step: 922/466, loss: 1.0246845483779907 2023-01-24 02:01:37.225199: step: 924/466, loss: 0.8522800207138062 2023-01-24 02:01:37.966187: step: 926/466, loss: 1.536653995513916 2023-01-24 02:01:38.703255: step: 928/466, loss: 1.9280500411987305 2023-01-24 02:01:39.537024: step: 930/466, loss: 0.48116955161094666 2023-01-24 02:01:40.261494: step: 932/466, loss: 0.2521522641181946 ================================================== Loss: 1.280 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3266931007767504, 'r': 0.25106395790243624, 'f1': 0.28392855325018}, 'combined': 0.20921051292118525, 'epoch': 3} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3497748049075708, 'r': 0.24729108938149777, 'f1': 0.28973750928039793}, 'combined': 0.17808256667965922, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33271615385966297, 'r': 0.2746328784230615, 'f1': 0.3008971453824395}, 'combined': 0.2217136860712712, 'epoch': 3} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3370225761547367, 'r': 0.25364155817873785, 'f1': 0.2894468324329921}, 'combined': 0.1779039067636927, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34479203776273604, 'r': 0.26235599078341015, 'f1': 0.2979776016009853}, 'combined': 0.21956244328493651, 'epoch': 3} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3495157774949864, 'r': 0.24529542897660236, 'f1': 0.2882750749882467}, 'combined': 0.17805225219862303, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2916666666666667, 'r': 0.25, 'f1': 0.2692307692307692}, 'combined': 0.17948717948717946, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.10344827586206896, 'f1': 0.15384615384615385}, 'combined': 0.10256410256410256, 'epoch': 3} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157035672660673, 'r': 0.21723270034943004, 'f1': 0.2573708812650582}, 'combined': 0.1896417019847797, 'epoch': 2} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3751308493942274, 'r': 0.18416691398215304, 'f1': 0.2470479783626335}, 'combined': 0.15184412328630156, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38920454545454547, 'r': 0.24464285714285713, 'f1': 0.3004385964912281}, 'combined': 0.20029239766081874, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33271615385966297, 'r': 0.2746328784230615, 'f1': 0.3008971453824395}, 'combined': 0.2217136860712712, 'epoch': 3} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3370225761547367, 'r': 0.25364155817873785, 'f1': 0.2894468324329921}, 'combined': 0.1779039067636927, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33181401249024206, 'r': 0.2300074404761905, 'f1': 0.27168664109939283}, 'combined': 0.2001901565995526, 'epoch': 1} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3348279093853188, 'r': 0.16914881432678022, 'f1': 0.2247553952844653}, 'combined': 0.1388195088521698, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5357142857142857, 'r': 0.12931034482758622, 'f1': 0.20833333333333337}, 'combined': 0.1388888888888889, 'epoch': 1} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:04:33.167824: step: 2/466, loss: 1.2397675514221191 2023-01-24 02:04:33.923873: step: 4/466, loss: 1.0649540424346924 2023-01-24 02:04:34.610553: step: 6/466, loss: 0.5727893710136414 2023-01-24 02:04:35.290982: step: 8/466, loss: 0.516141951084137 2023-01-24 02:04:36.025713: step: 10/466, loss: 0.28951722383499146 2023-01-24 02:04:36.821007: step: 12/466, loss: 0.19822120666503906 2023-01-24 02:04:37.748027: step: 14/466, loss: 0.9063112139701843 2023-01-24 02:04:38.397511: step: 16/466, loss: 0.7773525714874268 2023-01-24 02:04:39.176418: step: 18/466, loss: 0.7677595019340515 2023-01-24 02:04:39.899865: step: 20/466, loss: 0.351625919342041 2023-01-24 02:04:40.588609: step: 22/466, loss: 0.45544004440307617 2023-01-24 02:04:41.376302: step: 24/466, loss: 0.7318277359008789 2023-01-24 02:04:42.133625: step: 26/466, loss: 4.886200904846191 2023-01-24 02:04:43.038608: step: 28/466, loss: 0.8121609687805176 2023-01-24 02:04:43.721982: step: 30/466, loss: 0.7934070825576782 2023-01-24 02:04:44.452462: step: 32/466, loss: 0.6122418642044067 2023-01-24 02:04:45.234452: step: 34/466, loss: 1.1905437707901 2023-01-24 02:04:45.995409: step: 36/466, loss: 1.7918663024902344 2023-01-24 02:04:46.670460: step: 38/466, loss: 0.3897348940372467 2023-01-24 02:04:47.426821: step: 40/466, loss: 2.976102828979492 2023-01-24 02:04:48.196641: step: 42/466, loss: 1.1098968982696533 2023-01-24 02:04:48.915850: step: 44/466, loss: 0.8799930810928345 2023-01-24 02:04:49.670775: step: 46/466, loss: 0.32722756266593933 2023-01-24 02:04:50.476080: step: 48/466, loss: 0.7166779637336731 2023-01-24 02:04:51.248533: step: 50/466, loss: 0.5385745763778687 2023-01-24 02:04:52.007582: step: 52/466, loss: 6.414247512817383 2023-01-24 02:04:52.717534: step: 54/466, loss: 0.9391055703163147 2023-01-24 02:04:53.541654: step: 56/466, loss: 1.185863971710205 2023-01-24 02:04:54.324215: step: 58/466, loss: 0.2553609311580658 2023-01-24 02:04:55.073965: step: 60/466, loss: 2.96231746673584 2023-01-24 02:04:55.832891: step: 62/466, loss: 0.5796050429344177 2023-01-24 02:04:56.608389: step: 64/466, loss: 1.1674344539642334 2023-01-24 02:04:57.342760: step: 66/466, loss: 0.18244606256484985 2023-01-24 02:04:58.058054: step: 68/466, loss: 1.3118362426757812 2023-01-24 02:04:58.840833: step: 70/466, loss: 1.1825385093688965 2023-01-24 02:04:59.545936: step: 72/466, loss: 0.4374426603317261 2023-01-24 02:05:00.252802: step: 74/466, loss: 0.6698932647705078 2023-01-24 02:05:00.944094: step: 76/466, loss: 0.5556724071502686 2023-01-24 02:05:01.644259: step: 78/466, loss: 0.7124918699264526 2023-01-24 02:05:02.487123: step: 80/466, loss: 0.8932681083679199 2023-01-24 02:05:03.271965: step: 82/466, loss: 0.7622426748275757 2023-01-24 02:05:04.145971: step: 84/466, loss: 0.9120004177093506 2023-01-24 02:05:04.879406: step: 86/466, loss: 0.16653411090373993 2023-01-24 02:05:05.682186: step: 88/466, loss: 1.4499459266662598 2023-01-24 02:05:06.465417: step: 90/466, loss: 0.7019395232200623 2023-01-24 02:05:07.227082: step: 92/466, loss: 0.962942361831665 2023-01-24 02:05:07.984921: step: 94/466, loss: 0.7560228705406189 2023-01-24 02:05:08.882116: step: 96/466, loss: 0.6426159143447876 2023-01-24 02:05:09.598289: step: 98/466, loss: 0.941290020942688 2023-01-24 02:05:10.440699: step: 100/466, loss: 1.404322624206543 2023-01-24 02:05:11.224747: step: 102/466, loss: 0.30239057540893555 2023-01-24 02:05:11.941997: step: 104/466, loss: 0.9652661681175232 2023-01-24 02:05:12.750491: step: 106/466, loss: 0.870488166809082 2023-01-24 02:05:13.549113: step: 108/466, loss: 1.636003851890564 2023-01-24 02:05:14.329130: step: 110/466, loss: 1.6018774509429932 2023-01-24 02:05:15.081813: step: 112/466, loss: 0.7858424782752991 2023-01-24 02:05:15.853826: step: 114/466, loss: 0.997367262840271 2023-01-24 02:05:16.630522: step: 116/466, loss: 0.9120631217956543 2023-01-24 02:05:17.351991: step: 118/466, loss: 2.7179312705993652 2023-01-24 02:05:18.159757: step: 120/466, loss: 2.083798408508301 2023-01-24 02:05:18.918901: step: 122/466, loss: 0.18698270618915558 2023-01-24 02:05:19.689635: step: 124/466, loss: 0.3793947398662567 2023-01-24 02:05:20.460519: step: 126/466, loss: 0.8133951425552368 2023-01-24 02:05:21.250558: step: 128/466, loss: 0.3822079002857208 2023-01-24 02:05:22.038406: step: 130/466, loss: 0.2132749706506729 2023-01-24 02:05:22.810890: step: 132/466, loss: 1.0254547595977783 2023-01-24 02:05:23.580555: step: 134/466, loss: 2.9349710941314697 2023-01-24 02:05:24.362324: step: 136/466, loss: 0.7198674082756042 2023-01-24 02:05:25.082723: step: 138/466, loss: 0.6288474202156067 2023-01-24 02:05:25.860509: step: 140/466, loss: 0.686730146408081 2023-01-24 02:05:26.653503: step: 142/466, loss: 0.36508676409721375 2023-01-24 02:05:27.337306: step: 144/466, loss: 0.6521004438400269 2023-01-24 02:05:28.045175: step: 146/466, loss: 0.9777683615684509 2023-01-24 02:05:28.795804: step: 148/466, loss: 1.2292046546936035 2023-01-24 02:05:29.586232: step: 150/466, loss: 0.512159526348114 2023-01-24 02:05:30.334448: step: 152/466, loss: 0.32447198033332825 2023-01-24 02:05:31.090128: step: 154/466, loss: 1.7976981401443481 2023-01-24 02:05:31.758186: step: 156/466, loss: 0.8636375069618225 2023-01-24 02:05:32.497412: step: 158/466, loss: 0.34068071842193604 2023-01-24 02:05:33.336222: step: 160/466, loss: 0.8749996423721313 2023-01-24 02:05:34.098257: step: 162/466, loss: 0.7675051689147949 2023-01-24 02:05:34.795928: step: 164/466, loss: 3.0022711753845215 2023-01-24 02:05:35.599256: step: 166/466, loss: 5.4731645584106445 2023-01-24 02:05:36.364785: step: 168/466, loss: 0.25334587693214417 2023-01-24 02:05:37.040882: step: 170/466, loss: 0.17516425251960754 2023-01-24 02:05:37.839980: step: 172/466, loss: 0.7780047059059143 2023-01-24 02:05:38.536283: step: 174/466, loss: 2.9079222679138184 2023-01-24 02:05:39.254776: step: 176/466, loss: 1.5633292198181152 2023-01-24 02:05:40.075178: step: 178/466, loss: 0.47790423035621643 2023-01-24 02:05:40.819485: step: 180/466, loss: 0.3932521343231201 2023-01-24 02:05:41.605901: step: 182/466, loss: 0.40621671080589294 2023-01-24 02:05:42.374269: step: 184/466, loss: 1.5518304109573364 2023-01-24 02:05:43.161966: step: 186/466, loss: 1.8420729637145996 2023-01-24 02:05:43.919123: step: 188/466, loss: 1.1289658546447754 2023-01-24 02:05:44.638806: step: 190/466, loss: 0.9936701059341431 2023-01-24 02:05:45.371595: step: 192/466, loss: 1.6482939720153809 2023-01-24 02:05:46.136246: step: 194/466, loss: 1.3317241668701172 2023-01-24 02:05:46.898373: step: 196/466, loss: 4.684289455413818 2023-01-24 02:05:47.652947: step: 198/466, loss: 0.5308117270469666 2023-01-24 02:05:48.441285: step: 200/466, loss: 0.6839389204978943 2023-01-24 02:05:49.189138: step: 202/466, loss: 0.8625197410583496 2023-01-24 02:05:49.974060: step: 204/466, loss: 0.6347617506980896 2023-01-24 02:05:50.745988: step: 206/466, loss: 0.6131063103675842 2023-01-24 02:05:51.486640: step: 208/466, loss: 0.26425862312316895 2023-01-24 02:05:52.205993: step: 210/466, loss: 0.7124180793762207 2023-01-24 02:05:52.974101: step: 212/466, loss: 3.0077409744262695 2023-01-24 02:05:53.968736: step: 214/466, loss: 1.0376074314117432 2023-01-24 02:05:54.737420: step: 216/466, loss: 1.0748909711837769 2023-01-24 02:05:55.562301: step: 218/466, loss: 0.948563814163208 2023-01-24 02:05:56.282893: step: 220/466, loss: 0.6731117963790894 2023-01-24 02:05:57.098599: step: 222/466, loss: 1.0048935413360596 2023-01-24 02:05:57.889129: step: 224/466, loss: 0.5586294531822205 2023-01-24 02:05:58.642953: step: 226/466, loss: 0.4264393150806427 2023-01-24 02:05:59.435267: step: 228/466, loss: 0.28359612822532654 2023-01-24 02:06:00.171451: step: 230/466, loss: 4.026350021362305 2023-01-24 02:06:00.947026: step: 232/466, loss: 0.7377166152000427 2023-01-24 02:06:01.733978: step: 234/466, loss: 1.4324662685394287 2023-01-24 02:06:02.466043: step: 236/466, loss: 0.6396796703338623 2023-01-24 02:06:03.212767: step: 238/466, loss: 1.875848412513733 2023-01-24 02:06:04.001951: step: 240/466, loss: 2.0574350357055664 2023-01-24 02:06:04.704914: step: 242/466, loss: 0.4546733796596527 2023-01-24 02:06:05.471812: step: 244/466, loss: 0.5061032176017761 2023-01-24 02:06:06.282105: step: 246/466, loss: 3.3162288665771484 2023-01-24 02:06:07.031243: step: 248/466, loss: 0.8115181922912598 2023-01-24 02:06:07.756368: step: 250/466, loss: 3.5903172492980957 2023-01-24 02:06:08.472827: step: 252/466, loss: 0.9342566728591919 2023-01-24 02:06:09.366480: step: 254/466, loss: 3.475551128387451 2023-01-24 02:06:10.159647: step: 256/466, loss: 0.47010546922683716 2023-01-24 02:06:10.926448: step: 258/466, loss: 1.0341893434524536 2023-01-24 02:06:11.697856: step: 260/466, loss: 0.9817947149276733 2023-01-24 02:06:12.400329: step: 262/466, loss: 0.77620929479599 2023-01-24 02:06:13.107858: step: 264/466, loss: 0.812580943107605 2023-01-24 02:06:13.931137: step: 266/466, loss: 1.2853327989578247 2023-01-24 02:06:14.688450: step: 268/466, loss: 0.25905662775039673 2023-01-24 02:06:15.504167: step: 270/466, loss: 1.620441198348999 2023-01-24 02:06:16.298298: step: 272/466, loss: 0.26145508885383606 2023-01-24 02:06:17.069651: step: 274/466, loss: 0.42695140838623047 2023-01-24 02:06:17.901583: step: 276/466, loss: 1.4670782089233398 2023-01-24 02:06:18.679124: step: 278/466, loss: 0.37129366397857666 2023-01-24 02:06:19.482788: step: 280/466, loss: 1.6939526796340942 2023-01-24 02:06:20.278521: step: 282/466, loss: 0.25103333592414856 2023-01-24 02:06:21.018963: step: 284/466, loss: 0.5259407758712769 2023-01-24 02:06:21.756428: step: 286/466, loss: 0.353412002325058 2023-01-24 02:06:22.538113: step: 288/466, loss: 1.4371587038040161 2023-01-24 02:06:23.291204: step: 290/466, loss: 1.5338389873504639 2023-01-24 02:06:24.054550: step: 292/466, loss: 3.5602529048919678 2023-01-24 02:06:24.853635: step: 294/466, loss: 3.602296829223633 2023-01-24 02:06:25.605414: step: 296/466, loss: 0.17957167327404022 2023-01-24 02:06:26.353621: step: 298/466, loss: 1.5521713495254517 2023-01-24 02:06:27.169535: step: 300/466, loss: 0.30981093645095825 2023-01-24 02:06:27.965921: step: 302/466, loss: 0.8085811734199524 2023-01-24 02:06:28.730999: step: 304/466, loss: 1.3801547288894653 2023-01-24 02:06:29.433234: step: 306/466, loss: 0.9548262357711792 2023-01-24 02:06:30.308945: step: 308/466, loss: 0.33303385972976685 2023-01-24 02:06:31.144301: step: 310/466, loss: 0.19228722155094147 2023-01-24 02:06:31.999000: step: 312/466, loss: 0.8639482855796814 2023-01-24 02:06:32.757446: step: 314/466, loss: 0.6498980522155762 2023-01-24 02:06:33.451792: step: 316/466, loss: 0.5746176242828369 2023-01-24 02:06:34.285284: step: 318/466, loss: 1.6305537223815918 2023-01-24 02:06:35.036134: step: 320/466, loss: 0.8195745944976807 2023-01-24 02:06:35.777645: step: 322/466, loss: 1.6166954040527344 2023-01-24 02:06:36.558648: step: 324/466, loss: 2.4278018474578857 2023-01-24 02:06:37.263190: step: 326/466, loss: 0.8675681948661804 2023-01-24 02:06:37.973452: step: 328/466, loss: 0.37489765882492065 2023-01-24 02:06:38.689496: step: 330/466, loss: 2.4730303287506104 2023-01-24 02:06:39.459076: step: 332/466, loss: 0.8059287071228027 2023-01-24 02:06:40.219881: step: 334/466, loss: 0.3558503985404968 2023-01-24 02:06:40.978447: step: 336/466, loss: 0.8252885937690735 2023-01-24 02:06:41.690345: step: 338/466, loss: 0.5280299782752991 2023-01-24 02:06:42.420509: step: 340/466, loss: 0.7340704202651978 2023-01-24 02:06:43.205244: step: 342/466, loss: 0.8384386897087097 2023-01-24 02:06:44.002075: step: 344/466, loss: 1.6607086658477783 2023-01-24 02:06:44.749138: step: 346/466, loss: 0.3305199444293976 2023-01-24 02:06:45.483249: step: 348/466, loss: 0.8351885676383972 2023-01-24 02:06:46.247466: step: 350/466, loss: 1.1301592588424683 2023-01-24 02:06:47.048563: step: 352/466, loss: 1.4941134452819824 2023-01-24 02:06:47.796844: step: 354/466, loss: 0.6128787994384766 2023-01-24 02:06:48.539376: step: 356/466, loss: 0.5210601687431335 2023-01-24 02:06:49.314164: step: 358/466, loss: 1.4006214141845703 2023-01-24 02:06:50.034628: step: 360/466, loss: 0.4968298077583313 2023-01-24 02:06:50.750452: step: 362/466, loss: 0.39130163192749023 2023-01-24 02:06:51.530687: step: 364/466, loss: 0.5103227496147156 2023-01-24 02:06:52.288125: step: 366/466, loss: 0.204085111618042 2023-01-24 02:06:53.025277: step: 368/466, loss: 0.20469868183135986 2023-01-24 02:06:53.774498: step: 370/466, loss: 0.20196063816547394 2023-01-24 02:06:54.502925: step: 372/466, loss: 3.2092690467834473 2023-01-24 02:06:55.333935: step: 374/466, loss: 0.4920191168785095 2023-01-24 02:06:55.988377: step: 376/466, loss: 1.18999445438385 2023-01-24 02:06:56.645478: step: 378/466, loss: 3.111128568649292 2023-01-24 02:06:57.377445: step: 380/466, loss: 0.5756344795227051 2023-01-24 02:06:58.193911: step: 382/466, loss: 0.1449517458677292 2023-01-24 02:06:58.918834: step: 384/466, loss: 0.24137958884239197 2023-01-24 02:06:59.717266: step: 386/466, loss: 0.8756279945373535 2023-01-24 02:07:00.483694: step: 388/466, loss: 0.8774224519729614 2023-01-24 02:07:01.279332: step: 390/466, loss: 1.375420331954956 2023-01-24 02:07:02.072589: step: 392/466, loss: 0.25797238945961 2023-01-24 02:07:02.868860: step: 394/466, loss: 0.740105390548706 2023-01-24 02:07:03.760385: step: 396/466, loss: 0.4930828809738159 2023-01-24 02:07:04.591261: step: 398/466, loss: 0.40567928552627563 2023-01-24 02:07:05.328136: step: 400/466, loss: 0.37840887904167175 2023-01-24 02:07:06.163020: step: 402/466, loss: 4.936130046844482 2023-01-24 02:07:06.933893: step: 404/466, loss: 0.722000777721405 2023-01-24 02:07:07.651238: step: 406/466, loss: 0.5378820300102234 2023-01-24 02:07:08.477568: step: 408/466, loss: 3.1902055740356445 2023-01-24 02:07:09.298403: step: 410/466, loss: 1.2864125967025757 2023-01-24 02:07:10.075679: step: 412/466, loss: 1.8357758522033691 2023-01-24 02:07:10.874159: step: 414/466, loss: 0.48244792222976685 2023-01-24 02:07:11.667420: step: 416/466, loss: 0.43493497371673584 2023-01-24 02:07:12.458219: step: 418/466, loss: 1.562419056892395 2023-01-24 02:07:13.300819: step: 420/466, loss: 1.8326852321624756 2023-01-24 02:07:14.053460: step: 422/466, loss: 1.6586694717407227 2023-01-24 02:07:14.840197: step: 424/466, loss: 1.4311715364456177 2023-01-24 02:07:15.561251: step: 426/466, loss: 1.946704387664795 2023-01-24 02:07:16.414464: step: 428/466, loss: 2.4207661151885986 2023-01-24 02:07:17.143060: step: 430/466, loss: 1.1361252069473267 2023-01-24 02:07:17.836971: step: 432/466, loss: 2.117689371109009 2023-01-24 02:07:18.610045: step: 434/466, loss: 0.7453824877738953 2023-01-24 02:07:19.371105: step: 436/466, loss: 3.592252016067505 2023-01-24 02:07:20.221040: step: 438/466, loss: 0.7220326066017151 2023-01-24 02:07:20.978409: step: 440/466, loss: 0.30927208065986633 2023-01-24 02:07:21.751401: step: 442/466, loss: 0.6411184668540955 2023-01-24 02:07:22.473736: step: 444/466, loss: 0.5024139881134033 2023-01-24 02:07:23.190567: step: 446/466, loss: 0.4438614845275879 2023-01-24 02:07:23.993575: step: 448/466, loss: 0.2669348418712616 2023-01-24 02:07:24.684783: step: 450/466, loss: 0.9279356002807617 2023-01-24 02:07:25.440019: step: 452/466, loss: 0.6318209767341614 2023-01-24 02:07:26.176067: step: 454/466, loss: 2.680860757827759 2023-01-24 02:07:26.962692: step: 456/466, loss: 1.1922527551651 2023-01-24 02:07:27.733484: step: 458/466, loss: 0.10912270098924637 2023-01-24 02:07:28.613936: step: 460/466, loss: 0.18991920351982117 2023-01-24 02:07:29.329595: step: 462/466, loss: 1.3336913585662842 2023-01-24 02:07:30.126450: step: 464/466, loss: 2.4666285514831543 2023-01-24 02:07:30.920132: step: 466/466, loss: 0.3230827748775482 2023-01-24 02:07:31.658592: step: 468/466, loss: 0.6548050045967102 2023-01-24 02:07:32.477168: step: 470/466, loss: 0.5973828434944153 2023-01-24 02:07:33.219039: step: 472/466, loss: 0.7570876479148865 2023-01-24 02:07:33.999212: step: 474/466, loss: 1.6824928522109985 2023-01-24 02:07:34.842704: step: 476/466, loss: 0.658035397529602 2023-01-24 02:07:35.578245: step: 478/466, loss: 0.2836608290672302 2023-01-24 02:07:36.329341: step: 480/466, loss: 1.383255124092102 2023-01-24 02:07:37.084209: step: 482/466, loss: 1.4585496187210083 2023-01-24 02:07:37.829215: step: 484/466, loss: 3.030864953994751 2023-01-24 02:07:38.593561: step: 486/466, loss: 0.32928135991096497 2023-01-24 02:07:39.381733: step: 488/466, loss: 2.591329574584961 2023-01-24 02:07:40.128730: step: 490/466, loss: 1.8271920680999756 2023-01-24 02:07:40.872530: step: 492/466, loss: 0.5807033181190491 2023-01-24 02:07:41.656852: step: 494/466, loss: 0.2876582741737366 2023-01-24 02:07:42.538873: step: 496/466, loss: 0.8153465986251831 2023-01-24 02:07:43.373375: step: 498/466, loss: 0.9287121891975403 2023-01-24 02:07:44.129019: step: 500/466, loss: 1.139840006828308 2023-01-24 02:07:44.891138: step: 502/466, loss: 0.5210357308387756 2023-01-24 02:07:45.695283: step: 504/466, loss: 1.055281162261963 2023-01-24 02:07:46.495384: step: 506/466, loss: 0.5654680132865906 2023-01-24 02:07:47.301832: step: 508/466, loss: 0.5230228304862976 2023-01-24 02:07:48.111918: step: 510/466, loss: 1.373519778251648 2023-01-24 02:07:48.865182: step: 512/466, loss: 1.0065639019012451 2023-01-24 02:07:49.753547: step: 514/466, loss: 2.237562656402588 2023-01-24 02:07:50.547641: step: 516/466, loss: 1.6026716232299805 2023-01-24 02:07:51.218208: step: 518/466, loss: 0.37853115797042847 2023-01-24 02:07:51.931871: step: 520/466, loss: 0.3763364255428314 2023-01-24 02:07:52.738281: step: 522/466, loss: 0.8508153557777405 2023-01-24 02:07:53.540505: step: 524/466, loss: 0.9764190316200256 2023-01-24 02:07:54.362307: step: 526/466, loss: 0.9147853255271912 2023-01-24 02:07:55.159373: step: 528/466, loss: 5.266207218170166 2023-01-24 02:07:56.007669: step: 530/466, loss: 0.34616580605506897 2023-01-24 02:07:56.703308: step: 532/466, loss: 0.5940641760826111 2023-01-24 02:07:57.440354: step: 534/466, loss: 1.3932961225509644 2023-01-24 02:07:58.151385: step: 536/466, loss: 0.3049767017364502 2023-01-24 02:07:58.832591: step: 538/466, loss: 0.5108581185340881 2023-01-24 02:07:59.591859: step: 540/466, loss: 0.6778570413589478 2023-01-24 02:08:00.386054: step: 542/466, loss: 0.8873158693313599 2023-01-24 02:08:01.195678: step: 544/466, loss: 2.1264898777008057 2023-01-24 02:08:01.992673: step: 546/466, loss: 1.411435842514038 2023-01-24 02:08:02.711229: step: 548/466, loss: 0.3980066180229187 2023-01-24 02:08:03.450658: step: 550/466, loss: 1.2385305166244507 2023-01-24 02:08:04.275959: step: 552/466, loss: 1.7239978313446045 2023-01-24 02:08:05.082871: step: 554/466, loss: 0.9625904560089111 2023-01-24 02:08:05.898274: step: 556/466, loss: 0.47288978099823 2023-01-24 02:08:06.714604: step: 558/466, loss: 1.9926071166992188 2023-01-24 02:08:07.458205: step: 560/466, loss: 2.027879238128662 2023-01-24 02:08:08.209228: step: 562/466, loss: 0.6236366033554077 2023-01-24 02:08:08.954492: step: 564/466, loss: 0.5292195081710815 2023-01-24 02:08:09.723443: step: 566/466, loss: 1.7229830026626587 2023-01-24 02:08:10.499985: step: 568/466, loss: 1.3079602718353271 2023-01-24 02:08:11.282600: step: 570/466, loss: 0.18612471222877502 2023-01-24 02:08:12.082269: step: 572/466, loss: 0.966667890548706 2023-01-24 02:08:12.839330: step: 574/466, loss: 1.0524870157241821 2023-01-24 02:08:13.632683: step: 576/466, loss: 0.3565841019153595 2023-01-24 02:08:14.396292: step: 578/466, loss: 1.3353606462478638 2023-01-24 02:08:15.110657: step: 580/466, loss: 0.52093505859375 2023-01-24 02:08:15.903849: step: 582/466, loss: 0.28593239188194275 2023-01-24 02:08:16.660046: step: 584/466, loss: 1.649446725845337 2023-01-24 02:08:17.370900: step: 586/466, loss: 0.9959136843681335 2023-01-24 02:08:18.131936: step: 588/466, loss: 0.5500845313072205 2023-01-24 02:08:18.936710: step: 590/466, loss: 0.5372470021247864 2023-01-24 02:08:19.642696: step: 592/466, loss: 4.194912433624268 2023-01-24 02:08:20.458886: step: 594/466, loss: 0.7411658763885498 2023-01-24 02:08:21.120576: step: 596/466, loss: 0.494291216135025 2023-01-24 02:08:21.827324: step: 598/466, loss: 0.4416239559650421 2023-01-24 02:08:22.616178: step: 600/466, loss: 0.610022783279419 2023-01-24 02:08:23.422034: step: 602/466, loss: 0.22815054655075073 2023-01-24 02:08:24.190584: step: 604/466, loss: 2.0596587657928467 2023-01-24 02:08:24.960494: step: 606/466, loss: 2.188324451446533 2023-01-24 02:08:25.678801: step: 608/466, loss: 0.4296473264694214 2023-01-24 02:08:26.511768: step: 610/466, loss: 0.8137211799621582 2023-01-24 02:08:27.285264: step: 612/466, loss: 0.8927656412124634 2023-01-24 02:08:28.096707: step: 614/466, loss: 1.6651499271392822 2023-01-24 02:08:28.987516: step: 616/466, loss: 0.8619599938392639 2023-01-24 02:08:29.813461: step: 618/466, loss: 0.9984352588653564 2023-01-24 02:08:30.610732: step: 620/466, loss: 0.28391364216804504 2023-01-24 02:08:31.402959: step: 622/466, loss: 0.7742931246757507 2023-01-24 02:08:32.260569: step: 624/466, loss: 1.2217891216278076 2023-01-24 02:08:33.013415: step: 626/466, loss: 0.5850755572319031 2023-01-24 02:08:33.760717: step: 628/466, loss: 1.6307834386825562 2023-01-24 02:08:34.559066: step: 630/466, loss: 0.39652201533317566 2023-01-24 02:08:35.368671: step: 632/466, loss: 0.7861143350601196 2023-01-24 02:08:35.994502: step: 634/466, loss: 0.38449627161026 2023-01-24 02:08:36.812497: step: 636/466, loss: 2.7891414165496826 2023-01-24 02:08:37.605267: step: 638/466, loss: 0.6628226041793823 2023-01-24 02:08:38.338253: step: 640/466, loss: 0.41520482301712036 2023-01-24 02:08:39.199478: step: 642/466, loss: 0.2679263949394226 2023-01-24 02:08:40.050336: step: 644/466, loss: 1.1348047256469727 2023-01-24 02:08:40.872942: step: 646/466, loss: 0.46422290802001953 2023-01-24 02:08:41.672835: step: 648/466, loss: 0.2260451465845108 2023-01-24 02:08:42.429307: step: 650/466, loss: 1.5560388565063477 2023-01-24 02:08:43.248495: step: 652/466, loss: 2.7813427448272705 2023-01-24 02:08:44.109543: step: 654/466, loss: 0.5180941224098206 2023-01-24 02:08:44.830327: step: 656/466, loss: 1.9066956043243408 2023-01-24 02:08:45.565031: step: 658/466, loss: 0.7163376808166504 2023-01-24 02:08:46.317195: step: 660/466, loss: 0.8216008543968201 2023-01-24 02:08:47.113367: step: 662/466, loss: 0.6733957529067993 2023-01-24 02:08:47.883269: step: 664/466, loss: 0.9260329008102417 2023-01-24 02:08:48.639916: step: 666/466, loss: 0.31455352902412415 2023-01-24 02:08:49.418825: step: 668/466, loss: 1.2638105154037476 2023-01-24 02:08:50.267773: step: 670/466, loss: 1.3063114881515503 2023-01-24 02:08:51.052073: step: 672/466, loss: 0.7214428186416626 2023-01-24 02:08:51.823592: step: 674/466, loss: 1.3825799226760864 2023-01-24 02:08:52.561005: step: 676/466, loss: 0.8210784196853638 2023-01-24 02:08:53.282292: step: 678/466, loss: 4.343418598175049 2023-01-24 02:08:54.138778: step: 680/466, loss: 0.6992369294166565 2023-01-24 02:08:54.846775: step: 682/466, loss: 0.6307631731033325 2023-01-24 02:08:55.612970: step: 684/466, loss: 0.2862870991230011 2023-01-24 02:08:56.428772: step: 686/466, loss: 0.795841634273529 2023-01-24 02:08:57.205379: step: 688/466, loss: 0.35162660479545593 2023-01-24 02:08:57.904748: step: 690/466, loss: 0.24319665133953094 2023-01-24 02:08:58.674717: step: 692/466, loss: 0.5327470898628235 2023-01-24 02:08:59.395466: step: 694/466, loss: 0.3367350399494171 2023-01-24 02:09:00.192816: step: 696/466, loss: 0.2241695076227188 2023-01-24 02:09:00.927133: step: 698/466, loss: 0.36978334188461304 2023-01-24 02:09:01.766201: step: 700/466, loss: 1.1117911338806152 2023-01-24 02:09:02.533807: step: 702/466, loss: 0.6542451977729797 2023-01-24 02:09:03.353152: step: 704/466, loss: 0.7758954763412476 2023-01-24 02:09:04.111248: step: 706/466, loss: 1.0059378147125244 2023-01-24 02:09:04.916125: step: 708/466, loss: 1.8209892511367798 2023-01-24 02:09:05.676761: step: 710/466, loss: 1.159255027770996 2023-01-24 02:09:06.493061: step: 712/466, loss: 3.3368239402770996 2023-01-24 02:09:07.241182: step: 714/466, loss: 0.5656745433807373 2023-01-24 02:09:07.991372: step: 716/466, loss: 2.0161354541778564 2023-01-24 02:09:08.718665: step: 718/466, loss: 0.24916690587997437 2023-01-24 02:09:09.646834: step: 720/466, loss: 2.7012593746185303 2023-01-24 02:09:10.398686: step: 722/466, loss: 0.9164566993713379 2023-01-24 02:09:11.132498: step: 724/466, loss: 0.8179494738578796 2023-01-24 02:09:11.774240: step: 726/466, loss: 0.8097164630889893 2023-01-24 02:09:12.593435: step: 728/466, loss: 0.7410258054733276 2023-01-24 02:09:13.322504: step: 730/466, loss: 7.917463302612305 2023-01-24 02:09:14.052640: step: 732/466, loss: 7.717822074890137 2023-01-24 02:09:14.869654: step: 734/466, loss: 1.1576268672943115 2023-01-24 02:09:15.583116: step: 736/466, loss: 0.34166157245635986 2023-01-24 02:09:16.366300: step: 738/466, loss: 1.1671950817108154 2023-01-24 02:09:17.148185: step: 740/466, loss: 0.6236756443977356 2023-01-24 02:09:17.924821: step: 742/466, loss: 0.6424854397773743 2023-01-24 02:09:18.694218: step: 744/466, loss: 3.8019018173217773 2023-01-24 02:09:19.513600: step: 746/466, loss: 0.8778786063194275 2023-01-24 02:09:20.242075: step: 748/466, loss: 0.9644218683242798 2023-01-24 02:09:21.049502: step: 750/466, loss: 0.9258589744567871 2023-01-24 02:09:21.838212: step: 752/466, loss: 1.182026743888855 2023-01-24 02:09:22.582733: step: 754/466, loss: 0.4859839975833893 2023-01-24 02:09:23.310005: step: 756/466, loss: 0.42185384035110474 2023-01-24 02:09:24.074493: step: 758/466, loss: 1.056136965751648 2023-01-24 02:09:24.771556: step: 760/466, loss: 1.8052279949188232 2023-01-24 02:09:25.549249: step: 762/466, loss: 0.25396454334259033 2023-01-24 02:09:26.184862: step: 764/466, loss: 0.984928548336029 2023-01-24 02:09:26.899281: step: 766/466, loss: 0.32059386372566223 2023-01-24 02:09:27.677620: step: 768/466, loss: 1.5475854873657227 2023-01-24 02:09:28.440108: step: 770/466, loss: 0.6480833292007446 2023-01-24 02:09:29.151485: step: 772/466, loss: 0.7749090790748596 2023-01-24 02:09:29.931929: step: 774/466, loss: 1.937896490097046 2023-01-24 02:09:30.671421: step: 776/466, loss: 1.2467749118804932 2023-01-24 02:09:31.368935: step: 778/466, loss: 0.29985183477401733 2023-01-24 02:09:32.151964: step: 780/466, loss: 0.7874993085861206 2023-01-24 02:09:33.021056: step: 782/466, loss: 0.5243517756462097 2023-01-24 02:09:33.788106: step: 784/466, loss: 1.106619119644165 2023-01-24 02:09:34.613005: step: 786/466, loss: 0.26240459084510803 2023-01-24 02:09:35.346495: step: 788/466, loss: 3.3374693393707275 2023-01-24 02:09:36.102260: step: 790/466, loss: 1.4162318706512451 2023-01-24 02:09:36.876093: step: 792/466, loss: 1.6213815212249756 2023-01-24 02:09:37.690863: step: 794/466, loss: 3.1641407012939453 2023-01-24 02:09:38.510241: step: 796/466, loss: 1.010169267654419 2023-01-24 02:09:39.324309: step: 798/466, loss: 0.5032852292060852 2023-01-24 02:09:40.008840: step: 800/466, loss: 0.5835109949111938 2023-01-24 02:09:40.771567: step: 802/466, loss: 2.25563907623291 2023-01-24 02:09:41.439447: step: 804/466, loss: 1.6692702770233154 2023-01-24 02:09:42.238193: step: 806/466, loss: 1.2163097858428955 2023-01-24 02:09:42.944931: step: 808/466, loss: 0.15825329720973969 2023-01-24 02:09:43.707062: step: 810/466, loss: 2.151369571685791 2023-01-24 02:09:44.379646: step: 812/466, loss: 1.0254435539245605 2023-01-24 02:09:45.108398: step: 814/466, loss: 1.6680785417556763 2023-01-24 02:09:45.858985: step: 816/466, loss: 1.0526785850524902 2023-01-24 02:09:46.611667: step: 818/466, loss: 0.9085757732391357 2023-01-24 02:09:47.334748: step: 820/466, loss: 0.41828882694244385 2023-01-24 02:09:48.000643: step: 822/466, loss: 0.7217870354652405 2023-01-24 02:09:48.752107: step: 824/466, loss: 0.6949070692062378 2023-01-24 02:09:49.573505: step: 826/466, loss: 1.796603798866272 2023-01-24 02:09:50.300768: step: 828/466, loss: 1.1403634548187256 2023-01-24 02:09:51.021185: step: 830/466, loss: 6.5800652503967285 2023-01-24 02:09:51.852377: step: 832/466, loss: 1.1636403799057007 2023-01-24 02:09:52.624296: step: 834/466, loss: 0.7278444766998291 2023-01-24 02:09:53.424426: step: 836/466, loss: 0.711738646030426 2023-01-24 02:09:54.206833: step: 838/466, loss: 0.42984867095947266 2023-01-24 02:09:54.910566: step: 840/466, loss: 1.1451420783996582 2023-01-24 02:09:55.660434: step: 842/466, loss: 0.5262464284896851 2023-01-24 02:09:56.398080: step: 844/466, loss: 0.3005110025405884 2023-01-24 02:09:57.111873: step: 846/466, loss: 1.6486259698867798 2023-01-24 02:09:57.855065: step: 848/466, loss: 2.0297067165374756 2023-01-24 02:09:58.663674: step: 850/466, loss: 1.0829218626022339 2023-01-24 02:09:59.411070: step: 852/466, loss: 0.3283446431159973 2023-01-24 02:10:00.181138: step: 854/466, loss: 0.4144214391708374 2023-01-24 02:10:00.985533: step: 856/466, loss: 1.1768455505371094 2023-01-24 02:10:01.688123: step: 858/466, loss: 0.47637295722961426 2023-01-24 02:10:02.491370: step: 860/466, loss: 1.35926353931427 2023-01-24 02:10:03.267975: step: 862/466, loss: 0.79765385389328 2023-01-24 02:10:04.024764: step: 864/466, loss: 2.7926888465881348 2023-01-24 02:10:04.752852: step: 866/466, loss: 1.0313583612442017 2023-01-24 02:10:05.530675: step: 868/466, loss: 0.9380786418914795 2023-01-24 02:10:06.324283: step: 870/466, loss: 1.7788538932800293 2023-01-24 02:10:07.104137: step: 872/466, loss: 12.130573272705078 2023-01-24 02:10:07.893578: step: 874/466, loss: 0.48626357316970825 2023-01-24 02:10:08.672132: step: 876/466, loss: 0.8553528189659119 2023-01-24 02:10:09.463465: step: 878/466, loss: 0.5449331998825073 2023-01-24 02:10:10.247499: step: 880/466, loss: 4.440307140350342 2023-01-24 02:10:11.012895: step: 882/466, loss: 2.0392606258392334 2023-01-24 02:10:11.821328: step: 884/466, loss: 0.5503759384155273 2023-01-24 02:10:12.618776: step: 886/466, loss: 0.6343894600868225 2023-01-24 02:10:13.352739: step: 888/466, loss: 0.6469244360923767 2023-01-24 02:10:14.141238: step: 890/466, loss: 1.1977115869522095 2023-01-24 02:10:14.976602: step: 892/466, loss: 0.896173357963562 2023-01-24 02:10:15.921913: step: 894/466, loss: 0.7121672630310059 2023-01-24 02:10:16.715373: step: 896/466, loss: 0.4969857633113861 2023-01-24 02:10:17.460960: step: 898/466, loss: 0.2636564075946808 2023-01-24 02:10:18.178179: step: 900/466, loss: 0.18109741806983948 2023-01-24 02:10:18.958160: step: 902/466, loss: 0.609916090965271 2023-01-24 02:10:19.759918: step: 904/466, loss: 1.431706428527832 2023-01-24 02:10:20.552701: step: 906/466, loss: 1.133394479751587 2023-01-24 02:10:21.264160: step: 908/466, loss: 0.38408052921295166 2023-01-24 02:10:22.013506: step: 910/466, loss: 0.21119973063468933 2023-01-24 02:10:22.714029: step: 912/466, loss: 4.096024036407471 2023-01-24 02:10:23.448039: step: 914/466, loss: 0.4555894732475281 2023-01-24 02:10:24.324641: step: 916/466, loss: 0.49550244212150574 2023-01-24 02:10:25.106069: step: 918/466, loss: 0.8274804949760437 2023-01-24 02:10:25.849192: step: 920/466, loss: 0.9337276220321655 2023-01-24 02:10:26.654248: step: 922/466, loss: 0.5701102614402771 2023-01-24 02:10:27.469452: step: 924/466, loss: 1.468475103378296 2023-01-24 02:10:28.327991: step: 926/466, loss: 0.5688779354095459 2023-01-24 02:10:29.073883: step: 928/466, loss: 0.829647958278656 2023-01-24 02:10:29.907142: step: 930/466, loss: 1.1818006038665771 2023-01-24 02:10:30.674393: step: 932/466, loss: 0.8422772884368896 ================================================== Loss: 1.159 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.335154981737013, 'r': 0.2849135328618251, 'f1': 0.30799883449883453}, 'combined': 0.22694650963072016, 'epoch': 4} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.34747237279256127, 'r': 0.218033658295073, 'f1': 0.2679393973950074}, 'combined': 0.16468470278912647, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32945151673584927, 'r': 0.28319077245036, 'f1': 0.30457456547212186}, 'combined': 0.2244233640320898, 'epoch': 4} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3467335894455655, 'r': 0.22026723270742493, 'f1': 0.2693966049074954}, 'combined': 0.16639202067815895, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30714285714285716, 'r': 0.30714285714285716, 'f1': 0.30714285714285716}, 'combined': 0.20476190476190476, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 4} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.335154981737013, 'r': 0.2849135328618251, 'f1': 0.30799883449883453}, 'combined': 0.22694650963072016, 'epoch': 4} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.34747237279256127, 'r': 0.218033658295073, 'f1': 0.2679393973950074}, 'combined': 0.16468470278912647, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30714285714285716, 'r': 0.30714285714285716, 'f1': 0.30714285714285716}, 'combined': 0.20476190476190476, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32945151673584927, 'r': 0.28319077245036, 'f1': 0.30457456547212186}, 'combined': 0.2244233640320898, 'epoch': 4} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3467335894455655, 'r': 0.22026723270742493, 'f1': 0.2693966049074954}, 'combined': 0.16639202067815895, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:13:30.161792: step: 2/466, loss: 0.9809449911117554 2023-01-24 02:13:30.911795: step: 4/466, loss: 0.9840978980064392 2023-01-24 02:13:31.703748: step: 6/466, loss: 0.43336671590805054 2023-01-24 02:13:32.514765: step: 8/466, loss: 1.2338881492614746 2023-01-24 02:13:33.252775: step: 10/466, loss: 0.5673670172691345 2023-01-24 02:13:34.077351: step: 12/466, loss: 0.8279459476470947 2023-01-24 02:13:34.837113: step: 14/466, loss: 0.7558691501617432 2023-01-24 02:13:35.621395: step: 16/466, loss: 0.34479042887687683 2023-01-24 02:13:36.291845: step: 18/466, loss: 2.8193399906158447 2023-01-24 02:13:37.195000: step: 20/466, loss: 0.6758497953414917 2023-01-24 02:13:37.934880: step: 22/466, loss: 1.0843076705932617 2023-01-24 02:13:38.795671: step: 24/466, loss: 0.7437761425971985 2023-01-24 02:13:39.465671: step: 26/466, loss: 0.8486707806587219 2023-01-24 02:13:40.276566: step: 28/466, loss: 0.17709881067276 2023-01-24 02:13:41.044788: step: 30/466, loss: 0.9467667937278748 2023-01-24 02:13:41.821557: step: 32/466, loss: 0.27025699615478516 2023-01-24 02:13:42.546630: step: 34/466, loss: 0.20740818977355957 2023-01-24 02:13:43.294744: step: 36/466, loss: 0.583893358707428 2023-01-24 02:13:44.055955: step: 38/466, loss: 0.26786521077156067 2023-01-24 02:13:44.830943: step: 40/466, loss: 0.38853883743286133 2023-01-24 02:13:45.600375: step: 42/466, loss: 0.2629576325416565 2023-01-24 02:13:46.390666: step: 44/466, loss: 3.393561840057373 2023-01-24 02:13:47.123951: step: 46/466, loss: 0.9243699312210083 2023-01-24 02:13:47.862955: step: 48/466, loss: 0.3238946497440338 2023-01-24 02:13:48.618348: step: 50/466, loss: 0.34954404830932617 2023-01-24 02:13:49.421864: step: 52/466, loss: 1.080365538597107 2023-01-24 02:13:50.174559: step: 54/466, loss: 0.6035301089286804 2023-01-24 02:13:50.897366: step: 56/466, loss: 0.1855875700712204 2023-01-24 02:13:51.668514: step: 58/466, loss: 0.17876757681369781 2023-01-24 02:13:52.462714: step: 60/466, loss: 0.3543803095817566 2023-01-24 02:13:53.205710: step: 62/466, loss: 1.1884087324142456 2023-01-24 02:13:53.904322: step: 64/466, loss: 0.3980889320373535 2023-01-24 02:13:54.755464: step: 66/466, loss: 0.3353211283683777 2023-01-24 02:13:55.564294: step: 68/466, loss: 0.8751363754272461 2023-01-24 02:13:56.341808: step: 70/466, loss: 0.6269444227218628 2023-01-24 02:13:57.185411: step: 72/466, loss: 0.6170547604560852 2023-01-24 02:13:58.031686: step: 74/466, loss: 0.9494127631187439 2023-01-24 02:13:58.804595: step: 76/466, loss: 0.4018596112728119 2023-01-24 02:13:59.567147: step: 78/466, loss: 4.271050453186035 2023-01-24 02:14:00.323216: step: 80/466, loss: 0.2668708860874176 2023-01-24 02:14:01.127433: step: 82/466, loss: 0.1476602405309677 2023-01-24 02:14:01.894675: step: 84/466, loss: 0.5470862984657288 2023-01-24 02:14:02.658578: step: 86/466, loss: 0.18264155089855194 2023-01-24 02:14:03.412919: step: 88/466, loss: 0.8571751117706299 2023-01-24 02:14:04.255704: step: 90/466, loss: 0.19562962651252747 2023-01-24 02:14:05.006931: step: 92/466, loss: 0.5648656487464905 2023-01-24 02:14:05.712442: step: 94/466, loss: 0.09153655171394348 2023-01-24 02:14:06.432746: step: 96/466, loss: 0.2601352334022522 2023-01-24 02:14:07.144252: step: 98/466, loss: 0.8876150250434875 2023-01-24 02:14:07.895449: step: 100/466, loss: 0.6579775214195251 2023-01-24 02:14:08.668357: step: 102/466, loss: 0.4219387173652649 2023-01-24 02:14:09.369237: step: 104/466, loss: 1.411496639251709 2023-01-24 02:14:10.127142: step: 106/466, loss: 0.7639531493186951 2023-01-24 02:14:10.949627: step: 108/466, loss: 0.375456303358078 2023-01-24 02:14:11.672383: step: 110/466, loss: 0.47441768646240234 2023-01-24 02:14:12.452460: step: 112/466, loss: 0.8059460520744324 2023-01-24 02:14:13.194752: step: 114/466, loss: 0.5893727540969849 2023-01-24 02:14:13.905959: step: 116/466, loss: 0.29101523756980896 2023-01-24 02:14:14.733587: step: 118/466, loss: 2.5849218368530273 2023-01-24 02:14:15.526936: step: 120/466, loss: 0.2512795925140381 2023-01-24 02:14:16.354643: step: 122/466, loss: 0.6373374462127686 2023-01-24 02:14:17.147566: step: 124/466, loss: 0.3568679988384247 2023-01-24 02:14:17.947908: step: 126/466, loss: 0.5779368281364441 2023-01-24 02:14:18.690799: step: 128/466, loss: 0.26844117045402527 2023-01-24 02:14:19.497532: step: 130/466, loss: 0.29416975378990173 2023-01-24 02:14:20.224931: step: 132/466, loss: 1.4655174016952515 2023-01-24 02:14:20.984687: step: 134/466, loss: 1.2914477586746216 2023-01-24 02:14:21.741526: step: 136/466, loss: 3.8348920345306396 2023-01-24 02:14:22.479861: step: 138/466, loss: 0.15896697342395782 2023-01-24 02:14:23.156583: step: 140/466, loss: 0.8044726848602295 2023-01-24 02:14:23.855658: step: 142/466, loss: 0.2973330616950989 2023-01-24 02:14:24.647272: step: 144/466, loss: 1.2005521059036255 2023-01-24 02:14:25.361401: step: 146/466, loss: 0.52372145652771 2023-01-24 02:14:26.116117: step: 148/466, loss: 0.33128097653388977 2023-01-24 02:14:26.893380: step: 150/466, loss: 0.3678876459598541 2023-01-24 02:14:27.653547: step: 152/466, loss: 0.6063940525054932 2023-01-24 02:14:28.391072: step: 154/466, loss: 1.0063389539718628 2023-01-24 02:14:29.216440: step: 156/466, loss: 2.022432327270508 2023-01-24 02:14:29.983801: step: 158/466, loss: 0.3305632174015045 2023-01-24 02:14:30.726246: step: 160/466, loss: 0.5475694537162781 2023-01-24 02:14:31.503676: step: 162/466, loss: 0.7513313293457031 2023-01-24 02:14:32.287188: step: 164/466, loss: 3.8323545455932617 2023-01-24 02:14:33.005342: step: 166/466, loss: 1.0958454608917236 2023-01-24 02:14:33.818766: step: 168/466, loss: 3.309696912765503 2023-01-24 02:14:34.566936: step: 170/466, loss: 0.9720345735549927 2023-01-24 02:14:35.301510: step: 172/466, loss: 1.1154394149780273 2023-01-24 02:14:36.074358: step: 174/466, loss: 0.9783151149749756 2023-01-24 02:14:36.876023: step: 176/466, loss: 0.3279179632663727 2023-01-24 02:14:37.608064: step: 178/466, loss: 1.1613959074020386 2023-01-24 02:14:38.461677: step: 180/466, loss: 0.2454126924276352 2023-01-24 02:14:39.243332: step: 182/466, loss: 0.409664124250412 2023-01-24 02:14:39.983614: step: 184/466, loss: 0.8884005546569824 2023-01-24 02:14:40.734675: step: 186/466, loss: 0.6842936277389526 2023-01-24 02:14:41.506222: step: 188/466, loss: 0.8189715147018433 2023-01-24 02:14:42.284955: step: 190/466, loss: 0.2442971169948578 2023-01-24 02:14:43.090056: step: 192/466, loss: 0.9736469984054565 2023-01-24 02:14:43.906590: step: 194/466, loss: 0.9577223062515259 2023-01-24 02:14:44.646412: step: 196/466, loss: 0.4329565763473511 2023-01-24 02:14:45.378385: step: 198/466, loss: 0.7579882144927979 2023-01-24 02:14:46.099227: step: 200/466, loss: 1.7942726612091064 2023-01-24 02:14:46.897922: step: 202/466, loss: 0.906710684299469 2023-01-24 02:14:47.636898: step: 204/466, loss: 1.0168603658676147 2023-01-24 02:14:48.359603: step: 206/466, loss: 0.24140344560146332 2023-01-24 02:14:49.140039: step: 208/466, loss: 0.17037667334079742 2023-01-24 02:14:49.881383: step: 210/466, loss: 2.784151554107666 2023-01-24 02:14:50.687126: step: 212/466, loss: 0.17526978254318237 2023-01-24 02:14:51.389376: step: 214/466, loss: 0.5849495530128479 2023-01-24 02:14:52.175250: step: 216/466, loss: 0.19523198902606964 2023-01-24 02:14:52.917492: step: 218/466, loss: 0.44503164291381836 2023-01-24 02:14:53.769565: step: 220/466, loss: 0.33854418992996216 2023-01-24 02:14:54.510958: step: 222/466, loss: 0.9513136744499207 2023-01-24 02:14:55.314383: step: 224/466, loss: 1.576082706451416 2023-01-24 02:14:56.006374: step: 226/466, loss: 0.6007593870162964 2023-01-24 02:14:56.822967: step: 228/466, loss: 0.38531842827796936 2023-01-24 02:14:57.555222: step: 230/466, loss: 0.6435443758964539 2023-01-24 02:14:58.411285: step: 232/466, loss: 0.3349244296550751 2023-01-24 02:14:59.144112: step: 234/466, loss: 0.4726037383079529 2023-01-24 02:14:59.835696: step: 236/466, loss: 1.0641672611236572 2023-01-24 02:15:00.512425: step: 238/466, loss: 0.6876202821731567 2023-01-24 02:15:01.420521: step: 240/466, loss: 0.9148756265640259 2023-01-24 02:15:02.130977: step: 242/466, loss: 0.7800842523574829 2023-01-24 02:15:02.888084: step: 244/466, loss: 0.427781879901886 2023-01-24 02:15:03.658545: step: 246/466, loss: 1.4870994091033936 2023-01-24 02:15:04.586180: step: 248/466, loss: 0.8990872502326965 2023-01-24 02:15:05.462836: step: 250/466, loss: 1.415848970413208 2023-01-24 02:15:06.264085: step: 252/466, loss: 0.5700762867927551 2023-01-24 02:15:07.034198: step: 254/466, loss: 7.834717750549316 2023-01-24 02:15:07.814209: step: 256/466, loss: 1.9557559490203857 2023-01-24 02:15:08.532795: step: 258/466, loss: 1.1228989362716675 2023-01-24 02:15:09.288159: step: 260/466, loss: 1.1477344036102295 2023-01-24 02:15:09.947827: step: 262/466, loss: 0.5251627564430237 2023-01-24 02:15:10.821652: step: 264/466, loss: 0.4231213927268982 2023-01-24 02:15:11.647091: step: 266/466, loss: 1.6289584636688232 2023-01-24 02:15:12.397675: step: 268/466, loss: 1.3237013816833496 2023-01-24 02:15:13.155622: step: 270/466, loss: 1.6523653268814087 2023-01-24 02:15:13.966959: step: 272/466, loss: 1.1716986894607544 2023-01-24 02:15:14.692881: step: 274/466, loss: 0.25267747044563293 2023-01-24 02:15:15.383106: step: 276/466, loss: 0.6228837370872498 2023-01-24 02:15:16.174190: step: 278/466, loss: 1.0324732065200806 2023-01-24 02:15:16.967635: step: 280/466, loss: 0.3614393472671509 2023-01-24 02:15:17.770914: step: 282/466, loss: 0.6208564639091492 2023-01-24 02:15:18.479309: step: 284/466, loss: 0.27702635526657104 2023-01-24 02:15:19.304164: step: 286/466, loss: 0.5526717901229858 2023-01-24 02:15:20.124330: step: 288/466, loss: 0.6723749041557312 2023-01-24 02:15:21.002412: step: 290/466, loss: 0.4218622148036957 2023-01-24 02:15:21.883945: step: 292/466, loss: 0.776480495929718 2023-01-24 02:15:22.755324: step: 294/466, loss: 0.6810406446456909 2023-01-24 02:15:23.478327: step: 296/466, loss: 1.0859265327453613 2023-01-24 02:15:24.224702: step: 298/466, loss: 0.3822017312049866 2023-01-24 02:15:25.000716: step: 300/466, loss: 1.7835806608200073 2023-01-24 02:15:25.735844: step: 302/466, loss: 0.3045477569103241 2023-01-24 02:15:26.557267: step: 304/466, loss: 0.27006521821022034 2023-01-24 02:15:27.239243: step: 306/466, loss: 0.22388720512390137 2023-01-24 02:15:27.995115: step: 308/466, loss: 1.2394423484802246 2023-01-24 02:15:28.694040: step: 310/466, loss: 0.9624764323234558 2023-01-24 02:15:29.362098: step: 312/466, loss: 2.5301806926727295 2023-01-24 02:15:30.170591: step: 314/466, loss: 1.4377882480621338 2023-01-24 02:15:30.934347: step: 316/466, loss: 1.201191782951355 2023-01-24 02:15:31.710690: step: 318/466, loss: 0.6634922027587891 2023-01-24 02:15:32.569433: step: 320/466, loss: 1.0930447578430176 2023-01-24 02:15:33.274356: step: 322/466, loss: 0.34051230549812317 2023-01-24 02:15:34.049701: step: 324/466, loss: 0.29256054759025574 2023-01-24 02:15:34.879118: step: 326/466, loss: 2.6791293621063232 2023-01-24 02:15:35.619744: step: 328/466, loss: 0.7095626592636108 2023-01-24 02:15:36.285143: step: 330/466, loss: 1.0395307540893555 2023-01-24 02:15:37.118574: step: 332/466, loss: 0.4586993455886841 2023-01-24 02:15:37.849007: step: 334/466, loss: 0.5928367972373962 2023-01-24 02:15:38.544918: step: 336/466, loss: 0.4381711483001709 2023-01-24 02:15:39.386135: step: 338/466, loss: 6.912117958068848 2023-01-24 02:15:40.216576: step: 340/466, loss: 0.6078882217407227 2023-01-24 02:15:40.930702: step: 342/466, loss: 1.0685304403305054 2023-01-24 02:15:41.756290: step: 344/466, loss: 0.21866194903850555 2023-01-24 02:15:42.607181: step: 346/466, loss: 2.0231590270996094 2023-01-24 02:15:43.371575: step: 348/466, loss: 0.6598676443099976 2023-01-24 02:15:44.084849: step: 350/466, loss: 0.28906044363975525 2023-01-24 02:15:44.843169: step: 352/466, loss: 1.618789792060852 2023-01-24 02:15:45.571297: step: 354/466, loss: 1.2375966310501099 2023-01-24 02:15:46.278424: step: 356/466, loss: 0.8457720875740051 2023-01-24 02:15:47.033732: step: 358/466, loss: 0.2892751395702362 2023-01-24 02:15:47.820782: step: 360/466, loss: 0.2970247268676758 2023-01-24 02:15:48.585056: step: 362/466, loss: 0.39713749289512634 2023-01-24 02:15:49.450055: step: 364/466, loss: 0.9225302338600159 2023-01-24 02:15:50.218413: step: 366/466, loss: 0.5669893026351929 2023-01-24 02:15:50.966363: step: 368/466, loss: 1.1761888265609741 2023-01-24 02:15:51.733348: step: 370/466, loss: 1.6809601783752441 2023-01-24 02:15:52.455249: step: 372/466, loss: 0.9083019495010376 2023-01-24 02:15:53.190930: step: 374/466, loss: 1.4838683605194092 2023-01-24 02:15:54.045200: step: 376/466, loss: 1.4812084436416626 2023-01-24 02:15:54.804177: step: 378/466, loss: 0.3336334228515625 2023-01-24 02:15:55.526999: step: 380/466, loss: 0.2552716135978699 2023-01-24 02:15:56.305830: step: 382/466, loss: 0.4424442946910858 2023-01-24 02:15:57.198380: step: 384/466, loss: 0.5649930238723755 2023-01-24 02:15:57.955929: step: 386/466, loss: 0.9104715585708618 2023-01-24 02:15:58.727212: step: 388/466, loss: 0.6600866913795471 2023-01-24 02:15:59.504091: step: 390/466, loss: 0.5454346537590027 2023-01-24 02:16:00.139873: step: 392/466, loss: 0.26466286182403564 2023-01-24 02:16:00.913664: step: 394/466, loss: 0.40282976627349854 2023-01-24 02:16:01.614430: step: 396/466, loss: 0.1367294043302536 2023-01-24 02:16:02.352706: step: 398/466, loss: 2.8048770427703857 2023-01-24 02:16:03.019960: step: 400/466, loss: 0.240428626537323 2023-01-24 02:16:03.746679: step: 402/466, loss: 0.4536705017089844 2023-01-24 02:16:04.544721: step: 404/466, loss: 0.6393134593963623 2023-01-24 02:16:05.304239: step: 406/466, loss: 0.4184322953224182 2023-01-24 02:16:06.064080: step: 408/466, loss: 3.7611472606658936 2023-01-24 02:16:07.031445: step: 410/466, loss: 0.3797491490840912 2023-01-24 02:16:07.756715: step: 412/466, loss: 0.8127273321151733 2023-01-24 02:16:08.476853: step: 414/466, loss: 0.5759282112121582 2023-01-24 02:16:09.236210: step: 416/466, loss: 0.8959246277809143 2023-01-24 02:16:09.985888: step: 418/466, loss: 0.698711633682251 2023-01-24 02:16:10.712215: step: 420/466, loss: 1.1171255111694336 2023-01-24 02:16:11.436388: step: 422/466, loss: 1.4768218994140625 2023-01-24 02:16:12.164400: step: 424/466, loss: 1.5017131567001343 2023-01-24 02:16:12.895387: step: 426/466, loss: 0.3037756681442261 2023-01-24 02:16:13.798840: step: 428/466, loss: 1.1463508605957031 2023-01-24 02:16:14.580095: step: 430/466, loss: 0.897317111492157 2023-01-24 02:16:15.327896: step: 432/466, loss: 1.296699047088623 2023-01-24 02:16:16.097373: step: 434/466, loss: 0.975659191608429 2023-01-24 02:16:16.899430: step: 436/466, loss: 0.4669412076473236 2023-01-24 02:16:17.637559: step: 438/466, loss: 2.3514862060546875 2023-01-24 02:16:18.360723: step: 440/466, loss: 0.5584709048271179 2023-01-24 02:16:19.083420: step: 442/466, loss: 1.1041259765625 2023-01-24 02:16:19.855572: step: 444/466, loss: 0.24737270176410675 2023-01-24 02:16:20.677693: step: 446/466, loss: 0.5634731650352478 2023-01-24 02:16:21.392993: step: 448/466, loss: 0.5878589153289795 2023-01-24 02:16:22.100229: step: 450/466, loss: 0.5585016012191772 2023-01-24 02:16:22.873673: step: 452/466, loss: 0.5468478202819824 2023-01-24 02:16:23.681396: step: 454/466, loss: 0.30728453397750854 2023-01-24 02:16:24.413791: step: 456/466, loss: 0.6326717734336853 2023-01-24 02:16:25.176789: step: 458/466, loss: 2.432973861694336 2023-01-24 02:16:25.998964: step: 460/466, loss: 1.4347832202911377 2023-01-24 02:16:26.751240: step: 462/466, loss: 1.02238929271698 2023-01-24 02:16:27.551224: step: 464/466, loss: 0.8291895389556885 2023-01-24 02:16:28.367562: step: 466/466, loss: 0.7763910889625549 2023-01-24 02:16:29.271505: step: 468/466, loss: 1.2875326871871948 2023-01-24 02:16:30.089382: step: 470/466, loss: 0.9274290800094604 2023-01-24 02:16:30.885955: step: 472/466, loss: 0.7256883978843689 2023-01-24 02:16:31.805223: step: 474/466, loss: 0.2669588029384613 2023-01-24 02:16:32.562212: step: 476/466, loss: 0.35543161630630493 2023-01-24 02:16:33.313247: step: 478/466, loss: 0.21915604174137115 2023-01-24 02:16:34.049467: step: 480/466, loss: 1.8925755023956299 2023-01-24 02:16:34.861359: step: 482/466, loss: 0.35683223605155945 2023-01-24 02:16:35.652823: step: 484/466, loss: 1.3415595293045044 2023-01-24 02:16:36.421478: step: 486/466, loss: 1.1984599828720093 2023-01-24 02:16:37.194525: step: 488/466, loss: 5.187197208404541 2023-01-24 02:16:37.959454: step: 490/466, loss: 0.2753686308860779 2023-01-24 02:16:38.695497: step: 492/466, loss: 0.2652628421783447 2023-01-24 02:16:39.396577: step: 494/466, loss: 1.2016327381134033 2023-01-24 02:16:40.191035: step: 496/466, loss: 0.6046364903450012 2023-01-24 02:16:40.970870: step: 498/466, loss: 0.8817658424377441 2023-01-24 02:16:41.778629: step: 500/466, loss: 0.38747915625572205 2023-01-24 02:16:42.634949: step: 502/466, loss: 0.7070812582969666 2023-01-24 02:16:43.354375: step: 504/466, loss: 0.3789001703262329 2023-01-24 02:16:44.060745: step: 506/466, loss: 0.9974977374076843 2023-01-24 02:16:44.805698: step: 508/466, loss: 0.4238806366920471 2023-01-24 02:16:45.541685: step: 510/466, loss: 0.3909887373447418 2023-01-24 02:16:46.274060: step: 512/466, loss: 0.32775312662124634 2023-01-24 02:16:47.027311: step: 514/466, loss: 0.47133827209472656 2023-01-24 02:16:47.759577: step: 516/466, loss: 0.8088892102241516 2023-01-24 02:16:48.545665: step: 518/466, loss: 0.7348328232765198 2023-01-24 02:16:49.310787: step: 520/466, loss: 2.851290225982666 2023-01-24 02:16:50.034353: step: 522/466, loss: 0.6305214166641235 2023-01-24 02:16:50.776161: step: 524/466, loss: 0.8245713710784912 2023-01-24 02:16:51.604618: step: 526/466, loss: 0.8373036980628967 2023-01-24 02:16:52.327737: step: 528/466, loss: 1.6329104900360107 2023-01-24 02:16:53.148068: step: 530/466, loss: 0.21834726631641388 2023-01-24 02:16:53.860441: step: 532/466, loss: 0.2542460262775421 2023-01-24 02:16:54.542354: step: 534/466, loss: 0.741761326789856 2023-01-24 02:16:55.291158: step: 536/466, loss: 0.15138760209083557 2023-01-24 02:16:56.063550: step: 538/466, loss: 1.3336399793624878 2023-01-24 02:16:56.846778: step: 540/466, loss: 3.1425106525421143 2023-01-24 02:16:57.660011: step: 542/466, loss: 0.24177254736423492 2023-01-24 02:16:58.499250: step: 544/466, loss: 18.67690086364746 2023-01-24 02:16:59.173500: step: 546/466, loss: 0.42344048619270325 2023-01-24 02:16:59.933063: step: 548/466, loss: 1.8342130184173584 2023-01-24 02:17:00.735981: step: 550/466, loss: 0.5240026116371155 2023-01-24 02:17:01.498087: step: 552/466, loss: 0.2808763086795807 2023-01-24 02:17:02.391165: step: 554/466, loss: 1.0640990734100342 2023-01-24 02:17:03.166837: step: 556/466, loss: 0.8659460544586182 2023-01-24 02:17:03.908138: step: 558/466, loss: 0.5987705588340759 2023-01-24 02:17:04.652209: step: 560/466, loss: 1.1994832754135132 2023-01-24 02:17:05.485858: step: 562/466, loss: 0.2609291076660156 2023-01-24 02:17:06.316176: step: 564/466, loss: 0.9732968807220459 2023-01-24 02:17:07.073519: step: 566/466, loss: 1.5298984050750732 2023-01-24 02:17:07.837499: step: 568/466, loss: 0.4171451926231384 2023-01-24 02:17:08.593843: step: 570/466, loss: 0.2139699012041092 2023-01-24 02:17:09.345905: step: 572/466, loss: 0.9007517099380493 2023-01-24 02:17:10.068834: step: 574/466, loss: 0.17563261091709137 2023-01-24 02:17:10.829535: step: 576/466, loss: 1.719172716140747 2023-01-24 02:17:11.626062: step: 578/466, loss: 1.16365385055542 2023-01-24 02:17:12.315502: step: 580/466, loss: 0.7707199454307556 2023-01-24 02:17:13.017712: step: 582/466, loss: 0.9323350191116333 2023-01-24 02:17:13.706905: step: 584/466, loss: 0.7965724468231201 2023-01-24 02:17:14.431589: step: 586/466, loss: 1.6608428955078125 2023-01-24 02:17:15.165891: step: 588/466, loss: 1.4693870544433594 2023-01-24 02:17:16.012036: step: 590/466, loss: 1.5551859140396118 2023-01-24 02:17:16.774858: step: 592/466, loss: 0.7750275135040283 2023-01-24 02:17:17.539900: step: 594/466, loss: 0.2587835192680359 2023-01-24 02:17:18.246839: step: 596/466, loss: 0.2051980197429657 2023-01-24 02:17:19.056165: step: 598/466, loss: 0.41535842418670654 2023-01-24 02:17:19.894501: step: 600/466, loss: 0.62410968542099 2023-01-24 02:17:20.624128: step: 602/466, loss: 0.3837363123893738 2023-01-24 02:17:21.313244: step: 604/466, loss: 0.22830593585968018 2023-01-24 02:17:22.119524: step: 606/466, loss: 0.7377241253852844 2023-01-24 02:17:22.925936: step: 608/466, loss: 0.8769369125366211 2023-01-24 02:17:23.728004: step: 610/466, loss: 0.6371053457260132 2023-01-24 02:17:24.451587: step: 612/466, loss: 0.5381723046302795 2023-01-24 02:17:25.234553: step: 614/466, loss: 0.21260666847229004 2023-01-24 02:17:26.015032: step: 616/466, loss: 0.7615000009536743 2023-01-24 02:17:26.795058: step: 618/466, loss: 0.9144954085350037 2023-01-24 02:17:27.520853: step: 620/466, loss: 0.6950018405914307 2023-01-24 02:17:28.191992: step: 622/466, loss: 1.596514105796814 2023-01-24 02:17:29.022893: step: 624/466, loss: 1.3464326858520508 2023-01-24 02:17:29.760934: step: 626/466, loss: 0.23497076332569122 2023-01-24 02:17:30.557770: step: 628/466, loss: 0.43337422609329224 2023-01-24 02:17:31.262462: step: 630/466, loss: 0.8914546966552734 2023-01-24 02:17:31.988120: step: 632/466, loss: 4.810173511505127 2023-01-24 02:17:32.846528: step: 634/466, loss: 1.0508265495300293 2023-01-24 02:17:33.572209: step: 636/466, loss: 0.9275634288787842 2023-01-24 02:17:34.302184: step: 638/466, loss: 1.0370503664016724 2023-01-24 02:17:35.104608: step: 640/466, loss: 0.1253279745578766 2023-01-24 02:17:35.892632: step: 642/466, loss: 0.9695085883140564 2023-01-24 02:17:36.713878: step: 644/466, loss: 0.6428408026695251 2023-01-24 02:17:37.434846: step: 646/466, loss: 0.29084670543670654 2023-01-24 02:17:38.225493: step: 648/466, loss: 0.23787152767181396 2023-01-24 02:17:38.979263: step: 650/466, loss: 0.9056107401847839 2023-01-24 02:17:39.675770: step: 652/466, loss: 0.588067889213562 2023-01-24 02:17:40.439876: step: 654/466, loss: 1.6399379968643188 2023-01-24 02:17:41.305448: step: 656/466, loss: 0.40053674578666687 2023-01-24 02:17:42.070951: step: 658/466, loss: 0.2362254559993744 2023-01-24 02:17:42.871767: step: 660/466, loss: 0.42605677247047424 2023-01-24 02:17:43.595363: step: 662/466, loss: 0.5030765533447266 2023-01-24 02:17:44.332387: step: 664/466, loss: 0.21947017312049866 2023-01-24 02:17:45.076185: step: 666/466, loss: 0.6463884115219116 2023-01-24 02:17:45.836204: step: 668/466, loss: 2.0798840522766113 2023-01-24 02:17:46.559587: step: 670/466, loss: 0.25600212812423706 2023-01-24 02:17:47.339843: step: 672/466, loss: 1.530958890914917 2023-01-24 02:17:48.106445: step: 674/466, loss: 0.5654168725013733 2023-01-24 02:17:48.921504: step: 676/466, loss: 0.47959205508232117 2023-01-24 02:17:49.655179: step: 678/466, loss: 0.8467852473258972 2023-01-24 02:17:50.412997: step: 680/466, loss: 0.7995301485061646 2023-01-24 02:17:51.140760: step: 682/466, loss: 0.8343058228492737 2023-01-24 02:17:51.873995: step: 684/466, loss: 0.2655002474784851 2023-01-24 02:17:52.572949: step: 686/466, loss: 0.5379340648651123 2023-01-24 02:17:53.274986: step: 688/466, loss: 0.17376302182674408 2023-01-24 02:17:54.057703: step: 690/466, loss: 0.7422475814819336 2023-01-24 02:17:54.889208: step: 692/466, loss: 0.5891343355178833 2023-01-24 02:17:55.600158: step: 694/466, loss: 0.6532750129699707 2023-01-24 02:17:56.448600: step: 696/466, loss: 0.570507287979126 2023-01-24 02:17:57.204134: step: 698/466, loss: 1.0127125978469849 2023-01-24 02:17:57.951206: step: 700/466, loss: 0.2279486060142517 2023-01-24 02:17:58.689421: step: 702/466, loss: 0.6048024892807007 2023-01-24 02:17:59.473783: step: 704/466, loss: 1.9379174709320068 2023-01-24 02:18:00.273092: step: 706/466, loss: 0.3877120614051819 2023-01-24 02:18:01.045972: step: 708/466, loss: 0.3327438235282898 2023-01-24 02:18:01.807599: step: 710/466, loss: 0.12455499172210693 2023-01-24 02:18:02.573060: step: 712/466, loss: 1.2360749244689941 2023-01-24 02:18:03.293527: step: 714/466, loss: 7.34765100479126 2023-01-24 02:18:04.096500: step: 716/466, loss: 0.48359206318855286 2023-01-24 02:18:04.911437: step: 718/466, loss: 0.2707675099372864 2023-01-24 02:18:05.695314: step: 720/466, loss: 0.4693552851676941 2023-01-24 02:18:06.407038: step: 722/466, loss: 0.7341639995574951 2023-01-24 02:18:07.169400: step: 724/466, loss: 0.7864471077919006 2023-01-24 02:18:07.839057: step: 726/466, loss: 0.5092653632164001 2023-01-24 02:18:08.624069: step: 728/466, loss: 0.47015196084976196 2023-01-24 02:18:09.460420: step: 730/466, loss: 0.7482075095176697 2023-01-24 02:18:10.172153: step: 732/466, loss: 0.14449910819530487 2023-01-24 02:18:10.909823: step: 734/466, loss: 0.42004555463790894 2023-01-24 02:18:11.748670: step: 736/466, loss: 1.316415548324585 2023-01-24 02:18:12.631129: step: 738/466, loss: 1.8354363441467285 2023-01-24 02:18:13.482211: step: 740/466, loss: 0.6618088483810425 2023-01-24 02:18:14.258327: step: 742/466, loss: 0.41975152492523193 2023-01-24 02:18:15.068315: step: 744/466, loss: 1.0764285326004028 2023-01-24 02:18:15.808037: step: 746/466, loss: 0.16238969564437866 2023-01-24 02:18:16.598849: step: 748/466, loss: 1.6173679828643799 2023-01-24 02:18:17.470088: step: 750/466, loss: 2.1954853534698486 2023-01-24 02:18:18.288533: step: 752/466, loss: 0.3339827358722687 2023-01-24 02:18:19.021539: step: 754/466, loss: 0.7078976035118103 2023-01-24 02:18:19.770259: step: 756/466, loss: 0.5993287563323975 2023-01-24 02:18:20.568902: step: 758/466, loss: 0.6090127825737 2023-01-24 02:18:21.379715: step: 760/466, loss: 0.8635631799697876 2023-01-24 02:18:22.105284: step: 762/466, loss: 0.2963520288467407 2023-01-24 02:18:22.912299: step: 764/466, loss: 0.6128653883934021 2023-01-24 02:18:23.637235: step: 766/466, loss: 0.7596311569213867 2023-01-24 02:18:24.409958: step: 768/466, loss: 0.6596415638923645 2023-01-24 02:18:25.233343: step: 770/466, loss: 0.3747796416282654 2023-01-24 02:18:25.903009: step: 772/466, loss: 0.8948156237602234 2023-01-24 02:18:26.661133: step: 774/466, loss: 0.7336122989654541 2023-01-24 02:18:27.465525: step: 776/466, loss: 0.9042572379112244 2023-01-24 02:18:28.259994: step: 778/466, loss: 1.0166397094726562 2023-01-24 02:18:28.929001: step: 780/466, loss: 0.44153064489364624 2023-01-24 02:18:29.629263: step: 782/466, loss: 0.4000193178653717 2023-01-24 02:18:30.519364: step: 784/466, loss: 0.23604170978069305 2023-01-24 02:18:31.354261: step: 786/466, loss: 0.6637749671936035 2023-01-24 02:18:32.188111: step: 788/466, loss: 1.5385891199111938 2023-01-24 02:18:33.025518: step: 790/466, loss: 0.2822076082229614 2023-01-24 02:18:33.766333: step: 792/466, loss: 0.3108176589012146 2023-01-24 02:18:34.436418: step: 794/466, loss: 0.38376277685165405 2023-01-24 02:18:35.203866: step: 796/466, loss: 0.9600449800491333 2023-01-24 02:18:35.930788: step: 798/466, loss: 0.49588266015052795 2023-01-24 02:18:36.805185: step: 800/466, loss: 0.7010972499847412 2023-01-24 02:18:37.547620: step: 802/466, loss: 0.604422926902771 2023-01-24 02:18:38.332614: step: 804/466, loss: 0.1080489307641983 2023-01-24 02:18:39.067470: step: 806/466, loss: 0.25231894850730896 2023-01-24 02:18:39.836288: step: 808/466, loss: 0.7223091721534729 2023-01-24 02:18:40.630262: step: 810/466, loss: 1.1986238956451416 2023-01-24 02:18:41.403662: step: 812/466, loss: 0.7812969088554382 2023-01-24 02:18:42.137190: step: 814/466, loss: 0.8877654671669006 2023-01-24 02:18:42.871132: step: 816/466, loss: 0.5675227642059326 2023-01-24 02:18:43.642577: step: 818/466, loss: 1.0517579317092896 2023-01-24 02:18:44.504060: step: 820/466, loss: 0.8483049273490906 2023-01-24 02:18:45.231747: step: 822/466, loss: 1.1111209392547607 2023-01-24 02:18:45.946979: step: 824/466, loss: 0.7873216271400452 2023-01-24 02:18:46.600494: step: 826/466, loss: 0.2162623107433319 2023-01-24 02:18:47.456126: step: 828/466, loss: 0.521052360534668 2023-01-24 02:18:48.268980: step: 830/466, loss: 0.7919299602508545 2023-01-24 02:18:49.008764: step: 832/466, loss: 1.0302027463912964 2023-01-24 02:18:49.714610: step: 834/466, loss: 0.3500521183013916 2023-01-24 02:18:50.459513: step: 836/466, loss: 0.7324089407920837 2023-01-24 02:18:51.219709: step: 838/466, loss: 0.26163384318351746 2023-01-24 02:18:51.895183: step: 840/466, loss: 0.4300655126571655 2023-01-24 02:18:52.660620: step: 842/466, loss: 0.7690464854240417 2023-01-24 02:18:53.472891: step: 844/466, loss: 0.5059322118759155 2023-01-24 02:18:54.279130: step: 846/466, loss: 0.29137077927589417 2023-01-24 02:18:55.045380: step: 848/466, loss: 0.3070172965526581 2023-01-24 02:18:55.786540: step: 850/466, loss: 0.19828540086746216 2023-01-24 02:18:56.511962: step: 852/466, loss: 0.4829852283000946 2023-01-24 02:18:57.338603: step: 854/466, loss: 0.6273636817932129 2023-01-24 02:18:58.073697: step: 856/466, loss: 0.22892563045024872 2023-01-24 02:18:58.782414: step: 858/466, loss: 0.9977620244026184 2023-01-24 02:18:59.553152: step: 860/466, loss: 0.20873820781707764 2023-01-24 02:19:00.346874: step: 862/466, loss: 0.6448942422866821 2023-01-24 02:19:01.090374: step: 864/466, loss: 1.3498508930206299 2023-01-24 02:19:01.869319: step: 866/466, loss: 0.2051345258951187 2023-01-24 02:19:02.705776: step: 868/466, loss: 0.6440752744674683 2023-01-24 02:19:03.439688: step: 870/466, loss: 0.261584609746933 2023-01-24 02:19:04.283262: step: 872/466, loss: 0.2732155919075012 2023-01-24 02:19:05.067399: step: 874/466, loss: 0.8551644086837769 2023-01-24 02:19:05.860257: step: 876/466, loss: 1.28314208984375 2023-01-24 02:19:06.651625: step: 878/466, loss: 0.34755074977874756 2023-01-24 02:19:07.347496: step: 880/466, loss: 0.30099064111709595 2023-01-24 02:19:08.112955: step: 882/466, loss: 0.32799744606018066 2023-01-24 02:19:08.881361: step: 884/466, loss: 0.9017066955566406 2023-01-24 02:19:09.611306: step: 886/466, loss: 0.8086469173431396 2023-01-24 02:19:10.360935: step: 888/466, loss: 1.2836970090866089 2023-01-24 02:19:11.101461: step: 890/466, loss: 1.206702470779419 2023-01-24 02:19:11.868206: step: 892/466, loss: 0.3659389615058899 2023-01-24 02:19:12.698000: step: 894/466, loss: 2.131016731262207 2023-01-24 02:19:13.530111: step: 896/466, loss: 1.3239178657531738 2023-01-24 02:19:14.327395: step: 898/466, loss: 7.509538650512695 2023-01-24 02:19:15.103187: step: 900/466, loss: 0.4374963939189911 2023-01-24 02:19:15.837404: step: 902/466, loss: 0.9535677433013916 2023-01-24 02:19:16.521047: step: 904/466, loss: 0.19704625010490417 2023-01-24 02:19:17.274534: step: 906/466, loss: 0.3004119396209717 2023-01-24 02:19:18.019503: step: 908/466, loss: 0.8080199956893921 2023-01-24 02:19:18.788782: step: 910/466, loss: 1.2489067316055298 2023-01-24 02:19:19.558458: step: 912/466, loss: 1.3066067695617676 2023-01-24 02:19:20.333207: step: 914/466, loss: 0.8604760766029358 2023-01-24 02:19:21.078821: step: 916/466, loss: 3.206796407699585 2023-01-24 02:19:21.707569: step: 918/466, loss: 0.5037591457366943 2023-01-24 02:19:22.416506: step: 920/466, loss: 0.28978392481803894 2023-01-24 02:19:23.135674: step: 922/466, loss: 1.1058472394943237 2023-01-24 02:19:23.912201: step: 924/466, loss: 0.2016083300113678 2023-01-24 02:19:24.612196: step: 926/466, loss: 1.3228625059127808 2023-01-24 02:19:25.343773: step: 928/466, loss: 1.0719577074050903 2023-01-24 02:19:26.104353: step: 930/466, loss: 2.4533298015594482 2023-01-24 02:19:26.896714: step: 932/466, loss: 10.044556617736816 ================================================== Loss: 0.938 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34251131548536207, 'r': 0.30618435778236913, 'f1': 0.3233306818181818}, 'combined': 0.23824366028708133, 'epoch': 5} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3810191288111564, 'r': 0.22412889930068022, 'f1': 0.2822363917119677}, 'combined': 0.17347212368638013, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31755675042294146, 'r': 0.3037237859158815, 'f1': 0.3104862709846765}, 'combined': 0.22877935756765636, 'epoch': 5} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36610352580066574, 'r': 0.21915539779763038, 'f1': 0.27418142841348564}, 'combined': 0.1685212681956058, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35633182648907596, 'r': 0.3171150410310752, 'f1': 0.33558157956501333}, 'combined': 0.24727063757422033, 'epoch': 5} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38903978397030203, 'r': 0.22009690200395982, 'f1': 0.28114035217301386}, 'combined': 0.17364551163627331, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2661290322580645, 'r': 0.2357142857142857, 'f1': 0.24999999999999994}, 'combined': 0.16666666666666663, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2403846153846154, 'r': 0.2717391304347826, 'f1': 0.25510204081632654}, 'combined': 0.12755102040816327, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 5} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.335154981737013, 'r': 0.2849135328618251, 'f1': 0.30799883449883453}, 'combined': 0.22694650963072016, 'epoch': 4} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.34747237279256127, 'r': 0.218033658295073, 'f1': 0.2679393973950074}, 'combined': 0.16468470278912647, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30714285714285716, 'r': 0.30714285714285716, 'f1': 0.30714285714285716}, 'combined': 0.20476190476190476, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35633182648907596, 'r': 0.3171150410310752, 'f1': 0.33558157956501333}, 'combined': 0.24727063757422033, 'epoch': 5} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38903978397030203, 'r': 0.22009690200395982, 'f1': 0.28114035217301386}, 'combined': 0.17364551163627331, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 5} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:22:16.654407: step: 2/466, loss: 0.32218968868255615 2023-01-24 02:22:17.417783: step: 4/466, loss: 0.5556846857070923 2023-01-24 02:22:18.207943: step: 6/466, loss: 1.0528106689453125 2023-01-24 02:22:18.998713: step: 8/466, loss: 0.4053214192390442 2023-01-24 02:22:19.797263: step: 10/466, loss: 1.505388617515564 2023-01-24 02:22:20.546429: step: 12/466, loss: 0.5031132102012634 2023-01-24 02:22:21.318202: step: 14/466, loss: 0.7867810726165771 2023-01-24 02:22:22.044054: step: 16/466, loss: 0.9841118454933167 2023-01-24 02:22:22.808187: step: 18/466, loss: 0.7677281498908997 2023-01-24 02:22:23.530473: step: 20/466, loss: 0.23764349520206451 2023-01-24 02:22:24.275539: step: 22/466, loss: 0.7144334316253662 2023-01-24 02:22:24.970998: step: 24/466, loss: 0.38185957074165344 2023-01-24 02:22:25.791020: step: 26/466, loss: 2.423734188079834 2023-01-24 02:22:26.525544: step: 28/466, loss: 0.43167775869369507 2023-01-24 02:22:27.228548: step: 30/466, loss: 0.0954960510134697 2023-01-24 02:22:27.903767: step: 32/466, loss: 0.5020628571510315 2023-01-24 02:22:28.709971: step: 34/466, loss: 0.41035231947898865 2023-01-24 02:22:29.416456: step: 36/466, loss: 0.7318971753120422 2023-01-24 02:22:30.177648: step: 38/466, loss: 0.7467445135116577 2023-01-24 02:22:30.958436: step: 40/466, loss: 0.21532611548900604 2023-01-24 02:22:31.689800: step: 42/466, loss: 0.3341991901397705 2023-01-24 02:22:32.464448: step: 44/466, loss: 0.5254302620887756 2023-01-24 02:22:33.187622: step: 46/466, loss: 0.40966877341270447 2023-01-24 02:22:34.009716: step: 48/466, loss: 0.35710608959198 2023-01-24 02:22:34.749160: step: 50/466, loss: 0.3491014242172241 2023-01-24 02:22:35.481917: step: 52/466, loss: 0.5511988997459412 2023-01-24 02:22:36.262358: step: 54/466, loss: 1.2386209964752197 2023-01-24 02:22:37.025981: step: 56/466, loss: 1.3478890657424927 2023-01-24 02:22:37.792298: step: 58/466, loss: 0.8968897461891174 2023-01-24 02:22:38.493757: step: 60/466, loss: 0.2680509388446808 2023-01-24 02:22:39.167336: step: 62/466, loss: 1.1116560697555542 2023-01-24 02:22:39.940718: step: 64/466, loss: 0.18260887265205383 2023-01-24 02:22:40.701418: step: 66/466, loss: 0.3070507347583771 2023-01-24 02:22:41.490772: step: 68/466, loss: 1.1425074338912964 2023-01-24 02:22:42.265007: step: 70/466, loss: 0.8123894333839417 2023-01-24 02:22:43.016522: step: 72/466, loss: 0.27354997396469116 2023-01-24 02:22:43.760919: step: 74/466, loss: 0.36695006489753723 2023-01-24 02:22:44.500753: step: 76/466, loss: 0.33018723130226135 2023-01-24 02:22:45.243509: step: 78/466, loss: 0.5599246621131897 2023-01-24 02:22:46.033293: step: 80/466, loss: 0.7603071928024292 2023-01-24 02:22:46.760139: step: 82/466, loss: 0.3653887212276459 2023-01-24 02:22:47.488833: step: 84/466, loss: 1.2102289199829102 2023-01-24 02:22:48.335364: step: 86/466, loss: 0.4492781162261963 2023-01-24 02:22:49.098015: step: 88/466, loss: 0.325231671333313 2023-01-24 02:22:49.775571: step: 90/466, loss: 0.2342669665813446 2023-01-24 02:22:50.486976: step: 92/466, loss: 0.4624057412147522 2023-01-24 02:22:51.389098: step: 94/466, loss: 0.43966928124427795 2023-01-24 02:22:52.176172: step: 96/466, loss: 0.6490041613578796 2023-01-24 02:22:52.928778: step: 98/466, loss: 0.9651503562927246 2023-01-24 02:22:53.677115: step: 100/466, loss: 0.7601068615913391 2023-01-24 02:22:54.451471: step: 102/466, loss: 0.22763891518115997 2023-01-24 02:22:55.247067: step: 104/466, loss: 0.3821936249732971 2023-01-24 02:22:55.969642: step: 106/466, loss: 0.581498384475708 2023-01-24 02:22:56.722710: step: 108/466, loss: 0.23628266155719757 2023-01-24 02:22:57.415976: step: 110/466, loss: 0.5058366656303406 2023-01-24 02:22:58.149626: step: 112/466, loss: 0.2841312289237976 2023-01-24 02:22:58.867849: step: 114/466, loss: 0.5994552373886108 2023-01-24 02:22:59.642610: step: 116/466, loss: 0.16569878160953522 2023-01-24 02:23:00.383257: step: 118/466, loss: 0.16361653804779053 2023-01-24 02:23:01.156722: step: 120/466, loss: 0.3208037316799164 2023-01-24 02:23:01.937863: step: 122/466, loss: 0.606379508972168 2023-01-24 02:23:02.712512: step: 124/466, loss: 1.2331677675247192 2023-01-24 02:23:03.436438: step: 126/466, loss: 0.25890928506851196 2023-01-24 02:23:04.145204: step: 128/466, loss: 0.2537651062011719 2023-01-24 02:23:04.872777: step: 130/466, loss: 0.29682645201683044 2023-01-24 02:23:05.704132: step: 132/466, loss: 2.0517148971557617 2023-01-24 02:23:06.453309: step: 134/466, loss: 0.28343483805656433 2023-01-24 02:23:07.198305: step: 136/466, loss: 0.7250392436981201 2023-01-24 02:23:07.969247: step: 138/466, loss: 0.1914062350988388 2023-01-24 02:23:08.732306: step: 140/466, loss: 0.3541911840438843 2023-01-24 02:23:09.520829: step: 142/466, loss: 0.8470289707183838 2023-01-24 02:23:10.269437: step: 144/466, loss: 0.49261319637298584 2023-01-24 02:23:11.009146: step: 146/466, loss: 0.34677648544311523 2023-01-24 02:23:11.773599: step: 148/466, loss: 0.2643190920352936 2023-01-24 02:23:12.563184: step: 150/466, loss: 0.1552414745092392 2023-01-24 02:23:13.296702: step: 152/466, loss: 0.20740438997745514 2023-01-24 02:23:14.123039: step: 154/466, loss: 0.21313226222991943 2023-01-24 02:23:14.959095: step: 156/466, loss: 0.33110979199409485 2023-01-24 02:23:15.751764: step: 158/466, loss: 0.2816070318222046 2023-01-24 02:23:16.498020: step: 160/466, loss: 0.2219778299331665 2023-01-24 02:23:17.239764: step: 162/466, loss: 0.404222309589386 2023-01-24 02:23:18.033549: step: 164/466, loss: 0.22453781962394714 2023-01-24 02:23:18.738736: step: 166/466, loss: 0.3454417884349823 2023-01-24 02:23:19.504673: step: 168/466, loss: 0.5886520743370056 2023-01-24 02:23:20.205540: step: 170/466, loss: 0.6885159015655518 2023-01-24 02:23:21.009515: step: 172/466, loss: 0.8449385166168213 2023-01-24 02:23:21.827097: step: 174/466, loss: 0.41053617000579834 2023-01-24 02:23:22.609037: step: 176/466, loss: 0.25612226128578186 2023-01-24 02:23:23.396214: step: 178/466, loss: 1.0309232473373413 2023-01-24 02:23:24.075983: step: 180/466, loss: 1.5213203430175781 2023-01-24 02:23:24.873203: step: 182/466, loss: 0.6834684610366821 2023-01-24 02:23:25.629507: step: 184/466, loss: 0.25764113664627075 2023-01-24 02:23:26.360472: step: 186/466, loss: 1.1139030456542969 2023-01-24 02:23:27.213030: step: 188/466, loss: 1.328330636024475 2023-01-24 02:23:27.980907: step: 190/466, loss: 0.6299300789833069 2023-01-24 02:23:28.733510: step: 192/466, loss: 0.47483086585998535 2023-01-24 02:23:29.674538: step: 194/466, loss: 0.7948506474494934 2023-01-24 02:23:30.334910: step: 196/466, loss: 0.7495373487472534 2023-01-24 02:23:31.109958: step: 198/466, loss: 0.3398696780204773 2023-01-24 02:23:31.860485: step: 200/466, loss: 0.25219038128852844 2023-01-24 02:23:32.656124: step: 202/466, loss: 0.2839111089706421 2023-01-24 02:23:33.414184: step: 204/466, loss: 0.2768838703632355 2023-01-24 02:23:34.220147: step: 206/466, loss: 0.4428100883960724 2023-01-24 02:23:34.978749: step: 208/466, loss: 0.7311434149742126 2023-01-24 02:23:35.785709: step: 210/466, loss: 0.8593393564224243 2023-01-24 02:23:36.563652: step: 212/466, loss: 0.7541153430938721 2023-01-24 02:23:37.319187: step: 214/466, loss: 0.6181634664535522 2023-01-24 02:23:38.055623: step: 216/466, loss: 0.88047856092453 2023-01-24 02:23:38.810127: step: 218/466, loss: 1.1687943935394287 2023-01-24 02:23:39.609124: step: 220/466, loss: 0.5406815409660339 2023-01-24 02:23:40.523852: step: 222/466, loss: 0.42107611894607544 2023-01-24 02:23:41.294926: step: 224/466, loss: 0.17114980518817902 2023-01-24 02:23:42.074977: step: 226/466, loss: 0.4720715880393982 2023-01-24 02:23:42.804862: step: 228/466, loss: 0.37465938925743103 2023-01-24 02:23:43.522511: step: 230/466, loss: 0.20296700298786163 2023-01-24 02:23:44.374389: step: 232/466, loss: 0.747587263584137 2023-01-24 02:23:45.125051: step: 234/466, loss: 0.333487868309021 2023-01-24 02:23:45.931894: step: 236/466, loss: 1.3917906284332275 2023-01-24 02:23:46.752115: step: 238/466, loss: 0.7705444097518921 2023-01-24 02:23:47.502352: step: 240/466, loss: 0.5334435701370239 2023-01-24 02:23:48.223794: step: 242/466, loss: 0.6047327518463135 2023-01-24 02:23:49.140552: step: 244/466, loss: 0.7313522100448608 2023-01-24 02:23:49.878244: step: 246/466, loss: 0.6601555347442627 2023-01-24 02:23:50.607058: step: 248/466, loss: 0.19155901670455933 2023-01-24 02:23:51.432625: step: 250/466, loss: 0.16767148673534393 2023-01-24 02:23:52.141172: step: 252/466, loss: 0.27993425726890564 2023-01-24 02:23:52.941993: step: 254/466, loss: 1.27365243434906 2023-01-24 02:23:53.812464: step: 256/466, loss: 1.5138299465179443 2023-01-24 02:23:54.556086: step: 258/466, loss: 0.5258921980857849 2023-01-24 02:23:55.294387: step: 260/466, loss: 0.25220438838005066 2023-01-24 02:23:56.089670: step: 262/466, loss: 0.5644699335098267 2023-01-24 02:23:56.876288: step: 264/466, loss: 0.2249457985162735 2023-01-24 02:23:57.634991: step: 266/466, loss: 0.38131022453308105 2023-01-24 02:23:58.465793: step: 268/466, loss: 0.445722758769989 2023-01-24 02:23:59.141590: step: 270/466, loss: 1.9734113216400146 2023-01-24 02:23:59.873439: step: 272/466, loss: 0.46760833263397217 2023-01-24 02:24:00.605180: step: 274/466, loss: 0.5852957367897034 2023-01-24 02:24:01.456522: step: 276/466, loss: 1.8949702978134155 2023-01-24 02:24:02.165573: step: 278/466, loss: 1.1939055919647217 2023-01-24 02:24:02.932148: step: 280/466, loss: 1.2649636268615723 2023-01-24 02:24:03.725235: step: 282/466, loss: 0.39441004395484924 2023-01-24 02:24:04.507102: step: 284/466, loss: 0.6390337944030762 2023-01-24 02:24:05.293147: step: 286/466, loss: 0.5137861371040344 2023-01-24 02:24:06.043785: step: 288/466, loss: 0.4395367205142975 2023-01-24 02:24:06.966597: step: 290/466, loss: 0.8883119225502014 2023-01-24 02:24:07.689922: step: 292/466, loss: 0.552345335483551 2023-01-24 02:24:08.485114: step: 294/466, loss: 0.723929762840271 2023-01-24 02:24:09.248706: step: 296/466, loss: 0.530340313911438 2023-01-24 02:24:10.073522: step: 298/466, loss: 0.6502615809440613 2023-01-24 02:24:10.809821: step: 300/466, loss: 2.418755054473877 2023-01-24 02:24:11.652724: step: 302/466, loss: 0.22726400196552277 2023-01-24 02:24:12.490422: step: 304/466, loss: 0.49434563517570496 2023-01-24 02:24:13.285184: step: 306/466, loss: 0.28831496834754944 2023-01-24 02:24:14.089987: step: 308/466, loss: 0.3543457090854645 2023-01-24 02:24:14.814308: step: 310/466, loss: 0.9062546491622925 2023-01-24 02:24:15.575742: step: 312/466, loss: 0.2703440189361572 2023-01-24 02:24:16.388838: step: 314/466, loss: 0.5241992473602295 2023-01-24 02:24:17.091155: step: 316/466, loss: 0.4552978575229645 2023-01-24 02:24:17.793223: step: 318/466, loss: 0.39680472016334534 2023-01-24 02:24:18.502073: step: 320/466, loss: 0.09683714807033539 2023-01-24 02:24:19.276853: step: 322/466, loss: 0.8749290108680725 2023-01-24 02:24:20.008580: step: 324/466, loss: 0.8422608971595764 2023-01-24 02:24:20.826771: step: 326/466, loss: 0.7888513803482056 2023-01-24 02:24:21.591074: step: 328/466, loss: 0.3374817967414856 2023-01-24 02:24:22.274668: step: 330/466, loss: 0.6404813528060913 2023-01-24 02:24:23.036048: step: 332/466, loss: 0.5628029704093933 2023-01-24 02:24:23.910521: step: 334/466, loss: 0.623656153678894 2023-01-24 02:24:24.696107: step: 336/466, loss: 0.7959246039390564 2023-01-24 02:24:25.413830: step: 338/466, loss: 0.8810027837753296 2023-01-24 02:24:26.174390: step: 340/466, loss: 0.5927742719650269 2023-01-24 02:24:26.908839: step: 342/466, loss: 0.6585927605628967 2023-01-24 02:24:27.751931: step: 344/466, loss: 0.45125773549079895 2023-01-24 02:24:28.502070: step: 346/466, loss: 0.8539192080497742 2023-01-24 02:24:29.251761: step: 348/466, loss: 1.160231590270996 2023-01-24 02:24:29.957001: step: 350/466, loss: 1.3324898481369019 2023-01-24 02:24:30.711620: step: 352/466, loss: 0.45158857107162476 2023-01-24 02:24:31.500085: step: 354/466, loss: 0.3662046492099762 2023-01-24 02:24:32.343305: step: 356/466, loss: 0.6877017617225647 2023-01-24 02:24:33.089873: step: 358/466, loss: 0.5057063102722168 2023-01-24 02:24:33.886504: step: 360/466, loss: 0.7695874571800232 2023-01-24 02:24:34.661946: step: 362/466, loss: 0.49749425053596497 2023-01-24 02:24:35.507926: step: 364/466, loss: 0.14780224859714508 2023-01-24 02:24:36.306733: step: 366/466, loss: 0.7745498418807983 2023-01-24 02:24:37.116776: step: 368/466, loss: 1.1423521041870117 2023-01-24 02:24:37.922969: step: 370/466, loss: 0.5440044403076172 2023-01-24 02:24:38.597767: step: 372/466, loss: 3.054959297180176 2023-01-24 02:24:39.463312: step: 374/466, loss: 1.0194264650344849 2023-01-24 02:24:40.231508: step: 376/466, loss: 0.3306431174278259 2023-01-24 02:24:41.012907: step: 378/466, loss: 0.9671432971954346 2023-01-24 02:24:41.844619: step: 380/466, loss: 0.4413227438926697 2023-01-24 02:24:42.657947: step: 382/466, loss: 0.3477259576320648 2023-01-24 02:24:43.486783: step: 384/466, loss: 1.126652479171753 2023-01-24 02:24:44.287358: step: 386/466, loss: 0.8670724630355835 2023-01-24 02:24:45.089521: step: 388/466, loss: 1.109525442123413 2023-01-24 02:24:46.018718: step: 390/466, loss: 0.8821362257003784 2023-01-24 02:24:46.767415: step: 392/466, loss: 0.3569082021713257 2023-01-24 02:24:47.582791: step: 394/466, loss: 0.9964345693588257 2023-01-24 02:24:48.368670: step: 396/466, loss: 0.8517532348632812 2023-01-24 02:24:49.109396: step: 398/466, loss: 1.4055027961730957 2023-01-24 02:24:49.880730: step: 400/466, loss: 0.6798644065856934 2023-01-24 02:24:50.562875: step: 402/466, loss: 0.5541855692863464 2023-01-24 02:24:51.371057: step: 404/466, loss: 0.7029849290847778 2023-01-24 02:24:52.056573: step: 406/466, loss: 0.3102688789367676 2023-01-24 02:24:52.897792: step: 408/466, loss: 1.1614198684692383 2023-01-24 02:24:53.676976: step: 410/466, loss: 1.6590417623519897 2023-01-24 02:24:54.449637: step: 412/466, loss: 1.6184443235397339 2023-01-24 02:24:55.307002: step: 414/466, loss: 0.819237470626831 2023-01-24 02:24:56.145205: step: 416/466, loss: 0.5732754468917847 2023-01-24 02:24:56.857902: step: 418/466, loss: 0.37938907742500305 2023-01-24 02:24:57.595153: step: 420/466, loss: 0.46679314970970154 2023-01-24 02:24:58.337314: step: 422/466, loss: 0.9363054633140564 2023-01-24 02:24:59.060847: step: 424/466, loss: 0.8060967326164246 2023-01-24 02:24:59.835314: step: 426/466, loss: 1.919229507446289 2023-01-24 02:25:00.607343: step: 428/466, loss: 0.39510074257850647 2023-01-24 02:25:01.396041: step: 430/466, loss: 0.3275643587112427 2023-01-24 02:25:02.177036: step: 432/466, loss: 0.41811782121658325 2023-01-24 02:25:02.937732: step: 434/466, loss: 1.048869013786316 2023-01-24 02:25:03.641474: step: 436/466, loss: 0.5930517315864563 2023-01-24 02:25:04.438652: step: 438/466, loss: 0.37204745411872864 2023-01-24 02:25:05.150049: step: 440/466, loss: 0.9435140490531921 2023-01-24 02:25:05.859337: step: 442/466, loss: 0.6689704656600952 2023-01-24 02:25:06.694980: step: 444/466, loss: 1.39583158493042 2023-01-24 02:25:07.450220: step: 446/466, loss: 0.33848732709884644 2023-01-24 02:25:08.143458: step: 448/466, loss: 0.5037708282470703 2023-01-24 02:25:08.879004: step: 450/466, loss: 0.2862124741077423 2023-01-24 02:25:09.622346: step: 452/466, loss: 0.3021622598171234 2023-01-24 02:25:10.464042: step: 454/466, loss: 0.35256338119506836 2023-01-24 02:25:11.315897: step: 456/466, loss: 0.30306318402290344 2023-01-24 02:25:12.021410: step: 458/466, loss: 0.4096969962120056 2023-01-24 02:25:12.833072: step: 460/466, loss: 0.19336767494678497 2023-01-24 02:25:13.595385: step: 462/466, loss: 0.715903103351593 2023-01-24 02:25:14.350939: step: 464/466, loss: 0.6890691518783569 2023-01-24 02:25:15.194525: step: 466/466, loss: 0.416951060295105 2023-01-24 02:25:16.009178: step: 468/466, loss: 1.248590350151062 2023-01-24 02:25:16.739581: step: 470/466, loss: 0.6275112628936768 2023-01-24 02:25:17.527188: step: 472/466, loss: 0.7352412939071655 2023-01-24 02:25:18.354183: step: 474/466, loss: 0.2669612467288971 2023-01-24 02:25:19.127589: step: 476/466, loss: 2.033390998840332 2023-01-24 02:25:19.881276: step: 478/466, loss: 0.29588741064071655 2023-01-24 02:25:20.710457: step: 480/466, loss: 1.0971732139587402 2023-01-24 02:25:21.476850: step: 482/466, loss: 0.32912346720695496 2023-01-24 02:25:22.227374: step: 484/466, loss: 0.2324758917093277 2023-01-24 02:25:23.029810: step: 486/466, loss: 0.8204798102378845 2023-01-24 02:25:23.804848: step: 488/466, loss: 0.3005208671092987 2023-01-24 02:25:24.565186: step: 490/466, loss: 0.3320615589618683 2023-01-24 02:25:25.287766: step: 492/466, loss: 0.21812233328819275 2023-01-24 02:25:26.055483: step: 494/466, loss: 0.347537100315094 2023-01-24 02:25:26.864562: step: 496/466, loss: 0.3185756504535675 2023-01-24 02:25:27.637466: step: 498/466, loss: 0.8476412296295166 2023-01-24 02:25:28.385352: step: 500/466, loss: 3.0157365798950195 2023-01-24 02:25:29.211360: step: 502/466, loss: 0.45933496952056885 2023-01-24 02:25:29.998592: step: 504/466, loss: 0.8938225507736206 2023-01-24 02:25:30.698055: step: 506/466, loss: 0.782000720500946 2023-01-24 02:25:31.506895: step: 508/466, loss: 0.18423041701316833 2023-01-24 02:25:32.328095: step: 510/466, loss: 0.361537903547287 2023-01-24 02:25:33.112134: step: 512/466, loss: 0.2748829424381256 2023-01-24 02:25:33.895584: step: 514/466, loss: 0.6222298741340637 2023-01-24 02:25:34.609856: step: 516/466, loss: 0.24053962528705597 2023-01-24 02:25:35.525306: step: 518/466, loss: 0.37778693437576294 2023-01-24 02:25:36.314604: step: 520/466, loss: 0.8710047006607056 2023-01-24 02:25:37.063161: step: 522/466, loss: 0.853071928024292 2023-01-24 02:25:37.798835: step: 524/466, loss: 1.2134851217269897 2023-01-24 02:25:38.562975: step: 526/466, loss: 1.0508193969726562 2023-01-24 02:25:39.340309: step: 528/466, loss: 0.34795981645584106 2023-01-24 02:25:40.112753: step: 530/466, loss: 1.2121317386627197 2023-01-24 02:25:40.944340: step: 532/466, loss: 0.8486328721046448 2023-01-24 02:25:41.774245: step: 534/466, loss: 1.7330005168914795 2023-01-24 02:25:42.516405: step: 536/466, loss: 0.3037912845611572 2023-01-24 02:25:43.273566: step: 538/466, loss: 0.37141942977905273 2023-01-24 02:25:44.097087: step: 540/466, loss: 1.067850947380066 2023-01-24 02:25:44.888044: step: 542/466, loss: 0.240159809589386 2023-01-24 02:25:45.633912: step: 544/466, loss: 0.6632150411605835 2023-01-24 02:25:46.386434: step: 546/466, loss: 0.42939281463623047 2023-01-24 02:25:47.889249: step: 548/466, loss: 0.5655322670936584 2023-01-24 02:25:48.616286: step: 550/466, loss: 1.8335810899734497 2023-01-24 02:25:49.364267: step: 552/466, loss: 0.17235557734966278 2023-01-24 02:25:50.080171: step: 554/466, loss: 0.987075686454773 2023-01-24 02:25:50.997783: step: 556/466, loss: 0.2663685977458954 2023-01-24 02:25:51.755749: step: 558/466, loss: 0.42914122343063354 2023-01-24 02:25:52.561458: step: 560/466, loss: 0.4898962676525116 2023-01-24 02:25:53.318150: step: 562/466, loss: 0.20535391569137573 2023-01-24 02:25:54.072173: step: 564/466, loss: 0.27649348974227905 2023-01-24 02:25:54.809313: step: 566/466, loss: 0.2878555655479431 2023-01-24 02:25:55.587684: step: 568/466, loss: 0.5329242944717407 2023-01-24 02:25:56.360735: step: 570/466, loss: 1.488804578781128 2023-01-24 02:25:57.091995: step: 572/466, loss: 1.3856981992721558 2023-01-24 02:25:57.829248: step: 574/466, loss: 0.5450658798217773 2023-01-24 02:25:58.712948: step: 576/466, loss: 0.3929985463619232 2023-01-24 02:25:59.415571: step: 578/466, loss: 1.7792253494262695 2023-01-24 02:26:00.034424: step: 580/466, loss: 0.5900392532348633 2023-01-24 02:26:00.839003: step: 582/466, loss: 1.271080493927002 2023-01-24 02:26:01.577272: step: 584/466, loss: 0.14637401700019836 2023-01-24 02:26:02.352839: step: 586/466, loss: 1.0173259973526 2023-01-24 02:26:03.060518: step: 588/466, loss: 0.28128859400749207 2023-01-24 02:26:03.857603: step: 590/466, loss: 0.8834716081619263 2023-01-24 02:26:04.615134: step: 592/466, loss: 0.4259241223335266 2023-01-24 02:26:05.393042: step: 594/466, loss: 0.5690405964851379 2023-01-24 02:26:06.074709: step: 596/466, loss: 1.0316557884216309 2023-01-24 02:26:06.828633: step: 598/466, loss: 0.29038065671920776 2023-01-24 02:26:07.692096: step: 600/466, loss: 0.8943952322006226 2023-01-24 02:26:08.441906: step: 602/466, loss: 0.619583785533905 2023-01-24 02:26:09.184251: step: 604/466, loss: 0.2445245087146759 2023-01-24 02:26:09.938233: step: 606/466, loss: 0.8926266431808472 2023-01-24 02:26:10.780530: step: 608/466, loss: 0.18985801935195923 2023-01-24 02:26:11.452026: step: 610/466, loss: 0.8654029369354248 2023-01-24 02:26:12.246945: step: 612/466, loss: 1.043053388595581 2023-01-24 02:26:12.980217: step: 614/466, loss: 0.35355132818222046 2023-01-24 02:26:13.803543: step: 616/466, loss: 1.2893407344818115 2023-01-24 02:26:14.562841: step: 618/466, loss: 0.8885557055473328 2023-01-24 02:26:15.364573: step: 620/466, loss: 1.7367770671844482 2023-01-24 02:26:16.150525: step: 622/466, loss: 1.368610143661499 2023-01-24 02:26:16.958046: step: 624/466, loss: 0.21645484864711761 2023-01-24 02:26:17.690058: step: 626/466, loss: 2.2032814025878906 2023-01-24 02:26:18.431584: step: 628/466, loss: 2.669538974761963 2023-01-24 02:26:19.188591: step: 630/466, loss: 0.5482318997383118 2023-01-24 02:26:20.047953: step: 632/466, loss: 0.754304051399231 2023-01-24 02:26:20.830771: step: 634/466, loss: 0.38112232089042664 2023-01-24 02:26:21.580969: step: 636/466, loss: 1.5455775260925293 2023-01-24 02:26:22.286975: step: 638/466, loss: 0.33433040976524353 2023-01-24 02:26:23.083144: step: 640/466, loss: 0.6735621094703674 2023-01-24 02:26:23.880935: step: 642/466, loss: 0.19982242584228516 2023-01-24 02:26:24.688782: step: 644/466, loss: 0.4279138445854187 2023-01-24 02:26:25.427841: step: 646/466, loss: 0.5210850238800049 2023-01-24 02:26:26.116183: step: 648/466, loss: 1.5244331359863281 2023-01-24 02:26:26.847812: step: 650/466, loss: 0.7855544090270996 2023-01-24 02:26:27.647231: step: 652/466, loss: 0.25553497672080994 2023-01-24 02:26:28.389533: step: 654/466, loss: 0.19856083393096924 2023-01-24 02:26:29.191292: step: 656/466, loss: 0.2223929613828659 2023-01-24 02:26:29.990745: step: 658/466, loss: 0.5313467383384705 2023-01-24 02:26:30.703641: step: 660/466, loss: 0.24553091824054718 2023-01-24 02:26:31.460029: step: 662/466, loss: 1.6716569662094116 2023-01-24 02:26:32.245779: step: 664/466, loss: 0.6177428960800171 2023-01-24 02:26:33.051605: step: 666/466, loss: 0.20127610862255096 2023-01-24 02:26:33.766273: step: 668/466, loss: 2.8933916091918945 2023-01-24 02:26:34.555399: step: 670/466, loss: 0.5551666617393494 2023-01-24 02:26:35.374881: step: 672/466, loss: 0.2786451578140259 2023-01-24 02:26:36.186996: step: 674/466, loss: 0.5241547226905823 2023-01-24 02:26:36.910650: step: 676/466, loss: 0.46960535645484924 2023-01-24 02:26:37.632506: step: 678/466, loss: 0.9260277152061462 2023-01-24 02:26:38.445061: step: 680/466, loss: 0.505979597568512 2023-01-24 02:26:39.099699: step: 682/466, loss: 0.16141249239444733 2023-01-24 02:26:39.863440: step: 684/466, loss: 0.3264252543449402 2023-01-24 02:26:40.596472: step: 686/466, loss: 4.854494571685791 2023-01-24 02:26:41.359411: step: 688/466, loss: 0.3939049243927002 2023-01-24 02:26:42.196744: step: 690/466, loss: 4.195629596710205 2023-01-24 02:26:42.934681: step: 692/466, loss: 1.5494621992111206 2023-01-24 02:26:43.673003: step: 694/466, loss: 1.9979078769683838 2023-01-24 02:26:44.436723: step: 696/466, loss: 0.22917333245277405 2023-01-24 02:26:45.162450: step: 698/466, loss: 0.1751905381679535 2023-01-24 02:26:45.914013: step: 700/466, loss: 0.24713370203971863 2023-01-24 02:26:46.701871: step: 702/466, loss: 0.4419696033000946 2023-01-24 02:26:47.410769: step: 704/466, loss: 0.25651171803474426 2023-01-24 02:26:48.186164: step: 706/466, loss: 1.2192009687423706 2023-01-24 02:26:48.920512: step: 708/466, loss: 0.5099461674690247 2023-01-24 02:26:49.677006: step: 710/466, loss: 1.9569056034088135 2023-01-24 02:26:50.426226: step: 712/466, loss: 0.6427992582321167 2023-01-24 02:26:51.161616: step: 714/466, loss: 0.9513710141181946 2023-01-24 02:26:51.875047: step: 716/466, loss: 0.1762647181749344 2023-01-24 02:26:52.604039: step: 718/466, loss: 0.33705297112464905 2023-01-24 02:26:53.437321: step: 720/466, loss: 0.40447694063186646 2023-01-24 02:26:54.192288: step: 722/466, loss: 0.25762277841567993 2023-01-24 02:26:54.945407: step: 724/466, loss: 0.14812979102134705 2023-01-24 02:26:55.671244: step: 726/466, loss: 0.8303240537643433 2023-01-24 02:26:56.400541: step: 728/466, loss: 3.1632981300354004 2023-01-24 02:26:57.140288: step: 730/466, loss: 0.8790704011917114 2023-01-24 02:26:57.898845: step: 732/466, loss: 0.6939700841903687 2023-01-24 02:26:58.661265: step: 734/466, loss: 4.361475944519043 2023-01-24 02:26:59.474758: step: 736/466, loss: 1.0758943557739258 2023-01-24 02:27:00.272508: step: 738/466, loss: 0.6111956834793091 2023-01-24 02:27:01.031680: step: 740/466, loss: 0.5156033039093018 2023-01-24 02:27:01.727531: step: 742/466, loss: 0.5572746992111206 2023-01-24 02:27:02.463380: step: 744/466, loss: 0.4452946186065674 2023-01-24 02:27:03.143710: step: 746/466, loss: 1.1491637229919434 2023-01-24 02:27:03.868250: step: 748/466, loss: 0.3944651186466217 2023-01-24 02:27:04.609845: step: 750/466, loss: 0.3764747977256775 2023-01-24 02:27:05.391362: step: 752/466, loss: 0.48217853903770447 2023-01-24 02:27:06.200399: step: 754/466, loss: 0.41872501373291016 2023-01-24 02:27:06.953517: step: 756/466, loss: 0.6103464961051941 2023-01-24 02:27:07.686422: step: 758/466, loss: 0.599940836429596 2023-01-24 02:27:08.466917: step: 760/466, loss: 1.9976304769515991 2023-01-24 02:27:09.251692: step: 762/466, loss: 0.5040253400802612 2023-01-24 02:27:10.041987: step: 764/466, loss: 0.5511229038238525 2023-01-24 02:27:10.776344: step: 766/466, loss: 1.2951661348342896 2023-01-24 02:27:11.595363: step: 768/466, loss: 0.9320796728134155 2023-01-24 02:27:12.332891: step: 770/466, loss: 1.6455953121185303 2023-01-24 02:27:13.046465: step: 772/466, loss: 0.5544575452804565 2023-01-24 02:27:13.790021: step: 774/466, loss: 0.3264284133911133 2023-01-24 02:27:14.495306: step: 776/466, loss: 2.721331834793091 2023-01-24 02:27:15.258572: step: 778/466, loss: 0.29829540848731995 2023-01-24 02:27:16.039777: step: 780/466, loss: 0.3227888345718384 2023-01-24 02:27:16.818752: step: 782/466, loss: 0.20262525975704193 2023-01-24 02:27:17.547511: step: 784/466, loss: 0.2872578799724579 2023-01-24 02:27:18.390269: step: 786/466, loss: 0.34970372915267944 2023-01-24 02:27:19.223400: step: 788/466, loss: 3.8941845893859863 2023-01-24 02:27:19.965460: step: 790/466, loss: 1.0660080909729004 2023-01-24 02:27:20.710912: step: 792/466, loss: 0.4929357171058655 2023-01-24 02:27:21.490531: step: 794/466, loss: 0.7868713140487671 2023-01-24 02:27:22.258464: step: 796/466, loss: 0.8327652215957642 2023-01-24 02:27:22.958928: step: 798/466, loss: 1.2728098630905151 2023-01-24 02:27:23.661167: step: 800/466, loss: 0.6656249761581421 2023-01-24 02:27:24.320137: step: 802/466, loss: 0.7639499306678772 2023-01-24 02:27:25.086535: step: 804/466, loss: 0.8927834033966064 2023-01-24 02:27:25.832008: step: 806/466, loss: 0.2732463479042053 2023-01-24 02:27:26.638260: step: 808/466, loss: 0.4828619658946991 2023-01-24 02:27:27.392827: step: 810/466, loss: 1.1914703845977783 2023-01-24 02:27:28.118801: step: 812/466, loss: 0.23179252445697784 2023-01-24 02:27:28.958915: step: 814/466, loss: 1.8329510688781738 2023-01-24 02:27:29.748793: step: 816/466, loss: 0.2556149661540985 2023-01-24 02:27:30.536710: step: 818/466, loss: 0.8473445773124695 2023-01-24 02:27:31.317698: step: 820/466, loss: 0.5069705247879028 2023-01-24 02:27:32.092472: step: 822/466, loss: 0.1641732007265091 2023-01-24 02:27:32.870489: step: 824/466, loss: 0.8144305348396301 2023-01-24 02:27:33.618700: step: 826/466, loss: 0.1993732899427414 2023-01-24 02:27:34.461071: step: 828/466, loss: 0.16626204550266266 2023-01-24 02:27:35.214213: step: 830/466, loss: 0.8103485107421875 2023-01-24 02:27:35.979114: step: 832/466, loss: 1.944689154624939 2023-01-24 02:27:36.796314: step: 834/466, loss: 0.39095911383628845 2023-01-24 02:27:37.554845: step: 836/466, loss: 0.46140947937965393 2023-01-24 02:27:38.318507: step: 838/466, loss: 0.3880549371242523 2023-01-24 02:27:39.070426: step: 840/466, loss: 1.5692269802093506 2023-01-24 02:27:39.784894: step: 842/466, loss: 0.4883279800415039 2023-01-24 02:27:40.525336: step: 844/466, loss: 0.4998663365840912 2023-01-24 02:27:41.307311: step: 846/466, loss: 0.20900923013687134 2023-01-24 02:27:42.129934: step: 848/466, loss: 0.28893211483955383 2023-01-24 02:27:42.938542: step: 850/466, loss: 0.2505047917366028 2023-01-24 02:27:43.679541: step: 852/466, loss: 0.21978043019771576 2023-01-24 02:27:44.448516: step: 854/466, loss: 0.24482445418834686 2023-01-24 02:27:45.248831: step: 856/466, loss: 0.6268995404243469 2023-01-24 02:27:46.002573: step: 858/466, loss: 2.1591267585754395 2023-01-24 02:27:46.783976: step: 860/466, loss: 0.3672623634338379 2023-01-24 02:27:47.520783: step: 862/466, loss: 0.5924174189567566 2023-01-24 02:27:48.331544: step: 864/466, loss: 0.4663420021533966 2023-01-24 02:27:49.247508: step: 866/466, loss: 2.4700162410736084 2023-01-24 02:27:49.989120: step: 868/466, loss: 0.6600844860076904 2023-01-24 02:27:50.747210: step: 870/466, loss: 0.3065979778766632 2023-01-24 02:27:51.493629: step: 872/466, loss: 0.6033220291137695 2023-01-24 02:27:52.202152: step: 874/466, loss: 0.41608288884162903 2023-01-24 02:27:53.013394: step: 876/466, loss: 1.080448865890503 2023-01-24 02:27:53.802998: step: 878/466, loss: 0.5031520128250122 2023-01-24 02:27:54.582309: step: 880/466, loss: 0.2834298312664032 2023-01-24 02:27:55.418795: step: 882/466, loss: 0.28815382719039917 2023-01-24 02:27:56.214710: step: 884/466, loss: 0.6634982824325562 2023-01-24 02:27:56.973027: step: 886/466, loss: 0.40648210048675537 2023-01-24 02:27:57.737084: step: 888/466, loss: 0.31952717900276184 2023-01-24 02:27:58.571447: step: 890/466, loss: 19.538501739501953 2023-01-24 02:27:59.396730: step: 892/466, loss: 0.6794193387031555 2023-01-24 02:28:00.128560: step: 894/466, loss: 0.6911655068397522 2023-01-24 02:28:00.870274: step: 896/466, loss: 0.740145742893219 2023-01-24 02:28:01.633726: step: 898/466, loss: 0.2256525754928589 2023-01-24 02:28:02.507564: step: 900/466, loss: 0.22152604162693024 2023-01-24 02:28:03.301847: step: 902/466, loss: 0.4137997031211853 2023-01-24 02:28:03.962142: step: 904/466, loss: 0.3823583722114563 2023-01-24 02:28:04.712988: step: 906/466, loss: 0.3872143626213074 2023-01-24 02:28:05.434299: step: 908/466, loss: 2.1703059673309326 2023-01-24 02:28:06.188225: step: 910/466, loss: 0.579026460647583 2023-01-24 02:28:06.891065: step: 912/466, loss: 2.8523318767547607 2023-01-24 02:28:07.780568: step: 914/466, loss: 0.2878245711326599 2023-01-24 02:28:08.494740: step: 916/466, loss: 0.6961103677749634 2023-01-24 02:28:09.275584: step: 918/466, loss: 0.396592378616333 2023-01-24 02:28:10.055885: step: 920/466, loss: 0.8142573833465576 2023-01-24 02:28:10.893424: step: 922/466, loss: 0.35109513998031616 2023-01-24 02:28:11.725838: step: 924/466, loss: 0.20952901244163513 2023-01-24 02:28:12.526783: step: 926/466, loss: 1.3562030792236328 2023-01-24 02:28:13.361523: step: 928/466, loss: 0.8991647958755493 2023-01-24 02:28:14.111739: step: 930/466, loss: 0.5274887084960938 2023-01-24 02:28:14.765314: step: 932/466, loss: 0.24364523589611053 ================================================== Loss: 0.759 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34321716589861756, 'r': 0.28264943074003795, 'f1': 0.31000260145681585}, 'combined': 0.22842296949449586, 'epoch': 6} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3400251231205871, 'r': 0.24846857481712165, 'f1': 0.28712476629692396}, 'combined': 0.17647668562640204, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3209916904769846, 'r': 0.2929734404543256, 'f1': 0.3063432601575984}, 'combined': 0.22572661274770406, 'epoch': 6} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3287134738122114, 'r': 0.2539283149777723, 'f1': 0.2865213588231379}, 'combined': 0.17610581078885548, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3444333712984055, 'r': 0.286918880455408, 'f1': 0.31305641821946173}, 'combined': 0.2306731502669718, 'epoch': 6} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3396889512658375, 'r': 0.2500651362692281, 'f1': 0.2880670628074018}, 'combined': 0.17792377408692467, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3371212121212121, 'r': 0.31785714285714284, 'f1': 0.3272058823529411}, 'combined': 0.21813725490196073, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3076923076923077, 'r': 0.34782608695652173, 'f1': 0.32653061224489793}, 'combined': 0.16326530612244897, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.13793103448275862, 'f1': 0.2162162162162162}, 'combined': 0.14414414414414412, 'epoch': 6} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34321716589861756, 'r': 0.28264943074003795, 'f1': 0.31000260145681585}, 'combined': 0.22842296949449586, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3400251231205871, 'r': 0.24846857481712165, 'f1': 0.28712476629692396}, 'combined': 0.17647668562640204, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3371212121212121, 'r': 0.31785714285714284, 'f1': 0.3272058823529411}, 'combined': 0.21813725490196073, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35633182648907596, 'r': 0.3171150410310752, 'f1': 0.33558157956501333}, 'combined': 0.24727063757422033, 'epoch': 5} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38903978397030203, 'r': 0.22009690200395982, 'f1': 0.28114035217301386}, 'combined': 0.17364551163627331, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 5} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:31:06.831074: step: 2/466, loss: 0.29334014654159546 2023-01-24 02:31:07.539177: step: 4/466, loss: 1.4716475009918213 2023-01-24 02:31:08.331178: step: 6/466, loss: 0.577203631401062 2023-01-24 02:31:09.151280: step: 8/466, loss: 0.7299621105194092 2023-01-24 02:31:09.888524: step: 10/466, loss: 0.1240634173154831 2023-01-24 02:31:10.673588: step: 12/466, loss: 0.44954681396484375 2023-01-24 02:31:11.344713: step: 14/466, loss: 0.3550652265548706 2023-01-24 02:31:12.064701: step: 16/466, loss: 1.110145092010498 2023-01-24 02:31:12.835240: step: 18/466, loss: 3.7168407440185547 2023-01-24 02:31:13.644519: step: 20/466, loss: 0.1842595785856247 2023-01-24 02:31:14.375629: step: 22/466, loss: 0.29075542092323303 2023-01-24 02:31:15.145190: step: 24/466, loss: 1.1452717781066895 2023-01-24 02:31:15.868296: step: 26/466, loss: 2.3476266860961914 2023-01-24 02:31:16.695260: step: 28/466, loss: 0.23660674691200256 2023-01-24 02:31:17.436307: step: 30/466, loss: 0.48752743005752563 2023-01-24 02:31:18.182231: step: 32/466, loss: 0.33039748668670654 2023-01-24 02:31:18.912124: step: 34/466, loss: 0.5670379400253296 2023-01-24 02:31:19.659199: step: 36/466, loss: 0.4027182459831238 2023-01-24 02:31:20.438811: step: 38/466, loss: 0.24127821624279022 2023-01-24 02:31:21.119797: step: 40/466, loss: 1.246384859085083 2023-01-24 02:31:21.906716: step: 42/466, loss: 0.19768026471138 2023-01-24 02:31:22.767038: step: 44/466, loss: 0.3364740014076233 2023-01-24 02:31:23.510418: step: 46/466, loss: 1.4924445152282715 2023-01-24 02:31:24.307582: step: 48/466, loss: 0.6510562896728516 2023-01-24 02:31:25.123666: step: 50/466, loss: 0.5539807081222534 2023-01-24 02:31:25.912181: step: 52/466, loss: 0.6930699348449707 2023-01-24 02:31:26.860514: step: 54/466, loss: 1.2030017375946045 2023-01-24 02:31:27.790836: step: 56/466, loss: 0.20161156356334686 2023-01-24 02:31:28.474986: step: 58/466, loss: 0.37259748578071594 2023-01-24 02:31:29.238748: step: 60/466, loss: 0.4586893320083618 2023-01-24 02:31:29.927270: step: 62/466, loss: 0.4073813855648041 2023-01-24 02:31:30.609281: step: 64/466, loss: 0.2164788842201233 2023-01-24 02:31:31.398472: step: 66/466, loss: 0.8396023511886597 2023-01-24 02:31:32.176301: step: 68/466, loss: 0.37763679027557373 2023-01-24 02:31:32.944977: step: 70/466, loss: 1.0030779838562012 2023-01-24 02:31:33.773258: step: 72/466, loss: 0.2680083215236664 2023-01-24 02:31:34.566613: step: 74/466, loss: 0.2280387282371521 2023-01-24 02:31:35.265198: step: 76/466, loss: 0.3881959915161133 2023-01-24 02:31:36.088791: step: 78/466, loss: 0.7834279537200928 2023-01-24 02:31:36.842787: step: 80/466, loss: 0.10552302747964859 2023-01-24 02:31:37.709350: step: 82/466, loss: 0.2874581813812256 2023-01-24 02:31:38.450072: step: 84/466, loss: 0.22320303320884705 2023-01-24 02:31:39.193171: step: 86/466, loss: 0.22684840857982635 2023-01-24 02:31:39.947714: step: 88/466, loss: 0.15201908349990845 2023-01-24 02:31:40.689679: step: 90/466, loss: 0.4113709628582001 2023-01-24 02:31:41.531443: step: 92/466, loss: 0.35881781578063965 2023-01-24 02:31:42.231673: step: 94/466, loss: 0.1826227754354477 2023-01-24 02:31:43.145261: step: 96/466, loss: 0.48937031626701355 2023-01-24 02:31:43.882404: step: 98/466, loss: 0.4062560498714447 2023-01-24 02:31:44.599350: step: 100/466, loss: 0.4596622884273529 2023-01-24 02:31:45.386528: step: 102/466, loss: 0.2600345015525818 2023-01-24 02:31:46.158282: step: 104/466, loss: 0.36462166905403137 2023-01-24 02:31:46.885466: step: 106/466, loss: 0.24826651811599731 2023-01-24 02:31:47.546295: step: 108/466, loss: 0.27323004603385925 2023-01-24 02:31:48.307862: step: 110/466, loss: 0.9805687069892883 2023-01-24 02:31:48.978595: step: 112/466, loss: 0.12850280106067657 2023-01-24 02:31:49.717991: step: 114/466, loss: 0.16463781893253326 2023-01-24 02:31:50.441165: step: 116/466, loss: 0.338055282831192 2023-01-24 02:31:51.152712: step: 118/466, loss: 0.4716726839542389 2023-01-24 02:31:51.861118: step: 120/466, loss: 0.1965942233800888 2023-01-24 02:31:52.593516: step: 122/466, loss: 0.2254495918750763 2023-01-24 02:31:53.358204: step: 124/466, loss: 0.5136730670928955 2023-01-24 02:31:54.197790: step: 126/466, loss: 0.1527450531721115 2023-01-24 02:31:54.980029: step: 128/466, loss: 0.8943018913269043 2023-01-24 02:31:55.732555: step: 130/466, loss: 0.545354962348938 2023-01-24 02:31:56.542458: step: 132/466, loss: 0.6113411784172058 2023-01-24 02:31:57.250730: step: 134/466, loss: 0.13481000065803528 2023-01-24 02:31:58.014306: step: 136/466, loss: 0.6991126537322998 2023-01-24 02:31:58.755551: step: 138/466, loss: 0.19909624755382538 2023-01-24 02:31:59.570914: step: 140/466, loss: 1.2885040044784546 2023-01-24 02:32:00.269729: step: 142/466, loss: 0.0988985151052475 2023-01-24 02:32:01.049687: step: 144/466, loss: 0.1780286580324173 2023-01-24 02:32:01.821036: step: 146/466, loss: 0.08792361617088318 2023-01-24 02:32:02.553654: step: 148/466, loss: 0.6058153510093689 2023-01-24 02:32:03.261329: step: 150/466, loss: 0.31548625230789185 2023-01-24 02:32:04.101139: step: 152/466, loss: 0.8992829918861389 2023-01-24 02:32:04.860128: step: 154/466, loss: 0.1858549416065216 2023-01-24 02:32:05.609096: step: 156/466, loss: 0.2229134887456894 2023-01-24 02:32:06.289382: step: 158/466, loss: 0.4137539565563202 2023-01-24 02:32:07.069529: step: 160/466, loss: 0.47563108801841736 2023-01-24 02:32:07.740027: step: 162/466, loss: 0.1360701024532318 2023-01-24 02:32:08.460134: step: 164/466, loss: 0.13130144774913788 2023-01-24 02:32:09.235035: step: 166/466, loss: 0.6715490818023682 2023-01-24 02:32:09.968060: step: 168/466, loss: 0.28572914004325867 2023-01-24 02:32:10.669858: step: 170/466, loss: 0.2678864598274231 2023-01-24 02:32:11.460360: step: 172/466, loss: 0.509308397769928 2023-01-24 02:32:12.114208: step: 174/466, loss: 0.3618375062942505 2023-01-24 02:32:12.911264: step: 176/466, loss: 0.5390142798423767 2023-01-24 02:32:13.748479: step: 178/466, loss: 0.45017820596694946 2023-01-24 02:32:14.460512: step: 180/466, loss: 0.37983590364456177 2023-01-24 02:32:15.222065: step: 182/466, loss: 0.6454115509986877 2023-01-24 02:32:16.042955: step: 184/466, loss: 0.8248163461685181 2023-01-24 02:32:16.997366: step: 186/466, loss: 0.26074397563934326 2023-01-24 02:32:17.741472: step: 188/466, loss: 0.22187507152557373 2023-01-24 02:32:18.484085: step: 190/466, loss: 0.311760276556015 2023-01-24 02:32:19.218718: step: 192/466, loss: 0.3976534903049469 2023-01-24 02:32:19.953350: step: 194/466, loss: 0.8048707842826843 2023-01-24 02:32:20.707879: step: 196/466, loss: 1.1141060590744019 2023-01-24 02:32:21.535343: step: 198/466, loss: 0.9119713306427002 2023-01-24 02:32:22.381416: step: 200/466, loss: 0.4953900873661041 2023-01-24 02:32:23.071778: step: 202/466, loss: 0.08808280527591705 2023-01-24 02:32:23.747266: step: 204/466, loss: 0.44729456305503845 2023-01-24 02:32:24.596721: step: 206/466, loss: 1.191168189048767 2023-01-24 02:32:25.370715: step: 208/466, loss: 0.2583087384700775 2023-01-24 02:32:26.110587: step: 210/466, loss: 0.38636747002601624 2023-01-24 02:32:26.786454: step: 212/466, loss: 0.09954768419265747 2023-01-24 02:32:27.552745: step: 214/466, loss: 0.20439481735229492 2023-01-24 02:32:28.295871: step: 216/466, loss: 0.6137973070144653 2023-01-24 02:32:29.083568: step: 218/466, loss: 0.5198429226875305 2023-01-24 02:32:29.891194: step: 220/466, loss: 0.39407676458358765 2023-01-24 02:32:30.653782: step: 222/466, loss: 0.9188137650489807 2023-01-24 02:32:31.377322: step: 224/466, loss: 0.1870497763156891 2023-01-24 02:32:32.159584: step: 226/466, loss: 0.20046013593673706 2023-01-24 02:32:32.993375: step: 228/466, loss: 0.7507703304290771 2023-01-24 02:32:33.832698: step: 230/466, loss: 0.23928441107273102 2023-01-24 02:32:34.581711: step: 232/466, loss: 1.1374417543411255 2023-01-24 02:32:35.316136: step: 234/466, loss: 0.1974034458398819 2023-01-24 02:32:36.108283: step: 236/466, loss: 0.793383002281189 2023-01-24 02:32:36.897430: step: 238/466, loss: 0.32755231857299805 2023-01-24 02:32:37.698822: step: 240/466, loss: 0.9163374304771423 2023-01-24 02:32:38.456547: step: 242/466, loss: 0.3684721887111664 2023-01-24 02:32:39.178392: step: 244/466, loss: 0.9938925504684448 2023-01-24 02:32:39.977287: step: 246/466, loss: 0.2630435824394226 2023-01-24 02:32:40.870084: step: 248/466, loss: 0.9959964752197266 2023-01-24 02:32:41.647584: step: 250/466, loss: 0.26044443249702454 2023-01-24 02:32:42.340248: step: 252/466, loss: 0.3744111955165863 2023-01-24 02:32:43.193317: step: 254/466, loss: 0.2715119421482086 2023-01-24 02:32:44.028373: step: 256/466, loss: 0.31871679425239563 2023-01-24 02:32:44.791483: step: 258/466, loss: 0.2549704313278198 2023-01-24 02:32:45.553379: step: 260/466, loss: 0.1475781798362732 2023-01-24 02:32:46.272248: step: 262/466, loss: 0.29128512740135193 2023-01-24 02:32:47.221062: step: 264/466, loss: 0.18899090588092804 2023-01-24 02:32:47.993018: step: 266/466, loss: 0.8161706924438477 2023-01-24 02:32:48.773245: step: 268/466, loss: 0.6499884724617004 2023-01-24 02:32:49.558633: step: 270/466, loss: 0.6960606575012207 2023-01-24 02:32:50.240521: step: 272/466, loss: 0.5593203902244568 2023-01-24 02:32:50.982137: step: 274/466, loss: 0.27781498432159424 2023-01-24 02:32:51.764483: step: 276/466, loss: 0.3534137010574341 2023-01-24 02:32:52.540364: step: 278/466, loss: 0.29205551743507385 2023-01-24 02:32:53.313633: step: 280/466, loss: 0.846316397190094 2023-01-24 02:32:54.070488: step: 282/466, loss: 0.14797724783420563 2023-01-24 02:32:54.788781: step: 284/466, loss: 1.0052825212478638 2023-01-24 02:32:55.497023: step: 286/466, loss: 1.1280808448791504 2023-01-24 02:32:56.223254: step: 288/466, loss: 0.1440826803445816 2023-01-24 02:32:56.983195: step: 290/466, loss: 0.1783849447965622 2023-01-24 02:32:57.791218: step: 292/466, loss: 0.4393688142299652 2023-01-24 02:32:58.525541: step: 294/466, loss: 0.6713282465934753 2023-01-24 02:32:59.255959: step: 296/466, loss: 0.7822484374046326 2023-01-24 02:32:59.976574: step: 298/466, loss: 0.3282606899738312 2023-01-24 02:33:00.774639: step: 300/466, loss: 0.6223496794700623 2023-01-24 02:33:01.489117: step: 302/466, loss: 1.1325042247772217 2023-01-24 02:33:02.274017: step: 304/466, loss: 1.7857834100723267 2023-01-24 02:33:03.070325: step: 306/466, loss: 0.8472108840942383 2023-01-24 02:33:03.837450: step: 308/466, loss: 0.7359709739685059 2023-01-24 02:33:04.559830: step: 310/466, loss: 0.7195465564727783 2023-01-24 02:33:05.270532: step: 312/466, loss: 2.230241060256958 2023-01-24 02:33:06.168465: step: 314/466, loss: 0.25023576617240906 2023-01-24 02:33:06.933478: step: 316/466, loss: 0.30060118436813354 2023-01-24 02:33:07.694063: step: 318/466, loss: 0.2879800498485565 2023-01-24 02:33:08.533854: step: 320/466, loss: 0.44610944390296936 2023-01-24 02:33:09.339357: step: 322/466, loss: 0.23804304003715515 2023-01-24 02:33:10.102120: step: 324/466, loss: 0.3681897222995758 2023-01-24 02:33:10.892439: step: 326/466, loss: 2.706002712249756 2023-01-24 02:33:11.740465: step: 328/466, loss: 4.632607460021973 2023-01-24 02:33:12.473039: step: 330/466, loss: 0.1979474425315857 2023-01-24 02:33:13.278977: step: 332/466, loss: 0.9591643810272217 2023-01-24 02:33:14.036226: step: 334/466, loss: 0.2160029262304306 2023-01-24 02:33:14.854543: step: 336/466, loss: 0.7066240310668945 2023-01-24 02:33:15.688469: step: 338/466, loss: 0.17320756614208221 2023-01-24 02:33:16.421972: step: 340/466, loss: 0.7314639091491699 2023-01-24 02:33:17.211701: step: 342/466, loss: 0.7041606307029724 2023-01-24 02:33:17.916414: step: 344/466, loss: 0.2489834874868393 2023-01-24 02:33:18.662589: step: 346/466, loss: 0.16589593887329102 2023-01-24 02:33:19.407650: step: 348/466, loss: 0.14014838635921478 2023-01-24 02:33:20.251458: step: 350/466, loss: 0.6268644332885742 2023-01-24 02:33:21.036706: step: 352/466, loss: 0.42653533816337585 2023-01-24 02:33:21.770975: step: 354/466, loss: 0.14306007325649261 2023-01-24 02:33:22.517316: step: 356/466, loss: 0.6861312985420227 2023-01-24 02:33:23.445754: step: 358/466, loss: 0.8640709519386292 2023-01-24 02:33:24.258235: step: 360/466, loss: 0.08862889558076859 2023-01-24 02:33:25.000490: step: 362/466, loss: 0.3018539249897003 2023-01-24 02:33:25.714825: step: 364/466, loss: 0.8258170485496521 2023-01-24 02:33:26.487289: step: 366/466, loss: 0.47512322664260864 2023-01-24 02:33:27.193053: step: 368/466, loss: 1.041671872138977 2023-01-24 02:33:28.070422: step: 370/466, loss: 3.5983853340148926 2023-01-24 02:33:28.898783: step: 372/466, loss: 0.19122132658958435 2023-01-24 02:33:29.602414: step: 374/466, loss: 0.1812903732061386 2023-01-24 02:33:30.321702: step: 376/466, loss: 1.144923210144043 2023-01-24 02:33:31.081755: step: 378/466, loss: 0.2440568506717682 2023-01-24 02:33:31.808508: step: 380/466, loss: 0.5372747182846069 2023-01-24 02:33:32.649020: step: 382/466, loss: 0.4147646427154541 2023-01-24 02:33:33.434508: step: 384/466, loss: 0.43357139825820923 2023-01-24 02:33:34.217751: step: 386/466, loss: 0.37777167558670044 2023-01-24 02:33:34.963151: step: 388/466, loss: 0.2658584415912628 2023-01-24 02:33:35.770047: step: 390/466, loss: 1.4009718894958496 2023-01-24 02:33:36.506235: step: 392/466, loss: 1.075201392173767 2023-01-24 02:33:37.294851: step: 394/466, loss: 0.4344013035297394 2023-01-24 02:33:38.059332: step: 396/466, loss: 0.5745485424995422 2023-01-24 02:33:38.892961: step: 398/466, loss: 0.6046426296234131 2023-01-24 02:33:39.729067: step: 400/466, loss: 0.25382694602012634 2023-01-24 02:33:40.520621: step: 402/466, loss: 0.6577667593955994 2023-01-24 02:33:41.273163: step: 404/466, loss: 1.6412444114685059 2023-01-24 02:33:42.076002: step: 406/466, loss: 0.5959732532501221 2023-01-24 02:33:42.821002: step: 408/466, loss: 0.2520572245121002 2023-01-24 02:33:43.660831: step: 410/466, loss: 0.5502501130104065 2023-01-24 02:33:44.387505: step: 412/466, loss: 0.24335679411888123 2023-01-24 02:33:45.187843: step: 414/466, loss: 0.6526586413383484 2023-01-24 02:33:46.008335: step: 416/466, loss: 0.5386345386505127 2023-01-24 02:33:46.828812: step: 418/466, loss: 0.12898360192775726 2023-01-24 02:33:47.612413: step: 420/466, loss: 0.43213316798210144 2023-01-24 02:33:48.317724: step: 422/466, loss: 0.22789394855499268 2023-01-24 02:33:49.014128: step: 424/466, loss: 0.1911413073539734 2023-01-24 02:33:49.804055: step: 426/466, loss: 0.479383260011673 2023-01-24 02:33:50.576163: step: 428/466, loss: 0.24068160355091095 2023-01-24 02:33:51.298888: step: 430/466, loss: 0.36104321479797363 2023-01-24 02:33:52.028673: step: 432/466, loss: 0.24652445316314697 2023-01-24 02:33:52.772337: step: 434/466, loss: 0.4063998758792877 2023-01-24 02:33:53.571658: step: 436/466, loss: 0.4729330241680145 2023-01-24 02:33:54.352145: step: 438/466, loss: 0.48919734358787537 2023-01-24 02:33:55.060265: step: 440/466, loss: 0.23226898908615112 2023-01-24 02:33:55.824986: step: 442/466, loss: 0.22790449857711792 2023-01-24 02:33:56.597585: step: 444/466, loss: 1.0022603273391724 2023-01-24 02:33:57.386024: step: 446/466, loss: 0.36382633447647095 2023-01-24 02:33:58.186707: step: 448/466, loss: 0.5031176209449768 2023-01-24 02:33:59.011531: step: 450/466, loss: 0.6753062009811401 2023-01-24 02:33:59.741923: step: 452/466, loss: 0.3563656806945801 2023-01-24 02:34:00.542888: step: 454/466, loss: 0.2503567039966583 2023-01-24 02:34:01.359917: step: 456/466, loss: 0.28261706233024597 2023-01-24 02:34:02.125658: step: 458/466, loss: 0.3007924258708954 2023-01-24 02:34:02.834098: step: 460/466, loss: 0.6131289601325989 2023-01-24 02:34:03.686954: step: 462/466, loss: 0.8440264463424683 2023-01-24 02:34:04.461061: step: 464/466, loss: 0.46553340554237366 2023-01-24 02:34:05.198107: step: 466/466, loss: 0.6256039142608643 2023-01-24 02:34:05.964744: step: 468/466, loss: 0.37587013840675354 2023-01-24 02:34:06.772086: step: 470/466, loss: 0.39992663264274597 2023-01-24 02:34:07.455349: step: 472/466, loss: 0.1894497126340866 2023-01-24 02:34:08.232205: step: 474/466, loss: 0.34886282682418823 2023-01-24 02:34:08.999696: step: 476/466, loss: 0.12024272233247757 2023-01-24 02:34:09.727695: step: 478/466, loss: 0.23778888583183289 2023-01-24 02:34:10.470636: step: 480/466, loss: 0.42834538221359253 2023-01-24 02:34:11.187327: step: 482/466, loss: 0.1702658087015152 2023-01-24 02:34:11.879156: step: 484/466, loss: 0.16342906653881073 2023-01-24 02:34:12.633990: step: 486/466, loss: 0.27307936549186707 2023-01-24 02:34:13.454231: step: 488/466, loss: 0.5578143000602722 2023-01-24 02:34:14.297223: step: 490/466, loss: 1.0116239786148071 2023-01-24 02:34:15.084238: step: 492/466, loss: 0.7227616906166077 2023-01-24 02:34:15.845865: step: 494/466, loss: 0.46264591813087463 2023-01-24 02:34:16.532697: step: 496/466, loss: 0.33290180563926697 2023-01-24 02:34:17.330358: step: 498/466, loss: 0.3694120943546295 2023-01-24 02:34:18.090434: step: 500/466, loss: 1.8301973342895508 2023-01-24 02:34:18.811690: step: 502/466, loss: 0.5415647029876709 2023-01-24 02:34:19.555003: step: 504/466, loss: 0.2798643708229065 2023-01-24 02:34:20.248859: step: 506/466, loss: 0.2467707097530365 2023-01-24 02:34:21.012521: step: 508/466, loss: 0.6890528798103333 2023-01-24 02:34:21.742263: step: 510/466, loss: 1.0509618520736694 2023-01-24 02:34:22.475374: step: 512/466, loss: 0.878806471824646 2023-01-24 02:34:23.211087: step: 514/466, loss: 0.39082100987434387 2023-01-24 02:34:23.887090: step: 516/466, loss: 0.4234006404876709 2023-01-24 02:34:24.627331: step: 518/466, loss: 0.4060421288013458 2023-01-24 02:34:25.477070: step: 520/466, loss: 0.2576528489589691 2023-01-24 02:34:26.264840: step: 522/466, loss: 1.0605449676513672 2023-01-24 02:34:27.009642: step: 524/466, loss: 0.0857195034623146 2023-01-24 02:34:27.720543: step: 526/466, loss: 0.44455015659332275 2023-01-24 02:34:28.493010: step: 528/466, loss: 0.4080895483493805 2023-01-24 02:34:29.159717: step: 530/466, loss: 0.3870290219783783 2023-01-24 02:34:29.947956: step: 532/466, loss: 0.4184207320213318 2023-01-24 02:34:30.589447: step: 534/466, loss: 0.150475412607193 2023-01-24 02:34:31.362068: step: 536/466, loss: 0.482689768075943 2023-01-24 02:34:32.170883: step: 538/466, loss: 0.2870560884475708 2023-01-24 02:34:32.961073: step: 540/466, loss: 0.12939991056919098 2023-01-24 02:34:33.790159: step: 542/466, loss: 0.459214448928833 2023-01-24 02:34:34.565599: step: 544/466, loss: 0.36503612995147705 2023-01-24 02:34:35.305464: step: 546/466, loss: 0.24366234242916107 2023-01-24 02:34:36.081029: step: 548/466, loss: 0.9944776296615601 2023-01-24 02:34:36.844150: step: 550/466, loss: 0.49947139620780945 2023-01-24 02:34:37.593674: step: 552/466, loss: 0.5132686495780945 2023-01-24 02:34:38.326312: step: 554/466, loss: 0.8956375122070312 2023-01-24 02:34:39.135837: step: 556/466, loss: 2.3283026218414307 2023-01-24 02:34:39.952771: step: 558/466, loss: 0.12661069631576538 2023-01-24 02:34:40.683231: step: 560/466, loss: 0.18945397436618805 2023-01-24 02:34:41.458307: step: 562/466, loss: 0.8392725586891174 2023-01-24 02:34:42.242206: step: 564/466, loss: 0.1644848883152008 2023-01-24 02:34:43.002895: step: 566/466, loss: 0.18167704343795776 2023-01-24 02:34:43.805946: step: 568/466, loss: 0.17718948423862457 2023-01-24 02:34:44.585852: step: 570/466, loss: 0.673308253288269 2023-01-24 02:34:45.325574: step: 572/466, loss: 0.12600082159042358 2023-01-24 02:34:46.157390: step: 574/466, loss: 0.592653751373291 2023-01-24 02:34:46.874946: step: 576/466, loss: 0.603567361831665 2023-01-24 02:34:47.674442: step: 578/466, loss: 0.3641091287136078 2023-01-24 02:34:48.394693: step: 580/466, loss: 0.2526349425315857 2023-01-24 02:34:49.102862: step: 582/466, loss: 0.4343501329421997 2023-01-24 02:34:49.895309: step: 584/466, loss: 0.2535267770290375 2023-01-24 02:34:50.660523: step: 586/466, loss: 0.3185502886772156 2023-01-24 02:34:51.446024: step: 588/466, loss: 0.13986481726169586 2023-01-24 02:34:52.202271: step: 590/466, loss: 0.9744532108306885 2023-01-24 02:34:53.011711: step: 592/466, loss: 0.2984525263309479 2023-01-24 02:34:53.804333: step: 594/466, loss: 0.25249266624450684 2023-01-24 02:34:54.522512: step: 596/466, loss: 0.13529881834983826 2023-01-24 02:34:55.257738: step: 598/466, loss: 1.1212987899780273 2023-01-24 02:34:55.949743: step: 600/466, loss: 0.8201851844787598 2023-01-24 02:34:56.800040: step: 602/466, loss: 0.25969186425209045 2023-01-24 02:34:57.494113: step: 604/466, loss: 0.19115287065505981 2023-01-24 02:34:58.242269: step: 606/466, loss: 0.11920665949583054 2023-01-24 02:34:58.922244: step: 608/466, loss: 0.09032613784074783 2023-01-24 02:34:59.726185: step: 610/466, loss: 0.7254239320755005 2023-01-24 02:35:00.401453: step: 612/466, loss: 0.2879403829574585 2023-01-24 02:35:01.159048: step: 614/466, loss: 0.5039555430412292 2023-01-24 02:35:01.887473: step: 616/466, loss: 0.3236883580684662 2023-01-24 02:35:02.618291: step: 618/466, loss: 0.45721375942230225 2023-01-24 02:35:03.406155: step: 620/466, loss: 0.883962869644165 2023-01-24 02:35:04.204884: step: 622/466, loss: 1.2909221649169922 2023-01-24 02:35:04.944235: step: 624/466, loss: 0.27289527654647827 2023-01-24 02:35:05.700237: step: 626/466, loss: 0.45594915747642517 2023-01-24 02:35:06.427086: step: 628/466, loss: 0.2602394223213196 2023-01-24 02:35:07.206571: step: 630/466, loss: 0.2512398064136505 2023-01-24 02:35:07.967382: step: 632/466, loss: 0.1512545347213745 2023-01-24 02:35:08.734463: step: 634/466, loss: 0.2594332695007324 2023-01-24 02:35:09.521580: step: 636/466, loss: 0.26782238483428955 2023-01-24 02:35:10.199696: step: 638/466, loss: 0.28907614946365356 2023-01-24 02:35:10.982537: step: 640/466, loss: 0.3315509855747223 2023-01-24 02:35:11.778513: step: 642/466, loss: 0.6611424088478088 2023-01-24 02:35:12.679787: step: 644/466, loss: 0.27497512102127075 2023-01-24 02:35:13.398231: step: 646/466, loss: 0.729381799697876 2023-01-24 02:35:14.215940: step: 648/466, loss: 1.2339369058609009 2023-01-24 02:35:15.000635: step: 650/466, loss: 0.3203907012939453 2023-01-24 02:35:15.787023: step: 652/466, loss: 0.9287397861480713 2023-01-24 02:35:16.610263: step: 654/466, loss: 1.2124075889587402 2023-01-24 02:35:17.379398: step: 656/466, loss: 0.11122621595859528 2023-01-24 02:35:18.179899: step: 658/466, loss: 0.24004901945590973 2023-01-24 02:35:18.889484: step: 660/466, loss: 0.6141506433486938 2023-01-24 02:35:19.686841: step: 662/466, loss: 1.6464180946350098 2023-01-24 02:35:20.411692: step: 664/466, loss: 0.20204073190689087 2023-01-24 02:35:21.190054: step: 666/466, loss: 1.0337920188903809 2023-01-24 02:35:22.021910: step: 668/466, loss: 0.8603740930557251 2023-01-24 02:35:22.858390: step: 670/466, loss: 0.35860171914100647 2023-01-24 02:35:23.613529: step: 672/466, loss: 0.19825685024261475 2023-01-24 02:35:24.382134: step: 674/466, loss: 0.9691605567932129 2023-01-24 02:35:25.140552: step: 676/466, loss: 0.7861379981040955 2023-01-24 02:35:25.841145: step: 678/466, loss: 0.6022818088531494 2023-01-24 02:35:26.602059: step: 680/466, loss: 0.32521283626556396 2023-01-24 02:35:27.322200: step: 682/466, loss: 0.5121086835861206 2023-01-24 02:35:28.077372: step: 684/466, loss: 0.5897647142410278 2023-01-24 02:35:28.789886: step: 686/466, loss: 0.3449326753616333 2023-01-24 02:35:29.543517: step: 688/466, loss: 0.1923578381538391 2023-01-24 02:35:30.245234: step: 690/466, loss: 0.21271264553070068 2023-01-24 02:35:31.007387: step: 692/466, loss: 0.6504039168357849 2023-01-24 02:35:31.810526: step: 694/466, loss: 1.4073481559753418 2023-01-24 02:35:32.611357: step: 696/466, loss: 0.41430070996284485 2023-01-24 02:35:33.359930: step: 698/466, loss: 0.16300787031650543 2023-01-24 02:35:34.154583: step: 700/466, loss: 0.3126220405101776 2023-01-24 02:35:34.974810: step: 702/466, loss: 4.565176963806152 2023-01-24 02:35:35.708418: step: 704/466, loss: 1.1175744533538818 2023-01-24 02:35:36.589309: step: 706/466, loss: 0.32288840413093567 2023-01-24 02:35:37.299336: step: 708/466, loss: 0.24756182730197906 2023-01-24 02:35:38.073707: step: 710/466, loss: 0.26558712124824524 2023-01-24 02:35:38.854064: step: 712/466, loss: 0.2713622748851776 2023-01-24 02:35:39.675139: step: 714/466, loss: 0.3009144067764282 2023-01-24 02:35:40.389966: step: 716/466, loss: 0.9786598682403564 2023-01-24 02:35:41.268985: step: 718/466, loss: 0.5153631567955017 2023-01-24 02:35:42.040736: step: 720/466, loss: 0.2356444001197815 2023-01-24 02:35:42.739385: step: 722/466, loss: 1.4491140842437744 2023-01-24 02:35:43.498981: step: 724/466, loss: 0.16535405814647675 2023-01-24 02:35:44.248848: step: 726/466, loss: 0.4651423692703247 2023-01-24 02:35:45.010632: step: 728/466, loss: 0.1597919464111328 2023-01-24 02:35:45.768706: step: 730/466, loss: 0.18197688460350037 2023-01-24 02:35:46.529522: step: 732/466, loss: 0.11129917204380035 2023-01-24 02:35:47.262836: step: 734/466, loss: 1.0428858995437622 2023-01-24 02:35:47.946679: step: 736/466, loss: 0.7729167938232422 2023-01-24 02:35:48.732965: step: 738/466, loss: 0.12490782141685486 2023-01-24 02:35:49.568575: step: 740/466, loss: 0.971163809299469 2023-01-24 02:35:50.277234: step: 742/466, loss: 0.688123345375061 2023-01-24 02:35:51.079482: step: 744/466, loss: 1.1513824462890625 2023-01-24 02:35:51.885343: step: 746/466, loss: 0.2143803834915161 2023-01-24 02:35:52.600830: step: 748/466, loss: 0.5400157570838928 2023-01-24 02:35:53.288137: step: 750/466, loss: 0.19938690960407257 2023-01-24 02:35:54.026695: step: 752/466, loss: 1.1824733018875122 2023-01-24 02:35:54.736913: step: 754/466, loss: 0.8320244550704956 2023-01-24 02:35:55.578607: step: 756/466, loss: 0.6107889413833618 2023-01-24 02:35:56.287487: step: 758/466, loss: 0.11197170615196228 2023-01-24 02:35:57.087068: step: 760/466, loss: 0.34392377734184265 2023-01-24 02:35:57.936555: step: 762/466, loss: 0.3480488955974579 2023-01-24 02:35:58.784858: step: 764/466, loss: 0.20735707879066467 2023-01-24 02:35:59.567629: step: 766/466, loss: 0.49595823884010315 2023-01-24 02:36:00.345053: step: 768/466, loss: 1.4405468702316284 2023-01-24 02:36:01.102013: step: 770/466, loss: 0.26072534918785095 2023-01-24 02:36:01.881781: step: 772/466, loss: 0.48735663294792175 2023-01-24 02:36:02.675692: step: 774/466, loss: 0.4617172181606293 2023-01-24 02:36:03.419020: step: 776/466, loss: 0.6967835426330566 2023-01-24 02:36:04.265238: step: 778/466, loss: 1.1913807392120361 2023-01-24 02:36:04.975439: step: 780/466, loss: 0.21358001232147217 2023-01-24 02:36:05.684936: step: 782/466, loss: 0.9233065843582153 2023-01-24 02:36:06.440167: step: 784/466, loss: 0.71363365650177 2023-01-24 02:36:07.268231: step: 786/466, loss: 0.7549607753753662 2023-01-24 02:36:08.023725: step: 788/466, loss: 0.4810609817504883 2023-01-24 02:36:08.746830: step: 790/466, loss: 0.2233993262052536 2023-01-24 02:36:09.486072: step: 792/466, loss: 0.9135105609893799 2023-01-24 02:36:10.225121: step: 794/466, loss: 0.6643213033676147 2023-01-24 02:36:10.962827: step: 796/466, loss: 0.26859939098358154 2023-01-24 02:36:11.895740: step: 798/466, loss: 0.36576661467552185 2023-01-24 02:36:12.778637: step: 800/466, loss: 0.09290008246898651 2023-01-24 02:36:13.548641: step: 802/466, loss: 0.8580803871154785 2023-01-24 02:36:14.305104: step: 804/466, loss: 0.3385680019855499 2023-01-24 02:36:15.047795: step: 806/466, loss: 0.715552806854248 2023-01-24 02:36:15.872039: step: 808/466, loss: 0.46964597702026367 2023-01-24 02:36:16.635188: step: 810/466, loss: 0.6051387786865234 2023-01-24 02:36:17.386435: step: 812/466, loss: 0.25436586141586304 2023-01-24 02:36:18.118997: step: 814/466, loss: 1.0981717109680176 2023-01-24 02:36:18.928775: step: 816/466, loss: 0.5780366063117981 2023-01-24 02:36:19.696557: step: 818/466, loss: 0.2242840677499771 2023-01-24 02:36:20.460854: step: 820/466, loss: 0.9424983263015747 2023-01-24 02:36:21.236965: step: 822/466, loss: 0.30817434191703796 2023-01-24 02:36:22.089660: step: 824/466, loss: 0.4366517663002014 2023-01-24 02:36:22.789562: step: 826/466, loss: 0.1335681676864624 2023-01-24 02:36:23.622335: step: 828/466, loss: 0.43123510479927063 2023-01-24 02:36:24.413395: step: 830/466, loss: 0.29615893959999084 2023-01-24 02:36:25.113098: step: 832/466, loss: 0.7336601614952087 2023-01-24 02:36:25.837018: step: 834/466, loss: 0.17370028793811798 2023-01-24 02:36:26.635363: step: 836/466, loss: 0.6856794357299805 2023-01-24 02:36:27.428973: step: 838/466, loss: 0.2515942454338074 2023-01-24 02:36:28.122058: step: 840/466, loss: 0.38089174032211304 2023-01-24 02:36:28.885080: step: 842/466, loss: 0.6962507963180542 2023-01-24 02:36:29.656789: step: 844/466, loss: 0.1630067229270935 2023-01-24 02:36:30.482714: step: 846/466, loss: 0.7469332218170166 2023-01-24 02:36:31.214437: step: 848/466, loss: 0.47416603565216064 2023-01-24 02:36:32.035963: step: 850/466, loss: 0.5374373197555542 2023-01-24 02:36:32.934750: step: 852/466, loss: 0.44624650478363037 2023-01-24 02:36:33.720356: step: 854/466, loss: 0.2969151735305786 2023-01-24 02:36:34.530937: step: 856/466, loss: 0.18856649100780487 2023-01-24 02:36:35.338341: step: 858/466, loss: 8.759953498840332 2023-01-24 02:36:36.174659: step: 860/466, loss: 0.5982837677001953 2023-01-24 02:36:36.947261: step: 862/466, loss: 0.2622735798358917 2023-01-24 02:36:37.804081: step: 864/466, loss: 0.6516578197479248 2023-01-24 02:36:38.659525: step: 866/466, loss: 0.2897380590438843 2023-01-24 02:36:39.411977: step: 868/466, loss: 0.5647562742233276 2023-01-24 02:36:40.149472: step: 870/466, loss: 0.9220472574234009 2023-01-24 02:36:40.885679: step: 872/466, loss: 0.560003936290741 2023-01-24 02:36:41.673108: step: 874/466, loss: 0.5144690871238708 2023-01-24 02:36:42.505579: step: 876/466, loss: 1.207797884941101 2023-01-24 02:36:43.373103: step: 878/466, loss: 0.11351679265499115 2023-01-24 02:36:44.106195: step: 880/466, loss: 0.46011728048324585 2023-01-24 02:36:44.849993: step: 882/466, loss: 0.7599865198135376 2023-01-24 02:36:45.684947: step: 884/466, loss: 0.4350064694881439 2023-01-24 02:36:46.522528: step: 886/466, loss: 0.5173510909080505 2023-01-24 02:36:47.319562: step: 888/466, loss: 1.4208377599716187 2023-01-24 02:36:48.107845: step: 890/466, loss: 0.41764670610427856 2023-01-24 02:36:48.931679: step: 892/466, loss: 0.2334553599357605 2023-01-24 02:36:49.677847: step: 894/466, loss: 0.15015123784542084 2023-01-24 02:36:50.401042: step: 896/466, loss: 0.8183199167251587 2023-01-24 02:36:51.087554: step: 898/466, loss: 0.7500245571136475 2023-01-24 02:36:51.795368: step: 900/466, loss: 0.2415359616279602 2023-01-24 02:36:52.568656: step: 902/466, loss: 0.4030883312225342 2023-01-24 02:36:53.344127: step: 904/466, loss: 0.23466992378234863 2023-01-24 02:36:54.051774: step: 906/466, loss: 0.3660128116607666 2023-01-24 02:36:54.796605: step: 908/466, loss: 0.32721370458602905 2023-01-24 02:36:55.678997: step: 910/466, loss: 0.2336495816707611 2023-01-24 02:36:56.396804: step: 912/466, loss: 0.4234265983104706 2023-01-24 02:36:57.218562: step: 914/466, loss: 0.6359653472900391 2023-01-24 02:36:57.912046: step: 916/466, loss: 0.39385494589805603 2023-01-24 02:36:58.638573: step: 918/466, loss: 0.32778307795524597 2023-01-24 02:36:59.473847: step: 920/466, loss: 0.3014666736125946 2023-01-24 02:37:00.255164: step: 922/466, loss: 0.2647492587566376 2023-01-24 02:37:00.969050: step: 924/466, loss: 0.47217094898223877 2023-01-24 02:37:01.726812: step: 926/466, loss: 1.2372679710388184 2023-01-24 02:37:02.532472: step: 928/466, loss: 0.43278536200523376 2023-01-24 02:37:03.375422: step: 930/466, loss: 0.4891684353351593 2023-01-24 02:37:04.128633: step: 932/466, loss: 0.6238055229187012 ================================================== Loss: 0.560 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33597363002850805, 'r': 0.28752202493900786, 'f1': 0.30986524978089397}, 'combined': 0.22832176299644819, 'epoch': 7} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3623154260121604, 'r': 0.24917021079556015, 'f1': 0.2952750012092953}, 'combined': 0.18148609830424978, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3206331294894195, 'r': 0.30724806526025966, 'f1': 0.31379792711658305}, 'combined': 0.23121952524379802, 'epoch': 7} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3478420582497567, 'r': 0.25349668196537734, 'f1': 0.2932683418426521}, 'combined': 0.18025273693743496, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3445229095958038, 'r': 0.3026833152615885, 'f1': 0.32225072150072154}, 'combined': 0.2374479000531632, 'epoch': 7} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.35895466052748853, 'r': 0.2488420523587442, 'f1': 0.29392397996109604}, 'combined': 0.18154128174067702, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36875, 'r': 0.21071428571428572, 'f1': 0.2681818181818182}, 'combined': 0.17878787878787877, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26785714285714285, 'r': 0.32608695652173914, 'f1': 0.29411764705882354}, 'combined': 0.14705882352941177, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.06896551724137931, 'f1': 0.1081081081081081}, 'combined': 0.07207207207207206, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34321716589861756, 'r': 0.28264943074003795, 'f1': 0.31000260145681585}, 'combined': 0.22842296949449586, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3400251231205871, 'r': 0.24846857481712165, 'f1': 0.28712476629692396}, 'combined': 0.17647668562640204, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3371212121212121, 'r': 0.31785714285714284, 'f1': 0.3272058823529411}, 'combined': 0.21813725490196073, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35633182648907596, 'r': 0.3171150410310752, 'f1': 0.33558157956501333}, 'combined': 0.24727063757422033, 'epoch': 5} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38903978397030203, 'r': 0.22009690200395982, 'f1': 0.28114035217301386}, 'combined': 0.17364551163627331, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 5} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:39:47.018016: step: 2/466, loss: 0.22759246826171875 2023-01-24 02:39:47.886428: step: 4/466, loss: 0.1430056244134903 2023-01-24 02:39:48.701425: step: 6/466, loss: 0.15150728821754456 2023-01-24 02:39:49.398007: step: 8/466, loss: 0.6790808439254761 2023-01-24 02:39:50.153407: step: 10/466, loss: 0.24653606116771698 2023-01-24 02:39:50.891732: step: 12/466, loss: 0.2990144193172455 2023-01-24 02:39:51.679960: step: 14/466, loss: 0.810178279876709 2023-01-24 02:39:52.464943: step: 16/466, loss: 0.4249630272388458 2023-01-24 02:39:53.167330: step: 18/466, loss: 0.3089500963687897 2023-01-24 02:39:53.956583: step: 20/466, loss: 0.5554283857345581 2023-01-24 02:39:54.728228: step: 22/466, loss: 0.2011716067790985 2023-01-24 02:39:55.494783: step: 24/466, loss: 0.1238151267170906 2023-01-24 02:39:56.333165: step: 26/466, loss: 0.14290007948875427 2023-01-24 02:39:57.030693: step: 28/466, loss: 0.538735568523407 2023-01-24 02:39:57.767414: step: 30/466, loss: 0.13576442003250122 2023-01-24 02:39:58.494409: step: 32/466, loss: 0.461008220911026 2023-01-24 02:39:59.345542: step: 34/466, loss: 0.1841767132282257 2023-01-24 02:40:00.110015: step: 36/466, loss: 0.3797663450241089 2023-01-24 02:40:00.889203: step: 38/466, loss: 0.25566935539245605 2023-01-24 02:40:01.608070: step: 40/466, loss: 0.2620272636413574 2023-01-24 02:40:02.409696: step: 42/466, loss: 0.262869656085968 2023-01-24 02:40:03.129705: step: 44/466, loss: 0.325731486082077 2023-01-24 02:40:03.919583: step: 46/466, loss: 0.2767471671104431 2023-01-24 02:40:04.605675: step: 48/466, loss: 1.0482711791992188 2023-01-24 02:40:05.308693: step: 50/466, loss: 0.19650810956954956 2023-01-24 02:40:06.155556: step: 52/466, loss: 1.0581046342849731 2023-01-24 02:40:06.848443: step: 54/466, loss: 0.21606041491031647 2023-01-24 02:40:07.583876: step: 56/466, loss: 0.3756955564022064 2023-01-24 02:40:08.366371: step: 58/466, loss: 0.3466208279132843 2023-01-24 02:40:09.045137: step: 60/466, loss: 0.43687790632247925 2023-01-24 02:40:09.768997: step: 62/466, loss: 0.22791868448257446 2023-01-24 02:40:10.561986: step: 64/466, loss: 0.30758219957351685 2023-01-24 02:40:11.368095: step: 66/466, loss: 0.10821656882762909 2023-01-24 02:40:12.017752: step: 68/466, loss: 0.34948158264160156 2023-01-24 02:40:12.771151: step: 70/466, loss: 0.6015602350234985 2023-01-24 02:40:13.572663: step: 72/466, loss: 0.42821982502937317 2023-01-24 02:40:14.368992: step: 74/466, loss: 0.22986894845962524 2023-01-24 02:40:15.147518: step: 76/466, loss: 0.15815268456935883 2023-01-24 02:40:15.948920: step: 78/466, loss: 0.245316281914711 2023-01-24 02:40:16.808432: step: 80/466, loss: 0.23755723237991333 2023-01-24 02:40:17.549506: step: 82/466, loss: 0.08479206264019012 2023-01-24 02:40:18.255193: step: 84/466, loss: 0.0937538594007492 2023-01-24 02:40:19.021870: step: 86/466, loss: 0.35855525732040405 2023-01-24 02:40:19.796048: step: 88/466, loss: 0.6756817102432251 2023-01-24 02:40:20.641520: step: 90/466, loss: 0.3196064233779907 2023-01-24 02:40:21.419408: step: 92/466, loss: 0.3062879145145416 2023-01-24 02:40:22.136464: step: 94/466, loss: 0.21980153024196625 2023-01-24 02:40:22.953339: step: 96/466, loss: 0.735496461391449 2023-01-24 02:40:23.738173: step: 98/466, loss: 0.3766806423664093 2023-01-24 02:40:24.518348: step: 100/466, loss: 0.39500221610069275 2023-01-24 02:40:25.261622: step: 102/466, loss: 0.05726594477891922 2023-01-24 02:40:26.098969: step: 104/466, loss: 0.28027504682540894 2023-01-24 02:40:26.808659: step: 106/466, loss: 0.20551623404026031 2023-01-24 02:40:27.614083: step: 108/466, loss: 0.6117175817489624 2023-01-24 02:40:28.390369: step: 110/466, loss: 0.8166786432266235 2023-01-24 02:40:29.134028: step: 112/466, loss: 0.64291912317276 2023-01-24 02:40:29.819411: step: 114/466, loss: 0.6314736008644104 2023-01-24 02:40:30.597425: step: 116/466, loss: 0.6598122119903564 2023-01-24 02:40:31.397039: step: 118/466, loss: 0.2853725254535675 2023-01-24 02:40:32.173544: step: 120/466, loss: 0.14809046685695648 2023-01-24 02:40:32.945933: step: 122/466, loss: 0.48133033514022827 2023-01-24 02:40:33.734709: step: 124/466, loss: 0.43819597363471985 2023-01-24 02:40:34.414434: step: 126/466, loss: 0.4271838068962097 2023-01-24 02:40:35.333587: step: 128/466, loss: 0.28445273637771606 2023-01-24 02:40:36.082156: step: 130/466, loss: 0.6659205555915833 2023-01-24 02:40:36.819340: step: 132/466, loss: 0.6517955660820007 2023-01-24 02:40:37.715506: step: 134/466, loss: 0.5717197060585022 2023-01-24 02:40:38.490296: step: 136/466, loss: 0.3504186272621155 2023-01-24 02:40:39.200495: step: 138/466, loss: 0.6698485016822815 2023-01-24 02:40:39.896283: step: 140/466, loss: 0.17458480596542358 2023-01-24 02:40:40.671952: step: 142/466, loss: 0.18910260498523712 2023-01-24 02:40:41.476589: step: 144/466, loss: 0.5898134112358093 2023-01-24 02:40:42.358194: step: 146/466, loss: 2.988463878631592 2023-01-24 02:40:43.102640: step: 148/466, loss: 0.6820144057273865 2023-01-24 02:40:43.875614: step: 150/466, loss: 0.2095578908920288 2023-01-24 02:40:44.624607: step: 152/466, loss: 0.24290573596954346 2023-01-24 02:40:45.407100: step: 154/466, loss: 0.9977115392684937 2023-01-24 02:40:46.114877: step: 156/466, loss: 0.12446726858615875 2023-01-24 02:40:46.836241: step: 158/466, loss: 0.4961407780647278 2023-01-24 02:40:47.640735: step: 160/466, loss: 0.2016962319612503 2023-01-24 02:40:48.373222: step: 162/466, loss: 0.1211315244436264 2023-01-24 02:40:49.142621: step: 164/466, loss: 0.5762068629264832 2023-01-24 02:40:49.857699: step: 166/466, loss: 0.16279837489128113 2023-01-24 02:40:50.609385: step: 168/466, loss: 0.3130417764186859 2023-01-24 02:40:51.311313: step: 170/466, loss: 0.2116745114326477 2023-01-24 02:40:52.077838: step: 172/466, loss: 0.16255971789360046 2023-01-24 02:40:52.846985: step: 174/466, loss: 1.197072982788086 2023-01-24 02:40:53.621919: step: 176/466, loss: 0.5906725525856018 2023-01-24 02:40:54.473768: step: 178/466, loss: 0.38303136825561523 2023-01-24 02:40:55.238413: step: 180/466, loss: 1.2655754089355469 2023-01-24 02:40:56.026208: step: 182/466, loss: 0.44630518555641174 2023-01-24 02:40:56.909084: step: 184/466, loss: 0.31110793352127075 2023-01-24 02:40:57.610559: step: 186/466, loss: 0.3698071539402008 2023-01-24 02:40:58.369765: step: 188/466, loss: 0.3049424886703491 2023-01-24 02:40:59.123968: step: 190/466, loss: 0.13894884288311005 2023-01-24 02:40:59.950800: step: 192/466, loss: 0.29648882150650024 2023-01-24 02:41:00.828333: step: 194/466, loss: 0.1692948341369629 2023-01-24 02:41:01.543084: step: 196/466, loss: 0.155476376414299 2023-01-24 02:41:02.288420: step: 198/466, loss: 0.24735870957374573 2023-01-24 02:41:03.097165: step: 200/466, loss: 0.45558783411979675 2023-01-24 02:41:03.851495: step: 202/466, loss: 0.3680471181869507 2023-01-24 02:41:04.583663: step: 204/466, loss: 0.19752237200737 2023-01-24 02:41:05.384718: step: 206/466, loss: 0.3782448172569275 2023-01-24 02:41:06.187499: step: 208/466, loss: 0.2489340454339981 2023-01-24 02:41:06.900588: step: 210/466, loss: 0.23533755540847778 2023-01-24 02:41:07.671021: step: 212/466, loss: 0.10373549908399582 2023-01-24 02:41:08.403094: step: 214/466, loss: 0.053315721452236176 2023-01-24 02:41:09.184659: step: 216/466, loss: 0.08812931180000305 2023-01-24 02:41:09.936020: step: 218/466, loss: 2.715826988220215 2023-01-24 02:41:10.747120: step: 220/466, loss: 0.1123843863606453 2023-01-24 02:41:11.427505: step: 222/466, loss: 0.5836272239685059 2023-01-24 02:41:12.184185: step: 224/466, loss: 0.13191816210746765 2023-01-24 02:41:13.029145: step: 226/466, loss: 0.3843967318534851 2023-01-24 02:41:13.716033: step: 228/466, loss: 0.1885620653629303 2023-01-24 02:41:14.444995: step: 230/466, loss: 0.1312829703092575 2023-01-24 02:41:15.214410: step: 232/466, loss: 0.7238270044326782 2023-01-24 02:41:16.016388: step: 234/466, loss: 0.049187153577804565 2023-01-24 02:41:16.726410: step: 236/466, loss: 0.09891631454229355 2023-01-24 02:41:17.548480: step: 238/466, loss: 0.6556817293167114 2023-01-24 02:41:18.266233: step: 240/466, loss: 0.2603532373905182 2023-01-24 02:41:19.037161: step: 242/466, loss: 0.8729942440986633 2023-01-24 02:41:19.713709: step: 244/466, loss: 0.3330722451210022 2023-01-24 02:41:20.492902: step: 246/466, loss: 0.11522451788187027 2023-01-24 02:41:21.185034: step: 248/466, loss: 0.24133557081222534 2023-01-24 02:41:21.895656: step: 250/466, loss: 1.1795192956924438 2023-01-24 02:41:22.696929: step: 252/466, loss: 0.5877870917320251 2023-01-24 02:41:23.402871: step: 254/466, loss: 1.9018089771270752 2023-01-24 02:41:24.173280: step: 256/466, loss: 0.2621552646160126 2023-01-24 02:41:24.896266: step: 258/466, loss: 0.20106241106987 2023-01-24 02:41:25.665566: step: 260/466, loss: 0.5996320247650146 2023-01-24 02:41:26.450545: step: 262/466, loss: 0.2576277554035187 2023-01-24 02:41:27.174177: step: 264/466, loss: 0.6318485736846924 2023-01-24 02:41:27.920550: step: 266/466, loss: 0.32296547293663025 2023-01-24 02:41:28.645405: step: 268/466, loss: 0.5395624041557312 2023-01-24 02:41:29.423688: step: 270/466, loss: 0.23238903284072876 2023-01-24 02:41:30.173985: step: 272/466, loss: 0.20087341964244843 2023-01-24 02:41:30.951835: step: 274/466, loss: 0.5104784965515137 2023-01-24 02:41:31.690997: step: 276/466, loss: 0.4179069995880127 2023-01-24 02:41:32.476463: step: 278/466, loss: 0.4076002836227417 2023-01-24 02:41:33.296983: step: 280/466, loss: 0.23009905219078064 2023-01-24 02:41:34.129032: step: 282/466, loss: 0.29668739438056946 2023-01-24 02:41:34.855982: step: 284/466, loss: 0.3764442503452301 2023-01-24 02:41:35.660705: step: 286/466, loss: 0.18076901137828827 2023-01-24 02:41:36.385978: step: 288/466, loss: 0.8756890296936035 2023-01-24 02:41:37.158536: step: 290/466, loss: 0.6863442659378052 2023-01-24 02:41:37.934505: step: 292/466, loss: 0.44005173444747925 2023-01-24 02:41:38.695912: step: 294/466, loss: 0.25520214438438416 2023-01-24 02:41:39.395012: step: 296/466, loss: 0.1529461294412613 2023-01-24 02:41:40.105082: step: 298/466, loss: 0.5102216601371765 2023-01-24 02:41:40.864627: step: 300/466, loss: 1.0082459449768066 2023-01-24 02:41:41.630637: step: 302/466, loss: 0.3374558687210083 2023-01-24 02:41:42.369627: step: 304/466, loss: 0.586357057094574 2023-01-24 02:41:43.182415: step: 306/466, loss: 0.3253418207168579 2023-01-24 02:41:43.956039: step: 308/466, loss: 0.5261020660400391 2023-01-24 02:41:44.745617: step: 310/466, loss: 0.12631230056285858 2023-01-24 02:41:45.500376: step: 312/466, loss: 0.21314901113510132 2023-01-24 02:41:46.255005: step: 314/466, loss: 0.26143577694892883 2023-01-24 02:41:47.010490: step: 316/466, loss: 0.24304695427417755 2023-01-24 02:41:47.753821: step: 318/466, loss: 0.9620099663734436 2023-01-24 02:41:48.483305: step: 320/466, loss: 0.1309376209974289 2023-01-24 02:41:49.270640: step: 322/466, loss: 0.4396485686302185 2023-01-24 02:41:50.122244: step: 324/466, loss: 0.43423640727996826 2023-01-24 02:41:50.935060: step: 326/466, loss: 0.18821410834789276 2023-01-24 02:41:51.664144: step: 328/466, loss: 0.09666875749826431 2023-01-24 02:41:52.489738: step: 330/466, loss: 0.23626506328582764 2023-01-24 02:41:53.269885: step: 332/466, loss: 0.9110296964645386 2023-01-24 02:41:54.093529: step: 334/466, loss: 2.471574306488037 2023-01-24 02:41:54.757282: step: 336/466, loss: 0.2877243161201477 2023-01-24 02:41:55.551855: step: 338/466, loss: 0.6529865860939026 2023-01-24 02:41:56.234412: step: 340/466, loss: 0.9429927468299866 2023-01-24 02:41:57.029844: step: 342/466, loss: 0.32690343260765076 2023-01-24 02:41:57.699769: step: 344/466, loss: 0.10178147256374359 2023-01-24 02:41:58.522101: step: 346/466, loss: 0.2624507248401642 2023-01-24 02:41:59.277125: step: 348/466, loss: 0.27885669469833374 2023-01-24 02:41:59.969725: step: 350/466, loss: 0.15760451555252075 2023-01-24 02:42:00.722776: step: 352/466, loss: 0.46658897399902344 2023-01-24 02:42:01.476996: step: 354/466, loss: 0.7008988857269287 2023-01-24 02:42:02.302485: step: 356/466, loss: 0.30459436774253845 2023-01-24 02:42:03.033168: step: 358/466, loss: 0.6526221632957458 2023-01-24 02:42:03.755747: step: 360/466, loss: 0.608924925327301 2023-01-24 02:42:04.603022: step: 362/466, loss: 0.21239249408245087 2023-01-24 02:42:05.458136: step: 364/466, loss: 0.15510523319244385 2023-01-24 02:42:06.315142: step: 366/466, loss: 0.2997853755950928 2023-01-24 02:42:07.128892: step: 368/466, loss: 0.2863451838493347 2023-01-24 02:42:07.909894: step: 370/466, loss: 0.3021564483642578 2023-01-24 02:42:08.639985: step: 372/466, loss: 0.1663222461938858 2023-01-24 02:42:09.383606: step: 374/466, loss: 0.60276198387146 2023-01-24 02:42:10.071894: step: 376/466, loss: 0.2979196012020111 2023-01-24 02:42:10.753444: step: 378/466, loss: 0.11225948482751846 2023-01-24 02:42:11.519236: step: 380/466, loss: 0.5652484893798828 2023-01-24 02:42:12.222373: step: 382/466, loss: 0.4527016580104828 2023-01-24 02:42:12.989433: step: 384/466, loss: 2.857930898666382 2023-01-24 02:42:13.782938: step: 386/466, loss: 0.14337778091430664 2023-01-24 02:42:14.585518: step: 388/466, loss: 0.6897900700569153 2023-01-24 02:42:15.246855: step: 390/466, loss: 0.21116462349891663 2023-01-24 02:42:15.960437: step: 392/466, loss: 0.6865518689155579 2023-01-24 02:42:16.672696: step: 394/466, loss: 0.34384459257125854 2023-01-24 02:42:17.643460: step: 396/466, loss: 1.022497296333313 2023-01-24 02:42:18.318673: step: 398/466, loss: 0.2426058053970337 2023-01-24 02:42:19.049009: step: 400/466, loss: 0.13261792063713074 2023-01-24 02:42:19.759333: step: 402/466, loss: 0.6296444535255432 2023-01-24 02:42:20.501876: step: 404/466, loss: 0.42095842957496643 2023-01-24 02:42:21.264026: step: 406/466, loss: 0.2147996574640274 2023-01-24 02:42:21.991409: step: 408/466, loss: 0.2778421938419342 2023-01-24 02:42:22.773517: step: 410/466, loss: 0.13006922602653503 2023-01-24 02:42:23.506570: step: 412/466, loss: 0.6652418971061707 2023-01-24 02:42:24.278649: step: 414/466, loss: 0.5734185576438904 2023-01-24 02:42:25.004404: step: 416/466, loss: 0.2978045344352722 2023-01-24 02:42:25.735331: step: 418/466, loss: 0.3669905662536621 2023-01-24 02:42:26.465314: step: 420/466, loss: 0.3904774487018585 2023-01-24 02:42:27.244756: step: 422/466, loss: 1.5295345783233643 2023-01-24 02:42:27.973900: step: 424/466, loss: 0.2734212577342987 2023-01-24 02:42:28.735808: step: 426/466, loss: 0.708541989326477 2023-01-24 02:42:29.561064: step: 428/466, loss: 0.5671656131744385 2023-01-24 02:42:30.314422: step: 430/466, loss: 0.22312410175800323 2023-01-24 02:42:31.068592: step: 432/466, loss: 0.3741439878940582 2023-01-24 02:42:31.787479: step: 434/466, loss: 0.4038388431072235 2023-01-24 02:42:32.593775: step: 436/466, loss: 0.18085996806621552 2023-01-24 02:42:33.297939: step: 438/466, loss: 0.14758652448654175 2023-01-24 02:42:34.160168: step: 440/466, loss: 0.18641526997089386 2023-01-24 02:42:34.858720: step: 442/466, loss: 0.2503521740436554 2023-01-24 02:42:35.566272: step: 444/466, loss: 0.31462979316711426 2023-01-24 02:42:36.333338: step: 446/466, loss: 1.0632514953613281 2023-01-24 02:42:37.166723: step: 448/466, loss: 0.3526846766471863 2023-01-24 02:42:37.926030: step: 450/466, loss: 0.40511059761047363 2023-01-24 02:42:38.644159: step: 452/466, loss: 0.9780833125114441 2023-01-24 02:42:39.356843: step: 454/466, loss: 0.6404275894165039 2023-01-24 02:42:40.114505: step: 456/466, loss: 0.6844438314437866 2023-01-24 02:42:40.839432: step: 458/466, loss: 0.4939207434654236 2023-01-24 02:42:41.579979: step: 460/466, loss: 0.9836992621421814 2023-01-24 02:42:42.360373: step: 462/466, loss: 1.2867162227630615 2023-01-24 02:42:43.255634: step: 464/466, loss: 0.14138145744800568 2023-01-24 02:42:44.034140: step: 466/466, loss: 0.5794249176979065 2023-01-24 02:42:44.854983: step: 468/466, loss: 0.9879024624824524 2023-01-24 02:42:45.608755: step: 470/466, loss: 0.4761776030063629 2023-01-24 02:42:46.376862: step: 472/466, loss: 0.43164268136024475 2023-01-24 02:42:47.263537: step: 474/466, loss: 0.5020178556442261 2023-01-24 02:42:48.030629: step: 476/466, loss: 0.28537100553512573 2023-01-24 02:42:48.822057: step: 478/466, loss: 0.5118150115013123 2023-01-24 02:42:49.708212: step: 480/466, loss: 0.15906476974487305 2023-01-24 02:42:50.502810: step: 482/466, loss: 0.6295519471168518 2023-01-24 02:42:51.286011: step: 484/466, loss: 0.49415600299835205 2023-01-24 02:42:51.988990: step: 486/466, loss: 0.17992109060287476 2023-01-24 02:42:52.741269: step: 488/466, loss: 0.07363350689411163 2023-01-24 02:42:53.479965: step: 490/466, loss: 0.33977508544921875 2023-01-24 02:42:54.266586: step: 492/466, loss: 0.3130842447280884 2023-01-24 02:42:55.064027: step: 494/466, loss: 1.355231523513794 2023-01-24 02:42:55.857710: step: 496/466, loss: 0.14459706842899323 2023-01-24 02:42:56.667649: step: 498/466, loss: 0.5662770867347717 2023-01-24 02:42:57.386748: step: 500/466, loss: 0.4519438147544861 2023-01-24 02:42:58.175614: step: 502/466, loss: 0.21593526005744934 2023-01-24 02:42:58.980609: step: 504/466, loss: 0.5674519538879395 2023-01-24 02:42:59.728021: step: 506/466, loss: 0.18442323803901672 2023-01-24 02:43:00.487502: step: 508/466, loss: 0.5759652853012085 2023-01-24 02:43:01.226902: step: 510/466, loss: 0.3328949809074402 2023-01-24 02:43:01.996689: step: 512/466, loss: 1.5621113777160645 2023-01-24 02:43:02.736098: step: 514/466, loss: 0.3188912272453308 2023-01-24 02:43:03.459753: step: 516/466, loss: 0.2803928554058075 2023-01-24 02:43:04.195363: step: 518/466, loss: 0.1506417989730835 2023-01-24 02:43:05.119037: step: 520/466, loss: 0.3449791371822357 2023-01-24 02:43:05.848288: step: 522/466, loss: 0.21323703229427338 2023-01-24 02:43:06.590331: step: 524/466, loss: 0.2827951908111572 2023-01-24 02:43:07.308262: step: 526/466, loss: 0.2961046099662781 2023-01-24 02:43:08.113579: step: 528/466, loss: 0.4124855101108551 2023-01-24 02:43:09.007127: step: 530/466, loss: 0.31052014231681824 2023-01-24 02:43:09.810841: step: 532/466, loss: 0.7250269651412964 2023-01-24 02:43:10.558044: step: 534/466, loss: 0.502644956111908 2023-01-24 02:43:11.312930: step: 536/466, loss: 0.3006429374217987 2023-01-24 02:43:12.136782: step: 538/466, loss: 1.0074026584625244 2023-01-24 02:43:13.055681: step: 540/466, loss: 0.3936672806739807 2023-01-24 02:43:13.799759: step: 542/466, loss: 0.4782834053039551 2023-01-24 02:43:14.605051: step: 544/466, loss: 0.17096778750419617 2023-01-24 02:43:15.446980: step: 546/466, loss: 0.3335103690624237 2023-01-24 02:43:16.137702: step: 548/466, loss: 0.195723295211792 2023-01-24 02:43:16.890906: step: 550/466, loss: 0.42337173223495483 2023-01-24 02:43:17.603241: step: 552/466, loss: 0.3888344466686249 2023-01-24 02:43:18.434064: step: 554/466, loss: 1.6903038024902344 2023-01-24 02:43:19.164696: step: 556/466, loss: 0.21383579075336456 2023-01-24 02:43:19.842905: step: 558/466, loss: 0.23822146654129028 2023-01-24 02:43:20.601994: step: 560/466, loss: 1.131753921508789 2023-01-24 02:43:21.393871: step: 562/466, loss: 0.2799283564090729 2023-01-24 02:43:22.114009: step: 564/466, loss: 0.43949148058891296 2023-01-24 02:43:22.853177: step: 566/466, loss: 0.23566792905330658 2023-01-24 02:43:23.580074: step: 568/466, loss: 0.7373307347297668 2023-01-24 02:43:24.365166: step: 570/466, loss: 1.1472512483596802 2023-01-24 02:43:25.150753: step: 572/466, loss: 0.5465192198753357 2023-01-24 02:43:25.948845: step: 574/466, loss: 0.3646984398365021 2023-01-24 02:43:26.720080: step: 576/466, loss: 0.1426057070493698 2023-01-24 02:43:27.476927: step: 578/466, loss: 0.3378341495990753 2023-01-24 02:43:28.266695: step: 580/466, loss: 0.14365246891975403 2023-01-24 02:43:29.039827: step: 582/466, loss: 0.37120455503463745 2023-01-24 02:43:29.840792: step: 584/466, loss: 0.8782157897949219 2023-01-24 02:43:30.644819: step: 586/466, loss: 1.1974824666976929 2023-01-24 02:43:31.439340: step: 588/466, loss: 0.3319639265537262 2023-01-24 02:43:32.259226: step: 590/466, loss: 0.37469062209129333 2023-01-24 02:43:33.011680: step: 592/466, loss: 0.21519650518894196 2023-01-24 02:43:33.802910: step: 594/466, loss: 0.3425082564353943 2023-01-24 02:43:34.540101: step: 596/466, loss: 0.3874545991420746 2023-01-24 02:43:35.325836: step: 598/466, loss: 0.5452808737754822 2023-01-24 02:43:36.088422: step: 600/466, loss: 0.14452871680259705 2023-01-24 02:43:36.828667: step: 602/466, loss: 0.23176631331443787 2023-01-24 02:43:37.603987: step: 604/466, loss: 0.39068472385406494 2023-01-24 02:43:38.414028: step: 606/466, loss: 0.4605758488178253 2023-01-24 02:43:39.145995: step: 608/466, loss: 0.21772919595241547 2023-01-24 02:43:39.949661: step: 610/466, loss: 0.4970461428165436 2023-01-24 02:43:40.754030: step: 612/466, loss: 0.46533507108688354 2023-01-24 02:43:41.510968: step: 614/466, loss: 0.7836638689041138 2023-01-24 02:43:42.268772: step: 616/466, loss: 0.7685657143592834 2023-01-24 02:43:43.138505: step: 618/466, loss: 0.08030344545841217 2023-01-24 02:43:43.848200: step: 620/466, loss: 0.4773416519165039 2023-01-24 02:43:44.635434: step: 622/466, loss: 0.5082002282142639 2023-01-24 02:43:45.374599: step: 624/466, loss: 0.7404825091362 2023-01-24 02:43:46.114276: step: 626/466, loss: 0.2041405588388443 2023-01-24 02:43:46.818616: step: 628/466, loss: 0.2953926920890808 2023-01-24 02:43:47.532767: step: 630/466, loss: 0.10666435211896896 2023-01-24 02:43:48.437518: step: 632/466, loss: 0.16296805441379547 2023-01-24 02:43:49.224570: step: 634/466, loss: 0.7506895065307617 2023-01-24 02:43:49.888714: step: 636/466, loss: 0.4533143937587738 2023-01-24 02:43:50.609411: step: 638/466, loss: 2.788522481918335 2023-01-24 02:43:51.355389: step: 640/466, loss: 1.4357651472091675 2023-01-24 02:43:52.136399: step: 642/466, loss: 1.3311514854431152 2023-01-24 02:43:52.947174: step: 644/466, loss: 0.21705827116966248 2023-01-24 02:43:53.728363: step: 646/466, loss: 0.6914721131324768 2023-01-24 02:43:54.452639: step: 648/466, loss: 0.20655307173728943 2023-01-24 02:43:55.134509: step: 650/466, loss: 0.4660431742668152 2023-01-24 02:43:56.024919: step: 652/466, loss: 0.7570342421531677 2023-01-24 02:43:56.765937: step: 654/466, loss: 0.1994781494140625 2023-01-24 02:43:57.573782: step: 656/466, loss: 0.298532098531723 2023-01-24 02:43:58.353815: step: 658/466, loss: 0.6627851128578186 2023-01-24 02:43:59.070480: step: 660/466, loss: 0.330418199300766 2023-01-24 02:43:59.808334: step: 662/466, loss: 0.4699534773826599 2023-01-24 02:44:00.602290: step: 664/466, loss: 0.30207765102386475 2023-01-24 02:44:01.369644: step: 666/466, loss: 0.23757143318653107 2023-01-24 02:44:02.213149: step: 668/466, loss: 0.6265518069267273 2023-01-24 02:44:02.906106: step: 670/466, loss: 0.19305653870105743 2023-01-24 02:44:03.710320: step: 672/466, loss: 0.06330909579992294 2023-01-24 02:44:04.420312: step: 674/466, loss: 0.2550206184387207 2023-01-24 02:44:05.253252: step: 676/466, loss: 0.7556263208389282 2023-01-24 02:44:05.988641: step: 678/466, loss: 0.1781693398952484 2023-01-24 02:44:06.835603: step: 680/466, loss: 0.521965503692627 2023-01-24 02:44:07.600246: step: 682/466, loss: 0.21839545667171478 2023-01-24 02:44:08.334849: step: 684/466, loss: 0.1758604794740677 2023-01-24 02:44:09.080156: step: 686/466, loss: 0.5475415587425232 2023-01-24 02:44:09.888982: step: 688/466, loss: 0.21490980684757233 2023-01-24 02:44:10.638799: step: 690/466, loss: 0.39477869868278503 2023-01-24 02:44:11.412147: step: 692/466, loss: 0.5012360215187073 2023-01-24 02:44:12.298749: step: 694/466, loss: 0.6030199527740479 2023-01-24 02:44:13.113617: step: 696/466, loss: 0.48296019434928894 2023-01-24 02:44:13.968554: step: 698/466, loss: 0.5750011205673218 2023-01-24 02:44:14.612041: step: 700/466, loss: 0.13368430733680725 2023-01-24 02:44:15.397649: step: 702/466, loss: 0.1267334520816803 2023-01-24 02:44:16.207048: step: 704/466, loss: 0.3822780251502991 2023-01-24 02:44:16.908523: step: 706/466, loss: 0.22256867587566376 2023-01-24 02:44:17.627194: step: 708/466, loss: 0.6377112865447998 2023-01-24 02:44:18.311235: step: 710/466, loss: 0.15049827098846436 2023-01-24 02:44:19.155405: step: 712/466, loss: 0.5327929854393005 2023-01-24 02:44:19.981061: step: 714/466, loss: 0.3772541880607605 2023-01-24 02:44:20.734711: step: 716/466, loss: 0.254294216632843 2023-01-24 02:44:21.544390: step: 718/466, loss: 1.3840159177780151 2023-01-24 02:44:22.346218: step: 720/466, loss: 0.3449174761772156 2023-01-24 02:44:23.032035: step: 722/466, loss: 1.406064510345459 2023-01-24 02:44:23.684804: step: 724/466, loss: 0.3576720058917999 2023-01-24 02:44:24.429050: step: 726/466, loss: 4.103994846343994 2023-01-24 02:44:25.271866: step: 728/466, loss: 0.2885291278362274 2023-01-24 02:44:26.076428: step: 730/466, loss: 0.16140568256378174 2023-01-24 02:44:26.824580: step: 732/466, loss: 0.6693004965782166 2023-01-24 02:44:27.611287: step: 734/466, loss: 0.43078896403312683 2023-01-24 02:44:28.425928: step: 736/466, loss: 0.10041685402393341 2023-01-24 02:44:29.215918: step: 738/466, loss: 0.4233916699886322 2023-01-24 02:44:29.939927: step: 740/466, loss: 0.23425817489624023 2023-01-24 02:44:30.700189: step: 742/466, loss: 0.1729540079832077 2023-01-24 02:44:31.495876: step: 744/466, loss: 0.26733776926994324 2023-01-24 02:44:32.329946: step: 746/466, loss: 0.14470168948173523 2023-01-24 02:44:33.074980: step: 748/466, loss: 0.12287446856498718 2023-01-24 02:44:33.883354: step: 750/466, loss: 0.4447556138038635 2023-01-24 02:44:34.602756: step: 752/466, loss: 0.11110031604766846 2023-01-24 02:44:35.331385: step: 754/466, loss: 0.41622406244277954 2023-01-24 02:44:36.100085: step: 756/466, loss: 0.25940385460853577 2023-01-24 02:44:36.890692: step: 758/466, loss: 0.11219124495983124 2023-01-24 02:44:37.632681: step: 760/466, loss: 0.634303629398346 2023-01-24 02:44:38.351455: step: 762/466, loss: 0.40231674909591675 2023-01-24 02:44:39.059325: step: 764/466, loss: 0.33801835775375366 2023-01-24 02:44:39.800135: step: 766/466, loss: 0.1518666297197342 2023-01-24 02:44:40.556647: step: 768/466, loss: 0.1566280722618103 2023-01-24 02:44:41.316183: step: 770/466, loss: 0.5563924312591553 2023-01-24 02:44:42.039972: step: 772/466, loss: 0.3823583126068115 2023-01-24 02:44:42.906914: step: 774/466, loss: 0.20732498168945312 2023-01-24 02:44:43.750535: step: 776/466, loss: 0.2604410648345947 2023-01-24 02:44:44.516469: step: 778/466, loss: 0.8115687370300293 2023-01-24 02:44:45.255234: step: 780/466, loss: 0.21109074354171753 2023-01-24 02:44:45.973013: step: 782/466, loss: 0.4132833182811737 2023-01-24 02:44:46.711183: step: 784/466, loss: 0.3729992210865021 2023-01-24 02:44:47.541818: step: 786/466, loss: 0.166608989238739 2023-01-24 02:44:48.344303: step: 788/466, loss: 0.45757102966308594 2023-01-24 02:44:49.086126: step: 790/466, loss: 0.31187009811401367 2023-01-24 02:44:49.893003: step: 792/466, loss: 0.35437431931495667 2023-01-24 02:44:50.751795: step: 794/466, loss: 0.15766958892345428 2023-01-24 02:44:51.454332: step: 796/466, loss: 1.3682160377502441 2023-01-24 02:44:52.214570: step: 798/466, loss: 0.23890097439289093 2023-01-24 02:44:53.026271: step: 800/466, loss: 0.0744379311800003 2023-01-24 02:44:53.795893: step: 802/466, loss: 0.1655988246202469 2023-01-24 02:44:54.599133: step: 804/466, loss: 0.5713003873825073 2023-01-24 02:44:55.428038: step: 806/466, loss: 0.26792052388191223 2023-01-24 02:44:56.203827: step: 808/466, loss: 0.6057258248329163 2023-01-24 02:44:57.088325: step: 810/466, loss: 0.22376419603824615 2023-01-24 02:44:57.857191: step: 812/466, loss: 0.15296228229999542 2023-01-24 02:44:58.754908: step: 814/466, loss: 0.4715817868709564 2023-01-24 02:44:59.587019: step: 816/466, loss: 0.2729172110557556 2023-01-24 02:45:00.315798: step: 818/466, loss: 0.10306338220834732 2023-01-24 02:45:01.079394: step: 820/466, loss: 0.6315586566925049 2023-01-24 02:45:01.824431: step: 822/466, loss: 0.21267792582511902 2023-01-24 02:45:02.679098: step: 824/466, loss: 0.26321327686309814 2023-01-24 02:45:03.559904: step: 826/466, loss: 0.3587478995323181 2023-01-24 02:45:04.305719: step: 828/466, loss: 0.3764268755912781 2023-01-24 02:45:04.994330: step: 830/466, loss: 1.0579025745391846 2023-01-24 02:45:05.724058: step: 832/466, loss: 0.4254581928253174 2023-01-24 02:45:06.460800: step: 834/466, loss: 0.6968392133712769 2023-01-24 02:45:07.215558: step: 836/466, loss: 3.7670774459838867 2023-01-24 02:45:07.942753: step: 838/466, loss: 0.36055028438568115 2023-01-24 02:45:08.747926: step: 840/466, loss: 0.2476329505443573 2023-01-24 02:45:09.483934: step: 842/466, loss: 0.2701147198677063 2023-01-24 02:45:10.221831: step: 844/466, loss: 0.19635479152202606 2023-01-24 02:45:10.940496: step: 846/466, loss: 0.7163576483726501 2023-01-24 02:45:11.682513: step: 848/466, loss: 0.2725900709629059 2023-01-24 02:45:12.409587: step: 850/466, loss: 0.1970960944890976 2023-01-24 02:45:13.147392: step: 852/466, loss: 0.24540355801582336 2023-01-24 02:45:13.904644: step: 854/466, loss: 0.11632157117128372 2023-01-24 02:45:14.698779: step: 856/466, loss: 0.12717144191265106 2023-01-24 02:45:15.474877: step: 858/466, loss: 0.15465322136878967 2023-01-24 02:45:16.241146: step: 860/466, loss: 0.11278307437896729 2023-01-24 02:45:17.006253: step: 862/466, loss: 0.2683349847793579 2023-01-24 02:45:17.713316: step: 864/466, loss: 0.2555452585220337 2023-01-24 02:45:18.524937: step: 866/466, loss: 0.235856831073761 2023-01-24 02:45:19.256801: step: 868/466, loss: 0.2737419903278351 2023-01-24 02:45:20.067352: step: 870/466, loss: 0.2201920598745346 2023-01-24 02:45:20.788981: step: 872/466, loss: 0.7243627309799194 2023-01-24 02:45:21.516282: step: 874/466, loss: 0.18630512058734894 2023-01-24 02:45:22.308437: step: 876/466, loss: 0.24475634098052979 2023-01-24 02:45:23.044111: step: 878/466, loss: 0.18331408500671387 2023-01-24 02:45:23.793635: step: 880/466, loss: 0.37412190437316895 2023-01-24 02:45:24.505661: step: 882/466, loss: 0.10743524134159088 2023-01-24 02:45:25.144210: step: 884/466, loss: 0.7666282653808594 2023-01-24 02:45:25.924608: step: 886/466, loss: 0.25264352560043335 2023-01-24 02:45:26.704293: step: 888/466, loss: 0.2977299392223358 2023-01-24 02:45:27.548603: step: 890/466, loss: 0.25733569264411926 2023-01-24 02:45:28.352366: step: 892/466, loss: 1.2413746118545532 2023-01-24 02:45:29.098049: step: 894/466, loss: 0.2269682139158249 2023-01-24 02:45:29.880637: step: 896/466, loss: 0.4371106028556824 2023-01-24 02:45:30.740176: step: 898/466, loss: 1.4118807315826416 2023-01-24 02:45:31.532409: step: 900/466, loss: 0.7781394720077515 2023-01-24 02:45:32.311596: step: 902/466, loss: 0.42764055728912354 2023-01-24 02:45:33.111885: step: 904/466, loss: 0.604682207107544 2023-01-24 02:45:33.854849: step: 906/466, loss: 0.25415295362472534 2023-01-24 02:45:34.619461: step: 908/466, loss: 0.31604674458503723 2023-01-24 02:45:35.412596: step: 910/466, loss: 0.3052256405353546 2023-01-24 02:45:36.222266: step: 912/466, loss: 0.21037594974040985 2023-01-24 02:45:36.991811: step: 914/466, loss: 0.47760850191116333 2023-01-24 02:45:37.782872: step: 916/466, loss: 0.4769919514656067 2023-01-24 02:45:38.533820: step: 918/466, loss: 0.4393412470817566 2023-01-24 02:45:39.309328: step: 920/466, loss: 0.25017476081848145 2023-01-24 02:45:40.096045: step: 922/466, loss: 5.702282905578613 2023-01-24 02:45:40.928602: step: 924/466, loss: 0.6336638331413269 2023-01-24 02:45:41.654207: step: 926/466, loss: 0.17593809962272644 2023-01-24 02:45:42.420602: step: 928/466, loss: 0.6609342098236084 2023-01-24 02:45:43.172410: step: 930/466, loss: 0.33767199516296387 2023-01-24 02:45:43.900529: step: 932/466, loss: 0.4533933401107788 ================================================== Loss: 0.466 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33593902713806645, 'r': 0.2785680357862145, 'f1': 0.30457542501936724}, 'combined': 0.22442399738269164, 'epoch': 8} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3689524574716157, 'r': 0.2552320463375073, 'f1': 0.3017328695187523}, 'combined': 0.18545532467981848, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31427003788795727, 'r': 0.28982018674297383, 'f1': 0.3015503226328672}, 'combined': 0.22219497457158632, 'epoch': 8} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35272614491605897, 'r': 0.2602728387659155, 'f1': 0.2995275277385747}, 'combined': 0.18409984631736787, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3436898244146144, 'r': 0.28760381891241926, 'f1': 0.3131553978653821}, 'combined': 0.23074608263764995, 'epoch': 8} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3702821454852311, 'r': 0.2572115595368863, 'f1': 0.3035595332563966}, 'combined': 0.18749265289365677, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.391304347826087, 'r': 0.2571428571428571, 'f1': 0.3103448275862069}, 'combined': 0.20689655172413793, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31521739130434784, 'r': 0.31521739130434784, 'f1': 0.31521739130434784}, 'combined': 0.15760869565217392, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.10344827586206896, 'f1': 0.16666666666666663}, 'combined': 0.11111111111111108, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34321716589861756, 'r': 0.28264943074003795, 'f1': 0.31000260145681585}, 'combined': 0.22842296949449586, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3400251231205871, 'r': 0.24846857481712165, 'f1': 0.28712476629692396}, 'combined': 0.17647668562640204, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3371212121212121, 'r': 0.31785714285714284, 'f1': 0.3272058823529411}, 'combined': 0.21813725490196073, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35633182648907596, 'r': 0.3171150410310752, 'f1': 0.33558157956501333}, 'combined': 0.24727063757422033, 'epoch': 5} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38903978397030203, 'r': 0.22009690200395982, 'f1': 0.28114035217301386}, 'combined': 0.17364551163627331, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 5} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:48:27.750172: step: 2/466, loss: 0.7617669105529785 2023-01-24 02:48:28.562689: step: 4/466, loss: 0.47193193435668945 2023-01-24 02:48:29.349474: step: 6/466, loss: 0.399549663066864 2023-01-24 02:48:30.120338: step: 8/466, loss: 1.0091228485107422 2023-01-24 02:48:30.844647: step: 10/466, loss: 0.16260093450546265 2023-01-24 02:48:31.754617: step: 12/466, loss: 0.24605731666088104 2023-01-24 02:48:32.528276: step: 14/466, loss: 0.4598664343357086 2023-01-24 02:48:33.248082: step: 16/466, loss: 0.22285336256027222 2023-01-24 02:48:34.062463: step: 18/466, loss: 0.2630546987056732 2023-01-24 02:48:34.837387: step: 20/466, loss: 0.9710225462913513 2023-01-24 02:48:35.552691: step: 22/466, loss: 0.138289675116539 2023-01-24 02:48:36.262902: step: 24/466, loss: 0.18287083506584167 2023-01-24 02:48:37.018616: step: 26/466, loss: 1.1075892448425293 2023-01-24 02:48:37.740402: step: 28/466, loss: 0.21186289191246033 2023-01-24 02:48:38.465870: step: 30/466, loss: 0.7421730756759644 2023-01-24 02:48:39.226090: step: 32/466, loss: 0.25038522481918335 2023-01-24 02:48:39.994159: step: 34/466, loss: 0.8326919674873352 2023-01-24 02:48:40.743753: step: 36/466, loss: 0.11848055571317673 2023-01-24 02:48:41.574488: step: 38/466, loss: 0.5838706493377686 2023-01-24 02:48:42.288001: step: 40/466, loss: 0.5546318292617798 2023-01-24 02:48:43.082310: step: 42/466, loss: 0.2589946985244751 2023-01-24 02:48:43.799466: step: 44/466, loss: 0.3935251832008362 2023-01-24 02:48:44.557550: step: 46/466, loss: 0.4007113575935364 2023-01-24 02:48:45.396276: step: 48/466, loss: 0.1532585769891739 2023-01-24 02:48:46.170393: step: 50/466, loss: 0.24019591510295868 2023-01-24 02:48:46.925294: step: 52/466, loss: 0.16741792857646942 2023-01-24 02:48:47.655036: step: 54/466, loss: 0.1578289419412613 2023-01-24 02:48:48.408381: step: 56/466, loss: 0.24181082844734192 2023-01-24 02:48:49.298412: step: 58/466, loss: 0.2726386785507202 2023-01-24 02:48:49.989731: step: 60/466, loss: 0.2632262110710144 2023-01-24 02:48:50.737964: step: 62/466, loss: 0.382368803024292 2023-01-24 02:48:51.477282: step: 64/466, loss: 0.7976775169372559 2023-01-24 02:48:52.217221: step: 66/466, loss: 0.4883986711502075 2023-01-24 02:48:53.006435: step: 68/466, loss: 0.3494545519351959 2023-01-24 02:48:53.739179: step: 70/466, loss: 0.3140970468521118 2023-01-24 02:48:54.494192: step: 72/466, loss: 0.3909737765789032 2023-01-24 02:48:55.226247: step: 74/466, loss: 0.249564528465271 2023-01-24 02:48:55.972525: step: 76/466, loss: 0.03772410377860069 2023-01-24 02:48:56.699833: step: 78/466, loss: 0.4875684082508087 2023-01-24 02:48:57.472300: step: 80/466, loss: 0.09279076755046844 2023-01-24 02:48:58.257015: step: 82/466, loss: 0.13388334214687347 2023-01-24 02:48:58.964670: step: 84/466, loss: 0.3358129858970642 2023-01-24 02:48:59.707935: step: 86/466, loss: 0.33978158235549927 2023-01-24 02:49:00.463771: step: 88/466, loss: 0.3743342161178589 2023-01-24 02:49:01.276430: step: 90/466, loss: 0.6772381067276001 2023-01-24 02:49:01.958897: step: 92/466, loss: 0.16973809897899628 2023-01-24 02:49:02.770680: step: 94/466, loss: 0.1664164513349533 2023-01-24 02:49:03.467023: step: 96/466, loss: 0.6884454488754272 2023-01-24 02:49:04.212934: step: 98/466, loss: 0.143169105052948 2023-01-24 02:49:05.029430: step: 100/466, loss: 0.5162626504898071 2023-01-24 02:49:05.817333: step: 102/466, loss: 0.24762707948684692 2023-01-24 02:49:06.688115: step: 104/466, loss: 0.1087818592786789 2023-01-24 02:49:07.424757: step: 106/466, loss: 0.1368941068649292 2023-01-24 02:49:08.206271: step: 108/466, loss: 0.3564227223396301 2023-01-24 02:49:08.961263: step: 110/466, loss: 0.36613261699676514 2023-01-24 02:49:09.720721: step: 112/466, loss: 0.2938078045845032 2023-01-24 02:49:10.490055: step: 114/466, loss: 0.7527884244918823 2023-01-24 02:49:11.261984: step: 116/466, loss: 0.15450149774551392 2023-01-24 02:49:12.105105: step: 118/466, loss: 1.102258563041687 2023-01-24 02:49:12.834386: step: 120/466, loss: 0.17737160623073578 2023-01-24 02:49:13.589635: step: 122/466, loss: 0.3365913927555084 2023-01-24 02:49:14.484585: step: 124/466, loss: 0.10386361926794052 2023-01-24 02:49:15.279186: step: 126/466, loss: 0.35903674364089966 2023-01-24 02:49:16.004193: step: 128/466, loss: 0.6990206241607666 2023-01-24 02:49:16.781966: step: 130/466, loss: 0.6001828908920288 2023-01-24 02:49:17.546837: step: 132/466, loss: 0.1223577931523323 2023-01-24 02:49:18.315129: step: 134/466, loss: 0.11910473555326462 2023-01-24 02:49:19.011199: step: 136/466, loss: 0.1736944168806076 2023-01-24 02:49:19.767769: step: 138/466, loss: 0.25195005536079407 2023-01-24 02:49:20.479625: step: 140/466, loss: 0.24543677270412445 2023-01-24 02:49:21.198884: step: 142/466, loss: 0.21577134728431702 2023-01-24 02:49:21.948595: step: 144/466, loss: 0.13015221059322357 2023-01-24 02:49:22.691522: step: 146/466, loss: 0.0657990425825119 2023-01-24 02:49:23.454710: step: 148/466, loss: 0.22850392758846283 2023-01-24 02:49:24.294806: step: 150/466, loss: 0.22996000945568085 2023-01-24 02:49:25.111771: step: 152/466, loss: 0.10576073080301285 2023-01-24 02:49:25.911926: step: 154/466, loss: 0.2323477864265442 2023-01-24 02:49:26.622326: step: 156/466, loss: 0.269827663898468 2023-01-24 02:49:27.331468: step: 158/466, loss: 0.33273160457611084 2023-01-24 02:49:28.082463: step: 160/466, loss: 0.2759661078453064 2023-01-24 02:49:28.768532: step: 162/466, loss: 0.357607901096344 2023-01-24 02:49:29.528768: step: 164/466, loss: 0.24747194349765778 2023-01-24 02:49:30.261853: step: 166/466, loss: 0.24419671297073364 2023-01-24 02:49:31.031783: step: 168/466, loss: 0.3568973243236542 2023-01-24 02:49:31.709038: step: 170/466, loss: 0.43960994482040405 2023-01-24 02:49:32.498669: step: 172/466, loss: 0.2388741672039032 2023-01-24 02:49:33.262216: step: 174/466, loss: 0.13412341475486755 2023-01-24 02:49:34.014426: step: 176/466, loss: 0.17737331986427307 2023-01-24 02:49:34.729219: step: 178/466, loss: 0.04712221026420593 2023-01-24 02:49:35.419787: step: 180/466, loss: 0.44618692994117737 2023-01-24 02:49:36.164439: step: 182/466, loss: 0.4233967065811157 2023-01-24 02:49:36.993813: step: 184/466, loss: 0.4268166422843933 2023-01-24 02:49:37.731796: step: 186/466, loss: 0.3471412658691406 2023-01-24 02:49:38.489574: step: 188/466, loss: 0.1013251468539238 2023-01-24 02:49:39.239408: step: 190/466, loss: 0.47036483883857727 2023-01-24 02:49:39.992958: step: 192/466, loss: 0.1508309692144394 2023-01-24 02:49:40.784558: step: 194/466, loss: 0.5607315897941589 2023-01-24 02:49:41.548058: step: 196/466, loss: 0.18536363542079926 2023-01-24 02:49:42.284750: step: 198/466, loss: 0.29159408807754517 2023-01-24 02:49:42.990588: step: 200/466, loss: 0.18396173417568207 2023-01-24 02:49:43.791473: step: 202/466, loss: 0.0968318060040474 2023-01-24 02:49:44.571129: step: 204/466, loss: 0.09180274605751038 2023-01-24 02:49:45.286200: step: 206/466, loss: 0.2655928432941437 2023-01-24 02:49:46.004899: step: 208/466, loss: 0.1596938669681549 2023-01-24 02:49:46.751617: step: 210/466, loss: 0.3983745872974396 2023-01-24 02:49:47.470523: step: 212/466, loss: 0.2768838703632355 2023-01-24 02:49:48.240757: step: 214/466, loss: 0.325339674949646 2023-01-24 02:49:48.967609: step: 216/466, loss: 0.36471548676490784 2023-01-24 02:49:49.691210: step: 218/466, loss: 0.0601777583360672 2023-01-24 02:49:50.408482: step: 220/466, loss: 0.9182330369949341 2023-01-24 02:49:51.254293: step: 222/466, loss: 0.2803994119167328 2023-01-24 02:49:52.073795: step: 224/466, loss: 0.12595197558403015 2023-01-24 02:49:52.846825: step: 226/466, loss: 0.1642196774482727 2023-01-24 02:49:53.527612: step: 228/466, loss: 0.19627025723457336 2023-01-24 02:49:54.299552: step: 230/466, loss: 0.5533586144447327 2023-01-24 02:49:55.030496: step: 232/466, loss: 0.0949460119009018 2023-01-24 02:49:55.844434: step: 234/466, loss: 0.31896600127220154 2023-01-24 02:49:56.708559: step: 236/466, loss: 0.27299320697784424 2023-01-24 02:49:57.450245: step: 238/466, loss: 0.3383888304233551 2023-01-24 02:49:58.212567: step: 240/466, loss: 0.7563154101371765 2023-01-24 02:49:58.971694: step: 242/466, loss: 0.17838376760482788 2023-01-24 02:49:59.731017: step: 244/466, loss: 0.28580132126808167 2023-01-24 02:50:00.521153: step: 246/466, loss: 0.26450544595718384 2023-01-24 02:50:01.300203: step: 248/466, loss: 0.1821730136871338 2023-01-24 02:50:02.185343: step: 250/466, loss: 2.6174964904785156 2023-01-24 02:50:02.954219: step: 252/466, loss: 0.24531832337379456 2023-01-24 02:50:03.647150: step: 254/466, loss: 0.1795777529478073 2023-01-24 02:50:04.437570: step: 256/466, loss: 0.2463114857673645 2023-01-24 02:50:05.124860: step: 258/466, loss: 0.43065595626831055 2023-01-24 02:50:05.929363: step: 260/466, loss: 0.23510803282260895 2023-01-24 02:50:06.733575: step: 262/466, loss: 0.3374987244606018 2023-01-24 02:50:07.537651: step: 264/466, loss: 0.2109946459531784 2023-01-24 02:50:08.291286: step: 266/466, loss: 0.4068831503391266 2023-01-24 02:50:09.168636: step: 268/466, loss: 0.2021590620279312 2023-01-24 02:50:09.862632: step: 270/466, loss: 0.468037486076355 2023-01-24 02:50:10.649280: step: 272/466, loss: 0.13995757699012756 2023-01-24 02:50:11.477801: step: 274/466, loss: 3.66815185546875 2023-01-24 02:50:12.278546: step: 276/466, loss: 1.1114652156829834 2023-01-24 02:50:13.084257: step: 278/466, loss: 0.22365108132362366 2023-01-24 02:50:13.930357: step: 280/466, loss: 0.40952688455581665 2023-01-24 02:50:14.738709: step: 282/466, loss: 0.44825875759124756 2023-01-24 02:50:15.474242: step: 284/466, loss: 0.308834969997406 2023-01-24 02:50:16.192004: step: 286/466, loss: 0.07672157138586044 2023-01-24 02:50:16.969522: step: 288/466, loss: 0.27245256304740906 2023-01-24 02:50:17.756315: step: 290/466, loss: 0.21029876172542572 2023-01-24 02:50:18.477105: step: 292/466, loss: 0.30801981687545776 2023-01-24 02:50:19.264685: step: 294/466, loss: 0.272743284702301 2023-01-24 02:50:20.047850: step: 296/466, loss: 1.1342663764953613 2023-01-24 02:50:20.762193: step: 298/466, loss: 0.2068939507007599 2023-01-24 02:50:21.555945: step: 300/466, loss: 0.4226273000240326 2023-01-24 02:50:22.379224: step: 302/466, loss: 0.35881322622299194 2023-01-24 02:50:23.214248: step: 304/466, loss: 0.0775582492351532 2023-01-24 02:50:23.955149: step: 306/466, loss: 0.2552018165588379 2023-01-24 02:50:24.697971: step: 308/466, loss: 0.20988473296165466 2023-01-24 02:50:25.426698: step: 310/466, loss: 0.09981327503919601 2023-01-24 02:50:26.177814: step: 312/466, loss: 0.26077914237976074 2023-01-24 02:50:26.988858: step: 314/466, loss: 0.24229076504707336 2023-01-24 02:50:27.717339: step: 316/466, loss: 0.19912472367286682 2023-01-24 02:50:28.519756: step: 318/466, loss: 0.3713231682777405 2023-01-24 02:50:29.341785: step: 320/466, loss: 0.20230260491371155 2023-01-24 02:50:30.108480: step: 322/466, loss: 0.356352299451828 2023-01-24 02:50:30.860141: step: 324/466, loss: 0.22180378437042236 2023-01-24 02:50:31.636867: step: 326/466, loss: 1.2216380834579468 2023-01-24 02:50:32.477997: step: 328/466, loss: 0.4283653795719147 2023-01-24 02:50:33.258848: step: 330/466, loss: 0.16802679002285004 2023-01-24 02:50:34.020778: step: 332/466, loss: 0.458918035030365 2023-01-24 02:50:34.748194: step: 334/466, loss: 0.4632418751716614 2023-01-24 02:50:35.527773: step: 336/466, loss: 0.06715085357427597 2023-01-24 02:50:36.251434: step: 338/466, loss: 0.33738797903060913 2023-01-24 02:50:37.068849: step: 340/466, loss: 0.5246156454086304 2023-01-24 02:50:37.853247: step: 342/466, loss: 0.456807404756546 2023-01-24 02:50:38.621107: step: 344/466, loss: 0.8150843381881714 2023-01-24 02:50:39.414402: step: 346/466, loss: 0.6331696510314941 2023-01-24 02:50:40.187578: step: 348/466, loss: 0.23484434187412262 2023-01-24 02:50:40.989292: step: 350/466, loss: 0.9283328652381897 2023-01-24 02:50:41.729538: step: 352/466, loss: 0.20326565206050873 2023-01-24 02:50:42.547703: step: 354/466, loss: 0.13701821863651276 2023-01-24 02:50:43.325416: step: 356/466, loss: 0.47411486506462097 2023-01-24 02:50:44.096316: step: 358/466, loss: 0.41743534803390503 2023-01-24 02:50:44.871616: step: 360/466, loss: 0.1875670850276947 2023-01-24 02:50:45.618815: step: 362/466, loss: 0.2520168721675873 2023-01-24 02:50:46.324040: step: 364/466, loss: 0.33447718620300293 2023-01-24 02:50:47.118882: step: 366/466, loss: 0.4753393530845642 2023-01-24 02:50:47.909790: step: 368/466, loss: 0.13572938740253448 2023-01-24 02:50:48.847921: step: 370/466, loss: 0.19028696417808533 2023-01-24 02:50:49.684171: step: 372/466, loss: 0.2276836782693863 2023-01-24 02:50:50.531136: step: 374/466, loss: 0.2572558522224426 2023-01-24 02:50:51.194299: step: 376/466, loss: 0.285245418548584 2023-01-24 02:50:51.998373: step: 378/466, loss: 0.061036914587020874 2023-01-24 02:50:52.801210: step: 380/466, loss: 0.48633047938346863 2023-01-24 02:50:53.537734: step: 382/466, loss: 0.20658385753631592 2023-01-24 02:50:54.252192: step: 384/466, loss: 0.11859611421823502 2023-01-24 02:50:54.956129: step: 386/466, loss: 0.7415170073509216 2023-01-24 02:50:55.740017: step: 388/466, loss: 0.27088481187820435 2023-01-24 02:50:56.579583: step: 390/466, loss: 0.29326778650283813 2023-01-24 02:50:57.392790: step: 392/466, loss: 0.3623054623603821 2023-01-24 02:50:58.144240: step: 394/466, loss: 0.6543350219726562 2023-01-24 02:50:58.864918: step: 396/466, loss: 0.7425453066825867 2023-01-24 02:50:59.679010: step: 398/466, loss: 0.28374147415161133 2023-01-24 02:51:00.441987: step: 400/466, loss: 0.1749078333377838 2023-01-24 02:51:01.211685: step: 402/466, loss: 0.21680453419685364 2023-01-24 02:51:01.968382: step: 404/466, loss: 0.09921462088823318 2023-01-24 02:51:02.727764: step: 406/466, loss: 0.2960980236530304 2023-01-24 02:51:03.472828: step: 408/466, loss: 0.2041424661874771 2023-01-24 02:51:04.261456: step: 410/466, loss: 1.829854965209961 2023-01-24 02:51:05.049959: step: 412/466, loss: 1.0091545581817627 2023-01-24 02:51:05.858222: step: 414/466, loss: 0.8316347002983093 2023-01-24 02:51:06.614461: step: 416/466, loss: 0.51683109998703 2023-01-24 02:51:07.340671: step: 418/466, loss: 0.44507837295532227 2023-01-24 02:51:08.133594: step: 420/466, loss: 0.3541668951511383 2023-01-24 02:51:08.919729: step: 422/466, loss: 0.09778696298599243 2023-01-24 02:51:09.660682: step: 424/466, loss: 0.27539190649986267 2023-01-24 02:51:10.407926: step: 426/466, loss: 0.11397480964660645 2023-01-24 02:51:11.266358: step: 428/466, loss: 0.44090360403060913 2023-01-24 02:51:12.053659: step: 430/466, loss: 0.1974068135023117 2023-01-24 02:51:12.785495: step: 432/466, loss: 0.5693545937538147 2023-01-24 02:51:13.534970: step: 434/466, loss: 0.3817470669746399 2023-01-24 02:51:14.278905: step: 436/466, loss: 0.17790962755680084 2023-01-24 02:51:14.979122: step: 438/466, loss: 0.22396616637706757 2023-01-24 02:51:15.714725: step: 440/466, loss: 0.3058200776576996 2023-01-24 02:51:16.499362: step: 442/466, loss: 0.21500913798809052 2023-01-24 02:51:17.280930: step: 444/466, loss: 0.25797945261001587 2023-01-24 02:51:18.013408: step: 446/466, loss: 0.3602447509765625 2023-01-24 02:51:18.711544: step: 448/466, loss: 1.1268690824508667 2023-01-24 02:51:19.501716: step: 450/466, loss: 0.12626045942306519 2023-01-24 02:51:20.269783: step: 452/466, loss: 0.523021399974823 2023-01-24 02:51:21.010774: step: 454/466, loss: 0.21925614774227142 2023-01-24 02:51:21.771699: step: 456/466, loss: 0.6794977188110352 2023-01-24 02:51:22.518037: step: 458/466, loss: 0.09772194921970367 2023-01-24 02:51:23.265190: step: 460/466, loss: 0.5687675476074219 2023-01-24 02:51:24.059011: step: 462/466, loss: 0.38782799243927 2023-01-24 02:51:24.787517: step: 464/466, loss: 0.35689419507980347 2023-01-24 02:51:25.510889: step: 466/466, loss: 0.6951501965522766 2023-01-24 02:51:26.264851: step: 468/466, loss: 0.12729394435882568 2023-01-24 02:51:27.034819: step: 470/466, loss: 0.6348598599433899 2023-01-24 02:51:27.752537: step: 472/466, loss: 1.082305669784546 2023-01-24 02:51:28.542070: step: 474/466, loss: 3.6716866493225098 2023-01-24 02:51:29.292854: step: 476/466, loss: 0.23165543377399445 2023-01-24 02:51:30.083940: step: 478/466, loss: 0.11158735305070877 2023-01-24 02:51:30.833690: step: 480/466, loss: 1.666973352432251 2023-01-24 02:51:31.575797: step: 482/466, loss: 0.3452420234680176 2023-01-24 02:51:32.250056: step: 484/466, loss: 0.5956166386604309 2023-01-24 02:51:32.978078: step: 486/466, loss: 0.13574360311031342 2023-01-24 02:51:33.814343: step: 488/466, loss: 0.20491208136081696 2023-01-24 02:51:34.599782: step: 490/466, loss: 2.7010035514831543 2023-01-24 02:51:35.331422: step: 492/466, loss: 0.15974898636341095 2023-01-24 02:51:36.117464: step: 494/466, loss: 0.374811589717865 2023-01-24 02:51:36.851585: step: 496/466, loss: 0.21302439272403717 2023-01-24 02:51:37.633018: step: 498/466, loss: 0.21756063401699066 2023-01-24 02:51:38.369447: step: 500/466, loss: 0.2711212933063507 2023-01-24 02:51:39.217216: step: 502/466, loss: 0.30291667580604553 2023-01-24 02:51:40.092065: step: 504/466, loss: 0.26272809505462646 2023-01-24 02:51:40.825918: step: 506/466, loss: 0.8830516338348389 2023-01-24 02:51:41.577291: step: 508/466, loss: 0.17511196434497833 2023-01-24 02:51:42.292325: step: 510/466, loss: 0.2928393483161926 2023-01-24 02:51:43.221494: step: 512/466, loss: 0.9481021761894226 2023-01-24 02:51:43.997721: step: 514/466, loss: 0.22100970149040222 2023-01-24 02:51:44.805933: step: 516/466, loss: 0.42827150225639343 2023-01-24 02:51:45.588840: step: 518/466, loss: 0.686099112033844 2023-01-24 02:51:46.374416: step: 520/466, loss: 0.35829856991767883 2023-01-24 02:51:47.098289: step: 522/466, loss: 0.8147534728050232 2023-01-24 02:51:47.813284: step: 524/466, loss: 0.3499932587146759 2023-01-24 02:51:48.582310: step: 526/466, loss: 0.15522895753383636 2023-01-24 02:51:49.326802: step: 528/466, loss: 0.27899911999702454 2023-01-24 02:51:50.082695: step: 530/466, loss: 0.2625729441642761 2023-01-24 02:51:50.772025: step: 532/466, loss: 0.3538782000541687 2023-01-24 02:51:51.495668: step: 534/466, loss: 0.22810465097427368 2023-01-24 02:51:52.229847: step: 536/466, loss: 0.39624813199043274 2023-01-24 02:51:53.055534: step: 538/466, loss: 0.2748154401779175 2023-01-24 02:51:53.864583: step: 540/466, loss: 0.2919783294200897 2023-01-24 02:51:54.616798: step: 542/466, loss: 0.5167675614356995 2023-01-24 02:51:55.366154: step: 544/466, loss: 0.17415225505828857 2023-01-24 02:51:56.110997: step: 546/466, loss: 0.11248274147510529 2023-01-24 02:51:56.799529: step: 548/466, loss: 0.5512552857398987 2023-01-24 02:51:57.595640: step: 550/466, loss: 0.3146078884601593 2023-01-24 02:51:58.459617: step: 552/466, loss: 1.2088881731033325 2023-01-24 02:51:59.263049: step: 554/466, loss: 0.17362341284751892 2023-01-24 02:52:00.172695: step: 556/466, loss: 0.16103479266166687 2023-01-24 02:52:00.928685: step: 558/466, loss: 0.6395021677017212 2023-01-24 02:52:01.636374: step: 560/466, loss: 0.22681139409542084 2023-01-24 02:52:02.404578: step: 562/466, loss: 0.6811214089393616 2023-01-24 02:52:03.182498: step: 564/466, loss: 0.3851960599422455 2023-01-24 02:52:03.984661: step: 566/466, loss: 0.13778236508369446 2023-01-24 02:52:04.839260: step: 568/466, loss: 0.2697453498840332 2023-01-24 02:52:05.556654: step: 570/466, loss: 0.15944616496562958 2023-01-24 02:52:06.332356: step: 572/466, loss: 0.06645628064870834 2023-01-24 02:52:07.069447: step: 574/466, loss: 0.16914452612400055 2023-01-24 02:52:07.720273: step: 576/466, loss: 0.38253024220466614 2023-01-24 02:52:08.558133: step: 578/466, loss: 1.3886417150497437 2023-01-24 02:52:09.347975: step: 580/466, loss: 0.1061757430434227 2023-01-24 02:52:10.191657: step: 582/466, loss: 0.3046887218952179 2023-01-24 02:52:11.005498: step: 584/466, loss: 0.42094412446022034 2023-01-24 02:52:11.826118: step: 586/466, loss: 0.09330093115568161 2023-01-24 02:52:12.578016: step: 588/466, loss: 0.40052181482315063 2023-01-24 02:52:13.295218: step: 590/466, loss: 0.22441357374191284 2023-01-24 02:52:14.035327: step: 592/466, loss: 0.09866520762443542 2023-01-24 02:52:14.857352: step: 594/466, loss: 0.37762102484703064 2023-01-24 02:52:15.576593: step: 596/466, loss: 0.102919802069664 2023-01-24 02:52:16.398095: step: 598/466, loss: 4.134809494018555 2023-01-24 02:52:17.163346: step: 600/466, loss: 0.17482620477676392 2023-01-24 02:52:17.909741: step: 602/466, loss: 0.9702067971229553 2023-01-24 02:52:18.649725: step: 604/466, loss: 0.12988156080245972 2023-01-24 02:52:19.361244: step: 606/466, loss: 3.7468557357788086 2023-01-24 02:52:20.137427: step: 608/466, loss: 0.04453423246741295 2023-01-24 02:52:20.875688: step: 610/466, loss: 0.14342595636844635 2023-01-24 02:52:21.598593: step: 612/466, loss: 0.12794727087020874 2023-01-24 02:52:22.378141: step: 614/466, loss: 0.17742207646369934 2023-01-24 02:52:23.129967: step: 616/466, loss: 0.5601744651794434 2023-01-24 02:52:23.837351: step: 618/466, loss: 0.10394865274429321 2023-01-24 02:52:24.711295: step: 620/466, loss: 0.1495993435382843 2023-01-24 02:52:25.423598: step: 622/466, loss: 0.1939275562763214 2023-01-24 02:52:26.180906: step: 624/466, loss: 0.26793724298477173 2023-01-24 02:52:26.940235: step: 626/466, loss: 0.26115572452545166 2023-01-24 02:52:27.669975: step: 628/466, loss: 0.223658487200737 2023-01-24 02:52:28.380043: step: 630/466, loss: 0.21691951155662537 2023-01-24 02:52:29.106564: step: 632/466, loss: 0.16291409730911255 2023-01-24 02:52:29.888634: step: 634/466, loss: 0.11505762487649918 2023-01-24 02:52:30.598172: step: 636/466, loss: 0.5818736553192139 2023-01-24 02:52:31.347111: step: 638/466, loss: 0.35348910093307495 2023-01-24 02:52:32.161454: step: 640/466, loss: 0.6107161641120911 2023-01-24 02:52:32.930215: step: 642/466, loss: 0.5039299726486206 2023-01-24 02:52:33.629329: step: 644/466, loss: 0.4757615327835083 2023-01-24 02:52:34.448218: step: 646/466, loss: 0.09726975858211517 2023-01-24 02:52:35.254243: step: 648/466, loss: 0.644382655620575 2023-01-24 02:52:35.955708: step: 650/466, loss: 0.2625083923339844 2023-01-24 02:52:36.721313: step: 652/466, loss: 0.1770208179950714 2023-01-24 02:52:37.481323: step: 654/466, loss: 0.3707251250743866 2023-01-24 02:52:38.173925: step: 656/466, loss: 1.1968345642089844 2023-01-24 02:52:39.058418: step: 658/466, loss: 0.3440450429916382 2023-01-24 02:52:39.792488: step: 660/466, loss: 0.6458595395088196 2023-01-24 02:52:40.540035: step: 662/466, loss: 0.09910833090543747 2023-01-24 02:52:41.254306: step: 664/466, loss: 0.13965816795825958 2023-01-24 02:52:41.952031: step: 666/466, loss: 0.15654996037483215 2023-01-24 02:52:42.760911: step: 668/466, loss: 0.36312490701675415 2023-01-24 02:52:43.521444: step: 670/466, loss: 0.820220947265625 2023-01-24 02:52:44.362469: step: 672/466, loss: 0.40227824449539185 2023-01-24 02:52:45.122182: step: 674/466, loss: 0.2854505479335785 2023-01-24 02:52:45.869867: step: 676/466, loss: 0.1591416746377945 2023-01-24 02:52:46.654919: step: 678/466, loss: 0.23695874214172363 2023-01-24 02:52:47.458344: step: 680/466, loss: 0.16190719604492188 2023-01-24 02:52:48.182977: step: 682/466, loss: 0.6181747913360596 2023-01-24 02:52:48.950193: step: 684/466, loss: 0.4995764493942261 2023-01-24 02:52:49.656376: step: 686/466, loss: 0.21558211743831635 2023-01-24 02:52:50.404472: step: 688/466, loss: 0.18158429861068726 2023-01-24 02:52:51.189382: step: 690/466, loss: 0.5408704876899719 2023-01-24 02:52:51.915426: step: 692/466, loss: 0.28732776641845703 2023-01-24 02:52:52.659016: step: 694/466, loss: 0.07246675342321396 2023-01-24 02:52:53.405833: step: 696/466, loss: 0.41384613513946533 2023-01-24 02:52:54.161911: step: 698/466, loss: 0.23722220957279205 2023-01-24 02:52:54.961088: step: 700/466, loss: 0.36485758423805237 2023-01-24 02:52:55.785464: step: 702/466, loss: 0.2078809142112732 2023-01-24 02:52:56.674045: step: 704/466, loss: 0.12964241206645966 2023-01-24 02:52:57.381500: step: 706/466, loss: 0.4992145895957947 2023-01-24 02:52:58.201141: step: 708/466, loss: 0.19601070880889893 2023-01-24 02:52:58.899221: step: 710/466, loss: 0.703117847442627 2023-01-24 02:52:59.636226: step: 712/466, loss: 0.39133965969085693 2023-01-24 02:53:00.481554: step: 714/466, loss: 0.5801993012428284 2023-01-24 02:53:01.262039: step: 716/466, loss: 0.15397311747074127 2023-01-24 02:53:02.002180: step: 718/466, loss: 0.06464719772338867 2023-01-24 02:53:02.864501: step: 720/466, loss: 0.36740589141845703 2023-01-24 02:53:03.674216: step: 722/466, loss: 0.09521396458148956 2023-01-24 02:53:04.466527: step: 724/466, loss: 0.46077385544776917 2023-01-24 02:53:05.166665: step: 726/466, loss: 0.18440701067447662 2023-01-24 02:53:05.899333: step: 728/466, loss: 0.2911909520626068 2023-01-24 02:53:06.682513: step: 730/466, loss: 0.17030131816864014 2023-01-24 02:53:07.505764: step: 732/466, loss: 0.305722713470459 2023-01-24 02:53:08.258484: step: 734/466, loss: 0.20065052807331085 2023-01-24 02:53:09.059928: step: 736/466, loss: 0.5773312449455261 2023-01-24 02:53:09.899859: step: 738/466, loss: 0.6475232839584351 2023-01-24 02:53:10.690431: step: 740/466, loss: 0.29985684156417847 2023-01-24 02:53:11.406246: step: 742/466, loss: 0.573562502861023 2023-01-24 02:53:12.156733: step: 744/466, loss: 0.08346546441316605 2023-01-24 02:53:12.887535: step: 746/466, loss: 0.2765767276287079 2023-01-24 02:53:13.621991: step: 748/466, loss: 0.40180301666259766 2023-01-24 02:53:14.392681: step: 750/466, loss: 0.6855344176292419 2023-01-24 02:53:15.201184: step: 752/466, loss: 0.12331660836935043 2023-01-24 02:53:15.978868: step: 754/466, loss: 0.6254172325134277 2023-01-24 02:53:16.766309: step: 756/466, loss: 0.2580622136592865 2023-01-24 02:53:17.518193: step: 758/466, loss: 0.29950806498527527 2023-01-24 02:53:18.240635: step: 760/466, loss: 0.16383399069309235 2023-01-24 02:53:19.051899: step: 762/466, loss: 0.140916109085083 2023-01-24 02:53:19.789129: step: 764/466, loss: 0.15139026939868927 2023-01-24 02:53:20.531865: step: 766/466, loss: 0.11691779643297195 2023-01-24 02:53:21.271138: step: 768/466, loss: 0.2745065689086914 2023-01-24 02:53:22.024128: step: 770/466, loss: 0.5456153154373169 2023-01-24 02:53:22.777098: step: 772/466, loss: 1.0830720663070679 2023-01-24 02:53:23.534753: step: 774/466, loss: 0.3399311900138855 2023-01-24 02:53:24.306139: step: 776/466, loss: 0.7403802275657654 2023-01-24 02:53:25.013654: step: 778/466, loss: 0.18672645092010498 2023-01-24 02:53:25.768742: step: 780/466, loss: 0.2659226953983307 2023-01-24 02:53:26.526169: step: 782/466, loss: 0.33941036462783813 2023-01-24 02:53:27.404168: step: 784/466, loss: 0.40195438265800476 2023-01-24 02:53:28.239373: step: 786/466, loss: 0.20171843469142914 2023-01-24 02:53:28.981497: step: 788/466, loss: 0.2638154625892639 2023-01-24 02:53:29.661726: step: 790/466, loss: 0.25721925497055054 2023-01-24 02:53:30.500803: step: 792/466, loss: 0.6966685056686401 2023-01-24 02:53:31.291478: step: 794/466, loss: 0.9811992049217224 2023-01-24 02:53:32.048159: step: 796/466, loss: 0.26549002528190613 2023-01-24 02:53:32.820300: step: 798/466, loss: 0.36354395747184753 2023-01-24 02:53:33.590416: step: 800/466, loss: 1.4915454387664795 2023-01-24 02:53:34.349926: step: 802/466, loss: 0.2320493459701538 2023-01-24 02:53:35.038241: step: 804/466, loss: 0.5532269477844238 2023-01-24 02:53:35.880516: step: 806/466, loss: 0.3147946000099182 2023-01-24 02:53:36.697647: step: 808/466, loss: 0.6030541062355042 2023-01-24 02:53:37.439530: step: 810/466, loss: 0.25675368309020996 2023-01-24 02:53:38.186247: step: 812/466, loss: 0.12279326468706131 2023-01-24 02:53:38.874534: step: 814/466, loss: 0.09616804122924805 2023-01-24 02:53:39.619273: step: 816/466, loss: 0.22024525701999664 2023-01-24 02:53:40.479391: step: 818/466, loss: 0.9077953100204468 2023-01-24 02:53:41.221546: step: 820/466, loss: 0.10984183847904205 2023-01-24 02:53:41.948767: step: 822/466, loss: 0.19995927810668945 2023-01-24 02:53:42.680439: step: 824/466, loss: 0.2009981870651245 2023-01-24 02:53:43.552089: step: 826/466, loss: 0.42771244049072266 2023-01-24 02:53:44.371890: step: 828/466, loss: 0.16702905297279358 2023-01-24 02:53:45.097160: step: 830/466, loss: 0.32878464460372925 2023-01-24 02:53:45.853349: step: 832/466, loss: 0.3727845549583435 2023-01-24 02:53:46.540272: step: 834/466, loss: 0.1593688279390335 2023-01-24 02:53:47.224462: step: 836/466, loss: 0.15850010514259338 2023-01-24 02:53:47.989437: step: 838/466, loss: 0.1300395280122757 2023-01-24 02:53:48.708341: step: 840/466, loss: 0.29905185103416443 2023-01-24 02:53:49.478310: step: 842/466, loss: 0.1523512899875641 2023-01-24 02:53:50.222117: step: 844/466, loss: 2.24468994140625 2023-01-24 02:53:50.981547: step: 846/466, loss: 0.5050785541534424 2023-01-24 02:53:51.790402: step: 848/466, loss: 0.21456189453601837 2023-01-24 02:53:52.526974: step: 850/466, loss: 0.22099438309669495 2023-01-24 02:53:53.284553: step: 852/466, loss: 0.6934733986854553 2023-01-24 02:53:53.972498: step: 854/466, loss: 2.9839396476745605 2023-01-24 02:53:54.672024: step: 856/466, loss: 0.40766406059265137 2023-01-24 02:53:55.359731: step: 858/466, loss: 0.14863476157188416 2023-01-24 02:53:56.096166: step: 860/466, loss: 0.10019338876008987 2023-01-24 02:53:56.823602: step: 862/466, loss: 0.4024325907230377 2023-01-24 02:53:57.505603: step: 864/466, loss: 0.12153172492980957 2023-01-24 02:53:58.265747: step: 866/466, loss: 0.41037943959236145 2023-01-24 02:53:58.992333: step: 868/466, loss: 0.1819164752960205 2023-01-24 02:53:59.765425: step: 870/466, loss: 0.13573968410491943 2023-01-24 02:54:00.668945: step: 872/466, loss: 0.4748840630054474 2023-01-24 02:54:01.440574: step: 874/466, loss: 0.09928253293037415 2023-01-24 02:54:02.287141: step: 876/466, loss: 0.27209317684173584 2023-01-24 02:54:03.060683: step: 878/466, loss: 0.2346189022064209 2023-01-24 02:54:03.758747: step: 880/466, loss: 1.9735147953033447 2023-01-24 02:54:04.571697: step: 882/466, loss: 0.4819077253341675 2023-01-24 02:54:05.500988: step: 884/466, loss: 0.22298583388328552 2023-01-24 02:54:06.296101: step: 886/466, loss: 0.23651434481143951 2023-01-24 02:54:07.049442: step: 888/466, loss: 0.1375943422317505 2023-01-24 02:54:07.888180: step: 890/466, loss: 0.39386996626853943 2023-01-24 02:54:08.678253: step: 892/466, loss: 0.6610428094863892 2023-01-24 02:54:09.502872: step: 894/466, loss: 0.33950868248939514 2023-01-24 02:54:10.340906: step: 896/466, loss: 0.41958087682724 2023-01-24 02:54:11.046350: step: 898/466, loss: 0.2866438031196594 2023-01-24 02:54:11.885906: step: 900/466, loss: 0.609981894493103 2023-01-24 02:54:12.686770: step: 902/466, loss: 0.7020177245140076 2023-01-24 02:54:13.451205: step: 904/466, loss: 0.34889107942581177 2023-01-24 02:54:14.220687: step: 906/466, loss: 0.7393907308578491 2023-01-24 02:54:14.967092: step: 908/466, loss: 0.34370091557502747 2023-01-24 02:54:15.634542: step: 910/466, loss: 0.3134104013442993 2023-01-24 02:54:16.413898: step: 912/466, loss: 0.3483279347419739 2023-01-24 02:54:17.142416: step: 914/466, loss: 0.38782113790512085 2023-01-24 02:54:17.921363: step: 916/466, loss: 0.118745356798172 2023-01-24 02:54:18.704432: step: 918/466, loss: 0.6202765107154846 2023-01-24 02:54:19.401068: step: 920/466, loss: 0.1659417301416397 2023-01-24 02:54:20.192581: step: 922/466, loss: 0.19849182665348053 2023-01-24 02:54:20.969606: step: 924/466, loss: 0.17553061246871948 2023-01-24 02:54:21.652012: step: 926/466, loss: 0.0439554899930954 2023-01-24 02:54:22.395827: step: 928/466, loss: 0.15662969648838043 2023-01-24 02:54:23.167163: step: 930/466, loss: 0.199647918343544 2023-01-24 02:54:24.024421: step: 932/466, loss: 0.6263249516487122 ================================================== Loss: 0.405 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35376006036217306, 'r': 0.2859616427216048, 'f1': 0.3162681756858043}, 'combined': 0.2330397084000663, 'epoch': 9} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3797055970630157, 'r': 0.2827245138304706, 'f1': 0.3241159438949812}, 'combined': 0.19921272649154942, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33100057227191954, 'r': 0.2920593284752231, 'f1': 0.31031303650492453}, 'combined': 0.22865171110889174, 'epoch': 9} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36167892528646656, 'r': 0.2871511467425886, 'f1': 0.320134724408967}, 'combined': 0.19676573305136508, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35736398531375174, 'r': 0.2902310924369748, 'f1': 0.3203178758414361}, 'combined': 0.2360236979884266, 'epoch': 9} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3830159985376813, 'r': 0.28087839892763294, 'f1': 0.324090460301115}, 'combined': 0.20017351959774754, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3162878787878788, 'r': 0.2982142857142857, 'f1': 0.3069852941176471}, 'combined': 0.20465686274509803, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34321716589861756, 'r': 0.28264943074003795, 'f1': 0.31000260145681585}, 'combined': 0.22842296949449586, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3400251231205871, 'r': 0.24846857481712165, 'f1': 0.28712476629692396}, 'combined': 0.17647668562640204, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3371212121212121, 'r': 0.31785714285714284, 'f1': 0.3272058823529411}, 'combined': 0.21813725490196073, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35633182648907596, 'r': 0.3171150410310752, 'f1': 0.33558157956501333}, 'combined': 0.24727063757422033, 'epoch': 5} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38903978397030203, 'r': 0.22009690200395982, 'f1': 0.28114035217301386}, 'combined': 0.17364551163627331, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 5} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:57:07.560704: step: 2/466, loss: 0.2337709665298462 2023-01-24 02:57:08.381454: step: 4/466, loss: 0.26833581924438477 2023-01-24 02:57:09.123248: step: 6/466, loss: 0.14239007234573364 2023-01-24 02:57:09.992730: step: 8/466, loss: 0.1602407991886139 2023-01-24 02:57:10.824542: step: 10/466, loss: 0.30238527059555054 2023-01-24 02:57:11.658460: step: 12/466, loss: 0.4026970863342285 2023-01-24 02:57:12.405689: step: 14/466, loss: 1.2485120296478271 2023-01-24 02:57:13.195796: step: 16/466, loss: 0.3447570204734802 2023-01-24 02:57:14.010786: step: 18/466, loss: 0.09246478229761124 2023-01-24 02:57:14.839489: step: 20/466, loss: 0.26743602752685547 2023-01-24 02:57:15.569332: step: 22/466, loss: 0.35447368025779724 2023-01-24 02:57:16.391559: step: 24/466, loss: 0.17007564008235931 2023-01-24 02:57:17.106919: step: 26/466, loss: 0.1545749008655548 2023-01-24 02:57:17.787876: step: 28/466, loss: 0.11063441634178162 2023-01-24 02:57:18.600609: step: 30/466, loss: 1.414306879043579 2023-01-24 02:57:19.379788: step: 32/466, loss: 0.15004459023475647 2023-01-24 02:57:20.105884: step: 34/466, loss: 0.34166383743286133 2023-01-24 02:57:20.911715: step: 36/466, loss: 0.1305786520242691 2023-01-24 02:57:21.662597: step: 38/466, loss: 0.16415190696716309 2023-01-24 02:57:22.493274: step: 40/466, loss: 0.9625821709632874 2023-01-24 02:57:23.251093: step: 42/466, loss: 0.11631202697753906 2023-01-24 02:57:23.926115: step: 44/466, loss: 0.09589099138975143 2023-01-24 02:57:24.683410: step: 46/466, loss: 0.24927857518196106 2023-01-24 02:57:25.444173: step: 48/466, loss: 0.0878019705414772 2023-01-24 02:57:26.202219: step: 50/466, loss: 0.19715267419815063 2023-01-24 02:57:26.985798: step: 52/466, loss: 0.13326187431812286 2023-01-24 02:57:27.730945: step: 54/466, loss: 0.07522831857204437 2023-01-24 02:57:28.446877: step: 56/466, loss: 0.09932447969913483 2023-01-24 02:57:29.192692: step: 58/466, loss: 0.2271547019481659 2023-01-24 02:57:29.936740: step: 60/466, loss: 0.9119616746902466 2023-01-24 02:57:30.670637: step: 62/466, loss: 0.13493891060352325 2023-01-24 02:57:31.460546: step: 64/466, loss: 0.21414585411548615 2023-01-24 02:57:32.264735: step: 66/466, loss: 1.22073233127594 2023-01-24 02:57:33.065979: step: 68/466, loss: 0.07765951752662659 2023-01-24 02:57:33.812839: step: 70/466, loss: 0.2033333033323288 2023-01-24 02:57:34.584096: step: 72/466, loss: 0.26286882162094116 2023-01-24 02:57:35.384683: step: 74/466, loss: 0.9073281288146973 2023-01-24 02:57:36.117238: step: 76/466, loss: 0.2290145456790924 2023-01-24 02:57:36.860011: step: 78/466, loss: 0.80869060754776 2023-01-24 02:57:37.584598: step: 80/466, loss: 0.14732767641544342 2023-01-24 02:57:38.369548: step: 82/466, loss: 0.10995560884475708 2023-01-24 02:57:39.171868: step: 84/466, loss: 0.27620553970336914 2023-01-24 02:57:39.918835: step: 86/466, loss: 0.10498054325580597 2023-01-24 02:57:40.616164: step: 88/466, loss: 0.2640683352947235 2023-01-24 02:57:41.362083: step: 90/466, loss: 0.143999382853508 2023-01-24 02:57:42.135935: step: 92/466, loss: 0.38430488109588623 2023-01-24 02:57:42.872584: step: 94/466, loss: 0.12708406150341034 2023-01-24 02:57:43.738996: step: 96/466, loss: 0.16920247673988342 2023-01-24 02:57:44.511164: step: 98/466, loss: 0.13652513921260834 2023-01-24 02:57:45.373708: step: 100/466, loss: 1.5320005416870117 2023-01-24 02:57:46.139014: step: 102/466, loss: 0.2467314749956131 2023-01-24 02:57:46.818543: step: 104/466, loss: 0.3934531807899475 2023-01-24 02:57:47.657956: step: 106/466, loss: 0.18988072872161865 2023-01-24 02:57:48.451225: step: 108/466, loss: 0.16739220917224884 2023-01-24 02:57:49.248741: step: 110/466, loss: 0.1678118109703064 2023-01-24 02:57:50.043391: step: 112/466, loss: 0.0868569165468216 2023-01-24 02:57:50.799327: step: 114/466, loss: 0.2291712462902069 2023-01-24 02:57:51.716105: step: 116/466, loss: 0.264020711183548 2023-01-24 02:57:52.474003: step: 118/466, loss: 0.5153694748878479 2023-01-24 02:57:53.171852: step: 120/466, loss: 0.051767732948064804 2023-01-24 02:57:53.860841: step: 122/466, loss: 0.2249072939157486 2023-01-24 02:57:54.724317: step: 124/466, loss: 0.26452016830444336 2023-01-24 02:57:55.419580: step: 126/466, loss: 0.4013260304927826 2023-01-24 02:57:56.102295: step: 128/466, loss: 0.24689669907093048 2023-01-24 02:57:56.940267: step: 130/466, loss: 0.3453787863254547 2023-01-24 02:57:57.765922: step: 132/466, loss: 0.14687740802764893 2023-01-24 02:57:58.480921: step: 134/466, loss: 0.18214935064315796 2023-01-24 02:57:59.301950: step: 136/466, loss: 0.7668765187263489 2023-01-24 02:58:00.051406: step: 138/466, loss: 0.33379390835762024 2023-01-24 02:58:00.738828: step: 140/466, loss: 0.14274349808692932 2023-01-24 02:58:01.548907: step: 142/466, loss: 0.0996280089020729 2023-01-24 02:58:02.332665: step: 144/466, loss: 0.1344163566827774 2023-01-24 02:58:03.106089: step: 146/466, loss: 0.19428536295890808 2023-01-24 02:58:03.876437: step: 148/466, loss: 0.2296874076128006 2023-01-24 02:58:04.595526: step: 150/466, loss: 0.1182233989238739 2023-01-24 02:58:05.317232: step: 152/466, loss: 0.10741811245679855 2023-01-24 02:58:06.031001: step: 154/466, loss: 0.29721999168395996 2023-01-24 02:58:06.751216: step: 156/466, loss: 0.2870020568370819 2023-01-24 02:58:07.471978: step: 158/466, loss: 0.30465492606163025 2023-01-24 02:58:08.194146: step: 160/466, loss: 0.23985272645950317 2023-01-24 02:58:08.941969: step: 162/466, loss: 0.3943251371383667 2023-01-24 02:58:09.757000: step: 164/466, loss: 0.317905068397522 2023-01-24 02:58:10.563993: step: 166/466, loss: 0.21616436541080475 2023-01-24 02:58:11.289108: step: 168/466, loss: 1.09933340549469 2023-01-24 02:58:12.116855: step: 170/466, loss: 0.13349558413028717 2023-01-24 02:58:12.807251: step: 172/466, loss: 0.2912863790988922 2023-01-24 02:58:13.526246: step: 174/466, loss: 0.11956477910280228 2023-01-24 02:58:14.353235: step: 176/466, loss: 0.1785416156053543 2023-01-24 02:58:15.126303: step: 178/466, loss: 0.17102572321891785 2023-01-24 02:58:15.927587: step: 180/466, loss: 0.27333664894104004 2023-01-24 02:58:16.724867: step: 182/466, loss: 0.1813071221113205 2023-01-24 02:58:17.430832: step: 184/466, loss: 0.20739442110061646 2023-01-24 02:58:18.241405: step: 186/466, loss: 0.1849229484796524 2023-01-24 02:58:18.952169: step: 188/466, loss: 0.048303648829460144 2023-01-24 02:58:19.763194: step: 190/466, loss: 0.0667133778333664 2023-01-24 02:58:20.511428: step: 192/466, loss: 0.38608261942863464 2023-01-24 02:58:21.260669: step: 194/466, loss: 0.15903745591640472 2023-01-24 02:58:22.040849: step: 196/466, loss: 0.3151916265487671 2023-01-24 02:58:22.902409: step: 198/466, loss: 0.6694477200508118 2023-01-24 02:58:23.649512: step: 200/466, loss: 0.3216400146484375 2023-01-24 02:58:24.378163: step: 202/466, loss: 0.16271351277828217 2023-01-24 02:58:25.178646: step: 204/466, loss: 0.5475163459777832 2023-01-24 02:58:25.923662: step: 206/466, loss: 0.12444935739040375 2023-01-24 02:58:26.686782: step: 208/466, loss: 0.3612194061279297 2023-01-24 02:58:27.445817: step: 210/466, loss: 0.19568414986133575 2023-01-24 02:58:28.223333: step: 212/466, loss: 0.23034150898456573 2023-01-24 02:58:29.045666: step: 214/466, loss: 0.21345168352127075 2023-01-24 02:58:29.801253: step: 216/466, loss: 0.3210800290107727 2023-01-24 02:58:30.646284: step: 218/466, loss: 0.17796467244625092 2023-01-24 02:58:31.426219: step: 220/466, loss: 0.19314827024936676 2023-01-24 02:58:32.275047: step: 222/466, loss: 0.15028682351112366 2023-01-24 02:58:33.033947: step: 224/466, loss: 0.11573804914951324 2023-01-24 02:58:33.795699: step: 226/466, loss: 0.7474660873413086 2023-01-24 02:58:34.565332: step: 228/466, loss: 0.11379896104335785 2023-01-24 02:58:35.426063: step: 230/466, loss: 0.4897193908691406 2023-01-24 02:58:36.129815: step: 232/466, loss: 0.11260432004928589 2023-01-24 02:58:36.879498: step: 234/466, loss: 0.6882609128952026 2023-01-24 02:58:37.660839: step: 236/466, loss: 0.5560853481292725 2023-01-24 02:58:38.384973: step: 238/466, loss: 0.767204999923706 2023-01-24 02:58:39.108552: step: 240/466, loss: 0.34223759174346924 2023-01-24 02:58:39.862098: step: 242/466, loss: 0.08386547863483429 2023-01-24 02:58:40.617619: step: 244/466, loss: 0.22089608013629913 2023-01-24 02:58:41.314284: step: 246/466, loss: 0.17293940484523773 2023-01-24 02:58:42.039192: step: 248/466, loss: 0.8853538632392883 2023-01-24 02:58:42.839813: step: 250/466, loss: 0.696312665939331 2023-01-24 02:58:43.595159: step: 252/466, loss: 0.19818630814552307 2023-01-24 02:58:44.375680: step: 254/466, loss: 0.5379956364631653 2023-01-24 02:58:45.111416: step: 256/466, loss: 0.27241238951683044 2023-01-24 02:58:45.842192: step: 258/466, loss: 0.49338239431381226 2023-01-24 02:58:46.563125: step: 260/466, loss: 0.23142513632774353 2023-01-24 02:58:47.383668: step: 262/466, loss: 0.3157171607017517 2023-01-24 02:58:48.119303: step: 264/466, loss: 0.09880304336547852 2023-01-24 02:58:48.857186: step: 266/466, loss: 0.2063807100057602 2023-01-24 02:58:49.528640: step: 268/466, loss: 0.09504813700914383 2023-01-24 02:58:50.322370: step: 270/466, loss: 0.10063324868679047 2023-01-24 02:58:51.064877: step: 272/466, loss: 0.26294028759002686 2023-01-24 02:58:51.770245: step: 274/466, loss: 0.22391608357429504 2023-01-24 02:58:52.531325: step: 276/466, loss: 0.12411212176084518 2023-01-24 02:58:53.311978: step: 278/466, loss: 0.10606664419174194 2023-01-24 02:58:54.110386: step: 280/466, loss: 0.12701769173145294 2023-01-24 02:58:54.861466: step: 282/466, loss: 0.32284846901893616 2023-01-24 02:58:55.568519: step: 284/466, loss: 0.13506066799163818 2023-01-24 02:58:56.282933: step: 286/466, loss: 0.29167723655700684 2023-01-24 02:58:57.018247: step: 288/466, loss: 0.2490895539522171 2023-01-24 02:58:57.809015: step: 290/466, loss: 0.17621193826198578 2023-01-24 02:58:58.558794: step: 292/466, loss: 0.09964149445295334 2023-01-24 02:58:59.243121: step: 294/466, loss: 0.08445818722248077 2023-01-24 02:59:00.014438: step: 296/466, loss: 0.07993566989898682 2023-01-24 02:59:00.748450: step: 298/466, loss: 0.10784658789634705 2023-01-24 02:59:01.489751: step: 300/466, loss: 0.16252484917640686 2023-01-24 02:59:02.249129: step: 302/466, loss: 0.24444356560707092 2023-01-24 02:59:02.981503: step: 304/466, loss: 0.09691224247217178 2023-01-24 02:59:03.838339: step: 306/466, loss: 0.08311986178159714 2023-01-24 02:59:04.608469: step: 308/466, loss: 0.19063793122768402 2023-01-24 02:59:05.451970: step: 310/466, loss: 0.24725835025310516 2023-01-24 02:59:06.148614: step: 312/466, loss: 0.29137295484542847 2023-01-24 02:59:06.930698: step: 314/466, loss: 0.1480897068977356 2023-01-24 02:59:07.659165: step: 316/466, loss: 0.2394569218158722 2023-01-24 02:59:08.454182: step: 318/466, loss: 0.4203174114227295 2023-01-24 02:59:09.228883: step: 320/466, loss: 0.034369274973869324 2023-01-24 02:59:09.986269: step: 322/466, loss: 0.10814063251018524 2023-01-24 02:59:10.798289: step: 324/466, loss: 0.14247576892375946 2023-01-24 02:59:11.512325: step: 326/466, loss: 1.1015814542770386 2023-01-24 02:59:12.227969: step: 328/466, loss: 0.10019680112600327 2023-01-24 02:59:13.099058: step: 330/466, loss: 0.4012758433818817 2023-01-24 02:59:13.876452: step: 332/466, loss: 0.39217323064804077 2023-01-24 02:59:14.635955: step: 334/466, loss: 0.27039122581481934 2023-01-24 02:59:15.472817: step: 336/466, loss: 0.40216994285583496 2023-01-24 02:59:16.271614: step: 338/466, loss: 0.24465128779411316 2023-01-24 02:59:16.977879: step: 340/466, loss: 0.1533263623714447 2023-01-24 02:59:17.696376: step: 342/466, loss: 0.25162526965141296 2023-01-24 02:59:18.423438: step: 344/466, loss: 0.20700925588607788 2023-01-24 02:59:19.134085: step: 346/466, loss: 0.1375453919172287 2023-01-24 02:59:19.963130: step: 348/466, loss: 0.20698057115077972 2023-01-24 02:59:20.722330: step: 350/466, loss: 0.15939858555793762 2023-01-24 02:59:21.551924: step: 352/466, loss: 0.2313118427991867 2023-01-24 02:59:22.346965: step: 354/466, loss: 0.345257431268692 2023-01-24 02:59:23.103263: step: 356/466, loss: 0.5385666489601135 2023-01-24 02:59:23.957677: step: 358/466, loss: 0.5636390447616577 2023-01-24 02:59:24.662067: step: 360/466, loss: 0.20566923916339874 2023-01-24 02:59:25.456253: step: 362/466, loss: 0.16982313990592957 2023-01-24 02:59:26.194131: step: 364/466, loss: 0.1668519228696823 2023-01-24 02:59:26.903708: step: 366/466, loss: 0.5705475211143494 2023-01-24 02:59:27.675184: step: 368/466, loss: 0.19253084063529968 2023-01-24 02:59:28.418747: step: 370/466, loss: 0.1730499565601349 2023-01-24 02:59:29.204009: step: 372/466, loss: 0.17300289869308472 2023-01-24 02:59:29.921079: step: 374/466, loss: 0.29056882858276367 2023-01-24 02:59:30.688716: step: 376/466, loss: 0.07716875523328781 2023-01-24 02:59:31.672921: step: 378/466, loss: 0.6006758809089661 2023-01-24 02:59:32.401007: step: 380/466, loss: 0.26666271686553955 2023-01-24 02:59:33.169200: step: 382/466, loss: 0.9369261860847473 2023-01-24 02:59:33.874316: step: 384/466, loss: 0.18641993403434753 2023-01-24 02:59:34.612172: step: 386/466, loss: 0.12803220748901367 2023-01-24 02:59:35.377600: step: 388/466, loss: 0.24852482974529266 2023-01-24 02:59:36.112511: step: 390/466, loss: 0.20162753760814667 2023-01-24 02:59:36.834920: step: 392/466, loss: 0.22263064980506897 2023-01-24 02:59:37.680897: step: 394/466, loss: 0.5578153133392334 2023-01-24 02:59:38.422802: step: 396/466, loss: 0.43902575969696045 2023-01-24 02:59:39.215157: step: 398/466, loss: 0.18315844237804413 2023-01-24 02:59:39.923125: step: 400/466, loss: 0.5645053386688232 2023-01-24 02:59:40.689406: step: 402/466, loss: 0.04956785589456558 2023-01-24 02:59:41.441933: step: 404/466, loss: 0.100751131772995 2023-01-24 02:59:42.163249: step: 406/466, loss: 0.15760697424411774 2023-01-24 02:59:42.936295: step: 408/466, loss: 0.5888820290565491 2023-01-24 02:59:43.640937: step: 410/466, loss: 0.15951798856258392 2023-01-24 02:59:44.349098: step: 412/466, loss: 1.0511513948440552 2023-01-24 02:59:45.132826: step: 414/466, loss: 0.20933635532855988 2023-01-24 02:59:45.827201: step: 416/466, loss: 0.22570045292377472 2023-01-24 02:59:46.568776: step: 418/466, loss: 0.5592967867851257 2023-01-24 02:59:47.380419: step: 420/466, loss: 21.276796340942383 2023-01-24 02:59:48.251038: step: 422/466, loss: 0.39449530839920044 2023-01-24 02:59:49.014882: step: 424/466, loss: 0.2394774854183197 2023-01-24 02:59:49.792601: step: 426/466, loss: 0.12702800333499908 2023-01-24 02:59:50.514548: step: 428/466, loss: 0.1220950037240982 2023-01-24 02:59:51.236927: step: 430/466, loss: 0.1692110002040863 2023-01-24 02:59:52.020785: step: 432/466, loss: 0.6647356748580933 2023-01-24 02:59:52.766452: step: 434/466, loss: 0.25400546193122864 2023-01-24 02:59:53.554048: step: 436/466, loss: 0.516399085521698 2023-01-24 02:59:54.307856: step: 438/466, loss: 0.2164851427078247 2023-01-24 02:59:55.133320: step: 440/466, loss: 0.7619146108627319 2023-01-24 02:59:55.880675: step: 442/466, loss: 0.09502183645963669 2023-01-24 02:59:56.699449: step: 444/466, loss: 0.16723360121250153 2023-01-24 02:59:57.455974: step: 446/466, loss: 0.18075765669345856 2023-01-24 02:59:58.172459: step: 448/466, loss: 0.24220411479473114 2023-01-24 02:59:58.942973: step: 450/466, loss: 0.08047514408826828 2023-01-24 02:59:59.677698: step: 452/466, loss: 0.186679407954216 2023-01-24 03:00:00.421582: step: 454/466, loss: 0.14323924481868744 2023-01-24 03:00:01.227553: step: 456/466, loss: 0.3566244840621948 2023-01-24 03:00:02.011640: step: 458/466, loss: 0.20824034512043 2023-01-24 03:00:02.668493: step: 460/466, loss: 0.5536962151527405 2023-01-24 03:00:03.458625: step: 462/466, loss: 0.22645868360996246 2023-01-24 03:00:04.191336: step: 464/466, loss: 0.1923508644104004 2023-01-24 03:00:04.923521: step: 466/466, loss: 0.1315545290708542 2023-01-24 03:00:05.667967: step: 468/466, loss: 0.6798703670501709 2023-01-24 03:00:06.441149: step: 470/466, loss: 0.31831642985343933 2023-01-24 03:00:07.171633: step: 472/466, loss: 0.45602738857269287 2023-01-24 03:00:07.911282: step: 474/466, loss: 0.12411798536777496 2023-01-24 03:00:08.672700: step: 476/466, loss: 0.30964699387550354 2023-01-24 03:00:09.430224: step: 478/466, loss: 0.305565744638443 2023-01-24 03:00:10.222068: step: 480/466, loss: 0.11159797012805939 2023-01-24 03:00:10.954428: step: 482/466, loss: 0.44753023982048035 2023-01-24 03:00:11.635859: step: 484/466, loss: 0.1689191460609436 2023-01-24 03:00:12.409348: step: 486/466, loss: 0.1801457405090332 2023-01-24 03:00:13.258984: step: 488/466, loss: 0.7594742774963379 2023-01-24 03:00:13.958328: step: 490/466, loss: 0.3323618173599243 2023-01-24 03:00:14.750052: step: 492/466, loss: 0.17707262933254242 2023-01-24 03:00:15.570344: step: 494/466, loss: 0.3458724617958069 2023-01-24 03:00:16.418617: step: 496/466, loss: 0.2138400375843048 2023-01-24 03:00:17.240739: step: 498/466, loss: 0.22486338019371033 2023-01-24 03:00:18.053644: step: 500/466, loss: 0.6938997507095337 2023-01-24 03:00:18.820540: step: 502/466, loss: 0.4290332496166229 2023-01-24 03:00:19.605773: step: 504/466, loss: 0.25466781854629517 2023-01-24 03:00:20.335178: step: 506/466, loss: 0.11359059810638428 2023-01-24 03:00:21.160070: step: 508/466, loss: 0.15057611465454102 2023-01-24 03:00:21.888375: step: 510/466, loss: 0.14240585267543793 2023-01-24 03:00:22.665116: step: 512/466, loss: 0.8652889728546143 2023-01-24 03:00:23.504309: step: 514/466, loss: 0.25626131892204285 2023-01-24 03:00:24.236201: step: 516/466, loss: 0.865880012512207 2023-01-24 03:00:24.933857: step: 518/466, loss: 0.17434607446193695 2023-01-24 03:00:25.661761: step: 520/466, loss: 0.44801265001296997 2023-01-24 03:00:26.366957: step: 522/466, loss: 0.3766295313835144 2023-01-24 03:00:27.225450: step: 524/466, loss: 0.12937021255493164 2023-01-24 03:00:28.072807: step: 526/466, loss: 1.208333969116211 2023-01-24 03:00:28.813345: step: 528/466, loss: 0.1700485199689865 2023-01-24 03:00:29.570563: step: 530/466, loss: 0.27223414182662964 2023-01-24 03:00:30.417302: step: 532/466, loss: 0.32058337330818176 2023-01-24 03:00:31.099413: step: 534/466, loss: 0.2612878382205963 2023-01-24 03:00:31.895099: step: 536/466, loss: 0.24139459431171417 2023-01-24 03:00:32.693564: step: 538/466, loss: 0.1782309114933014 2023-01-24 03:00:33.549464: step: 540/466, loss: 1.4186257123947144 2023-01-24 03:00:34.409352: step: 542/466, loss: 0.8059676289558411 2023-01-24 03:00:35.173271: step: 544/466, loss: 0.14394286274909973 2023-01-24 03:00:35.894204: step: 546/466, loss: 0.18445007503032684 2023-01-24 03:00:36.771090: step: 548/466, loss: 0.42247700691223145 2023-01-24 03:00:37.529496: step: 550/466, loss: 0.19051989912986755 2023-01-24 03:00:38.427763: step: 552/466, loss: 0.5361776351928711 2023-01-24 03:00:39.176945: step: 554/466, loss: 0.2054612785577774 2023-01-24 03:00:39.886876: step: 556/466, loss: 0.15531039237976074 2023-01-24 03:00:40.595159: step: 558/466, loss: 0.3972584307193756 2023-01-24 03:00:41.249869: step: 560/466, loss: 0.12098507583141327 2023-01-24 03:00:42.140669: step: 562/466, loss: 0.1388738453388214 2023-01-24 03:00:42.868144: step: 564/466, loss: 0.4729807674884796 2023-01-24 03:00:43.636191: step: 566/466, loss: 0.17271478474140167 2023-01-24 03:00:44.329015: step: 568/466, loss: 0.16412785649299622 2023-01-24 03:00:45.062984: step: 570/466, loss: 0.31249192357063293 2023-01-24 03:00:45.788387: step: 572/466, loss: 0.4589084982872009 2023-01-24 03:00:46.508408: step: 574/466, loss: 0.07088687270879745 2023-01-24 03:00:47.290191: step: 576/466, loss: 0.1241055577993393 2023-01-24 03:00:48.097228: step: 578/466, loss: 0.34827089309692383 2023-01-24 03:00:48.803150: step: 580/466, loss: 0.1643725484609604 2023-01-24 03:00:49.594066: step: 582/466, loss: 0.5115247964859009 2023-01-24 03:00:50.348554: step: 584/466, loss: 0.3221728801727295 2023-01-24 03:00:51.235379: step: 586/466, loss: 0.49480682611465454 2023-01-24 03:00:51.981344: step: 588/466, loss: 0.1289515346288681 2023-01-24 03:00:52.726332: step: 590/466, loss: 0.22824212908744812 2023-01-24 03:00:53.508257: step: 592/466, loss: 0.08253544569015503 2023-01-24 03:00:54.248948: step: 594/466, loss: 0.07791145145893097 2023-01-24 03:00:55.070697: step: 596/466, loss: 0.22855186462402344 2023-01-24 03:00:55.757856: step: 598/466, loss: 0.5620360374450684 2023-01-24 03:00:56.439868: step: 600/466, loss: 0.9512036442756653 2023-01-24 03:00:57.169681: step: 602/466, loss: 0.20425112545490265 2023-01-24 03:00:57.846900: step: 604/466, loss: 0.15751442313194275 2023-01-24 03:00:58.720453: step: 606/466, loss: 0.12556593120098114 2023-01-24 03:00:59.387915: step: 608/466, loss: 0.1434406191110611 2023-01-24 03:01:00.160605: step: 610/466, loss: 0.09092912077903748 2023-01-24 03:01:00.876674: step: 612/466, loss: 0.1337815523147583 2023-01-24 03:01:01.641097: step: 614/466, loss: 0.7102845311164856 2023-01-24 03:01:02.448819: step: 616/466, loss: 0.38301992416381836 2023-01-24 03:01:03.320646: step: 618/466, loss: 0.3137530982494354 2023-01-24 03:01:04.123091: step: 620/466, loss: 0.3401144742965698 2023-01-24 03:01:04.826842: step: 622/466, loss: 0.04092513769865036 2023-01-24 03:01:05.514906: step: 624/466, loss: 0.2598436772823334 2023-01-24 03:01:06.243233: step: 626/466, loss: 0.20543630421161652 2023-01-24 03:01:07.046334: step: 628/466, loss: 2.207848310470581 2023-01-24 03:01:07.813113: step: 630/466, loss: 0.3994409441947937 2023-01-24 03:01:08.646486: step: 632/466, loss: 0.13118356466293335 2023-01-24 03:01:09.399087: step: 634/466, loss: 0.15899503231048584 2023-01-24 03:01:10.146942: step: 636/466, loss: 0.2652314007282257 2023-01-24 03:01:10.841476: step: 638/466, loss: 0.4009181559085846 2023-01-24 03:01:11.635286: step: 640/466, loss: 0.2774149775505066 2023-01-24 03:01:12.443966: step: 642/466, loss: 0.5266385674476624 2023-01-24 03:01:13.383746: step: 644/466, loss: 0.138621523976326 2023-01-24 03:01:14.182131: step: 646/466, loss: 0.20024169981479645 2023-01-24 03:01:14.979849: step: 648/466, loss: 0.604051947593689 2023-01-24 03:01:15.707125: step: 650/466, loss: 0.08497483283281326 2023-01-24 03:01:16.493921: step: 652/466, loss: 0.1440829187631607 2023-01-24 03:01:17.214614: step: 654/466, loss: 0.1202130913734436 2023-01-24 03:01:17.992553: step: 656/466, loss: 0.37918657064437866 2023-01-24 03:01:18.788616: step: 658/466, loss: 0.2415979504585266 2023-01-24 03:01:19.541805: step: 660/466, loss: 0.3267868757247925 2023-01-24 03:01:20.286759: step: 662/466, loss: 0.38840076327323914 2023-01-24 03:01:21.006297: step: 664/466, loss: 0.7931300401687622 2023-01-24 03:01:21.805199: step: 666/466, loss: 0.07995925843715668 2023-01-24 03:01:22.561320: step: 668/466, loss: 0.16449187695980072 2023-01-24 03:01:23.442215: step: 670/466, loss: 0.3676665723323822 2023-01-24 03:01:24.219352: step: 672/466, loss: 0.14478278160095215 2023-01-24 03:01:24.986643: step: 674/466, loss: 0.28152671456336975 2023-01-24 03:01:25.920280: step: 676/466, loss: 0.1067856177687645 2023-01-24 03:01:26.684593: step: 678/466, loss: 0.15938341617584229 2023-01-24 03:01:27.453301: step: 680/466, loss: 0.23811189830303192 2023-01-24 03:01:28.174326: step: 682/466, loss: 0.3324274718761444 2023-01-24 03:01:28.910473: step: 684/466, loss: 0.1873573213815689 2023-01-24 03:01:29.779917: step: 686/466, loss: 0.8957513570785522 2023-01-24 03:01:30.510524: step: 688/466, loss: 1.4601404666900635 2023-01-24 03:01:31.285583: step: 690/466, loss: 0.18743014335632324 2023-01-24 03:01:32.048995: step: 692/466, loss: 0.14349307119846344 2023-01-24 03:01:32.830256: step: 694/466, loss: 0.16869373619556427 2023-01-24 03:01:33.559018: step: 696/466, loss: 0.15919862687587738 2023-01-24 03:01:34.309778: step: 698/466, loss: 0.25145259499549866 2023-01-24 03:01:35.028176: step: 700/466, loss: 0.16693904995918274 2023-01-24 03:01:35.705000: step: 702/466, loss: 0.1935565024614334 2023-01-24 03:01:36.512274: step: 704/466, loss: 0.2286834865808487 2023-01-24 03:01:37.340459: step: 706/466, loss: 0.20977507531642914 2023-01-24 03:01:38.136101: step: 708/466, loss: 9.643488883972168 2023-01-24 03:01:38.957602: step: 710/466, loss: 0.6023985147476196 2023-01-24 03:01:39.639454: step: 712/466, loss: 0.13850431144237518 2023-01-24 03:01:40.409253: step: 714/466, loss: 0.1854768544435501 2023-01-24 03:01:41.174712: step: 716/466, loss: 0.03611788526177406 2023-01-24 03:01:41.936227: step: 718/466, loss: 0.08938011527061462 2023-01-24 03:01:42.723357: step: 720/466, loss: 0.2490069717168808 2023-01-24 03:01:43.498162: step: 722/466, loss: 0.4968907833099365 2023-01-24 03:01:44.262041: step: 724/466, loss: 1.0069706439971924 2023-01-24 03:01:45.084184: step: 726/466, loss: 1.2807743549346924 2023-01-24 03:01:45.760327: step: 728/466, loss: 0.17751643061637878 2023-01-24 03:01:46.517326: step: 730/466, loss: 0.04696459323167801 2023-01-24 03:01:47.252932: step: 732/466, loss: 1.1253899335861206 2023-01-24 03:01:48.026817: step: 734/466, loss: 0.1938478946685791 2023-01-24 03:01:48.748911: step: 736/466, loss: 0.39816173911094666 2023-01-24 03:01:49.477156: step: 738/466, loss: 0.19523125886917114 2023-01-24 03:01:50.197302: step: 740/466, loss: 0.1278812736272812 2023-01-24 03:01:50.949023: step: 742/466, loss: 0.19035612046718597 2023-01-24 03:01:51.678520: step: 744/466, loss: 0.09937532991170883 2023-01-24 03:01:52.461717: step: 746/466, loss: 0.1287962794303894 2023-01-24 03:01:53.140051: step: 748/466, loss: 0.6626227498054504 2023-01-24 03:01:53.916918: step: 750/466, loss: 0.18322832882404327 2023-01-24 03:01:54.625299: step: 752/466, loss: 1.757911205291748 2023-01-24 03:01:55.456840: step: 754/466, loss: 0.22883504629135132 2023-01-24 03:01:56.184853: step: 756/466, loss: 1.5663551092147827 2023-01-24 03:01:56.956990: step: 758/466, loss: 0.48100045323371887 2023-01-24 03:01:57.685187: step: 760/466, loss: 0.357120156288147 2023-01-24 03:01:58.484079: step: 762/466, loss: 0.16780208051204681 2023-01-24 03:01:59.307325: step: 764/466, loss: 0.2773645222187042 2023-01-24 03:02:00.127655: step: 766/466, loss: 0.21462607383728027 2023-01-24 03:02:00.789794: step: 768/466, loss: 0.30235159397125244 2023-01-24 03:02:01.482979: step: 770/466, loss: 0.1719624400138855 2023-01-24 03:02:02.289707: step: 772/466, loss: 0.4989997446537018 2023-01-24 03:02:03.165859: step: 774/466, loss: 0.49282678961753845 2023-01-24 03:02:04.043110: step: 776/466, loss: 1.6167389154434204 2023-01-24 03:02:04.833063: step: 778/466, loss: 0.3437347412109375 2023-01-24 03:02:05.602480: step: 780/466, loss: 0.27999237179756165 2023-01-24 03:02:06.328679: step: 782/466, loss: 0.10780219733715057 2023-01-24 03:02:07.105729: step: 784/466, loss: 0.2819616496562958 2023-01-24 03:02:07.807340: step: 786/466, loss: 0.3817901015281677 2023-01-24 03:02:08.546591: step: 788/466, loss: 0.20044463872909546 2023-01-24 03:02:09.353115: step: 790/466, loss: 0.06816612184047699 2023-01-24 03:02:10.207565: step: 792/466, loss: 0.2158653736114502 2023-01-24 03:02:11.030847: step: 794/466, loss: 0.2796313166618347 2023-01-24 03:02:11.683113: step: 796/466, loss: 0.08614753186702728 2023-01-24 03:02:12.475079: step: 798/466, loss: 0.6492806673049927 2023-01-24 03:02:13.230224: step: 800/466, loss: 0.08297394216060638 2023-01-24 03:02:13.974798: step: 802/466, loss: 0.09081264585256577 2023-01-24 03:02:14.668053: step: 804/466, loss: 0.1946927309036255 2023-01-24 03:02:15.484127: step: 806/466, loss: 0.3332517743110657 2023-01-24 03:02:16.250211: step: 808/466, loss: 0.18870267271995544 2023-01-24 03:02:17.120160: step: 810/466, loss: 0.10972864180803299 2023-01-24 03:02:17.865815: step: 812/466, loss: 0.33559009432792664 2023-01-24 03:02:18.574429: step: 814/466, loss: 0.13296253979206085 2023-01-24 03:02:19.366438: step: 816/466, loss: 0.23679058253765106 2023-01-24 03:02:20.078664: step: 818/466, loss: 0.9596275091171265 2023-01-24 03:02:20.884504: step: 820/466, loss: 0.36564570665359497 2023-01-24 03:02:21.682836: step: 822/466, loss: 0.4165021479129791 2023-01-24 03:02:22.496690: step: 824/466, loss: 0.29203930497169495 2023-01-24 03:02:23.360085: step: 826/466, loss: 0.17134632170200348 2023-01-24 03:02:24.040575: step: 828/466, loss: 0.08343864232301712 2023-01-24 03:02:24.747045: step: 830/466, loss: 0.16457146406173706 2023-01-24 03:02:25.452307: step: 832/466, loss: 0.14254069328308105 2023-01-24 03:02:26.187484: step: 834/466, loss: 0.2560867965221405 2023-01-24 03:02:26.988263: step: 836/466, loss: 0.5032888054847717 2023-01-24 03:02:27.748441: step: 838/466, loss: 0.31330007314682007 2023-01-24 03:02:28.535957: step: 840/466, loss: 0.5630853176116943 2023-01-24 03:02:29.208004: step: 842/466, loss: 0.2139889895915985 2023-01-24 03:02:29.976515: step: 844/466, loss: 0.269824743270874 2023-01-24 03:02:30.716460: step: 846/466, loss: 0.4968046545982361 2023-01-24 03:02:31.508023: step: 848/466, loss: 0.10947266966104507 2023-01-24 03:02:32.312304: step: 850/466, loss: 0.3745446503162384 2023-01-24 03:02:33.290880: step: 852/466, loss: 0.14542731642723083 2023-01-24 03:02:34.042914: step: 854/466, loss: 0.0944216251373291 2023-01-24 03:02:34.842892: step: 856/466, loss: 0.5812382102012634 2023-01-24 03:02:35.610770: step: 858/466, loss: 0.15963409841060638 2023-01-24 03:02:36.385380: step: 860/466, loss: 0.3798730671405792 2023-01-24 03:02:37.114580: step: 862/466, loss: 0.056237805634737015 2023-01-24 03:02:37.933855: step: 864/466, loss: 0.1034381166100502 2023-01-24 03:02:38.791115: step: 866/466, loss: 0.18533749878406525 2023-01-24 03:02:39.582893: step: 868/466, loss: 0.26592233777046204 2023-01-24 03:02:40.352480: step: 870/466, loss: 0.167646586894989 2023-01-24 03:02:41.102993: step: 872/466, loss: 0.15275219082832336 2023-01-24 03:02:41.913699: step: 874/466, loss: 0.5274092555046082 2023-01-24 03:02:42.678563: step: 876/466, loss: 0.17686963081359863 2023-01-24 03:02:43.419231: step: 878/466, loss: 0.24046087265014648 2023-01-24 03:02:44.223837: step: 880/466, loss: 0.5760931372642517 2023-01-24 03:02:45.074380: step: 882/466, loss: 0.12636913359165192 2023-01-24 03:02:45.975262: step: 884/466, loss: 0.13510404527187347 2023-01-24 03:02:46.783507: step: 886/466, loss: 1.0734281539916992 2023-01-24 03:02:47.442995: step: 888/466, loss: 0.16917142271995544 2023-01-24 03:02:48.232062: step: 890/466, loss: 0.29217538237571716 2023-01-24 03:02:48.931630: step: 892/466, loss: 0.25594523549079895 2023-01-24 03:02:49.703550: step: 894/466, loss: 1.6487802267074585 2023-01-24 03:02:50.446549: step: 896/466, loss: 0.13572891056537628 2023-01-24 03:02:51.162469: step: 898/466, loss: 0.516257107257843 2023-01-24 03:02:51.919573: step: 900/466, loss: 0.14448872208595276 2023-01-24 03:02:52.718701: step: 902/466, loss: 0.1506912112236023 2023-01-24 03:02:53.429910: step: 904/466, loss: 0.09637308120727539 2023-01-24 03:02:54.259218: step: 906/466, loss: 0.1871449053287506 2023-01-24 03:02:54.986950: step: 908/466, loss: 0.14197932183742523 2023-01-24 03:02:55.677393: step: 910/466, loss: 0.1988995522260666 2023-01-24 03:02:56.394743: step: 912/466, loss: 0.48657023906707764 2023-01-24 03:02:57.184382: step: 914/466, loss: 0.9628162384033203 2023-01-24 03:02:57.904053: step: 916/466, loss: 0.2303006649017334 2023-01-24 03:02:58.640990: step: 918/466, loss: 0.13100191950798035 2023-01-24 03:02:59.276541: step: 920/466, loss: 0.16516250371932983 2023-01-24 03:03:00.054499: step: 922/466, loss: 0.24126259982585907 2023-01-24 03:03:00.842171: step: 924/466, loss: 0.6147301197052002 2023-01-24 03:03:01.709354: step: 926/466, loss: 0.26411890983581543 2023-01-24 03:03:02.436257: step: 928/466, loss: 0.18852995336055756 2023-01-24 03:03:03.214733: step: 930/466, loss: 0.4089183509349823 2023-01-24 03:03:03.998010: step: 932/466, loss: 0.4652842879295349 ================================================== Loss: 0.383 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3482605754752493, 'r': 0.30398456303342447, 'f1': 0.3246197866638595}, 'combined': 0.2391935270154754, 'epoch': 10} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3566930984140036, 'r': 0.2760198759824135, 'f1': 0.3112134214789499}, 'combined': 0.19128239564072044, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215683116934148, 'r': 0.3044830883017343, 'f1': 0.31279256829437424}, 'combined': 0.2304787345326968, 'epoch': 10} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3391187859173863, 'r': 0.2865171718106167, 'f1': 0.3106066850816831}, 'combined': 0.19090947473313205, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3566033231474408, 'r': 0.31058999112841623, 'f1': 0.33200999051658286}, 'combined': 0.24463894038064, 'epoch': 10} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.34918835076574467, 'r': 0.2769320600012313, 'f1': 0.3088909022708084}, 'combined': 0.19078555728491112, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3254310344827586, 'r': 0.26964285714285713, 'f1': 0.294921875}, 'combined': 0.19661458333333331, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25806451612903225, 'r': 0.34782608695652173, 'f1': 0.2962962962962963}, 'combined': 0.14814814814814814, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.10344827586206896, 'f1': 0.15789473684210528}, 'combined': 0.10526315789473685, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34321716589861756, 'r': 0.28264943074003795, 'f1': 0.31000260145681585}, 'combined': 0.22842296949449586, 'epoch': 6} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3400251231205871, 'r': 0.24846857481712165, 'f1': 0.28712476629692396}, 'combined': 0.17647668562640204, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3371212121212121, 'r': 0.31785714285714284, 'f1': 0.3272058823529411}, 'combined': 0.21813725490196073, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35633182648907596, 'r': 0.3171150410310752, 'f1': 0.33558157956501333}, 'combined': 0.24727063757422033, 'epoch': 5} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38903978397030203, 'r': 0.22009690200395982, 'f1': 0.28114035217301386}, 'combined': 0.17364551163627331, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 5} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:05:48.282941: step: 2/466, loss: 2.1529102325439453 2023-01-24 03:05:49.026529: step: 4/466, loss: 1.2928975820541382 2023-01-24 03:05:49.796048: step: 6/466, loss: 0.7491808533668518 2023-01-24 03:05:50.719082: step: 8/466, loss: 0.5165972113609314 2023-01-24 03:05:51.427532: step: 10/466, loss: 0.6967114210128784 2023-01-24 03:05:52.160374: step: 12/466, loss: 0.181780144572258 2023-01-24 03:05:52.911432: step: 14/466, loss: 0.2290743887424469 2023-01-24 03:05:53.684083: step: 16/466, loss: 0.10063283145427704 2023-01-24 03:05:54.435028: step: 18/466, loss: 0.24175231158733368 2023-01-24 03:05:55.160681: step: 20/466, loss: 0.14193516969680786 2023-01-24 03:05:55.950481: step: 22/466, loss: 0.355695515871048 2023-01-24 03:05:56.790544: step: 24/466, loss: 0.3076014816761017 2023-01-24 03:05:57.573380: step: 26/466, loss: 0.07201249897480011 2023-01-24 03:05:58.325852: step: 28/466, loss: 0.09827329963445663 2023-01-24 03:05:59.144877: step: 30/466, loss: 1.4906624555587769 2023-01-24 03:05:59.946963: step: 32/466, loss: 0.16106005012989044 2023-01-24 03:06:00.701046: step: 34/466, loss: 0.2692556381225586 2023-01-24 03:06:01.491270: step: 36/466, loss: 0.6596773862838745 2023-01-24 03:06:02.349373: step: 38/466, loss: 0.7450871467590332 2023-01-24 03:06:03.036436: step: 40/466, loss: 0.16723114252090454 2023-01-24 03:06:03.835841: step: 42/466, loss: 0.17070387303829193 2023-01-24 03:06:04.627360: step: 44/466, loss: 4.699317932128906 2023-01-24 03:06:05.351746: step: 46/466, loss: 0.16521230340003967 2023-01-24 03:06:06.082587: step: 48/466, loss: 0.3028544783592224 2023-01-24 03:06:06.795740: step: 50/466, loss: 0.06732210516929626 2023-01-24 03:06:07.483354: step: 52/466, loss: 0.342848002910614 2023-01-24 03:06:08.265664: step: 54/466, loss: 0.1531759351491928 2023-01-24 03:06:09.085577: step: 56/466, loss: 0.38209885358810425 2023-01-24 03:06:09.874667: step: 58/466, loss: 0.11774308234453201 2023-01-24 03:06:10.627779: step: 60/466, loss: 0.24460706114768982 2023-01-24 03:06:11.388997: step: 62/466, loss: 0.5347704291343689 2023-01-24 03:06:12.161080: step: 64/466, loss: 0.15005452930927277 2023-01-24 03:06:12.930774: step: 66/466, loss: 0.31499019265174866 2023-01-24 03:06:13.706741: step: 68/466, loss: 0.29045701026916504 2023-01-24 03:06:14.526556: step: 70/466, loss: 0.49788540601730347 2023-01-24 03:06:15.242856: step: 72/466, loss: 0.1132664903998375 2023-01-24 03:06:15.957729: step: 74/466, loss: 1.393949031829834 2023-01-24 03:06:16.738720: step: 76/466, loss: 0.26555508375167847 2023-01-24 03:06:17.410914: step: 78/466, loss: 0.07243114709854126 2023-01-24 03:06:18.128148: step: 80/466, loss: 0.27598562836647034 2023-01-24 03:06:18.827543: step: 82/466, loss: 0.11839446425437927 2023-01-24 03:06:19.555767: step: 84/466, loss: 0.28125977516174316 2023-01-24 03:06:20.267105: step: 86/466, loss: 0.10161054879426956 2023-01-24 03:06:21.124907: step: 88/466, loss: 0.4249451160430908 2023-01-24 03:06:21.871449: step: 90/466, loss: 0.21938398480415344 2023-01-24 03:06:22.597717: step: 92/466, loss: 0.11637397855520248 2023-01-24 03:06:23.399870: step: 94/466, loss: 0.10267498344182968 2023-01-24 03:06:24.176837: step: 96/466, loss: 0.17808358371257782 2023-01-24 03:06:25.015154: step: 98/466, loss: 0.22559547424316406 2023-01-24 03:06:25.878317: step: 100/466, loss: 0.08399305492639542 2023-01-24 03:06:26.616510: step: 102/466, loss: 0.1191306933760643 2023-01-24 03:06:27.331255: step: 104/466, loss: 0.4601157605648041 2023-01-24 03:06:28.022438: step: 106/466, loss: 0.10419661551713943 2023-01-24 03:06:28.804638: step: 108/466, loss: 0.20737458765506744 2023-01-24 03:06:29.530896: step: 110/466, loss: 0.0793699324131012 2023-01-24 03:06:30.299491: step: 112/466, loss: 0.0680171549320221 2023-01-24 03:06:31.064219: step: 114/466, loss: 0.2625187337398529 2023-01-24 03:06:31.813187: step: 116/466, loss: 0.762864887714386 2023-01-24 03:06:32.588181: step: 118/466, loss: 0.1961923986673355 2023-01-24 03:06:33.522154: step: 120/466, loss: 0.07939931005239487 2023-01-24 03:06:34.286764: step: 122/466, loss: 0.20566321909427643 2023-01-24 03:06:35.037191: step: 124/466, loss: 0.1146789938211441 2023-01-24 03:06:35.811774: step: 126/466, loss: 0.10061171650886536 2023-01-24 03:06:36.610501: step: 128/466, loss: 0.09796812385320663 2023-01-24 03:06:37.407442: step: 130/466, loss: 0.17923253774642944 2023-01-24 03:06:38.170228: step: 132/466, loss: 0.07315370440483093 2023-01-24 03:06:38.930351: step: 134/466, loss: 0.12449519336223602 2023-01-24 03:06:39.651089: step: 136/466, loss: 0.6119828820228577 2023-01-24 03:06:40.378415: step: 138/466, loss: 0.09807797521352768 2023-01-24 03:06:41.115127: step: 140/466, loss: 0.27021315693855286 2023-01-24 03:06:41.854220: step: 142/466, loss: 0.2910257577896118 2023-01-24 03:06:42.641036: step: 144/466, loss: 0.16442370414733887 2023-01-24 03:06:43.408008: step: 146/466, loss: 0.22531013190746307 2023-01-24 03:06:44.187294: step: 148/466, loss: 0.4174662232398987 2023-01-24 03:06:44.974743: step: 150/466, loss: 0.17463970184326172 2023-01-24 03:06:45.679116: step: 152/466, loss: 0.12008628994226456 2023-01-24 03:06:46.400866: step: 154/466, loss: 0.09626641869544983 2023-01-24 03:06:47.215198: step: 156/466, loss: 0.16386191546916962 2023-01-24 03:06:47.953788: step: 158/466, loss: 0.09380415081977844 2023-01-24 03:06:48.706174: step: 160/466, loss: 0.1615980565547943 2023-01-24 03:06:49.444525: step: 162/466, loss: 0.12062174081802368 2023-01-24 03:06:50.252928: step: 164/466, loss: 0.04635758325457573 2023-01-24 03:06:51.009857: step: 166/466, loss: 0.11345676332712173 2023-01-24 03:06:51.851064: step: 168/466, loss: 0.34069615602493286 2023-01-24 03:06:52.629559: step: 170/466, loss: 0.11447493731975555 2023-01-24 03:06:53.448522: step: 172/466, loss: 0.19096854329109192 2023-01-24 03:06:54.192243: step: 174/466, loss: 0.1390686333179474 2023-01-24 03:06:54.974957: step: 176/466, loss: 0.15758000314235687 2023-01-24 03:06:55.766856: step: 178/466, loss: 0.24189817905426025 2023-01-24 03:06:56.522933: step: 180/466, loss: 0.1777925193309784 2023-01-24 03:06:57.222043: step: 182/466, loss: 0.09924599528312683 2023-01-24 03:06:57.984984: step: 184/466, loss: 0.06437882035970688 2023-01-24 03:06:58.732630: step: 186/466, loss: 0.24384798109531403 2023-01-24 03:06:59.451343: step: 188/466, loss: 0.09469848871231079 2023-01-24 03:07:00.245621: step: 190/466, loss: 0.27627843618392944 2023-01-24 03:07:01.034164: step: 192/466, loss: 0.09130019694566727 2023-01-24 03:07:01.817319: step: 194/466, loss: 0.1484045386314392 2023-01-24 03:07:02.546551: step: 196/466, loss: 0.10212335735559464 2023-01-24 03:07:03.397476: step: 198/466, loss: 0.42030757665634155 2023-01-24 03:07:04.167535: step: 200/466, loss: 0.19676528871059418 2023-01-24 03:07:04.842234: step: 202/466, loss: 0.09120253473520279 2023-01-24 03:07:05.642065: step: 204/466, loss: 0.29615336656570435 2023-01-24 03:07:06.509011: step: 206/466, loss: 0.4958255887031555 2023-01-24 03:07:07.176177: step: 208/466, loss: 0.13587284088134766 2023-01-24 03:07:07.877608: step: 210/466, loss: 0.13223013281822205 2023-01-24 03:07:08.746162: step: 212/466, loss: 0.12652768194675446 2023-01-24 03:07:09.487751: step: 214/466, loss: 0.20821043848991394 2023-01-24 03:07:10.210011: step: 216/466, loss: 0.37757372856140137 2023-01-24 03:07:10.971456: step: 218/466, loss: 0.10182490944862366 2023-01-24 03:07:11.701185: step: 220/466, loss: 0.2610222101211548 2023-01-24 03:07:12.460617: step: 222/466, loss: 0.23877929151058197 2023-01-24 03:07:13.130827: step: 224/466, loss: 0.3908085227012634 2023-01-24 03:07:13.981355: step: 226/466, loss: 0.24849556386470795 2023-01-24 03:07:14.657983: step: 228/466, loss: 0.07584869116544724 2023-01-24 03:07:15.382511: step: 230/466, loss: 0.34698769450187683 2023-01-24 03:07:16.138620: step: 232/466, loss: 0.4174465835094452 2023-01-24 03:07:16.938916: step: 234/466, loss: 0.24441280961036682 2023-01-24 03:07:17.706798: step: 236/466, loss: 0.22616979479789734 2023-01-24 03:07:18.469610: step: 238/466, loss: 0.8674289584159851 2023-01-24 03:07:19.220129: step: 240/466, loss: 0.05681760236620903 2023-01-24 03:07:20.004383: step: 242/466, loss: 0.4447229504585266 2023-01-24 03:07:20.939866: step: 244/466, loss: 0.20784218609333038 2023-01-24 03:07:21.689978: step: 246/466, loss: 0.11302675306797028 2023-01-24 03:07:22.399337: step: 248/466, loss: 0.7658733129501343 2023-01-24 03:07:23.158577: step: 250/466, loss: 0.4308475852012634 2023-01-24 03:07:23.915180: step: 252/466, loss: 0.1710461974143982 2023-01-24 03:07:24.705852: step: 254/466, loss: 0.18237625062465668 2023-01-24 03:07:25.398610: step: 256/466, loss: 0.06273120641708374 2023-01-24 03:07:26.253787: step: 258/466, loss: 0.21046406030654907 2023-01-24 03:07:26.965662: step: 260/466, loss: 0.15667913854122162 2023-01-24 03:07:27.732193: step: 262/466, loss: 0.07241981476545334 2023-01-24 03:07:28.528855: step: 264/466, loss: 0.32974952459335327 2023-01-24 03:07:29.296404: step: 266/466, loss: 0.26676201820373535 2023-01-24 03:07:30.065282: step: 268/466, loss: 0.0830921083688736 2023-01-24 03:07:30.760454: step: 270/466, loss: 0.1152021586894989 2023-01-24 03:07:31.468527: step: 272/466, loss: 0.23809200525283813 2023-01-24 03:07:32.296073: step: 274/466, loss: 0.4443970322608948 2023-01-24 03:07:33.052178: step: 276/466, loss: 0.06326255202293396 2023-01-24 03:07:33.871702: step: 278/466, loss: 0.15755707025527954 2023-01-24 03:07:34.694042: step: 280/466, loss: 1.1373233795166016 2023-01-24 03:07:35.471252: step: 282/466, loss: 0.1670621931552887 2023-01-24 03:07:36.307236: step: 284/466, loss: 6.769673824310303 2023-01-24 03:07:37.115100: step: 286/466, loss: 0.3943406343460083 2023-01-24 03:07:37.916612: step: 288/466, loss: 0.28536364436149597 2023-01-24 03:07:38.640785: step: 290/466, loss: 0.3271265923976898 2023-01-24 03:07:39.321427: step: 292/466, loss: 0.2132055014371872 2023-01-24 03:07:40.155686: step: 294/466, loss: 0.15132853388786316 2023-01-24 03:07:40.954072: step: 296/466, loss: 0.6087186932563782 2023-01-24 03:07:41.732796: step: 298/466, loss: 0.10347239673137665 2023-01-24 03:07:42.461732: step: 300/466, loss: 0.05397750809788704 2023-01-24 03:07:43.231712: step: 302/466, loss: 0.0957869216799736 2023-01-24 03:07:43.974587: step: 304/466, loss: 0.11784403026103973 2023-01-24 03:07:44.690034: step: 306/466, loss: 0.30135923624038696 2023-01-24 03:07:45.398207: step: 308/466, loss: 0.07710994780063629 2023-01-24 03:07:46.181073: step: 310/466, loss: 0.4046939015388489 2023-01-24 03:07:46.995176: step: 312/466, loss: 0.09228195995092392 2023-01-24 03:07:47.722900: step: 314/466, loss: 0.15427225828170776 2023-01-24 03:07:48.454297: step: 316/466, loss: 0.12082573771476746 2023-01-24 03:07:49.152963: step: 318/466, loss: 0.10438908636569977 2023-01-24 03:07:49.898957: step: 320/466, loss: 0.1299682855606079 2023-01-24 03:07:50.683434: step: 322/466, loss: 0.15788854658603668 2023-01-24 03:07:51.445067: step: 324/466, loss: 0.5104916095733643 2023-01-24 03:07:52.163570: step: 326/466, loss: 0.7637230753898621 2023-01-24 03:07:52.913350: step: 328/466, loss: 0.24904048442840576 2023-01-24 03:07:53.634194: step: 330/466, loss: 0.09890061616897583 2023-01-24 03:07:54.325201: step: 332/466, loss: 0.08374074846506119 2023-01-24 03:07:55.068202: step: 334/466, loss: 0.14553312957286835 2023-01-24 03:07:55.847173: step: 336/466, loss: 0.10662180930376053 2023-01-24 03:07:56.571153: step: 338/466, loss: 0.2679828405380249 2023-01-24 03:07:57.340200: step: 340/466, loss: 0.11776454746723175 2023-01-24 03:07:58.060626: step: 342/466, loss: 0.1401228904724121 2023-01-24 03:07:58.791210: step: 344/466, loss: 0.40193748474121094 2023-01-24 03:07:59.575880: step: 346/466, loss: 0.08663631230592728 2023-01-24 03:08:00.342056: step: 348/466, loss: 0.23300756514072418 2023-01-24 03:08:01.030886: step: 350/466, loss: 0.034195512533187866 2023-01-24 03:08:01.736027: step: 352/466, loss: 0.1955024152994156 2023-01-24 03:08:02.467785: step: 354/466, loss: 0.15643028914928436 2023-01-24 03:08:03.234253: step: 356/466, loss: 1.1213116645812988 2023-01-24 03:08:04.069944: step: 358/466, loss: 0.07308322191238403 2023-01-24 03:08:04.805910: step: 360/466, loss: 0.8623815178871155 2023-01-24 03:08:05.508501: step: 362/466, loss: 0.13978946208953857 2023-01-24 03:08:06.315297: step: 364/466, loss: 0.4244544208049774 2023-01-24 03:08:07.095917: step: 366/466, loss: 0.38631099462509155 2023-01-24 03:08:07.895659: step: 368/466, loss: 0.5736966729164124 2023-01-24 03:08:08.664299: step: 370/466, loss: 0.23332534730434418 2023-01-24 03:08:09.409091: step: 372/466, loss: 0.07546471804380417 2023-01-24 03:08:10.176250: step: 374/466, loss: 0.2998636066913605 2023-01-24 03:08:10.948799: step: 376/466, loss: 0.1286058872938156 2023-01-24 03:08:11.781807: step: 378/466, loss: 0.37791845202445984 2023-01-24 03:08:12.501206: step: 380/466, loss: 0.14797884225845337 2023-01-24 03:08:13.301894: step: 382/466, loss: 0.25256213545799255 2023-01-24 03:08:14.073201: step: 384/466, loss: 0.17319680750370026 2023-01-24 03:08:14.849975: step: 386/466, loss: 0.16688449680805206 2023-01-24 03:08:15.620723: step: 388/466, loss: 1.3531017303466797 2023-01-24 03:08:16.338410: step: 390/466, loss: 0.1990767866373062 2023-01-24 03:08:17.079355: step: 392/466, loss: 0.16120776534080505 2023-01-24 03:08:17.813340: step: 394/466, loss: 0.14971502125263214 2023-01-24 03:08:18.512443: step: 396/466, loss: 0.20626650750637054 2023-01-24 03:08:19.238466: step: 398/466, loss: 0.5205829739570618 2023-01-24 03:08:20.032918: step: 400/466, loss: 0.10441724210977554 2023-01-24 03:08:20.835127: step: 402/466, loss: 0.11944933980703354 2023-01-24 03:08:21.780147: step: 404/466, loss: 0.12934282422065735 2023-01-24 03:08:22.588506: step: 406/466, loss: 0.2357649952173233 2023-01-24 03:08:23.296863: step: 408/466, loss: 0.20092816650867462 2023-01-24 03:08:24.105774: step: 410/466, loss: 0.31943678855895996 2023-01-24 03:08:24.975094: step: 412/466, loss: 0.07797721028327942 2023-01-24 03:08:25.775835: step: 414/466, loss: 0.15454114973545074 2023-01-24 03:08:26.556975: step: 416/466, loss: 0.14024995267391205 2023-01-24 03:08:27.320031: step: 418/466, loss: 0.19680440425872803 2023-01-24 03:08:28.065998: step: 420/466, loss: 0.10583780705928802 2023-01-24 03:08:28.780639: step: 422/466, loss: 1.6686673164367676 2023-01-24 03:08:29.555918: step: 424/466, loss: 0.2630821466445923 2023-01-24 03:08:30.286560: step: 426/466, loss: 0.1577325165271759 2023-01-24 03:08:31.021311: step: 428/466, loss: 0.5519349575042725 2023-01-24 03:08:31.886663: step: 430/466, loss: 0.37767624855041504 2023-01-24 03:08:32.643867: step: 432/466, loss: 0.15149228274822235 2023-01-24 03:08:33.355050: step: 434/466, loss: 0.24351133406162262 2023-01-24 03:08:34.104918: step: 436/466, loss: 0.17260925471782684 2023-01-24 03:08:34.867189: step: 438/466, loss: 0.19760867953300476 2023-01-24 03:08:35.558845: step: 440/466, loss: 0.2500152885913849 2023-01-24 03:08:36.264868: step: 442/466, loss: 0.15466606616973877 2023-01-24 03:08:37.032738: step: 444/466, loss: 0.15376563370227814 2023-01-24 03:08:37.857122: step: 446/466, loss: 0.09276437014341354 2023-01-24 03:08:38.627432: step: 448/466, loss: 0.10054701566696167 2023-01-24 03:08:39.385639: step: 450/466, loss: 0.23553961515426636 2023-01-24 03:08:40.230606: step: 452/466, loss: 0.20160697400569916 2023-01-24 03:08:41.011320: step: 454/466, loss: 0.24913400411605835 2023-01-24 03:08:41.830108: step: 456/466, loss: 0.11520318686962128 2023-01-24 03:08:42.563684: step: 458/466, loss: 0.32482796907424927 2023-01-24 03:08:43.349485: step: 460/466, loss: 0.5226231217384338 2023-01-24 03:08:44.092758: step: 462/466, loss: 0.4979248046875 2023-01-24 03:08:44.993014: step: 464/466, loss: 0.40230900049209595 2023-01-24 03:08:45.753898: step: 466/466, loss: 0.3771675229072571 2023-01-24 03:08:46.553832: step: 468/466, loss: 0.13193966448307037 2023-01-24 03:08:47.385162: step: 470/466, loss: 0.6421751976013184 2023-01-24 03:08:48.169041: step: 472/466, loss: 0.32884228229522705 2023-01-24 03:08:48.991839: step: 474/466, loss: 0.13494789600372314 2023-01-24 03:08:49.815053: step: 476/466, loss: 0.3239307999610901 2023-01-24 03:08:50.622037: step: 478/466, loss: 1.8106663227081299 2023-01-24 03:08:51.452605: step: 480/466, loss: 0.653140127658844 2023-01-24 03:08:52.270456: step: 482/466, loss: 0.11913179606199265 2023-01-24 03:08:53.015746: step: 484/466, loss: 0.038208846002817154 2023-01-24 03:08:53.788192: step: 486/466, loss: 0.11867286264896393 2023-01-24 03:08:54.578091: step: 488/466, loss: 0.11869148910045624 2023-01-24 03:08:55.294719: step: 490/466, loss: 0.05254372954368591 2023-01-24 03:08:56.051848: step: 492/466, loss: 0.23922109603881836 2023-01-24 03:08:56.822493: step: 494/466, loss: 0.16811156272888184 2023-01-24 03:08:57.600456: step: 496/466, loss: 0.10078416764736176 2023-01-24 03:08:58.346484: step: 498/466, loss: 0.21667678654193878 2023-01-24 03:08:59.179648: step: 500/466, loss: 0.3497439920902252 2023-01-24 03:08:59.924195: step: 502/466, loss: 0.39367035031318665 2023-01-24 03:09:00.648051: step: 504/466, loss: 0.20031200349330902 2023-01-24 03:09:01.396315: step: 506/466, loss: 0.1277354657649994 2023-01-24 03:09:02.108084: step: 508/466, loss: 0.06326362490653992 2023-01-24 03:09:02.845470: step: 510/466, loss: 0.10629107058048248 2023-01-24 03:09:03.643218: step: 512/466, loss: 0.24034050107002258 2023-01-24 03:09:04.370004: step: 514/466, loss: 0.155228853225708 2023-01-24 03:09:05.105539: step: 516/466, loss: 0.5224460363388062 2023-01-24 03:09:05.904703: step: 518/466, loss: 0.10733848065137863 2023-01-24 03:09:06.634271: step: 520/466, loss: 0.152258038520813 2023-01-24 03:09:07.443644: step: 522/466, loss: 0.16879776120185852 2023-01-24 03:09:08.295927: step: 524/466, loss: 0.12189304083585739 2023-01-24 03:09:09.103664: step: 526/466, loss: 0.06921043246984482 2023-01-24 03:09:09.816612: step: 528/466, loss: 0.11970750242471695 2023-01-24 03:09:10.519707: step: 530/466, loss: 0.11582281440496445 2023-01-24 03:09:11.168574: step: 532/466, loss: 0.24655069410800934 2023-01-24 03:09:11.909596: step: 534/466, loss: 0.2609686255455017 2023-01-24 03:09:12.634814: step: 536/466, loss: 0.09174732863903046 2023-01-24 03:09:13.352641: step: 538/466, loss: 0.12777909636497498 2023-01-24 03:09:14.101701: step: 540/466, loss: 0.1914782077074051 2023-01-24 03:09:14.841244: step: 542/466, loss: 0.20091712474822998 2023-01-24 03:09:15.605704: step: 544/466, loss: 0.36752355098724365 2023-01-24 03:09:16.344336: step: 546/466, loss: 0.2852557599544525 2023-01-24 03:09:17.050905: step: 548/466, loss: 0.08599811047315598 2023-01-24 03:09:17.830908: step: 550/466, loss: 1.1437206268310547 2023-01-24 03:09:18.617306: step: 552/466, loss: 0.1743803322315216 2023-01-24 03:09:19.413517: step: 554/466, loss: 0.13963526487350464 2023-01-24 03:09:20.180753: step: 556/466, loss: 0.2871672809123993 2023-01-24 03:09:20.923544: step: 558/466, loss: 0.10863711684942245 2023-01-24 03:09:21.643654: step: 560/466, loss: 0.2510855793952942 2023-01-24 03:09:22.452956: step: 562/466, loss: 0.5121045112609863 2023-01-24 03:09:23.319125: step: 564/466, loss: 0.15737612545490265 2023-01-24 03:09:24.122821: step: 566/466, loss: 0.5852841138839722 2023-01-24 03:09:24.957670: step: 568/466, loss: 0.17177380621433258 2023-01-24 03:09:25.622328: step: 570/466, loss: 1.1846047639846802 2023-01-24 03:09:26.345780: step: 572/466, loss: 0.14318576455116272 2023-01-24 03:09:27.103971: step: 574/466, loss: 0.10378453880548477 2023-01-24 03:09:27.868289: step: 576/466, loss: 0.11457131803035736 2023-01-24 03:09:28.609783: step: 578/466, loss: 0.08289653807878494 2023-01-24 03:09:29.352349: step: 580/466, loss: 0.732667863368988 2023-01-24 03:09:30.089977: step: 582/466, loss: 0.44659432768821716 2023-01-24 03:09:30.795038: step: 584/466, loss: 0.18047739565372467 2023-01-24 03:09:31.580111: step: 586/466, loss: 0.30189192295074463 2023-01-24 03:09:32.371623: step: 588/466, loss: 0.09621741622686386 2023-01-24 03:09:33.179896: step: 590/466, loss: 0.3058559000492096 2023-01-24 03:09:33.993509: step: 592/466, loss: 0.20640884339809418 2023-01-24 03:09:34.771788: step: 594/466, loss: 0.26927196979522705 2023-01-24 03:09:35.486107: step: 596/466, loss: 0.44683629274368286 2023-01-24 03:09:36.308694: step: 598/466, loss: 0.17201684415340424 2023-01-24 03:09:37.065667: step: 600/466, loss: 0.3946480453014374 2023-01-24 03:09:37.878048: step: 602/466, loss: 0.1997591257095337 2023-01-24 03:09:38.722389: step: 604/466, loss: 0.16095568239688873 2023-01-24 03:09:39.463319: step: 606/466, loss: 0.26406875252723694 2023-01-24 03:09:40.172702: step: 608/466, loss: 0.0596434623003006 2023-01-24 03:09:40.958182: step: 610/466, loss: 0.22998706996440887 2023-01-24 03:09:41.691438: step: 612/466, loss: 0.4247513711452484 2023-01-24 03:09:42.480906: step: 614/466, loss: 0.2173185646533966 2023-01-24 03:09:43.181023: step: 616/466, loss: 0.15110743045806885 2023-01-24 03:09:43.961469: step: 618/466, loss: 0.22173559665679932 2023-01-24 03:09:44.815863: step: 620/466, loss: 0.16269834339618683 2023-01-24 03:09:45.614251: step: 622/466, loss: 0.8579102754592896 2023-01-24 03:09:46.415307: step: 624/466, loss: 0.29735979437828064 2023-01-24 03:09:47.103621: step: 626/466, loss: 0.28379639983177185 2023-01-24 03:09:47.804911: step: 628/466, loss: 0.22198012471199036 2023-01-24 03:09:48.625335: step: 630/466, loss: 0.36806726455688477 2023-01-24 03:09:49.371625: step: 632/466, loss: 0.07528834789991379 2023-01-24 03:09:50.241593: step: 634/466, loss: 0.5660086870193481 2023-01-24 03:09:51.009878: step: 636/466, loss: 0.28243446350097656 2023-01-24 03:09:51.767384: step: 638/466, loss: 0.8265565037727356 2023-01-24 03:09:52.529131: step: 640/466, loss: 0.17058542370796204 2023-01-24 03:09:53.307228: step: 642/466, loss: 1.0795800685882568 2023-01-24 03:09:54.003815: step: 644/466, loss: 0.17732802033424377 2023-01-24 03:09:54.817468: step: 646/466, loss: 0.6980006098747253 2023-01-24 03:09:55.590203: step: 648/466, loss: 0.745697557926178 2023-01-24 03:09:56.393059: step: 650/466, loss: 0.4132540822029114 2023-01-24 03:09:57.229363: step: 652/466, loss: 0.1508013904094696 2023-01-24 03:09:57.986818: step: 654/466, loss: 0.43576478958129883 2023-01-24 03:09:58.771593: step: 656/466, loss: 0.1567678302526474 2023-01-24 03:09:59.562064: step: 658/466, loss: 0.09988389909267426 2023-01-24 03:10:00.389760: step: 660/466, loss: 0.5101841688156128 2023-01-24 03:10:01.190646: step: 662/466, loss: 0.1614990085363388 2023-01-24 03:10:01.918210: step: 664/466, loss: 0.24654972553253174 2023-01-24 03:10:02.776854: step: 666/466, loss: 0.1733606457710266 2023-01-24 03:10:03.534054: step: 668/466, loss: 0.3766717314720154 2023-01-24 03:10:04.268156: step: 670/466, loss: 1.201568841934204 2023-01-24 03:10:04.979764: step: 672/466, loss: 0.11440683901309967 2023-01-24 03:10:05.771154: step: 674/466, loss: 0.06100420653820038 2023-01-24 03:10:06.595382: step: 676/466, loss: 0.5432313084602356 2023-01-24 03:10:07.347539: step: 678/466, loss: 0.13945043087005615 2023-01-24 03:10:08.171178: step: 680/466, loss: 0.16164690256118774 2023-01-24 03:10:09.118331: step: 682/466, loss: 0.2367585301399231 2023-01-24 03:10:09.852549: step: 684/466, loss: 0.2855292856693268 2023-01-24 03:10:10.653958: step: 686/466, loss: 0.0582096241414547 2023-01-24 03:10:11.401203: step: 688/466, loss: 0.13798856735229492 2023-01-24 03:10:12.244199: step: 690/466, loss: 0.27871906757354736 2023-01-24 03:10:13.073207: step: 692/466, loss: 0.5407863855361938 2023-01-24 03:10:13.903094: step: 694/466, loss: 0.13680404424667358 2023-01-24 03:10:14.691318: step: 696/466, loss: 0.5465109348297119 2023-01-24 03:10:15.459283: step: 698/466, loss: 0.1029527336359024 2023-01-24 03:10:16.191667: step: 700/466, loss: 0.28009337186813354 2023-01-24 03:10:17.010037: step: 702/466, loss: 0.23946930468082428 2023-01-24 03:10:17.748082: step: 704/466, loss: 0.19813068211078644 2023-01-24 03:10:18.475563: step: 706/466, loss: 0.2371053397655487 2023-01-24 03:10:19.270073: step: 708/466, loss: 4.844532489776611 2023-01-24 03:10:20.059266: step: 710/466, loss: 0.2899446487426758 2023-01-24 03:10:20.812737: step: 712/466, loss: 0.3364908695220947 2023-01-24 03:10:21.554627: step: 714/466, loss: 0.16732342541217804 2023-01-24 03:10:22.319435: step: 716/466, loss: 0.31874820590019226 2023-01-24 03:10:23.057243: step: 718/466, loss: 0.24993471801280975 2023-01-24 03:10:23.763602: step: 720/466, loss: 0.14156503975391388 2023-01-24 03:10:24.463646: step: 722/466, loss: 0.06063876673579216 2023-01-24 03:10:25.492354: step: 724/466, loss: 0.1900947093963623 2023-01-24 03:10:26.263836: step: 726/466, loss: 0.10233768075704575 2023-01-24 03:10:26.984367: step: 728/466, loss: 0.12381672859191895 2023-01-24 03:10:27.679151: step: 730/466, loss: 0.22875846922397614 2023-01-24 03:10:28.444340: step: 732/466, loss: 0.1475868672132492 2023-01-24 03:10:29.390881: step: 734/466, loss: 0.20701956748962402 2023-01-24 03:10:30.121367: step: 736/466, loss: 0.23301541805267334 2023-01-24 03:10:30.919212: step: 738/466, loss: 0.3107610046863556 2023-01-24 03:10:31.736199: step: 740/466, loss: 0.0913168340921402 2023-01-24 03:10:32.513493: step: 742/466, loss: 0.22894158959388733 2023-01-24 03:10:33.314254: step: 744/466, loss: 0.8507946729660034 2023-01-24 03:10:34.110248: step: 746/466, loss: 0.45970121026039124 2023-01-24 03:10:34.869626: step: 748/466, loss: 0.07593411207199097 2023-01-24 03:10:35.728377: step: 750/466, loss: 0.21511617302894592 2023-01-24 03:10:36.480328: step: 752/466, loss: 0.29151296615600586 2023-01-24 03:10:37.158235: step: 754/466, loss: 0.20864735543727875 2023-01-24 03:10:37.930863: step: 756/466, loss: 0.23786227405071259 2023-01-24 03:10:38.646155: step: 758/466, loss: 0.12289852648973465 2023-01-24 03:10:39.551506: step: 760/466, loss: 0.35148900747299194 2023-01-24 03:10:40.255990: step: 762/466, loss: 0.06791818141937256 2023-01-24 03:10:41.009135: step: 764/466, loss: 0.11666157096624374 2023-01-24 03:10:41.773976: step: 766/466, loss: 0.157499298453331 2023-01-24 03:10:42.551724: step: 768/466, loss: 0.14626558125019073 2023-01-24 03:10:43.293019: step: 770/466, loss: 0.2578248381614685 2023-01-24 03:10:44.048401: step: 772/466, loss: 0.32343581318855286 2023-01-24 03:10:44.806934: step: 774/466, loss: 0.29889240860939026 2023-01-24 03:10:45.624167: step: 776/466, loss: 0.1174267828464508 2023-01-24 03:10:46.374921: step: 778/466, loss: 0.14662732183933258 2023-01-24 03:10:47.151929: step: 780/466, loss: 0.21824069321155548 2023-01-24 03:10:47.896208: step: 782/466, loss: 0.15058116614818573 2023-01-24 03:10:48.590838: step: 784/466, loss: 0.1504792422056198 2023-01-24 03:10:49.397093: step: 786/466, loss: 0.16521036624908447 2023-01-24 03:10:50.179020: step: 788/466, loss: 0.18496575951576233 2023-01-24 03:10:50.959655: step: 790/466, loss: 0.3798065781593323 2023-01-24 03:10:51.729895: step: 792/466, loss: 0.12481054663658142 2023-01-24 03:10:52.482112: step: 794/466, loss: 0.13360081613063812 2023-01-24 03:10:53.255707: step: 796/466, loss: 0.07607656717300415 2023-01-24 03:10:54.012248: step: 798/466, loss: 0.1492290198802948 2023-01-24 03:10:54.850310: step: 800/466, loss: 0.07505486160516739 2023-01-24 03:10:55.624739: step: 802/466, loss: 0.16869288682937622 2023-01-24 03:10:56.358448: step: 804/466, loss: 0.1634787917137146 2023-01-24 03:10:57.012519: step: 806/466, loss: 0.09497810155153275 2023-01-24 03:10:57.806490: step: 808/466, loss: 0.20512060821056366 2023-01-24 03:10:58.558171: step: 810/466, loss: 0.2127516269683838 2023-01-24 03:10:59.216499: step: 812/466, loss: 0.18286651372909546 2023-01-24 03:10:59.932523: step: 814/466, loss: 0.7895347476005554 2023-01-24 03:11:00.747087: step: 816/466, loss: 0.29801860451698303 2023-01-24 03:11:01.510758: step: 818/466, loss: 0.14022906124591827 2023-01-24 03:11:02.239146: step: 820/466, loss: 0.5178606510162354 2023-01-24 03:11:03.011513: step: 822/466, loss: 0.05689023435115814 2023-01-24 03:11:03.783205: step: 824/466, loss: 0.25089576840400696 2023-01-24 03:11:04.598650: step: 826/466, loss: 0.17668074369430542 2023-01-24 03:11:05.402300: step: 828/466, loss: 0.09522435069084167 2023-01-24 03:11:06.154921: step: 830/466, loss: 0.16681769490242004 2023-01-24 03:11:06.888817: step: 832/466, loss: 0.06689116358757019 2023-01-24 03:11:07.706017: step: 834/466, loss: 0.13664615154266357 2023-01-24 03:11:08.446873: step: 836/466, loss: 0.2148696333169937 2023-01-24 03:11:09.180042: step: 838/466, loss: 0.1512245088815689 2023-01-24 03:11:09.977000: step: 840/466, loss: 0.3020787537097931 2023-01-24 03:11:10.684130: step: 842/466, loss: 0.11236050724983215 2023-01-24 03:11:11.460708: step: 844/466, loss: 0.10167965292930603 2023-01-24 03:11:12.184699: step: 846/466, loss: 0.1063070073723793 2023-01-24 03:11:12.866483: step: 848/466, loss: 0.4301778972148895 2023-01-24 03:11:13.601508: step: 850/466, loss: 0.2793833911418915 2023-01-24 03:11:14.482511: step: 852/466, loss: 0.06376925855875015 2023-01-24 03:11:15.189932: step: 854/466, loss: 0.20013092458248138 2023-01-24 03:11:16.009490: step: 856/466, loss: 0.09704309701919556 2023-01-24 03:11:16.724658: step: 858/466, loss: 0.1062927320599556 2023-01-24 03:11:17.610523: step: 860/466, loss: 0.249484583735466 2023-01-24 03:11:18.310762: step: 862/466, loss: 0.12209375947713852 2023-01-24 03:11:19.091331: step: 864/466, loss: 1.1528286933898926 2023-01-24 03:11:19.811879: step: 866/466, loss: 0.1208798959851265 2023-01-24 03:11:20.534980: step: 868/466, loss: 0.2088085114955902 2023-01-24 03:11:21.231231: step: 870/466, loss: 0.1554957628250122 2023-01-24 03:11:22.020337: step: 872/466, loss: 0.5452127456665039 2023-01-24 03:11:22.743698: step: 874/466, loss: 0.1534930318593979 2023-01-24 03:11:23.485227: step: 876/466, loss: 0.13185469806194305 2023-01-24 03:11:24.274817: step: 878/466, loss: 0.0991528108716011 2023-01-24 03:11:25.001998: step: 880/466, loss: 0.24972985684871674 2023-01-24 03:11:25.866403: step: 882/466, loss: 0.15756390988826752 2023-01-24 03:11:26.616003: step: 884/466, loss: 0.1563219279050827 2023-01-24 03:11:27.398467: step: 886/466, loss: 0.0598449744284153 2023-01-24 03:11:28.142242: step: 888/466, loss: 0.052008356899023056 2023-01-24 03:11:28.860354: step: 890/466, loss: 0.3348170518875122 2023-01-24 03:11:29.628113: step: 892/466, loss: 0.3671996295452118 2023-01-24 03:11:30.365035: step: 894/466, loss: 0.7377856969833374 2023-01-24 03:11:31.092259: step: 896/466, loss: 0.2085656374692917 2023-01-24 03:11:31.864122: step: 898/466, loss: 0.31188899278640747 2023-01-24 03:11:32.651483: step: 900/466, loss: 0.5782420635223389 2023-01-24 03:11:33.411919: step: 902/466, loss: 0.058877550065517426 2023-01-24 03:11:34.187288: step: 904/466, loss: 0.20558245480060577 2023-01-24 03:11:34.974310: step: 906/466, loss: 0.37686750292778015 2023-01-24 03:11:35.711753: step: 908/466, loss: 0.33963459730148315 2023-01-24 03:11:36.485328: step: 910/466, loss: 0.14356964826583862 2023-01-24 03:11:37.231437: step: 912/466, loss: 0.21850159764289856 2023-01-24 03:11:38.081195: step: 914/466, loss: 0.26700299978256226 2023-01-24 03:11:38.785677: step: 916/466, loss: 0.21178553998470306 2023-01-24 03:11:39.579620: step: 918/466, loss: 0.5047131180763245 2023-01-24 03:11:40.374361: step: 920/466, loss: 0.475437194108963 2023-01-24 03:11:41.216460: step: 922/466, loss: 0.04170737788081169 2023-01-24 03:11:41.984379: step: 924/466, loss: 0.03637902811169624 2023-01-24 03:11:42.747765: step: 926/466, loss: 0.349740594625473 2023-01-24 03:11:43.572425: step: 928/466, loss: 0.2327994704246521 2023-01-24 03:11:44.337522: step: 930/466, loss: 0.15047289431095123 2023-01-24 03:11:45.172228: step: 932/466, loss: 0.3883518874645233 ================================================== Loss: 0.304 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491135620915033, 'r': 0.33785183428209997, 'f1': 0.3433903889424622}, 'combined': 0.25302449711549846, 'epoch': 11} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36623080617100867, 'r': 0.27578385664004035, 'f1': 0.3146362536456812}, 'combined': 0.19338618516758943, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3172673757002801, 'r': 0.3371342132678498, 'f1': 0.3268992279524505}, 'combined': 0.24087311533338457, 'epoch': 11} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.348894820779356, 'r': 0.28056706558339894, 'f1': 0.3110224723181963}, 'combined': 0.191165031766306, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35420321637426905, 'r': 0.3447936432637571, 'f1': 0.34943509615384616}, 'combined': 0.257478491902834, 'epoch': 11} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36325261061235115, 'r': 0.27102296164405054, 'f1': 0.31043225581859485}, 'combined': 0.19173756977030862, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.2857142857142857, 'f1': 0.30769230769230765}, 'combined': 0.2051282051282051, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29838709677419356, 'r': 0.40217391304347827, 'f1': 0.34259259259259267}, 'combined': 0.17129629629629634, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.13793103448275862, 'f1': 0.2162162162162162}, 'combined': 0.14414414414414412, 'epoch': 11} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491135620915033, 'r': 0.33785183428209997, 'f1': 0.3433903889424622}, 'combined': 0.25302449711549846, 'epoch': 11} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36623080617100867, 'r': 0.27578385664004035, 'f1': 0.3146362536456812}, 'combined': 0.19338618516758943, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.2857142857142857, 'f1': 0.30769230769230765}, 'combined': 0.2051282051282051, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35420321637426905, 'r': 0.3447936432637571, 'f1': 0.34943509615384616}, 'combined': 0.257478491902834, 'epoch': 11} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36325261061235115, 'r': 0.27102296164405054, 'f1': 0.31043225581859485}, 'combined': 0.19173756977030862, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.13793103448275862, 'f1': 0.2162162162162162}, 'combined': 0.14414414414414412, 'epoch': 11} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:14:42.491549: step: 2/466, loss: 0.22970537841320038 2023-01-24 03:14:43.236809: step: 4/466, loss: 3.4017958641052246 2023-01-24 03:14:44.022714: step: 6/466, loss: 0.19457271695137024 2023-01-24 03:14:44.800175: step: 8/466, loss: 0.05014999583363533 2023-01-24 03:14:45.483998: step: 10/466, loss: 0.12878787517547607 2023-01-24 03:14:46.292372: step: 12/466, loss: 0.20097599923610687 2023-01-24 03:14:46.977275: step: 14/466, loss: 0.07929079234600067 2023-01-24 03:14:47.695538: step: 16/466, loss: 0.3970954418182373 2023-01-24 03:14:48.489365: step: 18/466, loss: 0.1011417880654335 2023-01-24 03:14:49.174228: step: 20/466, loss: 0.10177238285541534 2023-01-24 03:14:49.941215: step: 22/466, loss: 2.5332489013671875 2023-01-24 03:14:50.695665: step: 24/466, loss: 0.0751798003911972 2023-01-24 03:14:51.534106: step: 26/466, loss: 0.10535023361444473 2023-01-24 03:14:52.305653: step: 28/466, loss: 0.18587830662727356 2023-01-24 03:14:53.052916: step: 30/466, loss: 0.10108933597803116 2023-01-24 03:14:53.839148: step: 32/466, loss: 0.11795395612716675 2023-01-24 03:14:54.572435: step: 34/466, loss: 0.10065092146396637 2023-01-24 03:14:55.336669: step: 36/466, loss: 0.5440007448196411 2023-01-24 03:14:56.130469: step: 38/466, loss: 0.18395252525806427 2023-01-24 03:14:56.908529: step: 40/466, loss: 0.11210142821073532 2023-01-24 03:14:57.627967: step: 42/466, loss: 0.26954972743988037 2023-01-24 03:14:58.342185: step: 44/466, loss: 0.08725812286138535 2023-01-24 03:14:59.122337: step: 46/466, loss: 0.12701453268527985 2023-01-24 03:14:59.851744: step: 48/466, loss: 0.14181667566299438 2023-01-24 03:15:00.697372: step: 50/466, loss: 0.1456719934940338 2023-01-24 03:15:01.444491: step: 52/466, loss: 0.10307493805885315 2023-01-24 03:15:02.306491: step: 54/466, loss: 0.14766325056552887 2023-01-24 03:15:03.079341: step: 56/466, loss: 0.06563329696655273 2023-01-24 03:15:03.861521: step: 58/466, loss: 0.08325640857219696 2023-01-24 03:15:04.604770: step: 60/466, loss: 0.3567356467247009 2023-01-24 03:15:05.389488: step: 62/466, loss: 0.0927126333117485 2023-01-24 03:15:06.210057: step: 64/466, loss: 0.8182318806648254 2023-01-24 03:15:06.989345: step: 66/466, loss: 0.22731924057006836 2023-01-24 03:15:07.713915: step: 68/466, loss: 0.1506653130054474 2023-01-24 03:15:08.461632: step: 70/466, loss: 0.14179442822933197 2023-01-24 03:15:09.186765: step: 72/466, loss: 0.05986696109175682 2023-01-24 03:15:09.941396: step: 74/466, loss: 0.26590049266815186 2023-01-24 03:15:10.675894: step: 76/466, loss: 0.05869297683238983 2023-01-24 03:15:11.409865: step: 78/466, loss: 0.0641389861702919 2023-01-24 03:15:12.139206: step: 80/466, loss: 0.15808598697185516 2023-01-24 03:15:12.938402: step: 82/466, loss: 0.1286565214395523 2023-01-24 03:15:13.725524: step: 84/466, loss: 0.1533057689666748 2023-01-24 03:15:14.506483: step: 86/466, loss: 0.16723588109016418 2023-01-24 03:15:15.261259: step: 88/466, loss: 0.12770910561084747 2023-01-24 03:15:15.926208: step: 90/466, loss: 0.2282274216413498 2023-01-24 03:15:16.682717: step: 92/466, loss: 0.39215442538261414 2023-01-24 03:15:17.506796: step: 94/466, loss: 0.40199926495552063 2023-01-24 03:15:18.253003: step: 96/466, loss: 0.0938708484172821 2023-01-24 03:15:19.189161: step: 98/466, loss: 0.1018475815653801 2023-01-24 03:15:19.845952: step: 100/466, loss: 0.04701365530490875 2023-01-24 03:15:20.612254: step: 102/466, loss: 0.17152880132198334 2023-01-24 03:15:21.414160: step: 104/466, loss: 0.10114247351884842 2023-01-24 03:15:22.248475: step: 106/466, loss: 0.1542118936777115 2023-01-24 03:15:22.939980: step: 108/466, loss: 0.21466155350208282 2023-01-24 03:15:23.666041: step: 110/466, loss: 0.15568910539150238 2023-01-24 03:15:24.427492: step: 112/466, loss: 0.11884459853172302 2023-01-24 03:15:25.225149: step: 114/466, loss: 1.404669165611267 2023-01-24 03:15:25.930321: step: 116/466, loss: 0.6263872385025024 2023-01-24 03:15:26.658358: step: 118/466, loss: 0.10001714527606964 2023-01-24 03:15:27.357731: step: 120/466, loss: 0.2752031683921814 2023-01-24 03:15:28.135656: step: 122/466, loss: 0.21351397037506104 2023-01-24 03:15:28.879251: step: 124/466, loss: 0.33566728234291077 2023-01-24 03:15:29.621224: step: 126/466, loss: 0.0852104052901268 2023-01-24 03:15:30.477272: step: 128/466, loss: 0.1525503247976303 2023-01-24 03:15:31.427468: step: 130/466, loss: 5.630551815032959 2023-01-24 03:15:32.273042: step: 132/466, loss: 0.3894355595111847 2023-01-24 03:15:33.016192: step: 134/466, loss: 0.07047511637210846 2023-01-24 03:15:33.852916: step: 136/466, loss: 1.2956987619400024 2023-01-24 03:15:34.698177: step: 138/466, loss: 0.29221078753471375 2023-01-24 03:15:35.522428: step: 140/466, loss: 0.2097444236278534 2023-01-24 03:15:36.254755: step: 142/466, loss: 0.16492792963981628 2023-01-24 03:15:37.032516: step: 144/466, loss: 0.4171257019042969 2023-01-24 03:15:37.904634: step: 146/466, loss: 0.18491220474243164 2023-01-24 03:15:38.626362: step: 148/466, loss: 0.13337670266628265 2023-01-24 03:15:39.417487: step: 150/466, loss: 0.13845552504062653 2023-01-24 03:15:40.104457: step: 152/466, loss: 0.3049972653388977 2023-01-24 03:15:40.884547: step: 154/466, loss: 0.10930734872817993 2023-01-24 03:15:41.743783: step: 156/466, loss: 0.3641570806503296 2023-01-24 03:15:42.370122: step: 158/466, loss: 0.19187471270561218 2023-01-24 03:15:43.138788: step: 160/466, loss: 0.036166153848171234 2023-01-24 03:15:43.961905: step: 162/466, loss: 0.22070954740047455 2023-01-24 03:15:44.719103: step: 164/466, loss: 0.16025584936141968 2023-01-24 03:15:45.435325: step: 166/466, loss: 0.1772567182779312 2023-01-24 03:15:46.208158: step: 168/466, loss: 0.27021461725234985 2023-01-24 03:15:46.969002: step: 170/466, loss: 0.1123526319861412 2023-01-24 03:15:47.818822: step: 172/466, loss: 0.15609031915664673 2023-01-24 03:15:48.479801: step: 174/466, loss: 0.5921962857246399 2023-01-24 03:15:49.271501: step: 176/466, loss: 0.17766670882701874 2023-01-24 03:15:50.142571: step: 178/466, loss: 0.1023736447095871 2023-01-24 03:15:50.984589: step: 180/466, loss: 0.5316795110702515 2023-01-24 03:15:51.721231: step: 182/466, loss: 0.07270383089780807 2023-01-24 03:15:52.520761: step: 184/466, loss: 0.19070985913276672 2023-01-24 03:15:53.294295: step: 186/466, loss: 0.4144967496395111 2023-01-24 03:15:54.034780: step: 188/466, loss: 0.046569474041461945 2023-01-24 03:15:54.811909: step: 190/466, loss: 0.13963574171066284 2023-01-24 03:15:55.607395: step: 192/466, loss: 0.15261642634868622 2023-01-24 03:15:56.371822: step: 194/466, loss: 0.11186033487319946 2023-01-24 03:15:57.170315: step: 196/466, loss: 0.2546413838863373 2023-01-24 03:15:57.861890: step: 198/466, loss: 0.5899943709373474 2023-01-24 03:15:58.568419: step: 200/466, loss: 0.04048832505941391 2023-01-24 03:15:59.295225: step: 202/466, loss: 0.10920899361371994 2023-01-24 03:16:00.018483: step: 204/466, loss: 0.28868985176086426 2023-01-24 03:16:00.785532: step: 206/466, loss: 0.12218084186315536 2023-01-24 03:16:01.534357: step: 208/466, loss: 0.05156202241778374 2023-01-24 03:16:02.324568: step: 210/466, loss: 0.10586714744567871 2023-01-24 03:16:02.966360: step: 212/466, loss: 0.1500854194164276 2023-01-24 03:16:03.735980: step: 214/466, loss: 0.09757547080516815 2023-01-24 03:16:04.508138: step: 216/466, loss: 0.028935061767697334 2023-01-24 03:16:05.262964: step: 218/466, loss: 0.028134481981396675 2023-01-24 03:16:06.040981: step: 220/466, loss: 0.12261322140693665 2023-01-24 03:16:06.860613: step: 222/466, loss: 0.09786012023687363 2023-01-24 03:16:07.576292: step: 224/466, loss: 0.26669418811798096 2023-01-24 03:16:08.302385: step: 226/466, loss: 0.06629175692796707 2023-01-24 03:16:09.060268: step: 228/466, loss: 0.35725879669189453 2023-01-24 03:16:09.794258: step: 230/466, loss: 0.23570282757282257 2023-01-24 03:16:10.601367: step: 232/466, loss: 0.127173513174057 2023-01-24 03:16:11.419660: step: 234/466, loss: 0.10487955063581467 2023-01-24 03:16:12.155492: step: 236/466, loss: 0.06401833891868591 2023-01-24 03:16:12.949150: step: 238/466, loss: 0.3104228079319 2023-01-24 03:16:13.684197: step: 240/466, loss: 0.8340924978256226 2023-01-24 03:16:14.392688: step: 242/466, loss: 0.5680950880050659 2023-01-24 03:16:15.246588: step: 244/466, loss: 0.2969139814376831 2023-01-24 03:16:16.019508: step: 246/466, loss: 0.12126941233873367 2023-01-24 03:16:16.795255: step: 248/466, loss: 0.42154020071029663 2023-01-24 03:16:17.554891: step: 250/466, loss: 0.0735100582242012 2023-01-24 03:16:18.275224: step: 252/466, loss: 0.0969298928976059 2023-01-24 03:16:18.990308: step: 254/466, loss: 0.14617589116096497 2023-01-24 03:16:19.722387: step: 256/466, loss: 0.35524335503578186 2023-01-24 03:16:20.488116: step: 258/466, loss: 0.1707388311624527 2023-01-24 03:16:21.267173: step: 260/466, loss: 0.13604384660720825 2023-01-24 03:16:22.063396: step: 262/466, loss: 0.19494149088859558 2023-01-24 03:16:22.819082: step: 264/466, loss: 0.3368360698223114 2023-01-24 03:16:23.504064: step: 266/466, loss: 0.17181900143623352 2023-01-24 03:16:24.285535: step: 268/466, loss: 0.2624906301498413 2023-01-24 03:16:25.071983: step: 270/466, loss: 0.3313490152359009 2023-01-24 03:16:25.994899: step: 272/466, loss: 0.15030698478221893 2023-01-24 03:16:26.781087: step: 274/466, loss: 0.019664861261844635 2023-01-24 03:16:27.529950: step: 276/466, loss: 0.13571712374687195 2023-01-24 03:16:28.326183: step: 278/466, loss: 0.2538709342479706 2023-01-24 03:16:29.048150: step: 280/466, loss: 0.11656523495912552 2023-01-24 03:16:29.848380: step: 282/466, loss: 0.11484860628843307 2023-01-24 03:16:30.571342: step: 284/466, loss: 0.1368667632341385 2023-01-24 03:16:31.366108: step: 286/466, loss: 0.16884687542915344 2023-01-24 03:16:32.170700: step: 288/466, loss: 0.13645488023757935 2023-01-24 03:16:32.892252: step: 290/466, loss: 0.1285877227783203 2023-01-24 03:16:33.680303: step: 292/466, loss: 0.16469940543174744 2023-01-24 03:16:34.469274: step: 294/466, loss: 0.14539150893688202 2023-01-24 03:16:35.218405: step: 296/466, loss: 0.09342527389526367 2023-01-24 03:16:36.043672: step: 298/466, loss: 0.11741075664758682 2023-01-24 03:16:36.825166: step: 300/466, loss: 0.1407267451286316 2023-01-24 03:16:37.704252: step: 302/466, loss: 0.2229546159505844 2023-01-24 03:16:38.431011: step: 304/466, loss: 0.2091650664806366 2023-01-24 03:16:39.150662: step: 306/466, loss: 0.15506994724273682 2023-01-24 03:16:39.912708: step: 308/466, loss: 0.24538929760456085 2023-01-24 03:16:40.698226: step: 310/466, loss: 0.1123206615447998 2023-01-24 03:16:41.474135: step: 312/466, loss: 0.15521487593650818 2023-01-24 03:16:42.232986: step: 314/466, loss: 0.48486581444740295 2023-01-24 03:16:43.007571: step: 316/466, loss: 0.07889270037412643 2023-01-24 03:16:43.784854: step: 318/466, loss: 0.18789488077163696 2023-01-24 03:16:44.571122: step: 320/466, loss: 0.13976812362670898 2023-01-24 03:16:45.291811: step: 322/466, loss: 0.12298387289047241 2023-01-24 03:16:46.028551: step: 324/466, loss: 0.024222377687692642 2023-01-24 03:16:46.792119: step: 326/466, loss: 0.0798078179359436 2023-01-24 03:16:47.522341: step: 328/466, loss: 0.12812082469463348 2023-01-24 03:16:48.431750: step: 330/466, loss: 0.14643944799900055 2023-01-24 03:16:49.228554: step: 332/466, loss: 0.07890229672193527 2023-01-24 03:16:49.973402: step: 334/466, loss: 0.03876285254955292 2023-01-24 03:16:50.737007: step: 336/466, loss: 0.0667174905538559 2023-01-24 03:16:51.582723: step: 338/466, loss: 0.11607632786035538 2023-01-24 03:16:52.343976: step: 340/466, loss: 0.058539051562547684 2023-01-24 03:16:53.045743: step: 342/466, loss: 0.09484546631574631 2023-01-24 03:16:53.803143: step: 344/466, loss: 0.06643007695674896 2023-01-24 03:16:54.590829: step: 346/466, loss: 0.17572882771492004 2023-01-24 03:16:55.378180: step: 348/466, loss: 0.23891420662403107 2023-01-24 03:16:56.134471: step: 350/466, loss: 0.10195959359407425 2023-01-24 03:16:56.899687: step: 352/466, loss: 0.09479983896017075 2023-01-24 03:16:57.643545: step: 354/466, loss: 0.31346720457077026 2023-01-24 03:16:58.469250: step: 356/466, loss: 0.05729576200246811 2023-01-24 03:16:59.242607: step: 358/466, loss: 0.30581486225128174 2023-01-24 03:17:00.008961: step: 360/466, loss: 0.4423132538795471 2023-01-24 03:17:00.778716: step: 362/466, loss: 0.07643415778875351 2023-01-24 03:17:01.541491: step: 364/466, loss: 0.279776930809021 2023-01-24 03:17:02.303940: step: 366/466, loss: 0.27822229266166687 2023-01-24 03:17:03.064198: step: 368/466, loss: 0.389532208442688 2023-01-24 03:17:03.849361: step: 370/466, loss: 0.12308437377214432 2023-01-24 03:17:04.604808: step: 372/466, loss: 0.0807652547955513 2023-01-24 03:17:05.401467: step: 374/466, loss: 0.18257291615009308 2023-01-24 03:17:06.207592: step: 376/466, loss: 0.12374410033226013 2023-01-24 03:17:07.011321: step: 378/466, loss: 0.27145010232925415 2023-01-24 03:17:07.800682: step: 380/466, loss: 0.2303091585636139 2023-01-24 03:17:08.566748: step: 382/466, loss: 0.15865585207939148 2023-01-24 03:17:09.254940: step: 384/466, loss: 0.13504156470298767 2023-01-24 03:17:09.953745: step: 386/466, loss: 0.16256369650363922 2023-01-24 03:17:10.655879: step: 388/466, loss: 0.3930529057979584 2023-01-24 03:17:11.423255: step: 390/466, loss: 0.14539262652397156 2023-01-24 03:17:12.305527: step: 392/466, loss: 0.41078057885169983 2023-01-24 03:17:13.083503: step: 394/466, loss: 0.11577159911394119 2023-01-24 03:17:13.765514: step: 396/466, loss: 0.11931080371141434 2023-01-24 03:17:14.604263: step: 398/466, loss: 0.1519489884376526 2023-01-24 03:17:15.330176: step: 400/466, loss: 0.18284772336483002 2023-01-24 03:17:16.047849: step: 402/466, loss: 0.05453243479132652 2023-01-24 03:17:16.814234: step: 404/466, loss: 0.1171683669090271 2023-01-24 03:17:17.547070: step: 406/466, loss: 0.15749157965183258 2023-01-24 03:17:18.280894: step: 408/466, loss: 0.13453346490859985 2023-01-24 03:17:19.086758: step: 410/466, loss: 0.16586779057979584 2023-01-24 03:17:19.955374: step: 412/466, loss: 0.1531343013048172 2023-01-24 03:17:20.786325: step: 414/466, loss: 0.24512223899364471 2023-01-24 03:17:21.568974: step: 416/466, loss: 0.1436801552772522 2023-01-24 03:17:22.398461: step: 418/466, loss: 0.12498243153095245 2023-01-24 03:17:23.122861: step: 420/466, loss: 0.5236779451370239 2023-01-24 03:17:23.867407: step: 422/466, loss: 1.3416144847869873 2023-01-24 03:17:24.567530: step: 424/466, loss: 0.21465624868869781 2023-01-24 03:17:25.313877: step: 426/466, loss: 0.40788188576698303 2023-01-24 03:17:26.071369: step: 428/466, loss: 0.10750317573547363 2023-01-24 03:17:26.801243: step: 430/466, loss: 0.4191490411758423 2023-01-24 03:17:27.518480: step: 432/466, loss: 0.20027561485767365 2023-01-24 03:17:28.249745: step: 434/466, loss: 0.05397174507379532 2023-01-24 03:17:28.933692: step: 436/466, loss: 0.07795599102973938 2023-01-24 03:17:29.739556: step: 438/466, loss: 0.21288935840129852 2023-01-24 03:17:30.482603: step: 440/466, loss: 0.4418155550956726 2023-01-24 03:17:31.208141: step: 442/466, loss: 0.3671300709247589 2023-01-24 03:17:31.959794: step: 444/466, loss: 0.06263580918312073 2023-01-24 03:17:32.704959: step: 446/466, loss: 0.15847350656986237 2023-01-24 03:17:33.430695: step: 448/466, loss: 0.2654990553855896 2023-01-24 03:17:34.222559: step: 450/466, loss: 0.10745230317115784 2023-01-24 03:17:34.939162: step: 452/466, loss: 0.17414699494838715 2023-01-24 03:17:35.604860: step: 454/466, loss: 0.15538115799427032 2023-01-24 03:17:36.409927: step: 456/466, loss: 1.7021269798278809 2023-01-24 03:17:37.224174: step: 458/466, loss: 0.05081811919808388 2023-01-24 03:17:37.968471: step: 460/466, loss: 0.5242745876312256 2023-01-24 03:17:38.739716: step: 462/466, loss: 1.2987644672393799 2023-01-24 03:17:39.482085: step: 464/466, loss: 0.19540849328041077 2023-01-24 03:17:40.207124: step: 466/466, loss: 0.07759097963571548 2023-01-24 03:17:40.949813: step: 468/466, loss: 0.3391871154308319 2023-01-24 03:17:41.711028: step: 470/466, loss: 0.13671857118606567 2023-01-24 03:17:42.467958: step: 472/466, loss: 0.4496214985847473 2023-01-24 03:17:43.192842: step: 474/466, loss: 0.08343297243118286 2023-01-24 03:17:43.984343: step: 476/466, loss: 0.11408674716949463 2023-01-24 03:17:44.763654: step: 478/466, loss: 0.06497704982757568 2023-01-24 03:17:45.504126: step: 480/466, loss: 0.1774224489927292 2023-01-24 03:17:46.221762: step: 482/466, loss: 0.13437721133232117 2023-01-24 03:17:46.973594: step: 484/466, loss: 0.2139037400484085 2023-01-24 03:17:47.798816: step: 486/466, loss: 0.48550844192504883 2023-01-24 03:17:48.493565: step: 488/466, loss: 1.2269116640090942 2023-01-24 03:17:49.277723: step: 490/466, loss: 0.083436980843544 2023-01-24 03:17:50.090801: step: 492/466, loss: 0.26875337958335876 2023-01-24 03:17:50.840205: step: 494/466, loss: 0.10697463899850845 2023-01-24 03:17:51.559636: step: 496/466, loss: 0.0854906216263771 2023-01-24 03:17:52.276132: step: 498/466, loss: 0.5183632969856262 2023-01-24 03:17:53.083522: step: 500/466, loss: 0.3039102852344513 2023-01-24 03:17:53.774482: step: 502/466, loss: 0.12715557217597961 2023-01-24 03:17:54.455555: step: 504/466, loss: 0.16347670555114746 2023-01-24 03:17:55.251519: step: 506/466, loss: 0.14180181920528412 2023-01-24 03:17:55.956146: step: 508/466, loss: 0.08178799599409103 2023-01-24 03:17:56.719007: step: 510/466, loss: 0.1895528882741928 2023-01-24 03:17:57.433008: step: 512/466, loss: 0.2194291651248932 2023-01-24 03:17:58.320003: step: 514/466, loss: 0.17503222823143005 2023-01-24 03:17:58.994442: step: 516/466, loss: 0.2034883201122284 2023-01-24 03:17:59.715860: step: 518/466, loss: 0.1403059959411621 2023-01-24 03:18:00.440668: step: 520/466, loss: 0.04848955571651459 2023-01-24 03:18:01.221978: step: 522/466, loss: 0.1673567295074463 2023-01-24 03:18:01.992526: step: 524/466, loss: 0.12073955684900284 2023-01-24 03:18:02.761530: step: 526/466, loss: 0.42349761724472046 2023-01-24 03:18:03.483809: step: 528/466, loss: 0.10180588066577911 2023-01-24 03:18:04.240996: step: 530/466, loss: 0.32483330368995667 2023-01-24 03:18:05.007700: step: 532/466, loss: 0.08331071585416794 2023-01-24 03:18:05.767348: step: 534/466, loss: 0.05236487463116646 2023-01-24 03:18:06.593093: step: 536/466, loss: 0.4884677529335022 2023-01-24 03:18:07.363608: step: 538/466, loss: 0.6043692827224731 2023-01-24 03:18:08.113747: step: 540/466, loss: 0.39901936054229736 2023-01-24 03:18:08.977274: step: 542/466, loss: 0.10290895402431488 2023-01-24 03:18:09.706054: step: 544/466, loss: 0.2578374445438385 2023-01-24 03:18:10.514478: step: 546/466, loss: 0.34540289640426636 2023-01-24 03:18:11.254102: step: 548/466, loss: 0.12169482558965683 2023-01-24 03:18:12.010064: step: 550/466, loss: 0.20996147394180298 2023-01-24 03:18:12.755373: step: 552/466, loss: 0.14128772914409637 2023-01-24 03:18:13.460363: step: 554/466, loss: 0.21965238451957703 2023-01-24 03:18:14.218666: step: 556/466, loss: 0.09792616218328476 2023-01-24 03:18:14.998574: step: 558/466, loss: 0.10246670246124268 2023-01-24 03:18:15.740642: step: 560/466, loss: 0.0716271847486496 2023-01-24 03:18:16.477449: step: 562/466, loss: 0.10876203328371048 2023-01-24 03:18:17.257490: step: 564/466, loss: 0.08965716511011124 2023-01-24 03:18:18.056137: step: 566/466, loss: 0.2969381511211395 2023-01-24 03:18:18.779853: step: 568/466, loss: 0.10171569138765335 2023-01-24 03:18:19.587174: step: 570/466, loss: 0.2210264503955841 2023-01-24 03:18:20.297459: step: 572/466, loss: 0.1645101010799408 2023-01-24 03:18:21.005717: step: 574/466, loss: 0.15573878586292267 2023-01-24 03:18:21.858872: step: 576/466, loss: 0.15432459115982056 2023-01-24 03:18:22.666877: step: 578/466, loss: 0.10344555974006653 2023-01-24 03:18:23.382901: step: 580/466, loss: 0.08119934052228928 2023-01-24 03:18:24.171749: step: 582/466, loss: 0.03565558046102524 2023-01-24 03:18:24.887621: step: 584/466, loss: 0.0471508614718914 2023-01-24 03:18:25.636615: step: 586/466, loss: 0.30499204993247986 2023-01-24 03:18:26.368366: step: 588/466, loss: 0.07960199564695358 2023-01-24 03:18:27.115479: step: 590/466, loss: 0.07188259810209274 2023-01-24 03:18:27.869174: step: 592/466, loss: 0.20376189053058624 2023-01-24 03:18:28.664313: step: 594/466, loss: 0.08409322798252106 2023-01-24 03:18:29.419953: step: 596/466, loss: 0.1693294495344162 2023-01-24 03:18:30.229437: step: 598/466, loss: 0.3855529725551605 2023-01-24 03:18:30.947463: step: 600/466, loss: 0.1346471756696701 2023-01-24 03:18:31.717581: step: 602/466, loss: 0.05187489464879036 2023-01-24 03:18:32.441024: step: 604/466, loss: 0.09368216246366501 2023-01-24 03:18:33.200924: step: 606/466, loss: 0.16668803989887238 2023-01-24 03:18:34.022899: step: 608/466, loss: 0.4426323175430298 2023-01-24 03:18:34.741899: step: 610/466, loss: 0.05768425762653351 2023-01-24 03:18:35.583047: step: 612/466, loss: 0.46049514412879944 2023-01-24 03:18:36.317623: step: 614/466, loss: 0.11522240936756134 2023-01-24 03:18:37.089158: step: 616/466, loss: 0.17588311433792114 2023-01-24 03:18:37.895136: step: 618/466, loss: 0.11678693443536758 2023-01-24 03:18:38.609847: step: 620/466, loss: 0.05687686800956726 2023-01-24 03:18:39.380705: step: 622/466, loss: 0.17744433879852295 2023-01-24 03:18:40.128731: step: 624/466, loss: 0.07499007880687714 2023-01-24 03:18:40.816431: step: 626/466, loss: 0.0555715411901474 2023-01-24 03:18:41.612104: step: 628/466, loss: 0.08503931015729904 2023-01-24 03:18:42.313495: step: 630/466, loss: 0.4157727360725403 2023-01-24 03:18:43.013765: step: 632/466, loss: 0.10179923474788666 2023-01-24 03:18:43.799543: step: 634/466, loss: 0.39125341176986694 2023-01-24 03:18:44.547478: step: 636/466, loss: 0.10476453602313995 2023-01-24 03:18:45.318668: step: 638/466, loss: 0.332133948802948 2023-01-24 03:18:46.156589: step: 640/466, loss: 4.197464942932129 2023-01-24 03:18:46.913011: step: 642/466, loss: 0.11542638391256332 2023-01-24 03:18:47.726795: step: 644/466, loss: 0.17988237738609314 2023-01-24 03:18:48.509294: step: 646/466, loss: 0.40194782614707947 2023-01-24 03:18:49.208371: step: 648/466, loss: 0.24533627927303314 2023-01-24 03:18:49.969270: step: 650/466, loss: 0.23072265088558197 2023-01-24 03:18:50.684285: step: 652/466, loss: 0.08752790838479996 2023-01-24 03:18:51.471334: step: 654/466, loss: 0.3541397154331207 2023-01-24 03:18:52.237772: step: 656/466, loss: 0.11867779493331909 2023-01-24 03:18:53.044153: step: 658/466, loss: 0.4159494936466217 2023-01-24 03:18:53.813847: step: 660/466, loss: 0.07973338663578033 2023-01-24 03:18:54.540103: step: 662/466, loss: 0.24685896933078766 2023-01-24 03:18:55.313732: step: 664/466, loss: 0.07399331033229828 2023-01-24 03:18:56.061855: step: 666/466, loss: 0.18225261569023132 2023-01-24 03:18:56.887089: step: 668/466, loss: 0.08262845873832703 2023-01-24 03:18:57.657971: step: 670/466, loss: 0.10257098078727722 2023-01-24 03:18:58.389469: step: 672/466, loss: 0.3494648039340973 2023-01-24 03:18:59.187427: step: 674/466, loss: 0.21490906178951263 2023-01-24 03:18:59.892007: step: 676/466, loss: 0.1882047951221466 2023-01-24 03:19:00.617824: step: 678/466, loss: 0.08048104494810104 2023-01-24 03:19:01.347195: step: 680/466, loss: 0.12107175588607788 2023-01-24 03:19:02.110527: step: 682/466, loss: 0.10289272665977478 2023-01-24 03:19:02.893461: step: 684/466, loss: 0.09581567347049713 2023-01-24 03:19:03.687801: step: 686/466, loss: 0.39118218421936035 2023-01-24 03:19:04.429286: step: 688/466, loss: 0.14677421748638153 2023-01-24 03:19:05.186652: step: 690/466, loss: 0.18316148221492767 2023-01-24 03:19:05.936395: step: 692/466, loss: 0.07292038947343826 2023-01-24 03:19:06.674114: step: 694/466, loss: 0.04308658093214035 2023-01-24 03:19:07.404873: step: 696/466, loss: 0.09988658875226974 2023-01-24 03:19:08.140575: step: 698/466, loss: 0.2860415577888489 2023-01-24 03:19:08.933745: step: 700/466, loss: 0.24483168125152588 2023-01-24 03:19:09.766920: step: 702/466, loss: 0.12841112911701202 2023-01-24 03:19:10.576304: step: 704/466, loss: 0.18972891569137573 2023-01-24 03:19:11.391125: step: 706/466, loss: 0.9659270644187927 2023-01-24 03:19:12.137336: step: 708/466, loss: 0.1148655042052269 2023-01-24 03:19:12.926568: step: 710/466, loss: 0.1329548954963684 2023-01-24 03:19:13.718884: step: 712/466, loss: 0.2517092227935791 2023-01-24 03:19:14.537463: step: 714/466, loss: 0.32274332642555237 2023-01-24 03:19:15.327655: step: 716/466, loss: 0.3467109799385071 2023-01-24 03:19:16.039520: step: 718/466, loss: 0.9646264910697937 2023-01-24 03:19:16.876266: step: 720/466, loss: 0.06586325913667679 2023-01-24 03:19:17.657359: step: 722/466, loss: 0.16391877830028534 2023-01-24 03:19:18.360709: step: 724/466, loss: 0.11094661056995392 2023-01-24 03:19:19.069068: step: 726/466, loss: 0.06109832599759102 2023-01-24 03:19:19.854688: step: 728/466, loss: 0.18235573172569275 2023-01-24 03:19:20.601657: step: 730/466, loss: 0.24906662106513977 2023-01-24 03:19:21.357965: step: 732/466, loss: 0.1062256470322609 2023-01-24 03:19:22.231994: step: 734/466, loss: 0.20727825164794922 2023-01-24 03:19:22.982863: step: 736/466, loss: 0.197353795170784 2023-01-24 03:19:23.761112: step: 738/466, loss: 0.13762034475803375 2023-01-24 03:19:24.531153: step: 740/466, loss: 0.2729426622390747 2023-01-24 03:19:25.430703: step: 742/466, loss: 0.2156907469034195 2023-01-24 03:19:26.208632: step: 744/466, loss: 0.42388179898262024 2023-01-24 03:19:26.903861: step: 746/466, loss: 0.10986457765102386 2023-01-24 03:19:27.678232: step: 748/466, loss: 0.2412114143371582 2023-01-24 03:19:28.413484: step: 750/466, loss: 0.16270828247070312 2023-01-24 03:19:29.247447: step: 752/466, loss: 0.26500195264816284 2023-01-24 03:19:29.909841: step: 754/466, loss: 0.7490832209587097 2023-01-24 03:19:30.651587: step: 756/466, loss: 0.10206577926874161 2023-01-24 03:19:31.464819: step: 758/466, loss: 0.11831139028072357 2023-01-24 03:19:32.238386: step: 760/466, loss: 0.6326764225959778 2023-01-24 03:19:33.001145: step: 762/466, loss: 0.03746001422405243 2023-01-24 03:19:33.708292: step: 764/466, loss: 0.40483012795448303 2023-01-24 03:19:34.464803: step: 766/466, loss: 0.09523598849773407 2023-01-24 03:19:35.224826: step: 768/466, loss: 0.10036483407020569 2023-01-24 03:19:35.956929: step: 770/466, loss: 0.07891888171434402 2023-01-24 03:19:36.792094: step: 772/466, loss: 0.12781232595443726 2023-01-24 03:19:37.615513: step: 774/466, loss: 0.13502788543701172 2023-01-24 03:19:38.370725: step: 776/466, loss: 0.29947882890701294 2023-01-24 03:19:39.150193: step: 778/466, loss: 0.2242499589920044 2023-01-24 03:19:39.877745: step: 780/466, loss: 0.15417909622192383 2023-01-24 03:19:40.624114: step: 782/466, loss: 0.08405672013759613 2023-01-24 03:19:41.309416: step: 784/466, loss: 0.051594078540802 2023-01-24 03:19:41.986429: step: 786/466, loss: 0.05530751869082451 2023-01-24 03:19:42.761572: step: 788/466, loss: 0.09490536153316498 2023-01-24 03:19:43.545150: step: 790/466, loss: 0.25493767857551575 2023-01-24 03:19:44.316505: step: 792/466, loss: 0.09009568393230438 2023-01-24 03:19:45.155612: step: 794/466, loss: 0.39803338050842285 2023-01-24 03:19:45.876541: step: 796/466, loss: 0.10020395368337631 2023-01-24 03:19:46.645467: step: 798/466, loss: 0.16874580085277557 2023-01-24 03:19:47.574820: step: 800/466, loss: 0.18689240515232086 2023-01-24 03:19:48.413446: step: 802/466, loss: 0.26988959312438965 2023-01-24 03:19:49.200576: step: 804/466, loss: 0.07876058667898178 2023-01-24 03:19:49.933116: step: 806/466, loss: 0.056754451245069504 2023-01-24 03:19:50.594344: step: 808/466, loss: 0.12084650993347168 2023-01-24 03:19:51.290646: step: 810/466, loss: 0.16054895520210266 2023-01-24 03:19:52.041735: step: 812/466, loss: 0.03489147499203682 2023-01-24 03:19:52.866009: step: 814/466, loss: 0.2401704043149948 2023-01-24 03:19:53.611035: step: 816/466, loss: 0.08901477605104446 2023-01-24 03:19:54.306268: step: 818/466, loss: 0.19765466451644897 2023-01-24 03:19:55.080085: step: 820/466, loss: 0.08183137327432632 2023-01-24 03:19:55.798089: step: 822/466, loss: 0.08816594630479813 2023-01-24 03:19:56.553009: step: 824/466, loss: 0.077956922352314 2023-01-24 03:19:57.362006: step: 826/466, loss: 0.036140188574790955 2023-01-24 03:19:58.177441: step: 828/466, loss: 0.1455797553062439 2023-01-24 03:19:58.902918: step: 830/466, loss: 1.6206011772155762 2023-01-24 03:19:59.655039: step: 832/466, loss: 0.5392410159111023 2023-01-24 03:20:00.470584: step: 834/466, loss: 1.0381767749786377 2023-01-24 03:20:01.189162: step: 836/466, loss: 0.02795933187007904 2023-01-24 03:20:01.992510: step: 838/466, loss: 0.041050322353839874 2023-01-24 03:20:02.739171: step: 840/466, loss: 0.17207692563533783 2023-01-24 03:20:03.477656: step: 842/466, loss: 0.17557108402252197 2023-01-24 03:20:04.202157: step: 844/466, loss: 0.10534816980361938 2023-01-24 03:20:04.978701: step: 846/466, loss: 0.13698969781398773 2023-01-24 03:20:05.838397: step: 848/466, loss: 0.3698488473892212 2023-01-24 03:20:06.552369: step: 850/466, loss: 0.14198416471481323 2023-01-24 03:20:07.298352: step: 852/466, loss: 0.14994223415851593 2023-01-24 03:20:08.116122: step: 854/466, loss: 0.7133137583732605 2023-01-24 03:20:08.827003: step: 856/466, loss: 0.1454135626554489 2023-01-24 03:20:09.572738: step: 858/466, loss: 0.15097977221012115 2023-01-24 03:20:10.425287: step: 860/466, loss: 0.05939953401684761 2023-01-24 03:20:11.239207: step: 862/466, loss: 0.6255478858947754 2023-01-24 03:20:12.022125: step: 864/466, loss: 0.21900486946105957 2023-01-24 03:20:12.869131: step: 866/466, loss: 0.14697346091270447 2023-01-24 03:20:13.829294: step: 868/466, loss: 0.13551542162895203 2023-01-24 03:20:14.666230: step: 870/466, loss: 0.13633784651756287 2023-01-24 03:20:15.504461: step: 872/466, loss: 0.22378049790859222 2023-01-24 03:20:16.291934: step: 874/466, loss: 0.290413498878479 2023-01-24 03:20:17.026234: step: 876/466, loss: 0.35654738545417786 2023-01-24 03:20:17.799523: step: 878/466, loss: 0.27310025691986084 2023-01-24 03:20:18.599172: step: 880/466, loss: 0.09667938202619553 2023-01-24 03:20:19.401966: step: 882/466, loss: 0.17184355854988098 2023-01-24 03:20:20.234469: step: 884/466, loss: 0.1590942144393921 2023-01-24 03:20:20.966432: step: 886/466, loss: 0.1663396954536438 2023-01-24 03:20:21.808611: step: 888/466, loss: 0.18245753645896912 2023-01-24 03:20:22.585820: step: 890/466, loss: 0.10235494375228882 2023-01-24 03:20:23.363633: step: 892/466, loss: 0.09549881517887115 2023-01-24 03:20:24.165712: step: 894/466, loss: 0.04158993065357208 2023-01-24 03:20:24.907765: step: 896/466, loss: 2.234426975250244 2023-01-24 03:20:25.666437: step: 898/466, loss: 0.29457899928092957 2023-01-24 03:20:26.475303: step: 900/466, loss: 0.21517448127269745 2023-01-24 03:20:27.213472: step: 902/466, loss: 0.06916851550340652 2023-01-24 03:20:27.995441: step: 904/466, loss: 0.21449489891529083 2023-01-24 03:20:28.906169: step: 906/466, loss: 0.15825435519218445 2023-01-24 03:20:29.688362: step: 908/466, loss: 0.09181555360555649 2023-01-24 03:20:30.496885: step: 910/466, loss: 0.17901822924613953 2023-01-24 03:20:31.253395: step: 912/466, loss: 0.25450900197029114 2023-01-24 03:20:32.059075: step: 914/466, loss: 0.06662459671497345 2023-01-24 03:20:32.925327: step: 916/466, loss: 0.06792336702346802 2023-01-24 03:20:33.706772: step: 918/466, loss: 0.10544149577617645 2023-01-24 03:20:34.527751: step: 920/466, loss: 0.13937614858150482 2023-01-24 03:20:35.359738: step: 922/466, loss: 0.11666586250066757 2023-01-24 03:20:36.104847: step: 924/466, loss: 0.14547590911388397 2023-01-24 03:20:36.833120: step: 926/466, loss: 0.11564742773771286 2023-01-24 03:20:37.602037: step: 928/466, loss: 0.08576952666044235 2023-01-24 03:20:38.326924: step: 930/466, loss: 0.611613392829895 2023-01-24 03:20:39.060811: step: 932/466, loss: 0.5802958607673645 ================================================== Loss: 0.249 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33896708683473387, 'r': 0.32803266467877473, 'f1': 0.33341024934563984}, 'combined': 0.24567071004415567, 'epoch': 12} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37669707011417414, 'r': 0.2766572772724341, 'f1': 0.3190182668597844}, 'combined': 0.19607952011869675, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3124126758798605, 'r': 0.3355323995218236, 'f1': 0.3235600632168363}, 'combined': 0.2384126781597741, 'epoch': 12} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3588964470264492, 'r': 0.2774844391295404, 'f1': 0.31298293671349525}, 'combined': 0.19237000012634342, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34589285714285717, 'r': 0.338016738953646, 'f1': 0.3419094461200987}, 'combined': 0.25193327608849375, 'epoch': 12} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37148994173116306, 'r': 0.27227611978221805, 'f1': 0.31423787589828056}, 'combined': 0.19408809981952627, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2867647058823529, 'r': 0.2785714285714286, 'f1': 0.2826086956521739}, 'combined': 0.18840579710144925, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29838709677419356, 'r': 0.40217391304347827, 'f1': 0.34259259259259267}, 'combined': 0.17129629629629634, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491135620915033, 'r': 0.33785183428209997, 'f1': 0.3433903889424622}, 'combined': 0.25302449711549846, 'epoch': 11} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36623080617100867, 'r': 0.27578385664004035, 'f1': 0.3146362536456812}, 'combined': 0.19338618516758943, 'epoch': 11} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.2857142857142857, 'f1': 0.30769230769230765}, 'combined': 0.2051282051282051, 'epoch': 11} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35420321637426905, 'r': 0.3447936432637571, 'f1': 0.34943509615384616}, 'combined': 0.257478491902834, 'epoch': 11} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36325261061235115, 'r': 0.27102296164405054, 'f1': 0.31043225581859485}, 'combined': 0.19173756977030862, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.13793103448275862, 'f1': 0.2162162162162162}, 'combined': 0.14414414414414412, 'epoch': 11} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:23:23.608120: step: 2/466, loss: 0.382373571395874 2023-01-24 03:23:24.386787: step: 4/466, loss: 0.1019504964351654 2023-01-24 03:23:25.318945: step: 6/466, loss: 0.0512908473610878 2023-01-24 03:23:26.122410: step: 8/466, loss: 0.2527649700641632 2023-01-24 03:23:26.899676: step: 10/466, loss: 0.5246418714523315 2023-01-24 03:23:27.717016: step: 12/466, loss: 0.10989820212125778 2023-01-24 03:23:28.437313: step: 14/466, loss: 0.05986408516764641 2023-01-24 03:23:29.166637: step: 16/466, loss: 0.18218691647052765 2023-01-24 03:23:29.947207: step: 18/466, loss: 0.0748194083571434 2023-01-24 03:23:30.709986: step: 20/466, loss: 0.2327689379453659 2023-01-24 03:23:31.509480: step: 22/466, loss: 0.4737565815448761 2023-01-24 03:23:32.307962: step: 24/466, loss: 0.10894951969385147 2023-01-24 03:23:33.129427: step: 26/466, loss: 0.17812326550483704 2023-01-24 03:23:33.945990: step: 28/466, loss: 0.12489855289459229 2023-01-24 03:23:34.691724: step: 30/466, loss: 0.13165737688541412 2023-01-24 03:23:35.427471: step: 32/466, loss: 0.12805214524269104 2023-01-24 03:23:36.117077: step: 34/466, loss: 0.07385516166687012 2023-01-24 03:23:36.834085: step: 36/466, loss: 0.037543077021837234 2023-01-24 03:23:37.573968: step: 38/466, loss: 0.1814260482788086 2023-01-24 03:23:38.382089: step: 40/466, loss: 0.07305333018302917 2023-01-24 03:23:39.124543: step: 42/466, loss: 0.20866356790065765 2023-01-24 03:23:39.893847: step: 44/466, loss: 0.08367782831192017 2023-01-24 03:23:40.638177: step: 46/466, loss: 0.14819931983947754 2023-01-24 03:23:41.462639: step: 48/466, loss: 0.09913279116153717 2023-01-24 03:23:42.230003: step: 50/466, loss: 0.3708493113517761 2023-01-24 03:23:42.973842: step: 52/466, loss: 0.06336794793605804 2023-01-24 03:23:43.715000: step: 54/466, loss: 0.11002914607524872 2023-01-24 03:23:44.492095: step: 56/466, loss: 0.2921782433986664 2023-01-24 03:23:45.271211: step: 58/466, loss: 0.09743791818618774 2023-01-24 03:23:45.998609: step: 60/466, loss: 0.13039200007915497 2023-01-24 03:23:46.716271: step: 62/466, loss: 0.23460814356803894 2023-01-24 03:23:47.502231: step: 64/466, loss: 0.08859889954328537 2023-01-24 03:23:48.252849: step: 66/466, loss: 0.7760970592498779 2023-01-24 03:23:48.969668: step: 68/466, loss: 0.18919910490512848 2023-01-24 03:23:49.810588: step: 70/466, loss: 0.04972463101148605 2023-01-24 03:23:50.511943: step: 72/466, loss: 0.13533838093280792 2023-01-24 03:23:51.195046: step: 74/466, loss: 0.2750799357891083 2023-01-24 03:23:51.876735: step: 76/466, loss: 0.09845469892024994 2023-01-24 03:23:52.609474: step: 78/466, loss: 0.05216136947274208 2023-01-24 03:23:53.368175: step: 80/466, loss: 0.12190327048301697 2023-01-24 03:23:54.114640: step: 82/466, loss: 0.11941462010145187 2023-01-24 03:23:54.790048: step: 84/466, loss: 0.14103659987449646 2023-01-24 03:23:55.505841: step: 86/466, loss: 0.13708628714084625 2023-01-24 03:23:56.224073: step: 88/466, loss: 0.5550259947776794 2023-01-24 03:23:56.899278: step: 90/466, loss: 0.10664961487054825 2023-01-24 03:23:57.603227: step: 92/466, loss: 0.19297471642494202 2023-01-24 03:23:58.356523: step: 94/466, loss: 0.1483359932899475 2023-01-24 03:23:59.061663: step: 96/466, loss: 0.128028005361557 2023-01-24 03:23:59.833443: step: 98/466, loss: 0.17673911154270172 2023-01-24 03:24:00.754965: step: 100/466, loss: 0.11707588285207748 2023-01-24 03:24:01.563904: step: 102/466, loss: 0.38031941652297974 2023-01-24 03:24:02.305462: step: 104/466, loss: 0.06883509457111359 2023-01-24 03:24:03.089422: step: 106/466, loss: 0.20923328399658203 2023-01-24 03:24:03.721030: step: 108/466, loss: 0.08676006644964218 2023-01-24 03:24:04.507824: step: 110/466, loss: 0.04772452265024185 2023-01-24 03:24:05.263609: step: 112/466, loss: 0.6443043947219849 2023-01-24 03:24:06.080493: step: 114/466, loss: 0.12380842864513397 2023-01-24 03:24:06.809124: step: 116/466, loss: 0.09070102870464325 2023-01-24 03:24:07.526988: step: 118/466, loss: 0.07567242532968521 2023-01-24 03:24:08.290677: step: 120/466, loss: 0.12793821096420288 2023-01-24 03:24:09.009754: step: 122/466, loss: 0.11282803863286972 2023-01-24 03:24:09.719680: step: 124/466, loss: 0.049191106110811234 2023-01-24 03:24:10.554491: step: 126/466, loss: 0.3585178554058075 2023-01-24 03:24:11.272461: step: 128/466, loss: 0.25831490755081177 2023-01-24 03:24:12.063626: step: 130/466, loss: 0.07179155200719833 2023-01-24 03:24:12.879351: step: 132/466, loss: 0.0197906531393528 2023-01-24 03:24:13.583288: step: 134/466, loss: 0.03828202560544014 2023-01-24 03:24:14.337244: step: 136/466, loss: 0.10379919409751892 2023-01-24 03:24:15.122095: step: 138/466, loss: 0.16227275133132935 2023-01-24 03:24:15.856465: step: 140/466, loss: 0.17869989573955536 2023-01-24 03:24:16.632626: step: 142/466, loss: 0.16198000311851501 2023-01-24 03:24:17.378898: step: 144/466, loss: 0.08363251388072968 2023-01-24 03:24:18.202168: step: 146/466, loss: 1.946467399597168 2023-01-24 03:24:18.925655: step: 148/466, loss: 0.1306145340204239 2023-01-24 03:24:19.671415: step: 150/466, loss: 0.162128284573555 2023-01-24 03:24:20.346116: step: 152/466, loss: 0.1288970559835434 2023-01-24 03:24:21.115505: step: 154/466, loss: 3.0442185401916504 2023-01-24 03:24:21.833334: step: 156/466, loss: 0.08906519412994385 2023-01-24 03:24:22.507447: step: 158/466, loss: 0.14633415639400482 2023-01-24 03:24:23.179471: step: 160/466, loss: 0.0988697037100792 2023-01-24 03:24:23.914625: step: 162/466, loss: 0.17721976339817047 2023-01-24 03:24:24.774956: step: 164/466, loss: 0.14720726013183594 2023-01-24 03:24:25.574051: step: 166/466, loss: 0.07897642999887466 2023-01-24 03:24:26.365473: step: 168/466, loss: 0.050784893333911896 2023-01-24 03:24:27.116222: step: 170/466, loss: 0.09608414769172668 2023-01-24 03:24:27.896551: step: 172/466, loss: 0.29185426235198975 2023-01-24 03:24:28.648767: step: 174/466, loss: 0.36591893434524536 2023-01-24 03:24:29.488018: step: 176/466, loss: 0.19042915105819702 2023-01-24 03:24:30.317587: step: 178/466, loss: 0.41831472516059875 2023-01-24 03:24:31.095361: step: 180/466, loss: 0.04827124997973442 2023-01-24 03:24:31.818644: step: 182/466, loss: 0.20153003931045532 2023-01-24 03:24:32.577395: step: 184/466, loss: 0.07360716909170151 2023-01-24 03:24:33.313319: step: 186/466, loss: 0.05430116504430771 2023-01-24 03:24:34.145032: step: 188/466, loss: 0.21173177659511566 2023-01-24 03:24:34.840922: step: 190/466, loss: 0.09703905880451202 2023-01-24 03:24:35.660273: step: 192/466, loss: 0.10637550055980682 2023-01-24 03:24:36.508965: step: 194/466, loss: 0.1917353719472885 2023-01-24 03:24:37.314023: step: 196/466, loss: 0.04204834625124931 2023-01-24 03:24:38.107430: step: 198/466, loss: 0.11851033568382263 2023-01-24 03:24:38.830277: step: 200/466, loss: 0.2094639390707016 2023-01-24 03:24:39.596752: step: 202/466, loss: 0.02567223832011223 2023-01-24 03:24:40.370598: step: 204/466, loss: 0.11470548808574677 2023-01-24 03:24:41.191221: step: 206/466, loss: 0.18110646307468414 2023-01-24 03:24:41.960206: step: 208/466, loss: 0.041296664625406265 2023-01-24 03:24:42.707767: step: 210/466, loss: 0.15020208060741425 2023-01-24 03:24:43.477624: step: 212/466, loss: 0.16802366077899933 2023-01-24 03:24:44.319406: step: 214/466, loss: 0.06758114695549011 2023-01-24 03:24:45.099047: step: 216/466, loss: 0.2651681900024414 2023-01-24 03:24:45.853071: step: 218/466, loss: 0.37192392349243164 2023-01-24 03:24:46.627448: step: 220/466, loss: 0.1015489399433136 2023-01-24 03:24:47.343676: step: 222/466, loss: 0.15365496277809143 2023-01-24 03:24:48.147856: step: 224/466, loss: 0.11318421363830566 2023-01-24 03:24:48.935194: step: 226/466, loss: 0.3373357653617859 2023-01-24 03:24:49.713227: step: 228/466, loss: 0.24359995126724243 2023-01-24 03:24:50.434385: step: 230/466, loss: 0.41684919595718384 2023-01-24 03:24:51.155732: step: 232/466, loss: 0.015662597492337227 2023-01-24 03:24:51.921745: step: 234/466, loss: 0.12667682766914368 2023-01-24 03:24:52.694715: step: 236/466, loss: 0.192781463265419 2023-01-24 03:24:53.483310: step: 238/466, loss: 0.06522570550441742 2023-01-24 03:24:54.256528: step: 240/466, loss: 0.13598352670669556 2023-01-24 03:24:55.034412: step: 242/466, loss: 0.04600071534514427 2023-01-24 03:24:55.761251: step: 244/466, loss: 0.07195093482732773 2023-01-24 03:24:56.552540: step: 246/466, loss: 0.1215813159942627 2023-01-24 03:24:57.270645: step: 248/466, loss: 0.07753612101078033 2023-01-24 03:24:58.055147: step: 250/466, loss: 0.13835875689983368 2023-01-24 03:24:58.853060: step: 252/466, loss: 0.20455078780651093 2023-01-24 03:24:59.582525: step: 254/466, loss: 0.6708722114562988 2023-01-24 03:25:00.285889: step: 256/466, loss: 0.12679585814476013 2023-01-24 03:25:00.983960: step: 258/466, loss: 0.13701783120632172 2023-01-24 03:25:01.729657: step: 260/466, loss: 0.35117268562316895 2023-01-24 03:25:02.512850: step: 262/466, loss: 1.0611485242843628 2023-01-24 03:25:03.322713: step: 264/466, loss: 0.03565460443496704 2023-01-24 03:25:04.147114: step: 266/466, loss: 0.11983101069927216 2023-01-24 03:25:04.848477: step: 268/466, loss: 0.11047709733247757 2023-01-24 03:25:05.568788: step: 270/466, loss: 0.5995880961418152 2023-01-24 03:25:06.274251: step: 272/466, loss: 0.07326005399227142 2023-01-24 03:25:07.043137: step: 274/466, loss: 0.039359912276268005 2023-01-24 03:25:07.828151: step: 276/466, loss: 0.038333382457494736 2023-01-24 03:25:08.564860: step: 278/466, loss: 0.1003439798951149 2023-01-24 03:25:09.411932: step: 280/466, loss: 0.5510913729667664 2023-01-24 03:25:10.187190: step: 282/466, loss: 0.1375330090522766 2023-01-24 03:25:10.920923: step: 284/466, loss: 0.09970781207084656 2023-01-24 03:25:11.677265: step: 286/466, loss: 0.08100002259016037 2023-01-24 03:25:12.414116: step: 288/466, loss: 0.24892517924308777 2023-01-24 03:25:13.257040: step: 290/466, loss: 0.14371928572654724 2023-01-24 03:25:14.021227: step: 292/466, loss: 0.22054694592952728 2023-01-24 03:25:14.801331: step: 294/466, loss: 0.06324626505374908 2023-01-24 03:25:15.644322: step: 296/466, loss: 0.2058716118335724 2023-01-24 03:25:16.375594: step: 298/466, loss: 0.0764944925904274 2023-01-24 03:25:17.125152: step: 300/466, loss: 0.17809653282165527 2023-01-24 03:25:17.893288: step: 302/466, loss: 0.10286303609609604 2023-01-24 03:25:18.627681: step: 304/466, loss: 0.14660319685935974 2023-01-24 03:25:19.511529: step: 306/466, loss: 9.334784507751465 2023-01-24 03:25:20.249336: step: 308/466, loss: 1.5064904689788818 2023-01-24 03:25:21.101808: step: 310/466, loss: 0.13779456913471222 2023-01-24 03:25:21.822323: step: 312/466, loss: 0.13367831707000732 2023-01-24 03:25:22.662305: step: 314/466, loss: 0.08212552964687347 2023-01-24 03:25:23.443545: step: 316/466, loss: 4.54255485534668 2023-01-24 03:25:24.253832: step: 318/466, loss: 0.23854738473892212 2023-01-24 03:25:25.050844: step: 320/466, loss: 0.17678216099739075 2023-01-24 03:25:25.734695: step: 322/466, loss: 0.42571911215782166 2023-01-24 03:25:26.507581: step: 324/466, loss: 0.25691738724708557 2023-01-24 03:25:27.278849: step: 326/466, loss: 0.18332761526107788 2023-01-24 03:25:28.063947: step: 328/466, loss: 0.12483546882867813 2023-01-24 03:25:28.801551: step: 330/466, loss: 0.02915043570101261 2023-01-24 03:25:29.513566: step: 332/466, loss: 0.15927720069885254 2023-01-24 03:25:30.302602: step: 334/466, loss: 0.23633554577827454 2023-01-24 03:25:31.100954: step: 336/466, loss: 0.16368862986564636 2023-01-24 03:25:31.886763: step: 338/466, loss: 0.07220807671546936 2023-01-24 03:25:32.674994: step: 340/466, loss: 0.20814989507198334 2023-01-24 03:25:33.410116: step: 342/466, loss: 0.2206103801727295 2023-01-24 03:25:34.184961: step: 344/466, loss: 0.14292998611927032 2023-01-24 03:25:34.889890: step: 346/466, loss: 0.12245476245880127 2023-01-24 03:25:35.687617: step: 348/466, loss: 0.12565559148788452 2023-01-24 03:25:36.362001: step: 350/466, loss: 1.2255115509033203 2023-01-24 03:25:37.161847: step: 352/466, loss: 0.05467798188328743 2023-01-24 03:25:37.979826: step: 354/466, loss: 0.1506376415491104 2023-01-24 03:25:38.703980: step: 356/466, loss: 0.7821328639984131 2023-01-24 03:25:39.494153: step: 358/466, loss: 0.048696957528591156 2023-01-24 03:25:40.314353: step: 360/466, loss: 0.17461726069450378 2023-01-24 03:25:41.106175: step: 362/466, loss: 0.19472767412662506 2023-01-24 03:25:41.847438: step: 364/466, loss: 0.1759956032037735 2023-01-24 03:25:42.538136: step: 366/466, loss: 0.040069859474897385 2023-01-24 03:25:43.286041: step: 368/466, loss: 0.26998400688171387 2023-01-24 03:25:44.009709: step: 370/466, loss: 0.35295793414115906 2023-01-24 03:25:44.760267: step: 372/466, loss: 0.1004018560051918 2023-01-24 03:25:45.612393: step: 374/466, loss: 0.26358577609062195 2023-01-24 03:25:46.415709: step: 376/466, loss: 0.17438524961471558 2023-01-24 03:25:47.132394: step: 378/466, loss: 0.06375767290592194 2023-01-24 03:25:47.939083: step: 380/466, loss: 0.39776426553726196 2023-01-24 03:25:48.701651: step: 382/466, loss: 0.049622464925050735 2023-01-24 03:25:49.429416: step: 384/466, loss: 0.0817975103855133 2023-01-24 03:25:50.409440: step: 386/466, loss: 0.08786547183990479 2023-01-24 03:25:51.203395: step: 388/466, loss: 0.14453229308128357 2023-01-24 03:25:51.921700: step: 390/466, loss: 0.12139902263879776 2023-01-24 03:25:52.629676: step: 392/466, loss: 0.08930321037769318 2023-01-24 03:25:53.347568: step: 394/466, loss: 0.19578050076961517 2023-01-24 03:25:54.125551: step: 396/466, loss: 0.07919494807720184 2023-01-24 03:25:54.912934: step: 398/466, loss: 0.11601810902357101 2023-01-24 03:25:55.711409: step: 400/466, loss: 0.19614189863204956 2023-01-24 03:25:56.459134: step: 402/466, loss: 0.1866857409477234 2023-01-24 03:25:57.233956: step: 404/466, loss: 0.12435347586870193 2023-01-24 03:25:57.964343: step: 406/466, loss: 0.07915801554918289 2023-01-24 03:25:58.636079: step: 408/466, loss: 0.07421186566352844 2023-01-24 03:25:59.406291: step: 410/466, loss: 0.08340541273355484 2023-01-24 03:26:00.169128: step: 412/466, loss: 1.495497703552246 2023-01-24 03:26:01.046616: step: 414/466, loss: 1.0178102254867554 2023-01-24 03:26:01.823804: step: 416/466, loss: 0.09015622735023499 2023-01-24 03:26:02.602265: step: 418/466, loss: 0.10119029134511948 2023-01-24 03:26:03.282080: step: 420/466, loss: 0.04814134165644646 2023-01-24 03:26:04.030678: step: 422/466, loss: 0.07919945567846298 2023-01-24 03:26:04.761586: step: 424/466, loss: 0.25124919414520264 2023-01-24 03:26:05.547930: step: 426/466, loss: 0.10618660598993301 2023-01-24 03:26:06.304364: step: 428/466, loss: 0.2987133860588074 2023-01-24 03:26:07.116927: step: 430/466, loss: 0.08233465254306793 2023-01-24 03:26:07.844919: step: 432/466, loss: 0.7407652139663696 2023-01-24 03:26:08.629484: step: 434/466, loss: 0.024903899058699608 2023-01-24 03:26:09.441466: step: 436/466, loss: 0.17847418785095215 2023-01-24 03:26:10.238643: step: 438/466, loss: 0.17466199398040771 2023-01-24 03:26:11.026932: step: 440/466, loss: 0.1522059440612793 2023-01-24 03:26:11.807040: step: 442/466, loss: 0.07020722329616547 2023-01-24 03:26:12.757088: step: 444/466, loss: 0.7656755447387695 2023-01-24 03:26:13.555346: step: 446/466, loss: 0.06377051770687103 2023-01-24 03:26:14.263254: step: 448/466, loss: 0.1730051189661026 2023-01-24 03:26:15.002370: step: 450/466, loss: 0.09168146550655365 2023-01-24 03:26:15.771687: step: 452/466, loss: 0.1671973019838333 2023-01-24 03:26:16.560949: step: 454/466, loss: 0.39160823822021484 2023-01-24 03:26:17.393139: step: 456/466, loss: 0.06910758465528488 2023-01-24 03:26:18.147633: step: 458/466, loss: 0.24151785671710968 2023-01-24 03:26:18.879138: step: 460/466, loss: 0.24184173345565796 2023-01-24 03:26:19.666114: step: 462/466, loss: 0.06884318590164185 2023-01-24 03:26:20.491111: step: 464/466, loss: 0.18182960152626038 2023-01-24 03:26:21.231709: step: 466/466, loss: 0.18162575364112854 2023-01-24 03:26:22.144185: step: 468/466, loss: 0.062338173389434814 2023-01-24 03:26:22.870438: step: 470/466, loss: 0.13774867355823517 2023-01-24 03:26:23.615758: step: 472/466, loss: 0.06745638698339462 2023-01-24 03:26:24.462404: step: 474/466, loss: 0.04645160213112831 2023-01-24 03:26:25.208511: step: 476/466, loss: 0.14952293038368225 2023-01-24 03:26:25.994434: step: 478/466, loss: 0.09848786145448685 2023-01-24 03:26:26.742944: step: 480/466, loss: 0.2773403525352478 2023-01-24 03:26:27.414057: step: 482/466, loss: 0.03563261032104492 2023-01-24 03:26:28.124043: step: 484/466, loss: 0.30415090918540955 2023-01-24 03:26:29.049120: step: 486/466, loss: 0.44185060262680054 2023-01-24 03:26:29.835172: step: 488/466, loss: 0.6004049777984619 2023-01-24 03:26:30.572800: step: 490/466, loss: 0.5131444334983826 2023-01-24 03:26:31.334879: step: 492/466, loss: 0.11282426118850708 2023-01-24 03:26:32.108932: step: 494/466, loss: 0.07206586748361588 2023-01-24 03:26:32.832066: step: 496/466, loss: 0.11763086169958115 2023-01-24 03:26:33.535006: step: 498/466, loss: 0.17410100996494293 2023-01-24 03:26:34.335776: step: 500/466, loss: 0.4359200894832611 2023-01-24 03:26:34.998482: step: 502/466, loss: 0.10102607309818268 2023-01-24 03:26:35.773551: step: 504/466, loss: 0.19833999872207642 2023-01-24 03:26:36.577612: step: 506/466, loss: 0.2305871993303299 2023-01-24 03:26:37.320475: step: 508/466, loss: 0.10187660902738571 2023-01-24 03:26:38.023865: step: 510/466, loss: 0.25623825192451477 2023-01-24 03:26:38.809900: step: 512/466, loss: 0.07170939445495605 2023-01-24 03:26:39.578022: step: 514/466, loss: 0.1135045662522316 2023-01-24 03:26:40.330332: step: 516/466, loss: 0.26936087012290955 2023-01-24 03:26:41.183112: step: 518/466, loss: 1.3775463104248047 2023-01-24 03:26:42.046578: step: 520/466, loss: 0.16075366735458374 2023-01-24 03:26:42.828422: step: 522/466, loss: 0.11880484968423843 2023-01-24 03:26:43.527564: step: 524/466, loss: 0.2878868877887726 2023-01-24 03:26:44.271014: step: 526/466, loss: 0.09803323447704315 2023-01-24 03:26:44.989575: step: 528/466, loss: 1.1583340167999268 2023-01-24 03:26:45.770789: step: 530/466, loss: 0.11727601289749146 2023-01-24 03:26:46.520988: step: 532/466, loss: 0.15182524919509888 2023-01-24 03:26:47.425070: step: 534/466, loss: 0.056869395077228546 2023-01-24 03:26:48.182117: step: 536/466, loss: 0.34635502099990845 2023-01-24 03:26:48.837005: step: 538/466, loss: 0.060244232416152954 2023-01-24 03:26:49.654484: step: 540/466, loss: 0.48993760347366333 2023-01-24 03:26:50.457933: step: 542/466, loss: 0.1992948055267334 2023-01-24 03:26:51.263900: step: 544/466, loss: 0.08805844187736511 2023-01-24 03:26:52.012133: step: 546/466, loss: 1.0619785785675049 2023-01-24 03:26:52.772134: step: 548/466, loss: 0.0847192108631134 2023-01-24 03:26:53.550123: step: 550/466, loss: 0.2873402535915375 2023-01-24 03:26:54.250012: step: 552/466, loss: 0.1951029747724533 2023-01-24 03:26:55.045050: step: 554/466, loss: 0.401314914226532 2023-01-24 03:26:55.861041: step: 556/466, loss: 0.26130008697509766 2023-01-24 03:26:56.608895: step: 558/466, loss: 0.3713858425617218 2023-01-24 03:26:57.328117: step: 560/466, loss: 0.12568676471710205 2023-01-24 03:26:58.061307: step: 562/466, loss: 0.40286916494369507 2023-01-24 03:26:58.908457: step: 564/466, loss: 0.06690298020839691 2023-01-24 03:26:59.586407: step: 566/466, loss: 0.2356967180967331 2023-01-24 03:27:00.395714: step: 568/466, loss: 0.08960768580436707 2023-01-24 03:27:01.150254: step: 570/466, loss: 0.4388246536254883 2023-01-24 03:27:01.886280: step: 572/466, loss: 0.17967738211154938 2023-01-24 03:27:02.705594: step: 574/466, loss: 0.08611579239368439 2023-01-24 03:27:03.426562: step: 576/466, loss: 0.12125842273235321 2023-01-24 03:27:04.204593: step: 578/466, loss: 0.2086947113275528 2023-01-24 03:27:04.930150: step: 580/466, loss: 0.096670962870121 2023-01-24 03:27:05.636981: step: 582/466, loss: 0.12036548554897308 2023-01-24 03:27:06.282990: step: 584/466, loss: 0.024983666837215424 2023-01-24 03:27:07.077422: step: 586/466, loss: 0.3695588707923889 2023-01-24 03:27:07.820652: step: 588/466, loss: 0.16849878430366516 2023-01-24 03:27:08.594124: step: 590/466, loss: 0.15996284782886505 2023-01-24 03:27:09.464650: step: 592/466, loss: 0.08652857691049576 2023-01-24 03:27:10.300758: step: 594/466, loss: 0.0542147234082222 2023-01-24 03:27:11.200346: step: 596/466, loss: 0.034066833555698395 2023-01-24 03:27:11.916033: step: 598/466, loss: 0.21105864644050598 2023-01-24 03:27:12.721419: step: 600/466, loss: 0.1679028868675232 2023-01-24 03:27:13.428030: step: 602/466, loss: 0.27978837490081787 2023-01-24 03:27:14.192881: step: 604/466, loss: 0.5022321343421936 2023-01-24 03:27:14.955450: step: 606/466, loss: 0.10941528528928757 2023-01-24 03:27:15.713868: step: 608/466, loss: 0.14708594977855682 2023-01-24 03:27:16.391833: step: 610/466, loss: 0.09767206758260727 2023-01-24 03:27:17.127003: step: 612/466, loss: 0.0527835339307785 2023-01-24 03:27:17.881518: step: 614/466, loss: 0.1914864331483841 2023-01-24 03:27:18.641508: step: 616/466, loss: 0.10214617848396301 2023-01-24 03:27:19.336352: step: 618/466, loss: 3.6446030139923096 2023-01-24 03:27:20.080574: step: 620/466, loss: 0.04150984436273575 2023-01-24 03:27:20.874984: step: 622/466, loss: 0.8228867650032043 2023-01-24 03:27:21.604707: step: 624/466, loss: 0.07708427309989929 2023-01-24 03:27:22.312886: step: 626/466, loss: 0.6810038685798645 2023-01-24 03:27:23.036326: step: 628/466, loss: 6.378917217254639 2023-01-24 03:27:23.828591: step: 630/466, loss: 0.3479134738445282 2023-01-24 03:27:24.569152: step: 632/466, loss: 0.06563637405633926 2023-01-24 03:27:25.374171: step: 634/466, loss: 0.29593950510025024 2023-01-24 03:27:26.099400: step: 636/466, loss: 0.5084291696548462 2023-01-24 03:27:26.819264: step: 638/466, loss: 0.21293936669826508 2023-01-24 03:27:27.518244: step: 640/466, loss: 0.05789226293563843 2023-01-24 03:27:28.317094: step: 642/466, loss: 0.2178761065006256 2023-01-24 03:27:29.137178: step: 644/466, loss: 0.18674539029598236 2023-01-24 03:27:29.802753: step: 646/466, loss: 0.28289663791656494 2023-01-24 03:27:30.567787: step: 648/466, loss: 0.05782013759016991 2023-01-24 03:27:31.337356: step: 650/466, loss: 0.12950697541236877 2023-01-24 03:27:32.232909: step: 652/466, loss: 0.044820286333560944 2023-01-24 03:27:33.027014: step: 654/466, loss: 0.08404266089200974 2023-01-24 03:27:33.753036: step: 656/466, loss: 0.1321253478527069 2023-01-24 03:27:34.440193: step: 658/466, loss: 0.16074199974536896 2023-01-24 03:27:35.166034: step: 660/466, loss: 0.26025640964508057 2023-01-24 03:27:35.986505: step: 662/466, loss: 0.7558488845825195 2023-01-24 03:27:36.707471: step: 664/466, loss: 0.05508873984217644 2023-01-24 03:27:37.512085: step: 666/466, loss: 0.5754992365837097 2023-01-24 03:27:38.272641: step: 668/466, loss: 0.10862943530082703 2023-01-24 03:27:39.114895: step: 670/466, loss: 0.2049148827791214 2023-01-24 03:27:39.867059: step: 672/466, loss: 0.11638084799051285 2023-01-24 03:27:40.685027: step: 674/466, loss: 0.3473494052886963 2023-01-24 03:27:41.413033: step: 676/466, loss: 0.03944385051727295 2023-01-24 03:27:42.234822: step: 678/466, loss: 0.19364267587661743 2023-01-24 03:27:43.017620: step: 680/466, loss: 0.2050262987613678 2023-01-24 03:27:43.730992: step: 682/466, loss: 0.12509407103061676 2023-01-24 03:27:44.497240: step: 684/466, loss: 0.3392498791217804 2023-01-24 03:27:45.291009: step: 686/466, loss: 0.40512919425964355 2023-01-24 03:27:46.024958: step: 688/466, loss: 0.22947798669338226 2023-01-24 03:27:46.735874: step: 690/466, loss: 0.08351828902959824 2023-01-24 03:27:47.507576: step: 692/466, loss: 0.33065587282180786 2023-01-24 03:27:48.234534: step: 694/466, loss: 0.09166646748781204 2023-01-24 03:27:48.961879: step: 696/466, loss: 0.18775784969329834 2023-01-24 03:27:49.727316: step: 698/466, loss: 0.15353037416934967 2023-01-24 03:27:50.449101: step: 700/466, loss: 0.0546298585832119 2023-01-24 03:27:51.198881: step: 702/466, loss: 0.12193844467401505 2023-01-24 03:27:51.937562: step: 704/466, loss: 0.0375572144985199 2023-01-24 03:27:52.712773: step: 706/466, loss: 0.07901520282030106 2023-01-24 03:27:53.443276: step: 708/466, loss: 0.1349632441997528 2023-01-24 03:27:54.252041: step: 710/466, loss: 0.30271032452583313 2023-01-24 03:27:54.915964: step: 712/466, loss: 0.05177057161927223 2023-01-24 03:27:55.690026: step: 714/466, loss: 0.1005883440375328 2023-01-24 03:27:56.452040: step: 716/466, loss: 0.07584855705499649 2023-01-24 03:27:57.235977: step: 718/466, loss: 0.13204872608184814 2023-01-24 03:27:57.966018: step: 720/466, loss: 0.03609362989664078 2023-01-24 03:27:58.662584: step: 722/466, loss: 0.012614982202649117 2023-01-24 03:27:59.413590: step: 724/466, loss: 0.08021419495344162 2023-01-24 03:28:00.124806: step: 726/466, loss: 0.2547991871833801 2023-01-24 03:28:00.890199: step: 728/466, loss: 0.9453699588775635 2023-01-24 03:28:01.670170: step: 730/466, loss: 0.25624772906303406 2023-01-24 03:28:02.508723: step: 732/466, loss: 0.1573476493358612 2023-01-24 03:28:03.413861: step: 734/466, loss: 0.14874303340911865 2023-01-24 03:28:04.206989: step: 736/466, loss: 0.11184926331043243 2023-01-24 03:28:04.933743: step: 738/466, loss: 0.2025962769985199 2023-01-24 03:28:05.641507: step: 740/466, loss: 0.04139607027173042 2023-01-24 03:28:06.391851: step: 742/466, loss: 0.08207973837852478 2023-01-24 03:28:07.125017: step: 744/466, loss: 0.18964071571826935 2023-01-24 03:28:07.906084: step: 746/466, loss: 0.3260148763656616 2023-01-24 03:28:08.631841: step: 748/466, loss: 0.13423208892345428 2023-01-24 03:28:09.367266: step: 750/466, loss: 0.06044828146696091 2023-01-24 03:28:10.145770: step: 752/466, loss: 0.2879416346549988 2023-01-24 03:28:10.893124: step: 754/466, loss: 0.08656201511621475 2023-01-24 03:28:11.639015: step: 756/466, loss: 0.14122888445854187 2023-01-24 03:28:12.383873: step: 758/466, loss: 0.07630780339241028 2023-01-24 03:28:13.081800: step: 760/466, loss: 0.031992603093385696 2023-01-24 03:28:13.858747: step: 762/466, loss: 0.040653154253959656 2023-01-24 03:28:14.623079: step: 764/466, loss: 0.040650829672813416 2023-01-24 03:28:15.381973: step: 766/466, loss: 0.2310640513896942 2023-01-24 03:28:16.111332: step: 768/466, loss: 0.3783543109893799 2023-01-24 03:28:16.854206: step: 770/466, loss: 0.6130688190460205 2023-01-24 03:28:17.675260: step: 772/466, loss: 0.09674729406833649 2023-01-24 03:28:18.581361: step: 774/466, loss: 0.19277401268482208 2023-01-24 03:28:19.347232: step: 776/466, loss: 0.4734606146812439 2023-01-24 03:28:20.092115: step: 778/466, loss: 0.010649221017956734 2023-01-24 03:28:20.845889: step: 780/466, loss: 0.18568529188632965 2023-01-24 03:28:21.540158: step: 782/466, loss: 0.08577094972133636 2023-01-24 03:28:22.279698: step: 784/466, loss: 0.1252678632736206 2023-01-24 03:28:23.104079: step: 786/466, loss: 0.07036635279655457 2023-01-24 03:28:23.809682: step: 788/466, loss: 3.049663782119751 2023-01-24 03:28:24.626469: step: 790/466, loss: 0.2606522738933563 2023-01-24 03:28:25.439517: step: 792/466, loss: 0.11692289263010025 2023-01-24 03:28:26.249322: step: 794/466, loss: 0.09116509556770325 2023-01-24 03:28:26.995249: step: 796/466, loss: 0.1748039275407791 2023-01-24 03:28:27.849169: step: 798/466, loss: 0.06679972261190414 2023-01-24 03:28:28.574589: step: 800/466, loss: 0.7025142312049866 2023-01-24 03:28:29.294743: step: 802/466, loss: 0.11592617630958557 2023-01-24 03:28:30.086745: step: 804/466, loss: 0.9633765816688538 2023-01-24 03:28:30.956460: step: 806/466, loss: 0.1293555349111557 2023-01-24 03:28:31.917663: step: 808/466, loss: 0.1482734978199005 2023-01-24 03:28:32.684719: step: 810/466, loss: 0.028712695464491844 2023-01-24 03:28:33.457324: step: 812/466, loss: 0.22466611862182617 2023-01-24 03:28:34.180701: step: 814/466, loss: 0.3807362914085388 2023-01-24 03:28:34.919562: step: 816/466, loss: 0.11186535656452179 2023-01-24 03:28:35.758427: step: 818/466, loss: 0.15200169384479523 2023-01-24 03:28:36.562401: step: 820/466, loss: 0.13248823583126068 2023-01-24 03:28:37.310432: step: 822/466, loss: 0.5440301299095154 2023-01-24 03:28:38.022497: step: 824/466, loss: 0.033675309270620346 2023-01-24 03:28:38.736374: step: 826/466, loss: 0.13058869540691376 2023-01-24 03:28:39.501914: step: 828/466, loss: 0.09893123805522919 2023-01-24 03:28:40.273834: step: 830/466, loss: 0.5407994985580444 2023-01-24 03:28:41.043214: step: 832/466, loss: 0.11411946266889572 2023-01-24 03:28:41.728787: step: 834/466, loss: 0.06883563101291656 2023-01-24 03:28:42.500999: step: 836/466, loss: 0.26703953742980957 2023-01-24 03:28:43.328877: step: 838/466, loss: 0.9148061275482178 2023-01-24 03:28:44.112126: step: 840/466, loss: 0.0823197290301323 2023-01-24 03:28:44.949773: step: 842/466, loss: 0.054216478019952774 2023-01-24 03:28:45.729793: step: 844/466, loss: 0.17121522128582 2023-01-24 03:28:46.493042: step: 846/466, loss: 0.27832192182540894 2023-01-24 03:28:47.256655: step: 848/466, loss: 0.2275869995355606 2023-01-24 03:28:47.990717: step: 850/466, loss: 0.07137225568294525 2023-01-24 03:28:48.795698: step: 852/466, loss: 0.054541222751140594 2023-01-24 03:28:49.505663: step: 854/466, loss: 0.3268347978591919 2023-01-24 03:28:50.186366: step: 856/466, loss: 0.21499894559383392 2023-01-24 03:28:50.936867: step: 858/466, loss: 0.1782616823911667 2023-01-24 03:28:51.756005: step: 860/466, loss: 0.44444623589515686 2023-01-24 03:28:52.541933: step: 862/466, loss: 0.31309425830841064 2023-01-24 03:28:53.273754: step: 864/466, loss: 0.16758395731449127 2023-01-24 03:28:54.056808: step: 866/466, loss: 0.06523101776838303 2023-01-24 03:28:54.839992: step: 868/466, loss: 0.2688615918159485 2023-01-24 03:28:55.677697: step: 870/466, loss: 0.18860988318920135 2023-01-24 03:28:56.488639: step: 872/466, loss: 0.07963003218173981 2023-01-24 03:28:57.303522: step: 874/466, loss: 0.1542077660560608 2023-01-24 03:28:58.060238: step: 876/466, loss: 0.0746893659234047 2023-01-24 03:28:58.842629: step: 878/466, loss: 0.18620958924293518 2023-01-24 03:28:59.534693: step: 880/466, loss: 0.21131157875061035 2023-01-24 03:29:00.210390: step: 882/466, loss: 0.020140519365668297 2023-01-24 03:29:01.006043: step: 884/466, loss: 0.09388389438390732 2023-01-24 03:29:01.793689: step: 886/466, loss: 0.36890143156051636 2023-01-24 03:29:02.579767: step: 888/466, loss: 0.0899510309100151 2023-01-24 03:29:03.316355: step: 890/466, loss: 0.0766301304101944 2023-01-24 03:29:03.974787: step: 892/466, loss: 0.22256304323673248 2023-01-24 03:29:04.696902: step: 894/466, loss: 0.10125160962343216 2023-01-24 03:29:05.545491: step: 896/466, loss: 0.03004208765923977 2023-01-24 03:29:06.407843: step: 898/466, loss: 0.10825519263744354 2023-01-24 03:29:07.118132: step: 900/466, loss: 0.03943290561437607 2023-01-24 03:29:07.873762: step: 902/466, loss: 0.25123417377471924 2023-01-24 03:29:08.638736: step: 904/466, loss: 0.0888330340385437 2023-01-24 03:29:09.444158: step: 906/466, loss: 0.45769384503364563 2023-01-24 03:29:10.224296: step: 908/466, loss: 0.14918996393680573 2023-01-24 03:29:10.997566: step: 910/466, loss: 0.12507207691669464 2023-01-24 03:29:11.734645: step: 912/466, loss: 0.6189706921577454 2023-01-24 03:29:12.527779: step: 914/466, loss: 0.13033513724803925 2023-01-24 03:29:13.272366: step: 916/466, loss: 0.13682545721530914 2023-01-24 03:29:14.007937: step: 918/466, loss: 0.08202681690454483 2023-01-24 03:29:14.818875: step: 920/466, loss: 0.20634540915489197 2023-01-24 03:29:15.508142: step: 922/466, loss: 0.9963807463645935 2023-01-24 03:29:16.230061: step: 924/466, loss: 0.37003475427627563 2023-01-24 03:29:16.944038: step: 926/466, loss: 0.25558382272720337 2023-01-24 03:29:17.690084: step: 928/466, loss: 0.04824042692780495 2023-01-24 03:29:18.402972: step: 930/466, loss: 0.17482222616672516 2023-01-24 03:29:19.167287: step: 932/466, loss: 0.2238139510154724 ================================================== Loss: 0.270 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3269390207849, 'r': 0.34492997259279773, 'f1': 0.3356936206027782}, 'combined': 0.24735319412836287, 'epoch': 13} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36627061271293215, 'r': 0.27779485431734074, 'f1': 0.3159557427243019}, 'combined': 0.19419718821103432, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3663210818307906, 'r': 0.3579798048251559, 'f1': 0.36210241294214424}, 'combined': 0.2668123042731589, 'epoch': 13} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37875489782431904, 'r': 0.26988336009473124, 'f1': 0.31518228605603094}, 'combined': 0.19467141197578386, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.43478260869565216, 'f1': 0.3174603174603175}, 'combined': 0.15873015873015875, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 13} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3663210818307906, 'r': 0.3579798048251559, 'f1': 0.36210241294214424}, 'combined': 0.2668123042731589, 'epoch': 13} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37875489782431904, 'r': 0.26988336009473124, 'f1': 0.31518228605603094}, 'combined': 0.19467141197578386, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 13} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:32:16.427896: step: 2/466, loss: 1.7948763370513916 2023-01-24 03:32:17.263456: step: 4/466, loss: 0.11857905983924866 2023-01-24 03:32:17.948358: step: 6/466, loss: 0.014733104966580868 2023-01-24 03:32:18.713206: step: 8/466, loss: 0.0764663890004158 2023-01-24 03:32:19.462107: step: 10/466, loss: 0.09650014340877533 2023-01-24 03:32:20.341544: step: 12/466, loss: 0.08068323135375977 2023-01-24 03:32:21.066224: step: 14/466, loss: 0.07612251490354538 2023-01-24 03:32:21.838148: step: 16/466, loss: 0.14870595932006836 2023-01-24 03:32:22.607246: step: 18/466, loss: 0.01473545003682375 2023-01-24 03:32:23.433401: step: 20/466, loss: 0.13693945109844208 2023-01-24 03:32:24.243900: step: 22/466, loss: 0.09258382767438889 2023-01-24 03:32:24.927803: step: 24/466, loss: 0.0811443030834198 2023-01-24 03:32:25.677143: step: 26/466, loss: 0.08367400616407394 2023-01-24 03:32:26.475243: step: 28/466, loss: 0.04633022099733353 2023-01-24 03:32:27.225469: step: 30/466, loss: 0.14591136574745178 2023-01-24 03:32:28.007354: step: 32/466, loss: 0.2338826209306717 2023-01-24 03:32:28.769711: step: 34/466, loss: 0.0796242207288742 2023-01-24 03:32:29.510632: step: 36/466, loss: 0.04654328525066376 2023-01-24 03:32:30.304094: step: 38/466, loss: 0.2032957375049591 2023-01-24 03:32:31.073649: step: 40/466, loss: 0.15072587132453918 2023-01-24 03:32:31.877102: step: 42/466, loss: 0.03954103961586952 2023-01-24 03:32:32.656904: step: 44/466, loss: 0.16800735890865326 2023-01-24 03:32:33.413509: step: 46/466, loss: 0.08304127305746078 2023-01-24 03:32:34.135435: step: 48/466, loss: 1.2582231760025024 2023-01-24 03:32:34.844186: step: 50/466, loss: 0.17893579602241516 2023-01-24 03:32:35.598566: step: 52/466, loss: 0.14870749413967133 2023-01-24 03:32:36.328746: step: 54/466, loss: 0.052345480769872665 2023-01-24 03:32:37.143016: step: 56/466, loss: 0.3695552945137024 2023-01-24 03:32:37.971020: step: 58/466, loss: 0.05745256692171097 2023-01-24 03:32:38.661686: step: 60/466, loss: 0.05403880774974823 2023-01-24 03:32:39.355577: step: 62/466, loss: 0.09356357157230377 2023-01-24 03:32:40.089683: step: 64/466, loss: 0.06248932331800461 2023-01-24 03:32:40.882665: step: 66/466, loss: 0.04734089598059654 2023-01-24 03:32:41.697313: step: 68/466, loss: 0.11174242943525314 2023-01-24 03:32:42.484170: step: 70/466, loss: 0.059995852410793304 2023-01-24 03:32:43.320659: step: 72/466, loss: 0.09739916026592255 2023-01-24 03:32:44.121495: step: 74/466, loss: 0.14582285284996033 2023-01-24 03:32:44.896544: step: 76/466, loss: 0.08362985402345657 2023-01-24 03:32:45.632016: step: 78/466, loss: 0.17142032086849213 2023-01-24 03:32:46.366247: step: 80/466, loss: 0.07356587052345276 2023-01-24 03:32:47.221938: step: 82/466, loss: 0.1305818408727646 2023-01-24 03:32:47.982447: step: 84/466, loss: 0.13178014755249023 2023-01-24 03:32:48.733929: step: 86/466, loss: 0.06631729006767273 2023-01-24 03:32:49.433600: step: 88/466, loss: 0.11189496517181396 2023-01-24 03:32:50.224771: step: 90/466, loss: 0.08014704287052155 2023-01-24 03:32:50.966483: step: 92/466, loss: 0.08639585971832275 2023-01-24 03:32:51.753323: step: 94/466, loss: 0.05464732274413109 2023-01-24 03:32:52.446474: step: 96/466, loss: 0.048334747552871704 2023-01-24 03:32:53.207391: step: 98/466, loss: 0.3570219576358795 2023-01-24 03:32:53.944744: step: 100/466, loss: 0.037182196974754333 2023-01-24 03:32:54.748166: step: 102/466, loss: 0.15509562194347382 2023-01-24 03:32:55.480333: step: 104/466, loss: 0.6345539093017578 2023-01-24 03:32:56.289565: step: 106/466, loss: 0.05826098099350929 2023-01-24 03:32:57.051534: step: 108/466, loss: 0.10702861845493317 2023-01-24 03:32:57.863977: step: 110/466, loss: 0.3654964566230774 2023-01-24 03:32:58.693257: step: 112/466, loss: 0.18933402001857758 2023-01-24 03:32:59.456311: step: 114/466, loss: 0.13730598986148834 2023-01-24 03:33:00.139996: step: 116/466, loss: 0.030305171385407448 2023-01-24 03:33:00.949339: step: 118/466, loss: 0.12774544954299927 2023-01-24 03:33:01.714367: step: 120/466, loss: 0.05463085323572159 2023-01-24 03:33:02.510415: step: 122/466, loss: 0.085533507168293 2023-01-24 03:33:03.282545: step: 124/466, loss: 0.09212604910135269 2023-01-24 03:33:04.004789: step: 126/466, loss: 0.048779770731925964 2023-01-24 03:33:04.786926: step: 128/466, loss: 0.05446954816579819 2023-01-24 03:33:05.503247: step: 130/466, loss: 0.16687119007110596 2023-01-24 03:33:06.313800: step: 132/466, loss: 0.06060370057821274 2023-01-24 03:33:07.082615: step: 134/466, loss: 0.09922155737876892 2023-01-24 03:33:07.842808: step: 136/466, loss: 0.3732868731021881 2023-01-24 03:33:08.566188: step: 138/466, loss: 0.03313040733337402 2023-01-24 03:33:09.291961: step: 140/466, loss: 0.18710584938526154 2023-01-24 03:33:10.099140: step: 142/466, loss: 0.16831213235855103 2023-01-24 03:33:10.893110: step: 144/466, loss: 0.11231508105993271 2023-01-24 03:33:11.641174: step: 146/466, loss: 0.029249688610434532 2023-01-24 03:33:12.388952: step: 148/466, loss: 0.062171828001737595 2023-01-24 03:33:13.100000: step: 150/466, loss: 0.235786572098732 2023-01-24 03:33:13.874827: step: 152/466, loss: 0.06076842173933983 2023-01-24 03:33:14.648535: step: 154/466, loss: 0.13541647791862488 2023-01-24 03:33:15.425745: step: 156/466, loss: 5.351958751678467 2023-01-24 03:33:16.205935: step: 158/466, loss: 0.09068594127893448 2023-01-24 03:33:16.982831: step: 160/466, loss: 0.06700941920280457 2023-01-24 03:33:17.796495: step: 162/466, loss: 0.19954699277877808 2023-01-24 03:33:18.622000: step: 164/466, loss: 0.14579974114894867 2023-01-24 03:33:19.395063: step: 166/466, loss: 0.10689794272184372 2023-01-24 03:33:20.140252: step: 168/466, loss: 0.018217479810118675 2023-01-24 03:33:20.859370: step: 170/466, loss: 0.04633485898375511 2023-01-24 03:33:21.619672: step: 172/466, loss: 0.11263962835073471 2023-01-24 03:33:22.350869: step: 174/466, loss: 0.14510999619960785 2023-01-24 03:33:23.294373: step: 176/466, loss: 0.15911799669265747 2023-01-24 03:33:24.071738: step: 178/466, loss: 0.27858880162239075 2023-01-24 03:33:24.864022: step: 180/466, loss: 0.10577763617038727 2023-01-24 03:33:25.664940: step: 182/466, loss: 0.08947893977165222 2023-01-24 03:33:26.413183: step: 184/466, loss: 0.06114058196544647 2023-01-24 03:33:27.127273: step: 186/466, loss: 0.0358128622174263 2023-01-24 03:33:27.902681: step: 188/466, loss: 0.053163789212703705 2023-01-24 03:33:28.656246: step: 190/466, loss: 0.19581694900989532 2023-01-24 03:33:29.384498: step: 192/466, loss: 0.30720922350883484 2023-01-24 03:33:30.226183: step: 194/466, loss: 0.16692815721035004 2023-01-24 03:33:30.920303: step: 196/466, loss: 0.019776280969381332 2023-01-24 03:33:31.670903: step: 198/466, loss: 1.4734183549880981 2023-01-24 03:33:32.445042: step: 200/466, loss: 0.07742732763290405 2023-01-24 03:33:33.171780: step: 202/466, loss: 0.0917370393872261 2023-01-24 03:33:34.040395: step: 204/466, loss: 0.21086423099040985 2023-01-24 03:33:34.878816: step: 206/466, loss: 0.13101069629192352 2023-01-24 03:33:35.639396: step: 208/466, loss: 0.12070529162883759 2023-01-24 03:33:36.390002: step: 210/466, loss: 0.6178359985351562 2023-01-24 03:33:37.168328: step: 212/466, loss: 0.14963263273239136 2023-01-24 03:33:37.931991: step: 214/466, loss: 0.17099174857139587 2023-01-24 03:33:38.627550: step: 216/466, loss: 0.1138356477022171 2023-01-24 03:33:39.359113: step: 218/466, loss: 0.12635937333106995 2023-01-24 03:33:40.154536: step: 220/466, loss: 0.2430417686700821 2023-01-24 03:33:40.814936: step: 222/466, loss: 0.009499253705143929 2023-01-24 03:33:41.612320: step: 224/466, loss: 0.7551463842391968 2023-01-24 03:33:42.392796: step: 226/466, loss: 0.08994847536087036 2023-01-24 03:33:43.193600: step: 228/466, loss: 0.10397832840681076 2023-01-24 03:33:43.986467: step: 230/466, loss: 0.29179275035858154 2023-01-24 03:33:44.777332: step: 232/466, loss: 0.036685217171907425 2023-01-24 03:33:45.516134: step: 234/466, loss: 0.060931991785764694 2023-01-24 03:33:46.255882: step: 236/466, loss: 0.07507771253585815 2023-01-24 03:33:46.986086: step: 238/466, loss: 0.1963038593530655 2023-01-24 03:33:47.741859: step: 240/466, loss: 0.5022599101066589 2023-01-24 03:33:48.564746: step: 242/466, loss: 1.7030168771743774 2023-01-24 03:33:49.355219: step: 244/466, loss: 0.944342851638794 2023-01-24 03:33:50.137134: step: 246/466, loss: 0.19377201795578003 2023-01-24 03:33:50.868750: step: 248/466, loss: 0.07232809066772461 2023-01-24 03:33:51.713947: step: 250/466, loss: 0.1252468079328537 2023-01-24 03:33:52.443725: step: 252/466, loss: 0.06834293901920319 2023-01-24 03:33:53.265070: step: 254/466, loss: 0.09259863197803497 2023-01-24 03:33:53.956484: step: 256/466, loss: 0.15927770733833313 2023-01-24 03:33:54.764274: step: 258/466, loss: 0.08494063466787338 2023-01-24 03:33:55.515627: step: 260/466, loss: 0.19152744114398956 2023-01-24 03:33:56.309302: step: 262/466, loss: 0.09181059896945953 2023-01-24 03:33:57.071915: step: 264/466, loss: 0.19823607802391052 2023-01-24 03:33:57.811529: step: 266/466, loss: 0.09367074817419052 2023-01-24 03:33:58.662214: step: 268/466, loss: 0.4473530650138855 2023-01-24 03:33:59.402175: step: 270/466, loss: 0.35830551385879517 2023-01-24 03:34:00.245457: step: 272/466, loss: 0.143303781747818 2023-01-24 03:34:01.064444: step: 274/466, loss: 0.12418963015079498 2023-01-24 03:34:01.892611: step: 276/466, loss: 0.18688806891441345 2023-01-24 03:34:02.651185: step: 278/466, loss: 0.13030363619327545 2023-01-24 03:34:03.411900: step: 280/466, loss: 0.12113102525472641 2023-01-24 03:34:04.258625: step: 282/466, loss: 0.05125928670167923 2023-01-24 03:34:05.070446: step: 284/466, loss: 0.052229441702365875 2023-01-24 03:34:05.902924: step: 286/466, loss: 0.20199353992938995 2023-01-24 03:34:06.706155: step: 288/466, loss: 0.4484192430973053 2023-01-24 03:34:07.383193: step: 290/466, loss: 0.10080991685390472 2023-01-24 03:34:08.141460: step: 292/466, loss: 0.17666229605674744 2023-01-24 03:34:09.017355: step: 294/466, loss: 0.19794604182243347 2023-01-24 03:34:09.799050: step: 296/466, loss: 0.2858116626739502 2023-01-24 03:34:10.502067: step: 298/466, loss: 0.06229928135871887 2023-01-24 03:34:11.298072: step: 300/466, loss: 0.04586632549762726 2023-01-24 03:34:12.060632: step: 302/466, loss: 0.4339018166065216 2023-01-24 03:34:12.822803: step: 304/466, loss: 0.2180899679660797 2023-01-24 03:34:13.558155: step: 306/466, loss: 0.10189656913280487 2023-01-24 03:34:14.281218: step: 308/466, loss: 0.08927162736654282 2023-01-24 03:34:14.996660: step: 310/466, loss: 0.05226528272032738 2023-01-24 03:34:15.717393: step: 312/466, loss: 0.08544369041919708 2023-01-24 03:34:16.381259: step: 314/466, loss: 0.10923538357019424 2023-01-24 03:34:17.175631: step: 316/466, loss: 0.059774525463581085 2023-01-24 03:34:17.850272: step: 318/466, loss: 0.07523495703935623 2023-01-24 03:34:18.536695: step: 320/466, loss: 0.13578970730304718 2023-01-24 03:34:19.489419: step: 322/466, loss: 0.046819448471069336 2023-01-24 03:34:20.269824: step: 324/466, loss: 0.37413427233695984 2023-01-24 03:34:21.086615: step: 326/466, loss: 0.057528331875801086 2023-01-24 03:34:21.804356: step: 328/466, loss: 0.07809772342443466 2023-01-24 03:34:22.537028: step: 330/466, loss: 0.20363347232341766 2023-01-24 03:34:23.322068: step: 332/466, loss: 0.20195549726486206 2023-01-24 03:34:24.066668: step: 334/466, loss: 0.18331404030323029 2023-01-24 03:34:24.749566: step: 336/466, loss: 0.11527480185031891 2023-01-24 03:34:25.417445: step: 338/466, loss: 0.08166998624801636 2023-01-24 03:34:26.213925: step: 340/466, loss: 0.06871407479047775 2023-01-24 03:34:26.975016: step: 342/466, loss: 0.23992781341075897 2023-01-24 03:34:27.911756: step: 344/466, loss: 0.10774870216846466 2023-01-24 03:34:28.780890: step: 346/466, loss: 0.08146621286869049 2023-01-24 03:34:29.510815: step: 348/466, loss: 0.15509775280952454 2023-01-24 03:34:30.214577: step: 350/466, loss: 0.2556403875350952 2023-01-24 03:34:30.971504: step: 352/466, loss: 0.10612188279628754 2023-01-24 03:34:31.888660: step: 354/466, loss: 0.1254601627588272 2023-01-24 03:34:32.586047: step: 356/466, loss: 0.09702162444591522 2023-01-24 03:34:33.322167: step: 358/466, loss: 0.07175100594758987 2023-01-24 03:34:34.048527: step: 360/466, loss: 0.10510864108800888 2023-01-24 03:34:34.768296: step: 362/466, loss: 0.407501220703125 2023-01-24 03:34:35.529789: step: 364/466, loss: 0.13762032985687256 2023-01-24 03:34:36.310775: step: 366/466, loss: 0.11738991737365723 2023-01-24 03:34:37.049877: step: 368/466, loss: 0.07951025664806366 2023-01-24 03:34:37.806428: step: 370/466, loss: 0.351298063993454 2023-01-24 03:34:38.576045: step: 372/466, loss: 0.08246075361967087 2023-01-24 03:34:39.307918: step: 374/466, loss: 0.09868600219488144 2023-01-24 03:34:40.074173: step: 376/466, loss: 0.05318663269281387 2023-01-24 03:34:40.792039: step: 378/466, loss: 0.1378527283668518 2023-01-24 03:34:41.539503: step: 380/466, loss: 0.12403824925422668 2023-01-24 03:34:42.197695: step: 382/466, loss: 0.0518980547785759 2023-01-24 03:34:42.980835: step: 384/466, loss: 0.036360908299684525 2023-01-24 03:34:43.697638: step: 386/466, loss: 0.12337429821491241 2023-01-24 03:34:44.445444: step: 388/466, loss: 0.07348990440368652 2023-01-24 03:34:45.206795: step: 390/466, loss: 0.21546392142772675 2023-01-24 03:34:45.965506: step: 392/466, loss: 0.15438151359558105 2023-01-24 03:34:46.698639: step: 394/466, loss: 0.34767404198646545 2023-01-24 03:34:47.442754: step: 396/466, loss: 0.07150114327669144 2023-01-24 03:34:48.164680: step: 398/466, loss: 0.1381443440914154 2023-01-24 03:34:48.899202: step: 400/466, loss: 0.21789340674877167 2023-01-24 03:34:49.664752: step: 402/466, loss: 0.07714305073022842 2023-01-24 03:34:50.372676: step: 404/466, loss: 0.13851290941238403 2023-01-24 03:34:51.141347: step: 406/466, loss: 0.06918556243181229 2023-01-24 03:34:51.897557: step: 408/466, loss: 0.1138911172747612 2023-01-24 03:34:52.674178: step: 410/466, loss: 0.36651408672332764 2023-01-24 03:34:53.389215: step: 412/466, loss: 0.12470576912164688 2023-01-24 03:34:54.139026: step: 414/466, loss: 0.20686028897762299 2023-01-24 03:34:54.891630: step: 416/466, loss: 0.04287987947463989 2023-01-24 03:34:55.611279: step: 418/466, loss: 0.7809166312217712 2023-01-24 03:34:56.323229: step: 420/466, loss: 0.07137995958328247 2023-01-24 03:34:57.088152: step: 422/466, loss: 0.053179506212472916 2023-01-24 03:34:57.938345: step: 424/466, loss: 0.6887791156768799 2023-01-24 03:34:58.757161: step: 426/466, loss: 0.1992788314819336 2023-01-24 03:34:59.559691: step: 428/466, loss: 0.16830606758594513 2023-01-24 03:35:00.301430: step: 430/466, loss: 0.19737331569194794 2023-01-24 03:35:01.115592: step: 432/466, loss: 0.1718023270368576 2023-01-24 03:35:01.920021: step: 434/466, loss: 0.10333762317895889 2023-01-24 03:35:02.573313: step: 436/466, loss: 0.07898228615522385 2023-01-24 03:35:03.316246: step: 438/466, loss: 0.0929722934961319 2023-01-24 03:35:04.145006: step: 440/466, loss: 0.07579855620861053 2023-01-24 03:35:04.889147: step: 442/466, loss: 0.07746347039937973 2023-01-24 03:35:05.590465: step: 444/466, loss: 0.07698897272348404 2023-01-24 03:35:06.303034: step: 446/466, loss: 0.08299127966165543 2023-01-24 03:35:07.089957: step: 448/466, loss: 0.14330194890499115 2023-01-24 03:35:07.879936: step: 450/466, loss: 0.0593377910554409 2023-01-24 03:35:08.679089: step: 452/466, loss: 0.2686997354030609 2023-01-24 03:35:09.419138: step: 454/466, loss: 0.18140004575252533 2023-01-24 03:35:10.146438: step: 456/466, loss: 0.3100077211856842 2023-01-24 03:35:10.870931: step: 458/466, loss: 0.036361850798130035 2023-01-24 03:35:11.538256: step: 460/466, loss: 0.15428365767002106 2023-01-24 03:35:12.412308: step: 462/466, loss: 0.3123149871826172 2023-01-24 03:35:13.266195: step: 464/466, loss: 0.043799079954624176 2023-01-24 03:35:14.033007: step: 466/466, loss: 0.33713820576667786 2023-01-24 03:35:14.848982: step: 468/466, loss: 0.09103430807590485 2023-01-24 03:35:15.622888: step: 470/466, loss: 0.1020338162779808 2023-01-24 03:35:16.382086: step: 472/466, loss: 0.12119297683238983 2023-01-24 03:35:17.103207: step: 474/466, loss: 0.17621614038944244 2023-01-24 03:35:17.807609: step: 476/466, loss: 0.008811583742499352 2023-01-24 03:35:18.575352: step: 478/466, loss: 0.5255606770515442 2023-01-24 03:35:19.420582: step: 480/466, loss: 0.48379725217819214 2023-01-24 03:35:20.185811: step: 482/466, loss: 0.44753915071487427 2023-01-24 03:35:20.890036: step: 484/466, loss: 0.06750224530696869 2023-01-24 03:35:21.670035: step: 486/466, loss: 0.1774691641330719 2023-01-24 03:35:22.465300: step: 488/466, loss: 0.08389052003622055 2023-01-24 03:35:23.241437: step: 490/466, loss: 0.11968137323856354 2023-01-24 03:35:24.065120: step: 492/466, loss: 0.24325041472911835 2023-01-24 03:35:24.782570: step: 494/466, loss: 0.06990397721529007 2023-01-24 03:35:25.514390: step: 496/466, loss: 0.16294898092746735 2023-01-24 03:35:26.240118: step: 498/466, loss: 0.08192439377307892 2023-01-24 03:35:26.955876: step: 500/466, loss: 0.058084335178136826 2023-01-24 03:35:27.695547: step: 502/466, loss: 0.11363398283720016 2023-01-24 03:35:28.458778: step: 504/466, loss: 0.2798217535018921 2023-01-24 03:35:29.174792: step: 506/466, loss: 0.11963935196399689 2023-01-24 03:35:29.906835: step: 508/466, loss: 0.13938367366790771 2023-01-24 03:35:30.607572: step: 510/466, loss: 0.0852971151471138 2023-01-24 03:35:31.324063: step: 512/466, loss: 0.16567742824554443 2023-01-24 03:35:32.098073: step: 514/466, loss: 0.09919524937868118 2023-01-24 03:35:32.791046: step: 516/466, loss: 0.06752198934555054 2023-01-24 03:35:33.511263: step: 518/466, loss: 0.5552704930305481 2023-01-24 03:35:34.282864: step: 520/466, loss: 0.3073229193687439 2023-01-24 03:35:35.115048: step: 522/466, loss: 0.1807357370853424 2023-01-24 03:35:35.904464: step: 524/466, loss: 0.08142665773630142 2023-01-24 03:35:36.661696: step: 526/466, loss: 0.4611741304397583 2023-01-24 03:35:37.371878: step: 528/466, loss: 0.06939984858036041 2023-01-24 03:35:38.199337: step: 530/466, loss: 0.23970821499824524 2023-01-24 03:35:38.962894: step: 532/466, loss: 0.15627221763134003 2023-01-24 03:35:39.712880: step: 534/466, loss: 0.18242160975933075 2023-01-24 03:35:40.484276: step: 536/466, loss: 0.06681209057569504 2023-01-24 03:35:41.305995: step: 538/466, loss: 0.3042953312397003 2023-01-24 03:35:41.993457: step: 540/466, loss: 0.03705006465315819 2023-01-24 03:35:42.752329: step: 542/466, loss: 0.030975710600614548 2023-01-24 03:35:43.431204: step: 544/466, loss: 0.047146331518888474 2023-01-24 03:35:44.113423: step: 546/466, loss: 0.1211782693862915 2023-01-24 03:35:44.924236: step: 548/466, loss: 0.04829741269350052 2023-01-24 03:35:45.778565: step: 550/466, loss: 0.06972920894622803 2023-01-24 03:35:46.507892: step: 552/466, loss: 0.033795811235904694 2023-01-24 03:35:47.312811: step: 554/466, loss: 0.03054910898208618 2023-01-24 03:35:48.134545: step: 556/466, loss: 0.14491669833660126 2023-01-24 03:35:48.909122: step: 558/466, loss: 0.11190073192119598 2023-01-24 03:35:49.703070: step: 560/466, loss: 0.3582926392555237 2023-01-24 03:35:50.398863: step: 562/466, loss: 0.0762907937169075 2023-01-24 03:35:51.139539: step: 564/466, loss: 0.6806654334068298 2023-01-24 03:35:51.901836: step: 566/466, loss: 0.18192371726036072 2023-01-24 03:35:52.658837: step: 568/466, loss: 0.02502519078552723 2023-01-24 03:35:53.399550: step: 570/466, loss: 0.3706054389476776 2023-01-24 03:35:54.120598: step: 572/466, loss: 0.0466209352016449 2023-01-24 03:35:54.892103: step: 574/466, loss: 0.06274183094501495 2023-01-24 03:35:55.684744: step: 576/466, loss: 0.09402534365653992 2023-01-24 03:35:56.466480: step: 578/466, loss: 0.06700240820646286 2023-01-24 03:35:57.294814: step: 580/466, loss: 0.2377341240644455 2023-01-24 03:35:58.162088: step: 582/466, loss: 0.6597070693969727 2023-01-24 03:35:58.878893: step: 584/466, loss: 0.08730103075504303 2023-01-24 03:35:59.734507: step: 586/466, loss: 0.11759735643863678 2023-01-24 03:36:00.451166: step: 588/466, loss: 0.0636318176984787 2023-01-24 03:36:01.218010: step: 590/466, loss: 0.08937416970729828 2023-01-24 03:36:01.923922: step: 592/466, loss: 0.0803782120347023 2023-01-24 03:36:02.670641: step: 594/466, loss: 0.09570712596178055 2023-01-24 03:36:03.467037: step: 596/466, loss: 0.2582423686981201 2023-01-24 03:36:04.229986: step: 598/466, loss: 0.10370142012834549 2023-01-24 03:36:05.093078: step: 600/466, loss: 0.40866079926490784 2023-01-24 03:36:05.855793: step: 602/466, loss: 0.03324064239859581 2023-01-24 03:36:06.635098: step: 604/466, loss: 0.13952693343162537 2023-01-24 03:36:07.419806: step: 606/466, loss: 0.054963547736406326 2023-01-24 03:36:08.213645: step: 608/466, loss: 0.057081256061792374 2023-01-24 03:36:09.032890: step: 610/466, loss: 0.16398000717163086 2023-01-24 03:36:09.811970: step: 612/466, loss: 0.14669935405254364 2023-01-24 03:36:10.527906: step: 614/466, loss: 0.10255993157625198 2023-01-24 03:36:11.289553: step: 616/466, loss: 0.5874757170677185 2023-01-24 03:36:12.114648: step: 618/466, loss: 0.06501025706529617 2023-01-24 03:36:12.830662: step: 620/466, loss: 0.163362056016922 2023-01-24 03:36:13.572242: step: 622/466, loss: 0.06703763455152512 2023-01-24 03:36:14.444026: step: 624/466, loss: 4.936920642852783 2023-01-24 03:36:15.177656: step: 626/466, loss: 0.03846811503171921 2023-01-24 03:36:15.922450: step: 628/466, loss: 0.03890600800514221 2023-01-24 03:36:16.660761: step: 630/466, loss: 0.19648754596710205 2023-01-24 03:36:17.423426: step: 632/466, loss: 0.18167728185653687 2023-01-24 03:36:18.245848: step: 634/466, loss: 0.09970265626907349 2023-01-24 03:36:18.937319: step: 636/466, loss: 0.04559174180030823 2023-01-24 03:36:19.739027: step: 638/466, loss: 0.14223453402519226 2023-01-24 03:36:20.550882: step: 640/466, loss: 0.054243456572294235 2023-01-24 03:36:21.283652: step: 642/466, loss: 0.0635480135679245 2023-01-24 03:36:22.152789: step: 644/466, loss: 0.07275538891553879 2023-01-24 03:36:22.889655: step: 646/466, loss: 0.0707472488284111 2023-01-24 03:36:23.588322: step: 648/466, loss: 0.44649559259414673 2023-01-24 03:36:24.334702: step: 650/466, loss: 0.19551318883895874 2023-01-24 03:36:25.118696: step: 652/466, loss: 0.16433806717395782 2023-01-24 03:36:25.926903: step: 654/466, loss: 0.20102214813232422 2023-01-24 03:36:26.599476: step: 656/466, loss: 0.07433684915304184 2023-01-24 03:36:27.410139: step: 658/466, loss: 0.17160046100616455 2023-01-24 03:36:28.175116: step: 660/466, loss: 0.1790246218442917 2023-01-24 03:36:29.131134: step: 662/466, loss: 0.15379475057125092 2023-01-24 03:36:29.881240: step: 664/466, loss: 0.15680082142353058 2023-01-24 03:36:30.640156: step: 666/466, loss: 0.23924851417541504 2023-01-24 03:36:31.370415: step: 668/466, loss: 0.0730941891670227 2023-01-24 03:36:32.218058: step: 670/466, loss: 0.2315608114004135 2023-01-24 03:36:33.063470: step: 672/466, loss: 0.04345105215907097 2023-01-24 03:36:33.834595: step: 674/466, loss: 0.32656824588775635 2023-01-24 03:36:34.683219: step: 676/466, loss: 0.16333246231079102 2023-01-24 03:36:35.420031: step: 678/466, loss: 0.302473783493042 2023-01-24 03:36:36.173008: step: 680/466, loss: 0.09493256360292435 2023-01-24 03:36:36.989965: step: 682/466, loss: 0.10015727579593658 2023-01-24 03:36:37.895263: step: 684/466, loss: 0.29031530022621155 2023-01-24 03:36:38.601899: step: 686/466, loss: 0.05644798278808594 2023-01-24 03:36:39.339111: step: 688/466, loss: 0.08461704850196838 2023-01-24 03:36:40.080542: step: 690/466, loss: 0.07248745113611221 2023-01-24 03:36:40.812537: step: 692/466, loss: 1.287148356437683 2023-01-24 03:36:41.630725: step: 694/466, loss: 0.10873213410377502 2023-01-24 03:36:42.467655: step: 696/466, loss: 0.10134746134281158 2023-01-24 03:36:43.249940: step: 698/466, loss: 0.4512856900691986 2023-01-24 03:36:43.986341: step: 700/466, loss: 0.17885048687458038 2023-01-24 03:36:44.759489: step: 702/466, loss: 0.19928614795207977 2023-01-24 03:36:45.460658: step: 704/466, loss: 0.02061893790960312 2023-01-24 03:36:46.174485: step: 706/466, loss: 0.11236510425806046 2023-01-24 03:36:46.989840: step: 708/466, loss: 0.11860001087188721 2023-01-24 03:36:47.709475: step: 710/466, loss: 0.09537634998559952 2023-01-24 03:36:48.402688: step: 712/466, loss: 0.9762457609176636 2023-01-24 03:36:49.130590: step: 714/466, loss: 0.07860858738422394 2023-01-24 03:36:49.840161: step: 716/466, loss: 0.102113276720047 2023-01-24 03:36:50.614645: step: 718/466, loss: 0.6726066470146179 2023-01-24 03:36:51.382552: step: 720/466, loss: 0.12842540442943573 2023-01-24 03:36:52.143274: step: 722/466, loss: 0.04332917556166649 2023-01-24 03:36:52.954797: step: 724/466, loss: 0.08657079935073853 2023-01-24 03:36:53.767527: step: 726/466, loss: 0.9335070848464966 2023-01-24 03:36:54.526713: step: 728/466, loss: 0.04679827764630318 2023-01-24 03:36:55.311165: step: 730/466, loss: 0.2225496470928192 2023-01-24 03:36:56.109685: step: 732/466, loss: 0.2062837779521942 2023-01-24 03:36:56.879468: step: 734/466, loss: 0.06761132925748825 2023-01-24 03:36:57.656804: step: 736/466, loss: 0.12684382498264313 2023-01-24 03:36:58.455542: step: 738/466, loss: 0.11122657358646393 2023-01-24 03:36:59.245382: step: 740/466, loss: 0.17465952038764954 2023-01-24 03:36:59.996798: step: 742/466, loss: 0.10215871036052704 2023-01-24 03:37:00.767965: step: 744/466, loss: 0.047759778797626495 2023-01-24 03:37:01.576416: step: 746/466, loss: 0.10460063815116882 2023-01-24 03:37:02.374860: step: 748/466, loss: 0.1175285130739212 2023-01-24 03:37:03.064062: step: 750/466, loss: 0.3711400330066681 2023-01-24 03:37:03.771890: step: 752/466, loss: 0.10916385054588318 2023-01-24 03:37:04.627718: step: 754/466, loss: 0.09719958901405334 2023-01-24 03:37:05.394734: step: 756/466, loss: 0.1982584148645401 2023-01-24 03:37:06.207125: step: 758/466, loss: 0.15777599811553955 2023-01-24 03:37:06.973810: step: 760/466, loss: 0.0677880272269249 2023-01-24 03:37:07.827939: step: 762/466, loss: 0.08650727570056915 2023-01-24 03:37:08.624526: step: 764/466, loss: 0.06624545156955719 2023-01-24 03:37:09.482887: step: 766/466, loss: 0.05742808058857918 2023-01-24 03:37:10.208453: step: 768/466, loss: 0.7379640936851501 2023-01-24 03:37:11.001156: step: 770/466, loss: 0.16413678228855133 2023-01-24 03:37:11.759533: step: 772/466, loss: 0.11502152681350708 2023-01-24 03:37:12.517671: step: 774/466, loss: 0.14976269006729126 2023-01-24 03:37:13.235056: step: 776/466, loss: 0.24236641824245453 2023-01-24 03:37:13.993789: step: 778/466, loss: 0.5738264918327332 2023-01-24 03:37:14.704799: step: 780/466, loss: 0.17038989067077637 2023-01-24 03:37:15.501095: step: 782/466, loss: 0.22923089563846588 2023-01-24 03:37:16.318551: step: 784/466, loss: 0.09711220115423203 2023-01-24 03:37:17.076140: step: 786/466, loss: 0.10119055956602097 2023-01-24 03:37:17.770092: step: 788/466, loss: 0.039102327078580856 2023-01-24 03:37:18.510639: step: 790/466, loss: 0.4208541512489319 2023-01-24 03:37:19.274720: step: 792/466, loss: 0.09367494285106659 2023-01-24 03:37:20.129126: step: 794/466, loss: 0.09790325909852982 2023-01-24 03:37:20.902068: step: 796/466, loss: 0.2581421732902527 2023-01-24 03:37:21.597712: step: 798/466, loss: 0.09112636744976044 2023-01-24 03:37:22.374978: step: 800/466, loss: 0.13206596672534943 2023-01-24 03:37:23.116697: step: 802/466, loss: 0.25599294900894165 2023-01-24 03:37:23.852272: step: 804/466, loss: 0.059491004794836044 2023-01-24 03:37:24.631588: step: 806/466, loss: 0.32217538356781006 2023-01-24 03:37:25.405187: step: 808/466, loss: 0.045017991214990616 2023-01-24 03:37:26.118030: step: 810/466, loss: 0.1430712193250656 2023-01-24 03:37:26.940077: step: 812/466, loss: 0.2719506025314331 2023-01-24 03:37:27.690855: step: 814/466, loss: 0.054632265120744705 2023-01-24 03:37:28.449104: step: 816/466, loss: 0.08650518208742142 2023-01-24 03:37:29.210750: step: 818/466, loss: 0.1173226535320282 2023-01-24 03:37:29.931055: step: 820/466, loss: 0.090061254799366 2023-01-24 03:37:30.724364: step: 822/466, loss: 0.07849156111478806 2023-01-24 03:37:31.531832: step: 824/466, loss: 0.41988658905029297 2023-01-24 03:37:32.290513: step: 826/466, loss: 0.0609484426677227 2023-01-24 03:37:33.051885: step: 828/466, loss: 0.2581329345703125 2023-01-24 03:37:33.822766: step: 830/466, loss: 0.12320809066295624 2023-01-24 03:37:34.590303: step: 832/466, loss: 0.5082396864891052 2023-01-24 03:37:35.335895: step: 834/466, loss: 0.10686381906270981 2023-01-24 03:37:36.001422: step: 836/466, loss: 0.06501047313213348 2023-01-24 03:37:36.756522: step: 838/466, loss: 0.09969601035118103 2023-01-24 03:37:37.557527: step: 840/466, loss: 0.9600458145141602 2023-01-24 03:37:38.321588: step: 842/466, loss: 0.5189383029937744 2023-01-24 03:37:39.076648: step: 844/466, loss: 0.3567603826522827 2023-01-24 03:37:39.871068: step: 846/466, loss: 0.1487482786178589 2023-01-24 03:37:40.756526: step: 848/466, loss: 0.2470036894083023 2023-01-24 03:37:41.441145: step: 850/466, loss: 0.18263716995716095 2023-01-24 03:37:42.134785: step: 852/466, loss: 0.13464903831481934 2023-01-24 03:37:42.934460: step: 854/466, loss: 0.027120299637317657 2023-01-24 03:37:43.731126: step: 856/466, loss: 0.11120583117008209 2023-01-24 03:37:44.504486: step: 858/466, loss: 0.1276436448097229 2023-01-24 03:37:45.182422: step: 860/466, loss: 0.05004655197262764 2023-01-24 03:37:45.964131: step: 862/466, loss: 0.5223639607429504 2023-01-24 03:37:46.804495: step: 864/466, loss: 0.06866031885147095 2023-01-24 03:37:47.608293: step: 866/466, loss: 0.25382503867149353 2023-01-24 03:37:48.330195: step: 868/466, loss: 0.20900730788707733 2023-01-24 03:37:49.111429: step: 870/466, loss: 0.1347699910402298 2023-01-24 03:37:49.853920: step: 872/466, loss: 0.19099219143390656 2023-01-24 03:37:50.731901: step: 874/466, loss: 0.050141043961048126 2023-01-24 03:37:51.447670: step: 876/466, loss: 0.168554425239563 2023-01-24 03:37:52.266284: step: 878/466, loss: 0.05101846903562546 2023-01-24 03:37:53.036061: step: 880/466, loss: 0.0809459388256073 2023-01-24 03:37:53.738246: step: 882/466, loss: 0.265508770942688 2023-01-24 03:37:54.538848: step: 884/466, loss: 0.06232677027583122 2023-01-24 03:37:55.430629: step: 886/466, loss: 0.24122491478919983 2023-01-24 03:37:56.263057: step: 888/466, loss: 0.07205895334482193 2023-01-24 03:37:56.971211: step: 890/466, loss: 0.07484681904315948 2023-01-24 03:37:57.651546: step: 892/466, loss: 0.09441087394952774 2023-01-24 03:37:58.416908: step: 894/466, loss: 0.6755983829498291 2023-01-24 03:37:59.157154: step: 896/466, loss: 0.034746140241622925 2023-01-24 03:37:59.893047: step: 898/466, loss: 0.08571317791938782 2023-01-24 03:38:00.657479: step: 900/466, loss: 0.24418793618679047 2023-01-24 03:38:01.422545: step: 902/466, loss: 0.0639563649892807 2023-01-24 03:38:02.237224: step: 904/466, loss: 0.080899678170681 2023-01-24 03:38:02.990486: step: 906/466, loss: 0.41414880752563477 2023-01-24 03:38:03.740525: step: 908/466, loss: 0.13303065299987793 2023-01-24 03:38:04.552388: step: 910/466, loss: 0.06787938624620438 2023-01-24 03:38:05.287701: step: 912/466, loss: 0.08366450667381287 2023-01-24 03:38:06.017719: step: 914/466, loss: 0.08863049000501633 2023-01-24 03:38:06.772994: step: 916/466, loss: 0.11649972200393677 2023-01-24 03:38:07.573794: step: 918/466, loss: 0.020747818052768707 2023-01-24 03:38:08.286480: step: 920/466, loss: 0.10685139894485474 2023-01-24 03:38:09.014338: step: 922/466, loss: 0.09153237193822861 2023-01-24 03:38:09.722470: step: 924/466, loss: 0.1741751730442047 2023-01-24 03:38:10.507022: step: 926/466, loss: 0.16743814945220947 2023-01-24 03:38:11.261409: step: 928/466, loss: 0.07640694826841354 2023-01-24 03:38:12.081548: step: 930/466, loss: 0.20740586519241333 2023-01-24 03:38:12.888574: step: 932/466, loss: 0.128241166472435 ================================================== Loss: 0.198 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3419748520710059, 'r': 0.32899667931688803, 'f1': 0.335360251450677}, 'combined': 0.24710755370049883, 'epoch': 14} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3651257193530019, 'r': 0.29210057548240154, 'f1': 0.32455619498044613}, 'combined': 0.19948331984164006, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30819910858336935, 'r': 0.3304221942117717, 'f1': 0.31892398598828514}, 'combined': 0.2349966212545259, 'epoch': 14} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3511600818873107, 'r': 0.2973459394682163, 'f1': 0.32202021573913725}, 'combined': 0.1979246204055185, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3446929400386847, 'r': 0.33815227703984824, 'f1': 0.3413912835249043}, 'combined': 0.2515514720709821, 'epoch': 14} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36195833194362603, 'r': 0.28988004939208145, 'f1': 0.3219340933152443}, 'combined': 0.19884164587118036, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.33405172413793105, 'r': 0.2767857142857143, 'f1': 0.30273437500000006}, 'combined': 0.20182291666666669, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2777777777777778, 'r': 0.43478260869565216, 'f1': 0.3389830508474576}, 'combined': 0.1694915254237288, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.10344827586206896, 'f1': 0.16216216216216217}, 'combined': 0.10810810810810811, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3131914328000232, 'r': 0.29239124276586603, 'f1': 0.30243412156547866}, 'combined': 0.2228461948377211, 'epoch': 4} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3393879352810207, 'r': 0.2281180153229698, 'f1': 0.2728447239662215}, 'combined': 0.16769968399875076, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3663210818307906, 'r': 0.3579798048251559, 'f1': 0.36210241294214424}, 'combined': 0.2668123042731589, 'epoch': 13} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37875489782431904, 'r': 0.26988336009473124, 'f1': 0.31518228605603094}, 'combined': 0.19467141197578386, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 13} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:40:56.876189: step: 2/466, loss: 0.05210694670677185 2023-01-24 03:40:57.640150: step: 4/466, loss: 0.018808016553521156 2023-01-24 03:40:58.444918: step: 6/466, loss: 0.24722591042518616 2023-01-24 03:40:59.228118: step: 8/466, loss: 0.07486678659915924 2023-01-24 03:41:00.066986: step: 10/466, loss: 0.49716898798942566 2023-01-24 03:41:00.831702: step: 12/466, loss: 0.09887971729040146 2023-01-24 03:41:01.631974: step: 14/466, loss: 0.26036396622657776 2023-01-24 03:41:02.365273: step: 16/466, loss: 0.08663032948970795 2023-01-24 03:41:03.141777: step: 18/466, loss: 0.11957651376724243 2023-01-24 03:41:03.969977: step: 20/466, loss: 0.08551111817359924 2023-01-24 03:41:04.757171: step: 22/466, loss: 0.15386079251766205 2023-01-24 03:41:05.563680: step: 24/466, loss: 0.06206200644373894 2023-01-24 03:41:06.270790: step: 26/466, loss: 0.4639705717563629 2023-01-24 03:41:07.002477: step: 28/466, loss: 0.06706234812736511 2023-01-24 03:41:07.774681: step: 30/466, loss: 0.0980425551533699 2023-01-24 03:41:08.615011: step: 32/466, loss: 0.07218775898218155 2023-01-24 03:41:09.437395: step: 34/466, loss: 0.0752536952495575 2023-01-24 03:41:10.154855: step: 36/466, loss: 0.05118757486343384 2023-01-24 03:41:10.843868: step: 38/466, loss: 0.026366863399744034 2023-01-24 03:41:11.667128: step: 40/466, loss: 0.11569967120885849 2023-01-24 03:41:12.397497: step: 42/466, loss: 0.11791163682937622 2023-01-24 03:41:13.259972: step: 44/466, loss: 0.05543734133243561 2023-01-24 03:41:14.000787: step: 46/466, loss: 0.11040161550045013 2023-01-24 03:41:14.767032: step: 48/466, loss: 0.14670924842357635 2023-01-24 03:41:15.501849: step: 50/466, loss: 0.0680384710431099 2023-01-24 03:41:16.229822: step: 52/466, loss: 0.04696614667773247 2023-01-24 03:41:16.926500: step: 54/466, loss: 0.08242233097553253 2023-01-24 03:41:17.688364: step: 56/466, loss: 0.3122200071811676 2023-01-24 03:41:18.393112: step: 58/466, loss: 0.02097785286605358 2023-01-24 03:41:19.231976: step: 60/466, loss: 0.03667465224862099 2023-01-24 03:41:19.895686: step: 62/466, loss: 0.027150847017765045 2023-01-24 03:41:20.618947: step: 64/466, loss: 0.10212913900613785 2023-01-24 03:41:21.342081: step: 66/466, loss: 0.09340247511863708 2023-01-24 03:41:22.026567: step: 68/466, loss: 0.13316011428833008 2023-01-24 03:41:22.749055: step: 70/466, loss: 0.05900976061820984 2023-01-24 03:41:23.464471: step: 72/466, loss: 0.065632164478302 2023-01-24 03:41:24.236615: step: 74/466, loss: 0.07361248880624771 2023-01-24 03:41:24.944285: step: 76/466, loss: 0.05891217291355133 2023-01-24 03:41:25.704123: step: 78/466, loss: 0.01654049940407276 2023-01-24 03:41:26.455948: step: 80/466, loss: 0.06702452152967453 2023-01-24 03:41:27.247225: step: 82/466, loss: 0.3018524944782257 2023-01-24 03:41:28.025916: step: 84/466, loss: 0.3223186731338501 2023-01-24 03:41:28.735862: step: 86/466, loss: 0.028132904320955276 2023-01-24 03:41:29.562291: step: 88/466, loss: 0.32144275307655334 2023-01-24 03:41:30.244158: step: 90/466, loss: 0.11325498670339584 2023-01-24 03:41:30.956697: step: 92/466, loss: 0.06781657785177231 2023-01-24 03:41:31.862195: step: 94/466, loss: 0.020502163097262383 2023-01-24 03:41:32.595934: step: 96/466, loss: 0.029743617400527 2023-01-24 03:41:33.348167: step: 98/466, loss: 0.6470071077346802 2023-01-24 03:41:34.073867: step: 100/466, loss: 0.08369266241788864 2023-01-24 03:41:34.769005: step: 102/466, loss: 0.1308288723230362 2023-01-24 03:41:35.470365: step: 104/466, loss: 0.1260998249053955 2023-01-24 03:41:36.238041: step: 106/466, loss: 0.09659962356090546 2023-01-24 03:41:36.982133: step: 108/466, loss: 0.04915435239672661 2023-01-24 03:41:37.729366: step: 110/466, loss: 0.0700543075799942 2023-01-24 03:41:38.466445: step: 112/466, loss: 0.06394853442907333 2023-01-24 03:41:39.234736: step: 114/466, loss: 0.1292410045862198 2023-01-24 03:41:39.988277: step: 116/466, loss: 0.09758917987346649 2023-01-24 03:41:40.919342: step: 118/466, loss: 1.9617502689361572 2023-01-24 03:41:41.705040: step: 120/466, loss: 0.06086967885494232 2023-01-24 03:41:42.394396: step: 122/466, loss: 0.059228383004665375 2023-01-24 03:41:43.122266: step: 124/466, loss: 0.19763793051242828 2023-01-24 03:41:43.974792: step: 126/466, loss: 0.050040893256664276 2023-01-24 03:41:44.667981: step: 128/466, loss: 0.08526965230703354 2023-01-24 03:41:45.407731: step: 130/466, loss: 0.1221991628408432 2023-01-24 03:41:46.138809: step: 132/466, loss: 0.06798665970563889 2023-01-24 03:41:46.840222: step: 134/466, loss: 0.1459055095911026 2023-01-24 03:41:47.613976: step: 136/466, loss: 0.08939648419618607 2023-01-24 03:41:48.458520: step: 138/466, loss: 0.14021599292755127 2023-01-24 03:41:49.197670: step: 140/466, loss: 0.04460885748267174 2023-01-24 03:41:49.988229: step: 142/466, loss: 0.09865774214267731 2023-01-24 03:41:50.800077: step: 144/466, loss: 0.03751792013645172 2023-01-24 03:41:51.517739: step: 146/466, loss: 0.025001395493745804 2023-01-24 03:41:52.306748: step: 148/466, loss: 0.02535279095172882 2023-01-24 03:41:53.117717: step: 150/466, loss: 0.17281517386436462 2023-01-24 03:41:53.867652: step: 152/466, loss: 0.07138783484697342 2023-01-24 03:41:54.566630: step: 154/466, loss: 0.14332233369350433 2023-01-24 03:41:55.327348: step: 156/466, loss: 0.05977214500308037 2023-01-24 03:41:56.161045: step: 158/466, loss: 0.02197592705488205 2023-01-24 03:41:56.869575: step: 160/466, loss: 0.05003592371940613 2023-01-24 03:41:57.567339: step: 162/466, loss: 0.1948169469833374 2023-01-24 03:41:58.362665: step: 164/466, loss: 0.06136566773056984 2023-01-24 03:41:59.137963: step: 166/466, loss: 0.054979994893074036 2023-01-24 03:41:59.955507: step: 168/466, loss: 0.1970011591911316 2023-01-24 03:42:00.724553: step: 170/466, loss: 0.21378380060195923 2023-01-24 03:42:01.438341: step: 172/466, loss: 0.0801737830042839 2023-01-24 03:42:02.198072: step: 174/466, loss: 0.09108097851276398 2023-01-24 03:42:02.906372: step: 176/466, loss: 0.031183486804366112 2023-01-24 03:42:03.659101: step: 178/466, loss: 0.03109663724899292 2023-01-24 03:42:04.474751: step: 180/466, loss: 0.12059830874204636 2023-01-24 03:42:05.258973: step: 182/466, loss: 0.14663125574588776 2023-01-24 03:42:05.993489: step: 184/466, loss: 0.0532103069126606 2023-01-24 03:42:06.760989: step: 186/466, loss: 0.08011765033006668 2023-01-24 03:42:07.518553: step: 188/466, loss: 0.05597788095474243 2023-01-24 03:42:08.271200: step: 190/466, loss: 0.14271435141563416 2023-01-24 03:42:08.951515: step: 192/466, loss: 0.033926501870155334 2023-01-24 03:42:09.814352: step: 194/466, loss: 0.1532621681690216 2023-01-24 03:42:10.578752: step: 196/466, loss: 0.11758232861757278 2023-01-24 03:42:11.396359: step: 198/466, loss: 0.16431008279323578 2023-01-24 03:42:12.160391: step: 200/466, loss: 0.03531648963689804 2023-01-24 03:42:12.992008: step: 202/466, loss: 0.07027041167020798 2023-01-24 03:42:13.757629: step: 204/466, loss: 0.04011674225330353 2023-01-24 03:42:14.561521: step: 206/466, loss: 0.11462801694869995 2023-01-24 03:42:15.370501: step: 208/466, loss: 0.42639032006263733 2023-01-24 03:42:16.176526: step: 210/466, loss: 0.09091605991125107 2023-01-24 03:42:16.843570: step: 212/466, loss: 0.1382666379213333 2023-01-24 03:42:17.674621: step: 214/466, loss: 0.07938707619905472 2023-01-24 03:42:18.424718: step: 216/466, loss: 0.03487627953290939 2023-01-24 03:42:19.296863: step: 218/466, loss: 0.07560927420854568 2023-01-24 03:42:19.991404: step: 220/466, loss: 0.1018633246421814 2023-01-24 03:42:20.800052: step: 222/466, loss: 0.2556101679801941 2023-01-24 03:42:21.593986: step: 224/466, loss: 0.2421107292175293 2023-01-24 03:42:22.300627: step: 226/466, loss: 0.029341835528612137 2023-01-24 03:42:23.025972: step: 228/466, loss: 0.051736246794462204 2023-01-24 03:42:23.710366: step: 230/466, loss: 0.09494776278734207 2023-01-24 03:42:24.520258: step: 232/466, loss: 0.06730318069458008 2023-01-24 03:42:25.259209: step: 234/466, loss: 0.2819020748138428 2023-01-24 03:42:26.068300: step: 236/466, loss: 0.09525111317634583 2023-01-24 03:42:26.829932: step: 238/466, loss: 0.09886281937360764 2023-01-24 03:42:27.549370: step: 240/466, loss: 0.06698231399059296 2023-01-24 03:42:28.371043: step: 242/466, loss: 0.07156889885663986 2023-01-24 03:42:29.160645: step: 244/466, loss: 0.036325473338365555 2023-01-24 03:42:29.877600: step: 246/466, loss: 0.10516617447137833 2023-01-24 03:42:30.592623: step: 248/466, loss: 0.1363402009010315 2023-01-24 03:42:31.372918: step: 250/466, loss: 0.05505356565117836 2023-01-24 03:42:32.106405: step: 252/466, loss: 0.06005243584513664 2023-01-24 03:42:32.847611: step: 254/466, loss: 0.09491105377674103 2023-01-24 03:42:33.617741: step: 256/466, loss: 0.03337240591645241 2023-01-24 03:42:34.302303: step: 258/466, loss: 0.015260940417647362 2023-01-24 03:42:35.039676: step: 260/466, loss: 0.182101309299469 2023-01-24 03:42:35.878453: step: 262/466, loss: 0.12483559548854828 2023-01-24 03:42:36.657310: step: 264/466, loss: 0.05799545347690582 2023-01-24 03:42:37.514730: step: 266/466, loss: 0.12771588563919067 2023-01-24 03:42:38.283027: step: 268/466, loss: 0.05148211494088173 2023-01-24 03:42:39.005049: step: 270/466, loss: 0.025062330067157745 2023-01-24 03:42:39.746112: step: 272/466, loss: 0.07430432736873627 2023-01-24 03:42:40.536795: step: 274/466, loss: 0.10613179206848145 2023-01-24 03:42:41.385525: step: 276/466, loss: 0.1614530235528946 2023-01-24 03:42:42.097967: step: 278/466, loss: 0.0584588497877121 2023-01-24 03:42:42.880533: step: 280/466, loss: 0.13125962018966675 2023-01-24 03:42:43.686195: step: 282/466, loss: 0.0455959290266037 2023-01-24 03:42:44.424358: step: 284/466, loss: 0.0269328560680151 2023-01-24 03:42:45.132072: step: 286/466, loss: 0.24000252783298492 2023-01-24 03:42:45.946156: step: 288/466, loss: 0.19251394271850586 2023-01-24 03:42:46.767126: step: 290/466, loss: 0.2961283326148987 2023-01-24 03:42:47.519550: step: 292/466, loss: 0.0695134773850441 2023-01-24 03:42:48.315494: step: 294/466, loss: 0.05912279337644577 2023-01-24 03:42:49.117059: step: 296/466, loss: 0.08277720957994461 2023-01-24 03:42:49.890349: step: 298/466, loss: 0.08698936551809311 2023-01-24 03:42:50.654695: step: 300/466, loss: 0.18971477448940277 2023-01-24 03:42:51.406012: step: 302/466, loss: 0.06840641796588898 2023-01-24 03:42:52.102442: step: 304/466, loss: 0.08240868151187897 2023-01-24 03:42:52.920995: step: 306/466, loss: 0.19505003094673157 2023-01-24 03:42:53.784795: step: 308/466, loss: 0.04841731861233711 2023-01-24 03:42:54.549215: step: 310/466, loss: 0.09289427101612091 2023-01-24 03:42:55.258898: step: 312/466, loss: 10.075116157531738 2023-01-24 03:42:56.024968: step: 314/466, loss: 0.2554793357849121 2023-01-24 03:42:56.840188: step: 316/466, loss: 0.19938571751117706 2023-01-24 03:42:57.583981: step: 318/466, loss: 0.05021106079220772 2023-01-24 03:42:58.302258: step: 320/466, loss: 0.03854874148964882 2023-01-24 03:42:59.052004: step: 322/466, loss: 0.08314540982246399 2023-01-24 03:42:59.794765: step: 324/466, loss: 0.11936472356319427 2023-01-24 03:43:00.526897: step: 326/466, loss: 0.15186713635921478 2023-01-24 03:43:01.303694: step: 328/466, loss: 0.07621181011199951 2023-01-24 03:43:02.042391: step: 330/466, loss: 0.016436375677585602 2023-01-24 03:43:02.802293: step: 332/466, loss: 0.14319784939289093 2023-01-24 03:43:03.596464: step: 334/466, loss: 0.07255373150110245 2023-01-24 03:43:04.319488: step: 336/466, loss: 0.0869244858622551 2023-01-24 03:43:05.120704: step: 338/466, loss: 0.06745993345975876 2023-01-24 03:43:05.831450: step: 340/466, loss: 0.10919850319623947 2023-01-24 03:43:06.603427: step: 342/466, loss: 0.20024287700653076 2023-01-24 03:43:07.363304: step: 344/466, loss: 0.5972100496292114 2023-01-24 03:43:08.097579: step: 346/466, loss: 0.08542009443044662 2023-01-24 03:43:08.919452: step: 348/466, loss: 0.07332447171211243 2023-01-24 03:43:09.624397: step: 350/466, loss: 0.10761795938014984 2023-01-24 03:43:10.372257: step: 352/466, loss: 0.4194304943084717 2023-01-24 03:43:11.110372: step: 354/466, loss: 0.12218812108039856 2023-01-24 03:43:11.952417: step: 356/466, loss: 0.09939373284578323 2023-01-24 03:43:12.705600: step: 358/466, loss: 0.20191505551338196 2023-01-24 03:43:13.524354: step: 360/466, loss: 0.14228050410747528 2023-01-24 03:43:14.346721: step: 362/466, loss: 0.09845307469367981 2023-01-24 03:43:15.194871: step: 364/466, loss: 1.1167224645614624 2023-01-24 03:43:15.965465: step: 366/466, loss: 0.0897873193025589 2023-01-24 03:43:16.746876: step: 368/466, loss: 0.19432386755943298 2023-01-24 03:43:17.523758: step: 370/466, loss: 0.04567892104387283 2023-01-24 03:43:18.287162: step: 372/466, loss: 0.09775812178850174 2023-01-24 03:43:18.966957: step: 374/466, loss: 0.04945585876703262 2023-01-24 03:43:19.725859: step: 376/466, loss: 0.2018977403640747 2023-01-24 03:43:20.594608: step: 378/466, loss: 0.05446924269199371 2023-01-24 03:43:21.308274: step: 380/466, loss: 0.7429501414299011 2023-01-24 03:43:22.080865: step: 382/466, loss: 0.7492738366127014 2023-01-24 03:43:22.818751: step: 384/466, loss: 0.016909055411815643 2023-01-24 03:43:23.637432: step: 386/466, loss: 0.12241260707378387 2023-01-24 03:43:24.437938: step: 388/466, loss: 0.12036903947591782 2023-01-24 03:43:25.251673: step: 390/466, loss: 0.2424386888742447 2023-01-24 03:43:25.973271: step: 392/466, loss: 0.022720765322446823 2023-01-24 03:43:26.711480: step: 394/466, loss: 0.1325964778661728 2023-01-24 03:43:27.427655: step: 396/466, loss: 0.05196783319115639 2023-01-24 03:43:28.204388: step: 398/466, loss: 0.0683765709400177 2023-01-24 03:43:28.953065: step: 400/466, loss: 0.1732609122991562 2023-01-24 03:43:29.767751: step: 402/466, loss: 0.1777394562959671 2023-01-24 03:43:30.506434: step: 404/466, loss: 0.06969244033098221 2023-01-24 03:43:31.238833: step: 406/466, loss: 0.04348913952708244 2023-01-24 03:43:32.024489: step: 408/466, loss: 0.05620116740465164 2023-01-24 03:43:32.834348: step: 410/466, loss: 0.13990871608257294 2023-01-24 03:43:33.643119: step: 412/466, loss: 0.16890095174312592 2023-01-24 03:43:34.428600: step: 414/466, loss: 0.05791330337524414 2023-01-24 03:43:35.196246: step: 416/466, loss: 0.0948304757475853 2023-01-24 03:43:35.935341: step: 418/466, loss: 0.2736469805240631 2023-01-24 03:43:36.688589: step: 420/466, loss: 0.034205324947834015 2023-01-24 03:43:37.480151: step: 422/466, loss: 0.8601592183113098 2023-01-24 03:43:38.218398: step: 424/466, loss: 0.11530404537916183 2023-01-24 03:43:38.997202: step: 426/466, loss: 0.18276342749595642 2023-01-24 03:43:39.738661: step: 428/466, loss: 0.4518357515335083 2023-01-24 03:43:40.493619: step: 430/466, loss: 0.10191506147384644 2023-01-24 03:43:41.212515: step: 432/466, loss: 0.4678365886211395 2023-01-24 03:43:41.951474: step: 434/466, loss: 0.23182491958141327 2023-01-24 03:43:42.673746: step: 436/466, loss: 0.13566505908966064 2023-01-24 03:43:43.427028: step: 438/466, loss: 0.1481209546327591 2023-01-24 03:43:44.183898: step: 440/466, loss: 0.08209249377250671 2023-01-24 03:43:44.895736: step: 442/466, loss: 0.043845757842063904 2023-01-24 03:43:45.654507: step: 444/466, loss: 0.04230104759335518 2023-01-24 03:43:46.450642: step: 446/466, loss: 0.10302285850048065 2023-01-24 03:43:47.143523: step: 448/466, loss: 0.14680084586143494 2023-01-24 03:43:47.965489: step: 450/466, loss: 0.28017646074295044 2023-01-24 03:43:48.750557: step: 452/466, loss: 0.09054071456193924 2023-01-24 03:43:49.467069: step: 454/466, loss: 0.10634226351976395 2023-01-24 03:43:50.234904: step: 456/466, loss: 0.07919905334711075 2023-01-24 03:43:50.983929: step: 458/466, loss: 0.04398207366466522 2023-01-24 03:43:51.686470: step: 460/466, loss: 0.13439683616161346 2023-01-24 03:43:52.439312: step: 462/466, loss: 0.17098841071128845 2023-01-24 03:43:53.122331: step: 464/466, loss: 0.10975412279367447 2023-01-24 03:43:53.869828: step: 466/466, loss: 0.19899797439575195 2023-01-24 03:43:54.668658: step: 468/466, loss: 0.23727427423000336 2023-01-24 03:43:55.428281: step: 470/466, loss: 0.04510289058089256 2023-01-24 03:43:56.270094: step: 472/466, loss: 0.8150736093521118 2023-01-24 03:43:56.940452: step: 474/466, loss: 0.09007147699594498 2023-01-24 03:43:57.728192: step: 476/466, loss: 0.6509134769439697 2023-01-24 03:43:58.464412: step: 478/466, loss: 0.07442466914653778 2023-01-24 03:43:59.247453: step: 480/466, loss: 0.0767611488699913 2023-01-24 03:44:00.001913: step: 482/466, loss: 0.036706291139125824 2023-01-24 03:44:00.688303: step: 484/466, loss: 0.0735660120844841 2023-01-24 03:44:01.367713: step: 486/466, loss: 0.10951712727546692 2023-01-24 03:44:02.095039: step: 488/466, loss: 0.8974351286888123 2023-01-24 03:44:02.839141: step: 490/466, loss: 0.05478595569729805 2023-01-24 03:44:03.576343: step: 492/466, loss: 0.10811832547187805 2023-01-24 03:44:04.334472: step: 494/466, loss: 0.09113547205924988 2023-01-24 03:44:05.167627: step: 496/466, loss: 0.043976835906505585 2023-01-24 03:44:06.000882: step: 498/466, loss: 0.33339035511016846 2023-01-24 03:44:06.752537: step: 500/466, loss: 0.08161080628633499 2023-01-24 03:44:07.550357: step: 502/466, loss: 0.04528295621275902 2023-01-24 03:44:08.326289: step: 504/466, loss: 0.07120140641927719 2023-01-24 03:44:09.058487: step: 506/466, loss: 0.11000839620828629 2023-01-24 03:44:09.826503: step: 508/466, loss: 0.1413177251815796 2023-01-24 03:44:10.591284: step: 510/466, loss: 0.27769792079925537 2023-01-24 03:44:11.446668: step: 512/466, loss: 0.03286886215209961 2023-01-24 03:44:12.189544: step: 514/466, loss: 0.1395481377840042 2023-01-24 03:44:13.004552: step: 516/466, loss: 0.14255927503108978 2023-01-24 03:44:13.794468: step: 518/466, loss: 0.10735977441072464 2023-01-24 03:44:14.582702: step: 520/466, loss: 0.4797162711620331 2023-01-24 03:44:15.335403: step: 522/466, loss: 0.12145961076021194 2023-01-24 03:44:16.105973: step: 524/466, loss: 0.05048738792538643 2023-01-24 03:44:16.847724: step: 526/466, loss: 0.14986084401607513 2023-01-24 03:44:17.643537: step: 528/466, loss: 1.2179806232452393 2023-01-24 03:44:18.425269: step: 530/466, loss: 0.482911616563797 2023-01-24 03:44:19.279213: step: 532/466, loss: 0.2774769961833954 2023-01-24 03:44:20.055035: step: 534/466, loss: 0.0876186266541481 2023-01-24 03:44:20.851560: step: 536/466, loss: 0.7969873547554016 2023-01-24 03:44:21.588369: step: 538/466, loss: 0.33211031556129456 2023-01-24 03:44:22.293448: step: 540/466, loss: 0.11518576741218567 2023-01-24 03:44:23.073731: step: 542/466, loss: 0.07682258635759354 2023-01-24 03:44:23.834680: step: 544/466, loss: 0.03671961650252342 2023-01-24 03:44:24.526775: step: 546/466, loss: 0.26386862993240356 2023-01-24 03:44:25.361070: step: 548/466, loss: 0.10726254433393478 2023-01-24 03:44:26.176087: step: 550/466, loss: 0.11239303648471832 2023-01-24 03:44:27.113877: step: 552/466, loss: 0.04918292164802551 2023-01-24 03:44:27.882413: step: 554/466, loss: 0.21519367396831512 2023-01-24 03:44:28.705946: step: 556/466, loss: 0.17288942635059357 2023-01-24 03:44:29.445589: step: 558/466, loss: 0.04853741452097893 2023-01-24 03:44:30.209701: step: 560/466, loss: 0.4925331473350525 2023-01-24 03:44:30.970082: step: 562/466, loss: 1.027655005455017 2023-01-24 03:44:31.691207: step: 564/466, loss: 0.045533567667007446 2023-01-24 03:44:32.565208: step: 566/466, loss: 0.05125496909022331 2023-01-24 03:44:33.360229: step: 568/466, loss: 0.13714663684368134 2023-01-24 03:44:34.111147: step: 570/466, loss: 0.14521551132202148 2023-01-24 03:44:34.819747: step: 572/466, loss: 0.06423871219158173 2023-01-24 03:44:35.708029: step: 574/466, loss: 0.09547813981771469 2023-01-24 03:44:36.418201: step: 576/466, loss: 0.1754569262266159 2023-01-24 03:44:37.234858: step: 578/466, loss: 0.14660441875457764 2023-01-24 03:44:37.967539: step: 580/466, loss: 0.04251260310411453 2023-01-24 03:44:38.695309: step: 582/466, loss: 0.0846245139837265 2023-01-24 03:44:39.468287: step: 584/466, loss: 0.25155988335609436 2023-01-24 03:44:40.263443: step: 586/466, loss: 0.0799722746014595 2023-01-24 03:44:41.120027: step: 588/466, loss: 0.2427946925163269 2023-01-24 03:44:41.894743: step: 590/466, loss: 0.08180870860815048 2023-01-24 03:44:42.611713: step: 592/466, loss: 0.20674677193164825 2023-01-24 03:44:43.379466: step: 594/466, loss: 0.1549101173877716 2023-01-24 03:44:44.150632: step: 596/466, loss: 0.13579262793064117 2023-01-24 03:44:44.882449: step: 598/466, loss: 0.04477398842573166 2023-01-24 03:44:45.620373: step: 600/466, loss: 0.0795651227235794 2023-01-24 03:44:46.374437: step: 602/466, loss: 0.07534074783325195 2023-01-24 03:44:47.099547: step: 604/466, loss: 0.12016644328832626 2023-01-24 03:44:47.967759: step: 606/466, loss: 0.2995753288269043 2023-01-24 03:44:48.724110: step: 608/466, loss: 0.09187112748622894 2023-01-24 03:44:49.462010: step: 610/466, loss: 0.09526897221803665 2023-01-24 03:44:50.170129: step: 612/466, loss: 0.11181322485208511 2023-01-24 03:44:50.982452: step: 614/466, loss: 0.05556326359510422 2023-01-24 03:44:51.779955: step: 616/466, loss: 0.23775111138820648 2023-01-24 03:44:52.548255: step: 618/466, loss: 0.9687085747718811 2023-01-24 03:44:53.380755: step: 620/466, loss: 0.5194430947303772 2023-01-24 03:44:54.151590: step: 622/466, loss: 0.22122785449028015 2023-01-24 03:44:54.934404: step: 624/466, loss: 0.10294534265995026 2023-01-24 03:44:55.672537: step: 626/466, loss: 0.7381730675697327 2023-01-24 03:44:56.408929: step: 628/466, loss: 0.09413321316242218 2023-01-24 03:44:57.138659: step: 630/466, loss: 0.05440503731369972 2023-01-24 03:44:57.866300: step: 632/466, loss: 0.0769491195678711 2023-01-24 03:44:58.670567: step: 634/466, loss: 0.11340983211994171 2023-01-24 03:44:59.466110: step: 636/466, loss: 0.11693871766328812 2023-01-24 03:45:00.130475: step: 638/466, loss: 0.14776034653186798 2023-01-24 03:45:01.034658: step: 640/466, loss: 0.1705910563468933 2023-01-24 03:45:01.787629: step: 642/466, loss: 0.037550777196884155 2023-01-24 03:45:02.493147: step: 644/466, loss: 0.0886608213186264 2023-01-24 03:45:03.297688: step: 646/466, loss: 0.11776689440011978 2023-01-24 03:45:04.095178: step: 648/466, loss: 0.0356062576174736 2023-01-24 03:45:04.895084: step: 650/466, loss: 0.05151690915226936 2023-01-24 03:45:05.734522: step: 652/466, loss: 0.2012919932603836 2023-01-24 03:45:06.504264: step: 654/466, loss: 0.09088915586471558 2023-01-24 03:45:07.225439: step: 656/466, loss: 0.06062782183289528 2023-01-24 03:45:08.033592: step: 658/466, loss: 0.09774986654520035 2023-01-24 03:45:08.801550: step: 660/466, loss: 0.15430264174938202 2023-01-24 03:45:09.539151: step: 662/466, loss: 0.09477879106998444 2023-01-24 03:45:10.321920: step: 664/466, loss: 0.14804497361183167 2023-01-24 03:45:11.087092: step: 666/466, loss: 0.2251739799976349 2023-01-24 03:45:11.817559: step: 668/466, loss: 0.029697343707084656 2023-01-24 03:45:12.587778: step: 670/466, loss: 0.05504726991057396 2023-01-24 03:45:13.363066: step: 672/466, loss: 0.06437207758426666 2023-01-24 03:45:14.171132: step: 674/466, loss: 0.06237734109163284 2023-01-24 03:45:14.906005: step: 676/466, loss: 0.21999919414520264 2023-01-24 03:45:15.550842: step: 678/466, loss: 0.062139302492141724 2023-01-24 03:45:16.392128: step: 680/466, loss: 0.11696934700012207 2023-01-24 03:45:17.182444: step: 682/466, loss: 0.06388358771800995 2023-01-24 03:45:18.020505: step: 684/466, loss: 0.06830720603466034 2023-01-24 03:45:18.750998: step: 686/466, loss: 0.04317271709442139 2023-01-24 03:45:19.472612: step: 688/466, loss: 0.05816539004445076 2023-01-24 03:45:20.289646: step: 690/466, loss: 0.033409614115953445 2023-01-24 03:45:21.050111: step: 692/466, loss: 0.20011916756629944 2023-01-24 03:45:21.752065: step: 694/466, loss: 0.14553718268871307 2023-01-24 03:45:22.530576: step: 696/466, loss: 0.15632416307926178 2023-01-24 03:45:23.246811: step: 698/466, loss: 0.07469306886196136 2023-01-24 03:45:24.031636: step: 700/466, loss: 0.14653725922107697 2023-01-24 03:45:24.750024: step: 702/466, loss: 0.10972332954406738 2023-01-24 03:45:25.484716: step: 704/466, loss: 0.16227854788303375 2023-01-24 03:45:26.267061: step: 706/466, loss: 0.02626815065741539 2023-01-24 03:45:27.023052: step: 708/466, loss: 0.06898698210716248 2023-01-24 03:45:27.744554: step: 710/466, loss: 0.0700167715549469 2023-01-24 03:45:28.492826: step: 712/466, loss: 0.07130112498998642 2023-01-24 03:45:29.298220: step: 714/466, loss: 0.06511213630437851 2023-01-24 03:45:29.995678: step: 716/466, loss: 0.06653962284326553 2023-01-24 03:45:30.664344: step: 718/466, loss: 0.08015184104442596 2023-01-24 03:45:31.470894: step: 720/466, loss: 0.06690537929534912 2023-01-24 03:45:32.223320: step: 722/466, loss: 0.04099201411008835 2023-01-24 03:45:32.972448: step: 724/466, loss: 0.06964768469333649 2023-01-24 03:45:33.728483: step: 726/466, loss: 0.2873595356941223 2023-01-24 03:45:34.424005: step: 728/466, loss: 0.12969258427619934 2023-01-24 03:45:35.234073: step: 730/466, loss: 0.12038178741931915 2023-01-24 03:45:35.942468: step: 732/466, loss: 0.09994393587112427 2023-01-24 03:45:36.727297: step: 734/466, loss: 0.04484577104449272 2023-01-24 03:45:37.452850: step: 736/466, loss: 0.06055283918976784 2023-01-24 03:45:38.160308: step: 738/466, loss: 0.05349590629339218 2023-01-24 03:45:38.912541: step: 740/466, loss: 0.10076868534088135 2023-01-24 03:45:39.713490: step: 742/466, loss: 0.10146256536245346 2023-01-24 03:45:40.451792: step: 744/466, loss: 0.41469091176986694 2023-01-24 03:45:41.184657: step: 746/466, loss: 0.13279423117637634 2023-01-24 03:45:42.020997: step: 748/466, loss: 0.10934709012508392 2023-01-24 03:45:42.792679: step: 750/466, loss: 0.012673179619014263 2023-01-24 03:45:43.511635: step: 752/466, loss: 0.05210401490330696 2023-01-24 03:45:44.323741: step: 754/466, loss: 0.13793087005615234 2023-01-24 03:45:45.191700: step: 756/466, loss: 0.11023826152086258 2023-01-24 03:45:46.104299: step: 758/466, loss: 0.17428648471832275 2023-01-24 03:45:46.766929: step: 760/466, loss: 0.026478417217731476 2023-01-24 03:45:47.495982: step: 762/466, loss: 0.08447739481925964 2023-01-24 03:45:48.303592: step: 764/466, loss: 0.06560572981834412 2023-01-24 03:45:49.076585: step: 766/466, loss: 0.07490119338035583 2023-01-24 03:45:49.859718: step: 768/466, loss: 0.11548873037099838 2023-01-24 03:45:50.603373: step: 770/466, loss: 0.09563510119915009 2023-01-24 03:45:51.405094: step: 772/466, loss: 0.09963957965373993 2023-01-24 03:45:52.220433: step: 774/466, loss: 0.16085292398929596 2023-01-24 03:45:53.033195: step: 776/466, loss: 0.10022449493408203 2023-01-24 03:45:53.759410: step: 778/466, loss: 0.06054616719484329 2023-01-24 03:45:54.443081: step: 780/466, loss: 1.848663091659546 2023-01-24 03:45:55.197559: step: 782/466, loss: 0.1872957944869995 2023-01-24 03:45:56.055354: step: 784/466, loss: 0.05718646198511124 2023-01-24 03:45:56.833234: step: 786/466, loss: 0.09180223941802979 2023-01-24 03:45:57.684774: step: 788/466, loss: 0.3840520679950714 2023-01-24 03:45:58.451295: step: 790/466, loss: 0.09134998917579651 2023-01-24 03:45:59.263316: step: 792/466, loss: 0.05993478000164032 2023-01-24 03:46:00.099649: step: 794/466, loss: 0.14188416302204132 2023-01-24 03:46:00.863105: step: 796/466, loss: 0.02756846323609352 2023-01-24 03:46:01.625350: step: 798/466, loss: 0.04655991867184639 2023-01-24 03:46:02.342491: step: 800/466, loss: 0.02538241073489189 2023-01-24 03:46:03.168033: step: 802/466, loss: 0.14205986261367798 2023-01-24 03:46:03.907226: step: 804/466, loss: 0.24629098176956177 2023-01-24 03:46:04.596312: step: 806/466, loss: 0.25697317719459534 2023-01-24 03:46:05.379317: step: 808/466, loss: 0.2990754246711731 2023-01-24 03:46:06.116325: step: 810/466, loss: 0.042557764798402786 2023-01-24 03:46:06.896389: step: 812/466, loss: 0.3225131332874298 2023-01-24 03:46:07.620677: step: 814/466, loss: 0.027747908607125282 2023-01-24 03:46:08.359287: step: 816/466, loss: 6.1168413162231445 2023-01-24 03:46:09.088084: step: 818/466, loss: 0.0687987357378006 2023-01-24 03:46:09.877695: step: 820/466, loss: 0.09931518882513046 2023-01-24 03:46:10.700626: step: 822/466, loss: 0.07825738191604614 2023-01-24 03:46:11.486704: step: 824/466, loss: 0.08366437256336212 2023-01-24 03:46:12.268075: step: 826/466, loss: 0.07200953364372253 2023-01-24 03:46:13.016978: step: 828/466, loss: 0.07085266709327698 2023-01-24 03:46:13.936218: step: 830/466, loss: 0.014983849599957466 2023-01-24 03:46:14.740279: step: 832/466, loss: 0.2933443486690521 2023-01-24 03:46:15.489301: step: 834/466, loss: 0.122842937707901 2023-01-24 03:46:16.311072: step: 836/466, loss: 0.16995428502559662 2023-01-24 03:46:17.055290: step: 838/466, loss: 0.06800254434347153 2023-01-24 03:46:17.832880: step: 840/466, loss: 0.03221948444843292 2023-01-24 03:46:18.489095: step: 842/466, loss: 0.4852493107318878 2023-01-24 03:46:19.234983: step: 844/466, loss: 0.28303274512290955 2023-01-24 03:46:20.026257: step: 846/466, loss: 0.24812698364257812 2023-01-24 03:46:20.777981: step: 848/466, loss: 0.020885517820715904 2023-01-24 03:46:21.506409: step: 850/466, loss: 0.3325272500514984 2023-01-24 03:46:22.239680: step: 852/466, loss: 0.09081543236970901 2023-01-24 03:46:23.056344: step: 854/466, loss: 0.0565243735909462 2023-01-24 03:46:23.797927: step: 856/466, loss: 0.1972397118806839 2023-01-24 03:46:24.511398: step: 858/466, loss: 0.11067622900009155 2023-01-24 03:46:25.313993: step: 860/466, loss: 0.027421843260526657 2023-01-24 03:46:26.052455: step: 862/466, loss: 0.060354817658662796 2023-01-24 03:46:26.801617: step: 864/466, loss: 0.1397075653076172 2023-01-24 03:46:27.548858: step: 866/466, loss: 0.05992557108402252 2023-01-24 03:46:28.258436: step: 868/466, loss: 0.00546844070777297 2023-01-24 03:46:29.077785: step: 870/466, loss: 0.07609057426452637 2023-01-24 03:46:29.805978: step: 872/466, loss: 0.05730225145816803 2023-01-24 03:46:30.572248: step: 874/466, loss: 0.10852661728858948 2023-01-24 03:46:31.435186: step: 876/466, loss: 0.09243988990783691 2023-01-24 03:46:32.234861: step: 878/466, loss: 0.026159387081861496 2023-01-24 03:46:33.041965: step: 880/466, loss: 0.17023548483848572 2023-01-24 03:46:33.832704: step: 882/466, loss: 0.15736925601959229 2023-01-24 03:46:34.640414: step: 884/466, loss: 0.7499971985816956 2023-01-24 03:46:35.318141: step: 886/466, loss: 0.1022542342543602 2023-01-24 03:46:36.034480: step: 888/466, loss: 0.15656821429729462 2023-01-24 03:46:36.862505: step: 890/466, loss: 0.11236383765935898 2023-01-24 03:46:37.576042: step: 892/466, loss: 0.1907481998205185 2023-01-24 03:46:38.283063: step: 894/466, loss: 1.1647937297821045 2023-01-24 03:46:39.027595: step: 896/466, loss: 0.031657878309488297 2023-01-24 03:46:39.833602: step: 898/466, loss: 0.07947023212909698 2023-01-24 03:46:40.612016: step: 900/466, loss: 0.12622502446174622 2023-01-24 03:46:41.309782: step: 902/466, loss: 0.19517450034618378 2023-01-24 03:46:42.146695: step: 904/466, loss: 0.024985037744045258 2023-01-24 03:46:42.871590: step: 906/466, loss: 0.4478173851966858 2023-01-24 03:46:43.561117: step: 908/466, loss: 0.09397806227207184 2023-01-24 03:46:44.346632: step: 910/466, loss: 0.3781964182853699 2023-01-24 03:46:45.195119: step: 912/466, loss: 0.08397955447435379 2023-01-24 03:46:45.970378: step: 914/466, loss: 0.05961551517248154 2023-01-24 03:46:46.741833: step: 916/466, loss: 0.08746632933616638 2023-01-24 03:46:47.441375: step: 918/466, loss: 0.03498067334294319 2023-01-24 03:46:48.225614: step: 920/466, loss: 0.020834237337112427 2023-01-24 03:46:48.970373: step: 922/466, loss: 0.15319500863552094 2023-01-24 03:46:49.716854: step: 924/466, loss: 0.3376273512840271 2023-01-24 03:46:50.489606: step: 926/466, loss: 0.12826432287693024 2023-01-24 03:46:51.236584: step: 928/466, loss: 0.18537111580371857 2023-01-24 03:46:52.064644: step: 930/466, loss: 0.12460990250110626 2023-01-24 03:46:52.846655: step: 932/466, loss: 0.07324258983135223 ================================================== Loss: 0.187 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3307988251585812, 'r': 0.334565035691696, 'f1': 0.33267127133872415}, 'combined': 0.24512619993379672, 'epoch': 15} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3619731547097076, 'r': 0.2797920745243147, 'f1': 0.3156207761496179}, 'combined': 0.19399130631635053, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3397118203624733, 'r': 0.3455133505014909, 'f1': 0.3425880258029834}, 'combined': 0.25243328217061933, 'epoch': 15} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36496986212061505, 'r': 0.28716866100998134, 'f1': 0.32142835577644857}, 'combined': 0.19852927856780653, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.325, 'r': 0.325, 'f1': 0.325}, 'combined': 0.21666666666666667, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.13793103448275862, 'f1': 0.2162162162162162}, 'combined': 0.14414414414414412, 'epoch': 15} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3663210818307906, 'r': 0.3579798048251559, 'f1': 0.36210241294214424}, 'combined': 0.2668123042731589, 'epoch': 13} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37875489782431904, 'r': 0.26988336009473124, 'f1': 0.31518228605603094}, 'combined': 0.19467141197578386, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 13} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:49:43.229984: step: 2/466, loss: 0.0018624652875587344 2023-01-24 03:49:43.950807: step: 4/466, loss: 0.14262737333774567 2023-01-24 03:49:44.753913: step: 6/466, loss: 0.12517815828323364 2023-01-24 03:49:45.538635: step: 8/466, loss: 0.07878816872835159 2023-01-24 03:49:46.246927: step: 10/466, loss: 0.6276178359985352 2023-01-24 03:49:47.024706: step: 12/466, loss: 0.0350370928645134 2023-01-24 03:49:47.771701: step: 14/466, loss: 0.1565471738576889 2023-01-24 03:49:48.507521: step: 16/466, loss: 0.1140841469168663 2023-01-24 03:49:49.242162: step: 18/466, loss: 0.05684506148099899 2023-01-24 03:49:49.952048: step: 20/466, loss: 0.20923495292663574 2023-01-24 03:49:50.706193: step: 22/466, loss: 0.08959781378507614 2023-01-24 03:49:51.400617: step: 24/466, loss: 0.08080139756202698 2023-01-24 03:49:52.114885: step: 26/466, loss: 0.03367632254958153 2023-01-24 03:49:52.854904: step: 28/466, loss: 0.06738610565662384 2023-01-24 03:49:53.627252: step: 30/466, loss: 0.05645789951086044 2023-01-24 03:49:54.412213: step: 32/466, loss: 0.06814518570899963 2023-01-24 03:49:55.214128: step: 34/466, loss: 0.03460216894745827 2023-01-24 03:49:55.925989: step: 36/466, loss: 0.015818607062101364 2023-01-24 03:49:56.611584: step: 38/466, loss: 0.19628256559371948 2023-01-24 03:49:57.353667: step: 40/466, loss: 0.1271037459373474 2023-01-24 03:49:58.063913: step: 42/466, loss: 0.02907964028418064 2023-01-24 03:49:58.755613: step: 44/466, loss: 1.326354742050171 2023-01-24 03:49:59.496332: step: 46/466, loss: 0.10550469160079956 2023-01-24 03:50:00.305048: step: 48/466, loss: 0.09840810298919678 2023-01-24 03:50:01.177534: step: 50/466, loss: 0.037362001836299896 2023-01-24 03:50:02.016797: step: 52/466, loss: 0.028449734672904015 2023-01-24 03:50:02.819938: step: 54/466, loss: 0.007634575013071299 2023-01-24 03:50:03.463108: step: 56/466, loss: 0.06437010318040848 2023-01-24 03:50:04.238190: step: 58/466, loss: 0.05331692099571228 2023-01-24 03:50:05.062295: step: 60/466, loss: 0.044606491923332214 2023-01-24 03:50:05.819318: step: 62/466, loss: 0.03616861253976822 2023-01-24 03:50:06.617450: step: 64/466, loss: 0.09568973630666733 2023-01-24 03:50:07.417393: step: 66/466, loss: 0.08516858518123627 2023-01-24 03:50:08.237549: step: 68/466, loss: 0.06501974165439606 2023-01-24 03:50:09.009257: step: 70/466, loss: 0.10325935482978821 2023-01-24 03:50:09.779926: step: 72/466, loss: 0.09397002309560776 2023-01-24 03:50:10.565710: step: 74/466, loss: 0.016955891624093056 2023-01-24 03:50:11.289705: step: 76/466, loss: 0.07174551486968994 2023-01-24 03:50:12.091194: step: 78/466, loss: 0.08200935274362564 2023-01-24 03:50:12.819489: step: 80/466, loss: 0.02655804343521595 2023-01-24 03:50:13.653789: step: 82/466, loss: 0.13291595876216888 2023-01-24 03:50:14.426749: step: 84/466, loss: 0.06990199536085129 2023-01-24 03:50:15.162649: step: 86/466, loss: 0.08203735202550888 2023-01-24 03:50:15.940944: step: 88/466, loss: 0.07973272353410721 2023-01-24 03:50:16.639274: step: 90/466, loss: 0.0974380373954773 2023-01-24 03:50:17.509744: step: 92/466, loss: 0.053154356777668 2023-01-24 03:50:18.242071: step: 94/466, loss: 0.14800702035427094 2023-01-24 03:50:18.909619: step: 96/466, loss: 0.07176072895526886 2023-01-24 03:50:19.721330: step: 98/466, loss: 0.055861346423625946 2023-01-24 03:50:20.466291: step: 100/466, loss: 0.3699154257774353 2023-01-24 03:50:21.131081: step: 102/466, loss: 0.12106865644454956 2023-01-24 03:50:21.822113: step: 104/466, loss: 0.0461328960955143 2023-01-24 03:50:22.569433: step: 106/466, loss: 0.060555242002010345 2023-01-24 03:50:23.321055: step: 108/466, loss: 0.39660879969596863 2023-01-24 03:50:24.032290: step: 110/466, loss: 0.027628857642412186 2023-01-24 03:50:24.815600: step: 112/466, loss: 0.05876636505126953 2023-01-24 03:50:25.596829: step: 114/466, loss: 0.01485330518335104 2023-01-24 03:50:26.361321: step: 116/466, loss: 0.06990094482898712 2023-01-24 03:50:27.124809: step: 118/466, loss: 0.09951207041740417 2023-01-24 03:50:28.005736: step: 120/466, loss: 0.05669524893164635 2023-01-24 03:50:28.757391: step: 122/466, loss: 0.11176761239767075 2023-01-24 03:50:29.501687: step: 124/466, loss: 0.14566385746002197 2023-01-24 03:50:30.270108: step: 126/466, loss: 0.016346879303455353 2023-01-24 03:50:31.071671: step: 128/466, loss: 0.2126549333333969 2023-01-24 03:50:31.754186: step: 130/466, loss: 0.07509668916463852 2023-01-24 03:50:32.525355: step: 132/466, loss: 0.1204756647348404 2023-01-24 03:50:33.282114: step: 134/466, loss: 0.0901917964220047 2023-01-24 03:50:34.085686: step: 136/466, loss: 0.049818042665719986 2023-01-24 03:50:34.894043: step: 138/466, loss: 0.05061956122517586 2023-01-24 03:50:35.650774: step: 140/466, loss: 0.06672712415456772 2023-01-24 03:50:36.440723: step: 142/466, loss: 0.07314980030059814 2023-01-24 03:50:37.136576: step: 144/466, loss: 0.0676993653178215 2023-01-24 03:50:37.935792: step: 146/466, loss: 0.09283817559480667 2023-01-24 03:50:38.712112: step: 148/466, loss: 0.08164266496896744 2023-01-24 03:50:39.447415: step: 150/466, loss: 0.07620931416749954 2023-01-24 03:50:40.407344: step: 152/466, loss: 0.12517696619033813 2023-01-24 03:50:41.203354: step: 154/466, loss: 0.03965863585472107 2023-01-24 03:50:41.987185: step: 156/466, loss: 0.13473178446292877 2023-01-24 03:50:42.821133: step: 158/466, loss: 0.02908577024936676 2023-01-24 03:50:43.604108: step: 160/466, loss: 0.12194164097309113 2023-01-24 03:50:44.437816: step: 162/466, loss: 0.11548975110054016 2023-01-24 03:50:45.250649: step: 164/466, loss: 0.10065381228923798 2023-01-24 03:50:45.984676: step: 166/466, loss: 0.1245603933930397 2023-01-24 03:50:46.657919: step: 168/466, loss: 0.1796674132347107 2023-01-24 03:50:47.420024: step: 170/466, loss: 0.09850645065307617 2023-01-24 03:50:48.265884: step: 172/466, loss: 0.08459986746311188 2023-01-24 03:50:49.051269: step: 174/466, loss: 0.005720966961234808 2023-01-24 03:50:49.753361: step: 176/466, loss: 0.22306662797927856 2023-01-24 03:50:50.580659: step: 178/466, loss: 0.07315231114625931 2023-01-24 03:50:51.338686: step: 180/466, loss: 0.10266924649477005 2023-01-24 03:50:52.022943: step: 182/466, loss: 0.02916317619383335 2023-01-24 03:50:52.753060: step: 184/466, loss: 0.03219641372561455 2023-01-24 03:50:53.487164: step: 186/466, loss: 0.05558239668607712 2023-01-24 03:50:54.271183: step: 188/466, loss: 0.07634638249874115 2023-01-24 03:50:55.066432: step: 190/466, loss: 0.19770660996437073 2023-01-24 03:50:55.793642: step: 192/466, loss: 0.18504159152507782 2023-01-24 03:50:56.508219: step: 194/466, loss: 0.07066548615694046 2023-01-24 03:50:57.255361: step: 196/466, loss: 0.05562664568424225 2023-01-24 03:50:58.027613: step: 198/466, loss: 0.14733237028121948 2023-01-24 03:50:58.767794: step: 200/466, loss: 0.12928493320941925 2023-01-24 03:50:59.488766: step: 202/466, loss: 0.056578729301691055 2023-01-24 03:51:00.298984: step: 204/466, loss: 0.05881396308541298 2023-01-24 03:51:01.086058: step: 206/466, loss: 0.19485962390899658 2023-01-24 03:51:01.945200: step: 208/466, loss: 0.04371248558163643 2023-01-24 03:51:02.687333: step: 210/466, loss: 0.04043707624077797 2023-01-24 03:51:03.393406: step: 212/466, loss: 0.0322677306830883 2023-01-24 03:51:04.106533: step: 214/466, loss: 0.04926469177007675 2023-01-24 03:51:04.871174: step: 216/466, loss: 0.06370534747838974 2023-01-24 03:51:05.635615: step: 218/466, loss: 0.017677977681159973 2023-01-24 03:51:06.421719: step: 220/466, loss: 0.05885789915919304 2023-01-24 03:51:07.236472: step: 222/466, loss: 0.07734831422567368 2023-01-24 03:51:07.980228: step: 224/466, loss: 0.1633378267288208 2023-01-24 03:51:08.703852: step: 226/466, loss: 0.12721040844917297 2023-01-24 03:51:09.498538: step: 228/466, loss: 0.07709307223558426 2023-01-24 03:51:10.297327: step: 230/466, loss: 0.06658844649791718 2023-01-24 03:51:11.012480: step: 232/466, loss: 0.06309105455875397 2023-01-24 03:51:11.837638: step: 234/466, loss: 0.5354830026626587 2023-01-24 03:51:12.601601: step: 236/466, loss: 0.0737801343202591 2023-01-24 03:51:13.493161: step: 238/466, loss: 0.04832407832145691 2023-01-24 03:51:14.206467: step: 240/466, loss: 0.028802694752812386 2023-01-24 03:51:14.942074: step: 242/466, loss: 0.32804444432258606 2023-01-24 03:51:15.721104: step: 244/466, loss: 0.1064852774143219 2023-01-24 03:51:16.505717: step: 246/466, loss: 0.10216860473155975 2023-01-24 03:51:17.239592: step: 248/466, loss: 0.022415174171328545 2023-01-24 03:51:18.052239: step: 250/466, loss: 0.3452892005443573 2023-01-24 03:51:18.842858: step: 252/466, loss: 0.13802990317344666 2023-01-24 03:51:19.595844: step: 254/466, loss: 0.09531251341104507 2023-01-24 03:51:20.349374: step: 256/466, loss: 0.08211075514554977 2023-01-24 03:51:21.118452: step: 258/466, loss: 0.08147446811199188 2023-01-24 03:51:21.869339: step: 260/466, loss: 0.11451318114995956 2023-01-24 03:51:22.596850: step: 262/466, loss: 0.39439642429351807 2023-01-24 03:51:23.421916: step: 264/466, loss: 0.19546227157115936 2023-01-24 03:51:24.198912: step: 266/466, loss: 0.036694370210170746 2023-01-24 03:51:24.989288: step: 268/466, loss: 0.038847874850034714 2023-01-24 03:51:25.730516: step: 270/466, loss: 0.12380710244178772 2023-01-24 03:51:26.563978: step: 272/466, loss: 0.2092113494873047 2023-01-24 03:51:27.392890: step: 274/466, loss: 0.2461710274219513 2023-01-24 03:51:28.114733: step: 276/466, loss: 0.19365479052066803 2023-01-24 03:51:28.850735: step: 278/466, loss: 0.07007157057523727 2023-01-24 03:51:29.597260: step: 280/466, loss: 0.029620543122291565 2023-01-24 03:51:30.399723: step: 282/466, loss: 0.21002694964408875 2023-01-24 03:51:31.193941: step: 284/466, loss: 0.1598701775074005 2023-01-24 03:51:31.932289: step: 286/466, loss: 1.290482997894287 2023-01-24 03:51:32.732859: step: 288/466, loss: 0.06177673488855362 2023-01-24 03:51:33.409134: step: 290/466, loss: 0.12041433155536652 2023-01-24 03:51:34.181692: step: 292/466, loss: 0.4185667037963867 2023-01-24 03:51:35.099700: step: 294/466, loss: 0.1689419448375702 2023-01-24 03:51:35.841225: step: 296/466, loss: 0.14349795877933502 2023-01-24 03:51:36.525194: step: 298/466, loss: 0.10513242334127426 2023-01-24 03:51:37.292785: step: 300/466, loss: 0.2011110484600067 2023-01-24 03:51:38.091614: step: 302/466, loss: 0.04485170543193817 2023-01-24 03:51:38.947173: step: 304/466, loss: 0.035167694091796875 2023-01-24 03:51:39.721266: step: 306/466, loss: 0.00759429857134819 2023-01-24 03:51:40.466392: step: 308/466, loss: 0.31545257568359375 2023-01-24 03:51:41.172694: step: 310/466, loss: 0.09402605146169662 2023-01-24 03:51:41.891471: step: 312/466, loss: 0.07007033377885818 2023-01-24 03:51:42.671951: step: 314/466, loss: 0.06480089575052261 2023-01-24 03:51:43.458807: step: 316/466, loss: 0.2533351182937622 2023-01-24 03:51:44.228679: step: 318/466, loss: 0.3919064998626709 2023-01-24 03:51:44.909068: step: 320/466, loss: 0.13647937774658203 2023-01-24 03:51:45.662234: step: 322/466, loss: 0.12979894876480103 2023-01-24 03:51:46.475167: step: 324/466, loss: 0.24850276112556458 2023-01-24 03:51:47.278786: step: 326/466, loss: 0.06813384592533112 2023-01-24 03:51:48.071550: step: 328/466, loss: 0.2158774584531784 2023-01-24 03:51:48.789557: step: 330/466, loss: 0.0738043487071991 2023-01-24 03:51:49.562669: step: 332/466, loss: 0.1032731756567955 2023-01-24 03:51:50.353269: step: 334/466, loss: 0.24884411692619324 2023-01-24 03:51:51.123843: step: 336/466, loss: 0.10134509950876236 2023-01-24 03:51:51.947816: step: 338/466, loss: 0.10196374356746674 2023-01-24 03:51:52.675963: step: 340/466, loss: 0.10488557815551758 2023-01-24 03:51:53.417652: step: 342/466, loss: 0.019537772983312607 2023-01-24 03:51:54.202134: step: 344/466, loss: 0.12553223967552185 2023-01-24 03:51:54.872265: step: 346/466, loss: 0.035885635763406754 2023-01-24 03:51:55.655049: step: 348/466, loss: 0.044970910996198654 2023-01-24 03:51:56.393163: step: 350/466, loss: 0.056709982454776764 2023-01-24 03:51:57.206373: step: 352/466, loss: 0.11772741377353668 2023-01-24 03:51:57.958633: step: 354/466, loss: 0.5492798089981079 2023-01-24 03:51:58.842487: step: 356/466, loss: 0.016146494075655937 2023-01-24 03:51:59.592975: step: 358/466, loss: 0.21240819990634918 2023-01-24 03:52:00.337627: step: 360/466, loss: 0.12465915828943253 2023-01-24 03:52:01.146650: step: 362/466, loss: 0.7712176442146301 2023-01-24 03:52:01.912684: step: 364/466, loss: 0.13183462619781494 2023-01-24 03:52:02.625594: step: 366/466, loss: 0.10105898231267929 2023-01-24 03:52:03.406328: step: 368/466, loss: 0.015195309184491634 2023-01-24 03:52:04.187846: step: 370/466, loss: 0.04254454746842384 2023-01-24 03:52:04.930508: step: 372/466, loss: 0.044255319982767105 2023-01-24 03:52:05.725613: step: 374/466, loss: 0.08767160028219223 2023-01-24 03:52:06.505836: step: 376/466, loss: 0.1139039471745491 2023-01-24 03:52:07.198440: step: 378/466, loss: 0.23791880905628204 2023-01-24 03:52:07.929699: step: 380/466, loss: 0.05610283091664314 2023-01-24 03:52:08.678072: step: 382/466, loss: 0.10261404514312744 2023-01-24 03:52:09.476334: step: 384/466, loss: 0.05635293200612068 2023-01-24 03:52:10.282122: step: 386/466, loss: 0.11902878433465958 2023-01-24 03:52:10.985530: step: 388/466, loss: 0.023966550827026367 2023-01-24 03:52:11.697315: step: 390/466, loss: 0.059337567538022995 2023-01-24 03:52:12.427129: step: 392/466, loss: 1.2344927787780762 2023-01-24 03:52:13.193485: step: 394/466, loss: 0.04720328375697136 2023-01-24 03:52:13.965701: step: 396/466, loss: 0.11827465891838074 2023-01-24 03:52:14.770677: step: 398/466, loss: 0.15723110735416412 2023-01-24 03:52:15.696577: step: 400/466, loss: 0.8636144995689392 2023-01-24 03:52:16.497666: step: 402/466, loss: 1.0105373859405518 2023-01-24 03:52:17.270476: step: 404/466, loss: 0.1483166366815567 2023-01-24 03:52:18.041724: step: 406/466, loss: 0.019055398181080818 2023-01-24 03:52:18.838848: step: 408/466, loss: 0.0768151581287384 2023-01-24 03:52:19.537477: step: 410/466, loss: 0.13478273153305054 2023-01-24 03:52:20.261515: step: 412/466, loss: 0.035327523946762085 2023-01-24 03:52:20.980935: step: 414/466, loss: 0.040573496371507645 2023-01-24 03:52:21.694917: step: 416/466, loss: 0.10551556944847107 2023-01-24 03:52:22.508070: step: 418/466, loss: 0.11085692793130875 2023-01-24 03:52:23.269159: step: 420/466, loss: 0.13599808514118195 2023-01-24 03:52:24.089305: step: 422/466, loss: 0.1653360277414322 2023-01-24 03:52:24.851210: step: 424/466, loss: 0.6205252408981323 2023-01-24 03:52:25.605573: step: 426/466, loss: 0.43873658776283264 2023-01-24 03:52:26.339459: step: 428/466, loss: 0.1369527131319046 2023-01-24 03:52:27.149379: step: 430/466, loss: 0.2972129285335541 2023-01-24 03:52:27.849263: step: 432/466, loss: 0.07241196185350418 2023-01-24 03:52:28.637986: step: 434/466, loss: 0.028385912999510765 2023-01-24 03:52:29.365172: step: 436/466, loss: 0.05662926286458969 2023-01-24 03:52:30.097644: step: 438/466, loss: 0.10984829068183899 2023-01-24 03:52:30.851454: step: 440/466, loss: 0.16127750277519226 2023-01-24 03:52:31.601098: step: 442/466, loss: 0.33046212792396545 2023-01-24 03:52:32.386680: step: 444/466, loss: 0.023356251418590546 2023-01-24 03:52:33.123507: step: 446/466, loss: 0.04761826992034912 2023-01-24 03:52:33.984318: step: 448/466, loss: 0.0881882831454277 2023-01-24 03:52:34.792424: step: 450/466, loss: 0.059739310294389725 2023-01-24 03:52:35.612741: step: 452/466, loss: 0.07543253898620605 2023-01-24 03:52:36.482427: step: 454/466, loss: 0.010121147148311138 2023-01-24 03:52:37.284618: step: 456/466, loss: 0.08570060133934021 2023-01-24 03:52:38.195684: step: 458/466, loss: 0.1192619800567627 2023-01-24 03:52:38.898745: step: 460/466, loss: 0.6350327730178833 2023-01-24 03:52:39.702320: step: 462/466, loss: 0.028908485546708107 2023-01-24 03:52:40.533781: step: 464/466, loss: 0.07640614360570908 2023-01-24 03:52:41.347971: step: 466/466, loss: 0.09877616912126541 2023-01-24 03:52:42.024118: step: 468/466, loss: 0.017190825194120407 2023-01-24 03:52:42.684199: step: 470/466, loss: 0.16334563493728638 2023-01-24 03:52:43.326116: step: 472/466, loss: 0.06626991927623749 2023-01-24 03:52:44.092188: step: 474/466, loss: 0.04550163075327873 2023-01-24 03:52:44.843484: step: 476/466, loss: 0.03090309165418148 2023-01-24 03:52:45.615228: step: 478/466, loss: 0.045029982924461365 2023-01-24 03:52:46.376233: step: 480/466, loss: 0.1302035003900528 2023-01-24 03:52:47.181634: step: 482/466, loss: 0.08502025902271271 2023-01-24 03:52:47.986150: step: 484/466, loss: 0.034057505428791046 2023-01-24 03:52:48.711485: step: 486/466, loss: 0.026281673461198807 2023-01-24 03:52:49.593769: step: 488/466, loss: 0.07515764981508255 2023-01-24 03:52:50.276805: step: 490/466, loss: 0.029807021841406822 2023-01-24 03:52:51.021775: step: 492/466, loss: 0.08880341798067093 2023-01-24 03:52:51.888368: step: 494/466, loss: 0.13791541755199432 2023-01-24 03:52:52.710588: step: 496/466, loss: 0.08253327012062073 2023-01-24 03:52:53.488792: step: 498/466, loss: 0.0802103653550148 2023-01-24 03:52:54.256056: step: 500/466, loss: 0.08541009575128555 2023-01-24 03:52:55.055004: step: 502/466, loss: 0.24436721205711365 2023-01-24 03:52:55.814899: step: 504/466, loss: 0.2751705050468445 2023-01-24 03:52:56.594470: step: 506/466, loss: 0.24562475085258484 2023-01-24 03:52:57.331061: step: 508/466, loss: 0.48396503925323486 2023-01-24 03:52:58.105868: step: 510/466, loss: 0.8047950863838196 2023-01-24 03:52:58.966963: step: 512/466, loss: 0.057647477835416794 2023-01-24 03:52:59.759291: step: 514/466, loss: 0.08173815906047821 2023-01-24 03:53:00.518307: step: 516/466, loss: 0.0712982639670372 2023-01-24 03:53:01.254718: step: 518/466, loss: 0.041921310126781464 2023-01-24 03:53:02.056147: step: 520/466, loss: 0.18059973418712616 2023-01-24 03:53:02.780909: step: 522/466, loss: 0.0878557562828064 2023-01-24 03:53:03.633241: step: 524/466, loss: 0.1671873927116394 2023-01-24 03:53:04.384453: step: 526/466, loss: 0.025004588067531586 2023-01-24 03:53:05.113479: step: 528/466, loss: 0.08039911836385727 2023-01-24 03:53:05.858976: step: 530/466, loss: 0.3033719062805176 2023-01-24 03:53:06.650723: step: 532/466, loss: 0.08958456665277481 2023-01-24 03:53:07.405640: step: 534/466, loss: 0.09387919306755066 2023-01-24 03:53:08.122511: step: 536/466, loss: 0.09632495045661926 2023-01-24 03:53:08.918873: step: 538/466, loss: 0.1713729053735733 2023-01-24 03:53:09.682264: step: 540/466, loss: 0.07129695266485214 2023-01-24 03:53:10.390082: step: 542/466, loss: 0.12720653414726257 2023-01-24 03:53:11.122196: step: 544/466, loss: 0.060755349695682526 2023-01-24 03:53:11.862351: step: 546/466, loss: 1.95603346824646 2023-01-24 03:53:12.678704: step: 548/466, loss: 0.03766784816980362 2023-01-24 03:53:13.389906: step: 550/466, loss: 0.07923099398612976 2023-01-24 03:53:14.107200: step: 552/466, loss: 0.04662496969103813 2023-01-24 03:53:14.820007: step: 554/466, loss: 0.20221984386444092 2023-01-24 03:53:15.656525: step: 556/466, loss: 0.07292622327804565 2023-01-24 03:53:16.343806: step: 558/466, loss: 0.3139210045337677 2023-01-24 03:53:17.091158: step: 560/466, loss: 0.23124876618385315 2023-01-24 03:53:17.906232: step: 562/466, loss: 0.08138064295053482 2023-01-24 03:53:18.680615: step: 564/466, loss: 0.16122810542583466 2023-01-24 03:53:19.356448: step: 566/466, loss: 0.026222899556159973 2023-01-24 03:53:20.222464: step: 568/466, loss: 0.11673318594694138 2023-01-24 03:53:20.966058: step: 570/466, loss: 0.4448487460613251 2023-01-24 03:53:21.720952: step: 572/466, loss: 0.10425546020269394 2023-01-24 03:53:22.588619: step: 574/466, loss: 0.3005366325378418 2023-01-24 03:53:23.343183: step: 576/466, loss: 0.15741752088069916 2023-01-24 03:53:24.118425: step: 578/466, loss: 0.2272447943687439 2023-01-24 03:53:24.849650: step: 580/466, loss: 0.10790374875068665 2023-01-24 03:53:25.580551: step: 582/466, loss: 0.03616996109485626 2023-01-24 03:53:26.319867: step: 584/466, loss: 1.3382568359375 2023-01-24 03:53:27.021042: step: 586/466, loss: 0.0517452172935009 2023-01-24 03:53:27.773505: step: 588/466, loss: 0.0477430522441864 2023-01-24 03:53:28.503410: step: 590/466, loss: 0.14656464755535126 2023-01-24 03:53:29.191068: step: 592/466, loss: 0.0638587474822998 2023-01-24 03:53:29.940435: step: 594/466, loss: 0.14820857346057892 2023-01-24 03:53:30.701284: step: 596/466, loss: 0.028037745505571365 2023-01-24 03:53:31.448882: step: 598/466, loss: 0.8110078573226929 2023-01-24 03:53:32.225895: step: 600/466, loss: 0.02175460010766983 2023-01-24 03:53:32.970173: step: 602/466, loss: 1.047738790512085 2023-01-24 03:53:33.657080: step: 604/466, loss: 0.06620313227176666 2023-01-24 03:53:34.462043: step: 606/466, loss: 0.2300693392753601 2023-01-24 03:53:35.183175: step: 608/466, loss: 0.14889350533485413 2023-01-24 03:53:35.973880: step: 610/466, loss: 0.06209733709692955 2023-01-24 03:53:36.812638: step: 612/466, loss: 0.2901458442211151 2023-01-24 03:53:37.572006: step: 614/466, loss: 0.06201798841357231 2023-01-24 03:53:38.412837: step: 616/466, loss: 0.14276079833507538 2023-01-24 03:53:39.207118: step: 618/466, loss: 1.0708260536193848 2023-01-24 03:53:39.977641: step: 620/466, loss: 0.07404862344264984 2023-01-24 03:53:40.769076: step: 622/466, loss: 0.023511115461587906 2023-01-24 03:53:41.489627: step: 624/466, loss: 0.20319001376628876 2023-01-24 03:53:42.208899: step: 626/466, loss: 0.08493014425039291 2023-01-24 03:53:43.005350: step: 628/466, loss: 0.040104154497385025 2023-01-24 03:53:43.803360: step: 630/466, loss: 0.338765412569046 2023-01-24 03:53:44.602190: step: 632/466, loss: 0.1115725114941597 2023-01-24 03:53:45.376807: step: 634/466, loss: 0.09772010147571564 2023-01-24 03:53:46.056140: step: 636/466, loss: 0.07879616320133209 2023-01-24 03:53:46.762563: step: 638/466, loss: 0.05774553120136261 2023-01-24 03:53:47.490409: step: 640/466, loss: 0.09257499128580093 2023-01-24 03:53:48.222186: step: 642/466, loss: 0.17606566846370697 2023-01-24 03:53:48.932083: step: 644/466, loss: 0.07357846200466156 2023-01-24 03:53:49.727275: step: 646/466, loss: 0.10471224784851074 2023-01-24 03:53:50.496050: step: 648/466, loss: 0.1160171777009964 2023-01-24 03:53:51.204819: step: 650/466, loss: 0.0974593535065651 2023-01-24 03:53:51.954912: step: 652/466, loss: 0.18082386255264282 2023-01-24 03:53:52.662111: step: 654/466, loss: 0.6907148361206055 2023-01-24 03:53:53.494158: step: 656/466, loss: 0.07257720082998276 2023-01-24 03:53:54.221411: step: 658/466, loss: 0.222214937210083 2023-01-24 03:53:54.973839: step: 660/466, loss: 0.13155943155288696 2023-01-24 03:53:55.657167: step: 662/466, loss: 0.14486588537693024 2023-01-24 03:53:56.451253: step: 664/466, loss: 0.06527838110923767 2023-01-24 03:53:57.149802: step: 666/466, loss: 0.11442571878433228 2023-01-24 03:53:57.966611: step: 668/466, loss: 0.05018752068281174 2023-01-24 03:53:58.787342: step: 670/466, loss: 0.03921462595462799 2023-01-24 03:53:59.536396: step: 672/466, loss: 0.28432151675224304 2023-01-24 03:54:00.206236: step: 674/466, loss: 0.3747497797012329 2023-01-24 03:54:00.998251: step: 676/466, loss: 0.0581992045044899 2023-01-24 03:54:01.763155: step: 678/466, loss: 0.019291166216135025 2023-01-24 03:54:02.548492: step: 680/466, loss: 0.06049179658293724 2023-01-24 03:54:03.298731: step: 682/466, loss: 0.0835191160440445 2023-01-24 03:54:04.030274: step: 684/466, loss: 0.10527876764535904 2023-01-24 03:54:04.906037: step: 686/466, loss: 0.030664782971143723 2023-01-24 03:54:05.710614: step: 688/466, loss: 0.03258570656180382 2023-01-24 03:54:06.563482: step: 690/466, loss: 0.37244319915771484 2023-01-24 03:54:07.288503: step: 692/466, loss: 0.07915109395980835 2023-01-24 03:54:07.999550: step: 694/466, loss: 0.5006815791130066 2023-01-24 03:54:08.743129: step: 696/466, loss: 0.0645032450556755 2023-01-24 03:54:09.556260: step: 698/466, loss: 0.13112987577915192 2023-01-24 03:54:10.425928: step: 700/466, loss: 0.22272424399852753 2023-01-24 03:54:11.225960: step: 702/466, loss: 0.04174065217375755 2023-01-24 03:54:11.949855: step: 704/466, loss: 0.0813443660736084 2023-01-24 03:54:12.670552: step: 706/466, loss: 0.14078551530838013 2023-01-24 03:54:13.443006: step: 708/466, loss: 0.17905648052692413 2023-01-24 03:54:14.178829: step: 710/466, loss: 0.0903608500957489 2023-01-24 03:54:14.890018: step: 712/466, loss: 0.10948151350021362 2023-01-24 03:54:15.668344: step: 714/466, loss: 0.04178426414728165 2023-01-24 03:54:16.443615: step: 716/466, loss: 0.029289964586496353 2023-01-24 03:54:17.148212: step: 718/466, loss: 0.09051091969013214 2023-01-24 03:54:17.959452: step: 720/466, loss: 0.025737447664141655 2023-01-24 03:54:18.861470: step: 722/466, loss: 0.2563604712486267 2023-01-24 03:54:19.568346: step: 724/466, loss: 0.09010884910821915 2023-01-24 03:54:20.299731: step: 726/466, loss: 0.052926205098629 2023-01-24 03:54:20.982484: step: 728/466, loss: 0.07164259254932404 2023-01-24 03:54:21.794856: step: 730/466, loss: 0.10287351161241531 2023-01-24 03:54:22.587473: step: 732/466, loss: 0.12792713940143585 2023-01-24 03:54:23.364833: step: 734/466, loss: 0.06267481297254562 2023-01-24 03:54:24.087719: step: 736/466, loss: 0.06375842541456223 2023-01-24 03:54:24.878305: step: 738/466, loss: 0.05980125442147255 2023-01-24 03:54:25.645308: step: 740/466, loss: 0.049242787063121796 2023-01-24 03:54:26.453287: step: 742/466, loss: 0.0792761817574501 2023-01-24 03:54:27.273730: step: 744/466, loss: 0.018085921183228493 2023-01-24 03:54:28.068251: step: 746/466, loss: 0.0719904825091362 2023-01-24 03:54:28.759654: step: 748/466, loss: 0.09041845053434372 2023-01-24 03:54:29.517207: step: 750/466, loss: 0.018958715721964836 2023-01-24 03:54:30.286089: step: 752/466, loss: 0.08061391115188599 2023-01-24 03:54:31.096160: step: 754/466, loss: 0.07450126111507416 2023-01-24 03:54:31.816792: step: 756/466, loss: 0.07704184949398041 2023-01-24 03:54:32.626735: step: 758/466, loss: 0.23017098009586334 2023-01-24 03:54:33.358002: step: 760/466, loss: 0.07831332832574844 2023-01-24 03:54:34.076975: step: 762/466, loss: 0.07351720333099365 2023-01-24 03:54:34.909729: step: 764/466, loss: 0.15741848945617676 2023-01-24 03:54:35.722505: step: 766/466, loss: 0.1123637706041336 2023-01-24 03:54:36.483699: step: 768/466, loss: 0.05872133746743202 2023-01-24 03:54:37.253977: step: 770/466, loss: 0.07663730531930923 2023-01-24 03:54:37.934549: step: 772/466, loss: 0.15055827796459198 2023-01-24 03:54:38.694496: step: 774/466, loss: 0.1707383543252945 2023-01-24 03:54:39.414185: step: 776/466, loss: 0.5265418887138367 2023-01-24 03:54:40.234652: step: 778/466, loss: 0.5557186603546143 2023-01-24 03:54:41.179505: step: 780/466, loss: 0.06326576322317123 2023-01-24 03:54:41.990812: step: 782/466, loss: 0.1562936007976532 2023-01-24 03:54:42.717708: step: 784/466, loss: 0.03154328465461731 2023-01-24 03:54:43.491646: step: 786/466, loss: 0.04813500493764877 2023-01-24 03:54:44.257060: step: 788/466, loss: 0.07419311255216599 2023-01-24 03:54:45.034555: step: 790/466, loss: 0.16150015592575073 2023-01-24 03:54:45.777775: step: 792/466, loss: 0.6626380085945129 2023-01-24 03:54:46.562094: step: 794/466, loss: 0.05854855850338936 2023-01-24 03:54:47.390922: step: 796/466, loss: 0.07601359486579895 2023-01-24 03:54:48.176840: step: 798/466, loss: 0.316074937582016 2023-01-24 03:54:49.020517: step: 800/466, loss: 0.055279359221458435 2023-01-24 03:54:49.756912: step: 802/466, loss: 0.30370137095451355 2023-01-24 03:54:50.509817: step: 804/466, loss: 2.408041477203369 2023-01-24 03:54:51.236619: step: 806/466, loss: 0.7589643597602844 2023-01-24 03:54:51.935645: step: 808/466, loss: 0.14986556768417358 2023-01-24 03:54:52.686402: step: 810/466, loss: 0.0806887075304985 2023-01-24 03:54:53.469692: step: 812/466, loss: 0.06354395300149918 2023-01-24 03:54:54.253266: step: 814/466, loss: 0.30655643343925476 2023-01-24 03:54:55.006360: step: 816/466, loss: 0.09369249641895294 2023-01-24 03:54:55.821351: step: 818/466, loss: 0.1615120768547058 2023-01-24 03:54:56.577922: step: 820/466, loss: 0.06296786665916443 2023-01-24 03:54:57.306541: step: 822/466, loss: 0.025020739063620567 2023-01-24 03:54:58.045600: step: 824/466, loss: 0.15517769753932953 2023-01-24 03:54:58.755566: step: 826/466, loss: 0.08489702641963959 2023-01-24 03:54:59.548559: step: 828/466, loss: 0.2150644212961197 2023-01-24 03:55:00.321343: step: 830/466, loss: 0.2859443128108978 2023-01-24 03:55:01.068101: step: 832/466, loss: 0.09254368394613266 2023-01-24 03:55:01.893907: step: 834/466, loss: 0.011415778659284115 2023-01-24 03:55:02.585700: step: 836/466, loss: 0.14637251198291779 2023-01-24 03:55:03.436619: step: 838/466, loss: 0.01589520275592804 2023-01-24 03:55:04.197166: step: 840/466, loss: 0.10257188230752945 2023-01-24 03:55:04.979435: step: 842/466, loss: 0.03776068240404129 2023-01-24 03:55:05.674646: step: 844/466, loss: 0.029468653723597527 2023-01-24 03:55:06.451136: step: 846/466, loss: 0.12233823537826538 2023-01-24 03:55:07.248515: step: 848/466, loss: 0.8273965716362 2023-01-24 03:55:07.944463: step: 850/466, loss: 0.0944732055068016 2023-01-24 03:55:08.715607: step: 852/466, loss: 0.11218609660863876 2023-01-24 03:55:09.617123: step: 854/466, loss: 0.027405105531215668 2023-01-24 03:55:10.358482: step: 856/466, loss: 0.05875711888074875 2023-01-24 03:55:11.197673: step: 858/466, loss: 0.1434674710035324 2023-01-24 03:55:11.910485: step: 860/466, loss: 0.03289871662855148 2023-01-24 03:55:12.663516: step: 862/466, loss: 0.11220621317625046 2023-01-24 03:55:13.422311: step: 864/466, loss: 0.17699798941612244 2023-01-24 03:55:14.167552: step: 866/466, loss: 0.1084531918168068 2023-01-24 03:55:14.926084: step: 868/466, loss: 0.06856013089418411 2023-01-24 03:55:15.786089: step: 870/466, loss: 0.09868394583463669 2023-01-24 03:55:16.469482: step: 872/466, loss: 0.039851948618888855 2023-01-24 03:55:17.245388: step: 874/466, loss: 0.03650224953889847 2023-01-24 03:55:18.053038: step: 876/466, loss: 0.04264580085873604 2023-01-24 03:55:18.750586: step: 878/466, loss: 0.018047701567411423 2023-01-24 03:55:19.476778: step: 880/466, loss: 0.041126005351543427 2023-01-24 03:55:20.261887: step: 882/466, loss: 0.03496141731739044 2023-01-24 03:55:21.014519: step: 884/466, loss: 0.22649994492530823 2023-01-24 03:55:21.780304: step: 886/466, loss: 0.11513878405094147 2023-01-24 03:55:22.545482: step: 888/466, loss: 1.339849829673767 2023-01-24 03:55:23.326301: step: 890/466, loss: 0.05769219622015953 2023-01-24 03:55:24.172623: step: 892/466, loss: 0.04100324586033821 2023-01-24 03:55:24.962832: step: 894/466, loss: 0.015059271827340126 2023-01-24 03:55:25.666908: step: 896/466, loss: 0.1016344279050827 2023-01-24 03:55:26.404241: step: 898/466, loss: 0.20613186061382294 2023-01-24 03:55:27.185604: step: 900/466, loss: 0.12245035916566849 2023-01-24 03:55:27.998791: step: 902/466, loss: 0.0788796916604042 2023-01-24 03:55:28.677677: step: 904/466, loss: 0.3605306148529053 2023-01-24 03:55:29.441761: step: 906/466, loss: 0.03475223854184151 2023-01-24 03:55:30.162330: step: 908/466, loss: 1.0116491317749023 2023-01-24 03:55:30.886238: step: 910/466, loss: 0.10003989189863205 2023-01-24 03:55:31.613277: step: 912/466, loss: 0.062395673245191574 2023-01-24 03:55:32.436526: step: 914/466, loss: 0.211165651679039 2023-01-24 03:55:33.192222: step: 916/466, loss: 0.1046147421002388 2023-01-24 03:55:33.937298: step: 918/466, loss: 0.1511353999376297 2023-01-24 03:55:34.636487: step: 920/466, loss: 0.026441054418683052 2023-01-24 03:55:35.493107: step: 922/466, loss: 1.8698264360427856 2023-01-24 03:55:36.324056: step: 924/466, loss: 0.17580001056194305 2023-01-24 03:55:37.112809: step: 926/466, loss: 0.07652544230222702 2023-01-24 03:55:37.878013: step: 928/466, loss: 0.09471571445465088 2023-01-24 03:55:38.602662: step: 930/466, loss: 0.1395745873451233 2023-01-24 03:55:39.314454: step: 932/466, loss: 0.09615115821361542 ================================================== Loss: 0.161 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3354375836680053, 'r': 0.3169789690069576, 'f1': 0.3259471544715447}, 'combined': 0.24017158750534873, 'epoch': 16} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3712652016030077, 'r': 0.27989664245634027, 'f1': 0.3191706772674078}, 'combined': 0.1961731967594799, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3181175837493669, 'r': 0.32717216393198645, 'f1': 0.32258134778701003}, 'combined': 0.2376915194220074, 'epoch': 16} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3591237499987723, 'r': 0.2810127783785887, 'f1': 0.3153026215351398}, 'combined': 0.1937957576264762, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3456037074148297, 'r': 0.3272414611005693, 'f1': 0.33617202729044837}, 'combined': 0.24770570431927774, 'epoch': 16} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37192870264364486, 'r': 0.2797522650733828, 'f1': 0.31932157655260507}, 'combined': 0.19722803257660906, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3407258064516129, 'r': 0.30178571428571427, 'f1': 0.32007575757575757}, 'combined': 0.21338383838383837, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.4782608695652174, 'f1': 0.3728813559322034}, 'combined': 0.1864406779661017, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3663210818307906, 'r': 0.3579798048251559, 'f1': 0.36210241294214424}, 'combined': 0.2668123042731589, 'epoch': 13} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37875489782431904, 'r': 0.26988336009473124, 'f1': 0.31518228605603094}, 'combined': 0.19467141197578386, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 13} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:58:22.443973: step: 2/466, loss: 0.04191667214035988 2023-01-24 03:58:23.167942: step: 4/466, loss: 0.5147823691368103 2023-01-24 03:58:23.882177: step: 6/466, loss: 0.10745085775852203 2023-01-24 03:58:24.595943: step: 8/466, loss: 0.09878411144018173 2023-01-24 03:58:25.316716: step: 10/466, loss: 0.1570444107055664 2023-01-24 03:58:26.100614: step: 12/466, loss: 0.040436554700136185 2023-01-24 03:58:26.898505: step: 14/466, loss: 0.0783068835735321 2023-01-24 03:58:27.635608: step: 16/466, loss: 0.07695181667804718 2023-01-24 03:58:28.519017: step: 18/466, loss: 0.09340395033359528 2023-01-24 03:58:29.161253: step: 20/466, loss: 0.19459334015846252 2023-01-24 03:58:29.949145: step: 22/466, loss: 0.09914836287498474 2023-01-24 03:58:30.697700: step: 24/466, loss: 0.1062348410487175 2023-01-24 03:58:31.458060: step: 26/466, loss: 0.0334099642932415 2023-01-24 03:58:32.172286: step: 28/466, loss: 0.07453502714633942 2023-01-24 03:58:32.936491: step: 30/466, loss: 0.23181527853012085 2023-01-24 03:58:33.631672: step: 32/466, loss: 0.038020357489585876 2023-01-24 03:58:34.464805: step: 34/466, loss: 0.4997105002403259 2023-01-24 03:58:35.241675: step: 36/466, loss: 0.09167198091745377 2023-01-24 03:58:35.949464: step: 38/466, loss: 0.3927045166492462 2023-01-24 03:58:36.683030: step: 40/466, loss: 0.012292815372347832 2023-01-24 03:58:37.443187: step: 42/466, loss: 0.33926093578338623 2023-01-24 03:58:38.194926: step: 44/466, loss: 0.05959390103816986 2023-01-24 03:58:38.922752: step: 46/466, loss: 0.05834142491221428 2023-01-24 03:58:39.692691: step: 48/466, loss: 0.04409831017255783 2023-01-24 03:58:40.415483: step: 50/466, loss: 0.08244270831346512 2023-01-24 03:58:41.170255: step: 52/466, loss: 0.08154886215925217 2023-01-24 03:58:42.056689: step: 54/466, loss: 0.5688467025756836 2023-01-24 03:58:42.846994: step: 56/466, loss: 0.8010183572769165 2023-01-24 03:58:43.633617: step: 58/466, loss: 0.09396448731422424 2023-01-24 03:58:44.416305: step: 60/466, loss: 0.18453781306743622 2023-01-24 03:58:45.172217: step: 62/466, loss: 0.041124697774648666 2023-01-24 03:58:45.943018: step: 64/466, loss: 0.0507088266313076 2023-01-24 03:58:46.663972: step: 66/466, loss: 0.8169896602630615 2023-01-24 03:58:47.543889: step: 68/466, loss: 0.11679229885339737 2023-01-24 03:58:48.300916: step: 70/466, loss: 0.034613557159900665 2023-01-24 03:58:49.094133: step: 72/466, loss: 0.08543278276920319 2023-01-24 03:58:49.863864: step: 74/466, loss: 0.06375060230493546 2023-01-24 03:58:50.599708: step: 76/466, loss: 0.03974442556500435 2023-01-24 03:58:51.352733: step: 78/466, loss: 0.05513717606663704 2023-01-24 03:58:52.121319: step: 80/466, loss: 0.02512964978814125 2023-01-24 03:58:52.839715: step: 82/466, loss: 0.12997297942638397 2023-01-24 03:58:53.586507: step: 84/466, loss: 0.05820440128445625 2023-01-24 03:58:54.402574: step: 86/466, loss: 0.25973352789878845 2023-01-24 03:58:55.170991: step: 88/466, loss: 0.03171452507376671 2023-01-24 03:58:55.917333: step: 90/466, loss: 0.010969765484333038 2023-01-24 03:58:56.661188: step: 92/466, loss: 0.05218341201543808 2023-01-24 03:58:57.472268: step: 94/466, loss: 0.035453230142593384 2023-01-24 03:58:58.195070: step: 96/466, loss: 0.03244630992412567 2023-01-24 03:58:58.970439: step: 98/466, loss: 0.09304963797330856 2023-01-24 03:58:59.763107: step: 100/466, loss: 0.02452549710869789 2023-01-24 03:59:00.542752: step: 102/466, loss: 0.06358002126216888 2023-01-24 03:59:01.330133: step: 104/466, loss: 0.26026400923728943 2023-01-24 03:59:02.090029: step: 106/466, loss: 0.02341284416615963 2023-01-24 03:59:02.858477: step: 108/466, loss: 0.03641442582011223 2023-01-24 03:59:03.667329: step: 110/466, loss: 0.1473744809627533 2023-01-24 03:59:04.331999: step: 112/466, loss: 0.010492063127458096 2023-01-24 03:59:05.113821: step: 114/466, loss: 0.04320019856095314 2023-01-24 03:59:05.838553: step: 116/466, loss: 0.020513758063316345 2023-01-24 03:59:06.591664: step: 118/466, loss: 0.018752820789813995 2023-01-24 03:59:07.299096: step: 120/466, loss: 0.12152481079101562 2023-01-24 03:59:08.065363: step: 122/466, loss: 0.12580151855945587 2023-01-24 03:59:08.767687: step: 124/466, loss: 0.027853872627019882 2023-01-24 03:59:09.539919: step: 126/466, loss: 0.2141602486371994 2023-01-24 03:59:10.300651: step: 128/466, loss: 0.01891392096877098 2023-01-24 03:59:11.075511: step: 130/466, loss: 0.08879508823156357 2023-01-24 03:59:11.809912: step: 132/466, loss: 0.07886034995317459 2023-01-24 03:59:12.592258: step: 134/466, loss: 0.14440208673477173 2023-01-24 03:59:13.337472: step: 136/466, loss: 0.09427177906036377 2023-01-24 03:59:14.080622: step: 138/466, loss: 0.1376834511756897 2023-01-24 03:59:14.857181: step: 140/466, loss: 0.6417616605758667 2023-01-24 03:59:15.624101: step: 142/466, loss: 0.041780468076467514 2023-01-24 03:59:16.434000: step: 144/466, loss: 0.06160823255777359 2023-01-24 03:59:17.203244: step: 146/466, loss: 0.03103504702448845 2023-01-24 03:59:17.935077: step: 148/466, loss: 0.3792994022369385 2023-01-24 03:59:18.720478: step: 150/466, loss: 0.08279043436050415 2023-01-24 03:59:19.433472: step: 152/466, loss: 0.07768604159355164 2023-01-24 03:59:20.315193: step: 154/466, loss: 0.04201593995094299 2023-01-24 03:59:21.071269: step: 156/466, loss: 0.18667390942573547 2023-01-24 03:59:21.805656: step: 158/466, loss: 0.018082771450281143 2023-01-24 03:59:22.569404: step: 160/466, loss: 0.07044660300016403 2023-01-24 03:59:23.283929: step: 162/466, loss: 0.01237710751593113 2023-01-24 03:59:24.044678: step: 164/466, loss: 0.05196648836135864 2023-01-24 03:59:24.793480: step: 166/466, loss: 0.015970459207892418 2023-01-24 03:59:25.481956: step: 168/466, loss: 0.04284393787384033 2023-01-24 03:59:26.211818: step: 170/466, loss: 0.13483668863773346 2023-01-24 03:59:26.946478: step: 172/466, loss: 0.03902854397892952 2023-01-24 03:59:27.672929: step: 174/466, loss: 0.010558566078543663 2023-01-24 03:59:28.447643: step: 176/466, loss: 0.13913175463676453 2023-01-24 03:59:29.204316: step: 178/466, loss: 0.08197152614593506 2023-01-24 03:59:29.898342: step: 180/466, loss: 0.03208223730325699 2023-01-24 03:59:30.661855: step: 182/466, loss: 0.05784508213400841 2023-01-24 03:59:31.562483: step: 184/466, loss: 0.0503309890627861 2023-01-24 03:59:32.285190: step: 186/466, loss: 0.054225169122219086 2023-01-24 03:59:33.048470: step: 188/466, loss: 0.11491405963897705 2023-01-24 03:59:33.763466: step: 190/466, loss: 0.1225811094045639 2023-01-24 03:59:34.523232: step: 192/466, loss: 0.05776900798082352 2023-01-24 03:59:35.287816: step: 194/466, loss: 0.15298278629779816 2023-01-24 03:59:36.029908: step: 196/466, loss: 0.26484397053718567 2023-01-24 03:59:36.816345: step: 198/466, loss: 0.03561859950423241 2023-01-24 03:59:37.542303: step: 200/466, loss: 0.15547730028629303 2023-01-24 03:59:38.329399: step: 202/466, loss: 0.06322099268436432 2023-01-24 03:59:39.061015: step: 204/466, loss: 0.0602981299161911 2023-01-24 03:59:39.792379: step: 206/466, loss: 0.04739827662706375 2023-01-24 03:59:40.558019: step: 208/466, loss: 0.09399860352277756 2023-01-24 03:59:41.413935: step: 210/466, loss: 0.07635460048913956 2023-01-24 03:59:42.173249: step: 212/466, loss: 0.03263647481799126 2023-01-24 03:59:42.950813: step: 214/466, loss: 0.09039748460054398 2023-01-24 03:59:43.655308: step: 216/466, loss: 0.10271228849887848 2023-01-24 03:59:44.404966: step: 218/466, loss: 0.04623044654726982 2023-01-24 03:59:45.105917: step: 220/466, loss: 0.02166702412068844 2023-01-24 03:59:45.846484: step: 222/466, loss: 0.23645983636379242 2023-01-24 03:59:46.618877: step: 224/466, loss: 0.09341173619031906 2023-01-24 03:59:47.361320: step: 226/466, loss: 0.06331127882003784 2023-01-24 03:59:48.135559: step: 228/466, loss: 0.041292134672403336 2023-01-24 03:59:48.828776: step: 230/466, loss: 0.16387499868869781 2023-01-24 03:59:49.603537: step: 232/466, loss: 0.1170310378074646 2023-01-24 03:59:50.377610: step: 234/466, loss: 0.10173127800226212 2023-01-24 03:59:51.184858: step: 236/466, loss: 0.012713445350527763 2023-01-24 03:59:51.882145: step: 238/466, loss: 0.03802645951509476 2023-01-24 03:59:52.663494: step: 240/466, loss: 0.08751117438077927 2023-01-24 03:59:53.380709: step: 242/466, loss: 0.055097710341215134 2023-01-24 03:59:54.087871: step: 244/466, loss: 0.09168267250061035 2023-01-24 03:59:54.805898: step: 246/466, loss: 0.10639012604951859 2023-01-24 03:59:55.578380: step: 248/466, loss: 0.05749332159757614 2023-01-24 03:59:56.281960: step: 250/466, loss: 0.05517353489995003 2023-01-24 03:59:57.029794: step: 252/466, loss: 0.02633926272392273 2023-01-24 03:59:57.727962: step: 254/466, loss: 0.13642267882823944 2023-01-24 03:59:58.474214: step: 256/466, loss: 0.043375641107559204 2023-01-24 03:59:59.222809: step: 258/466, loss: 0.02250557206571102 2023-01-24 04:00:00.000111: step: 260/466, loss: 0.05615265294909477 2023-01-24 04:00:00.780167: step: 262/466, loss: 0.1533478945493698 2023-01-24 04:00:01.568377: step: 264/466, loss: 0.023662343621253967 2023-01-24 04:00:02.458926: step: 266/466, loss: 0.10575315356254578 2023-01-24 04:00:03.276995: step: 268/466, loss: 0.07846268266439438 2023-01-24 04:00:04.048504: step: 270/466, loss: 0.06294034421443939 2023-01-24 04:00:04.837968: step: 272/466, loss: 0.06830111891031265 2023-01-24 04:00:05.602686: step: 274/466, loss: 0.10785622894763947 2023-01-24 04:00:06.300808: step: 276/466, loss: 0.045360252261161804 2023-01-24 04:00:07.039217: step: 278/466, loss: 0.06354059278964996 2023-01-24 04:00:07.835566: step: 280/466, loss: 0.023901205509901047 2023-01-24 04:00:08.616360: step: 282/466, loss: 0.0633501261472702 2023-01-24 04:00:09.333794: step: 284/466, loss: 0.021562637761235237 2023-01-24 04:00:10.188558: step: 286/466, loss: 0.037073515355587006 2023-01-24 04:00:11.043931: step: 288/466, loss: 0.15239278972148895 2023-01-24 04:00:11.833018: step: 290/466, loss: 0.14575928449630737 2023-01-24 04:00:12.662845: step: 292/466, loss: 0.05778518319129944 2023-01-24 04:00:13.453960: step: 294/466, loss: 0.05821090191602707 2023-01-24 04:00:14.259649: step: 296/466, loss: 0.39319416880607605 2023-01-24 04:00:15.066819: step: 298/466, loss: 0.0335598960518837 2023-01-24 04:00:15.807873: step: 300/466, loss: 0.10477923601865768 2023-01-24 04:00:16.635974: step: 302/466, loss: 0.018370570614933968 2023-01-24 04:00:17.501231: step: 304/466, loss: 0.2609383463859558 2023-01-24 04:00:18.260821: step: 306/466, loss: 0.03143971413373947 2023-01-24 04:00:19.046711: step: 308/466, loss: 0.08613347262144089 2023-01-24 04:00:19.777338: step: 310/466, loss: 0.029043348506093025 2023-01-24 04:00:20.489912: step: 312/466, loss: 0.09032338112592697 2023-01-24 04:00:21.263543: step: 314/466, loss: 0.03294346109032631 2023-01-24 04:00:22.034584: step: 316/466, loss: 0.04086478054523468 2023-01-24 04:00:22.796535: step: 318/466, loss: 0.09302819520235062 2023-01-24 04:00:23.571529: step: 320/466, loss: 0.1359679102897644 2023-01-24 04:00:24.374271: step: 322/466, loss: 0.05803457275032997 2023-01-24 04:00:25.212163: step: 324/466, loss: 0.08316502720117569 2023-01-24 04:00:25.960419: step: 326/466, loss: 0.025853624567389488 2023-01-24 04:00:26.647468: step: 328/466, loss: 0.10935669392347336 2023-01-24 04:00:27.368914: step: 330/466, loss: 0.10221309214830399 2023-01-24 04:00:28.141171: step: 332/466, loss: 0.023362543433904648 2023-01-24 04:00:28.996107: step: 334/466, loss: 0.10492447018623352 2023-01-24 04:00:29.741254: step: 336/466, loss: 0.012983668595552444 2023-01-24 04:00:30.493623: step: 338/466, loss: 0.06686433404684067 2023-01-24 04:00:31.247892: step: 340/466, loss: 0.6464180946350098 2023-01-24 04:00:31.999565: step: 342/466, loss: 0.14225780963897705 2023-01-24 04:00:32.747100: step: 344/466, loss: 0.02568856254220009 2023-01-24 04:00:33.523366: step: 346/466, loss: 0.11996930837631226 2023-01-24 04:00:34.257093: step: 348/466, loss: 0.07267715036869049 2023-01-24 04:00:34.966662: step: 350/466, loss: 0.05870138481259346 2023-01-24 04:00:35.708876: step: 352/466, loss: 0.21511659026145935 2023-01-24 04:00:36.410219: step: 354/466, loss: 0.22571636736392975 2023-01-24 04:00:37.086198: step: 356/466, loss: 0.0684482753276825 2023-01-24 04:00:37.852239: step: 358/466, loss: 0.07619721442461014 2023-01-24 04:00:38.564613: step: 360/466, loss: 0.07417423278093338 2023-01-24 04:00:39.267830: step: 362/466, loss: 0.33287811279296875 2023-01-24 04:00:40.025349: step: 364/466, loss: 0.044003941118717194 2023-01-24 04:00:40.729400: step: 366/466, loss: 0.015192612074315548 2023-01-24 04:00:41.579702: step: 368/466, loss: 0.3871832489967346 2023-01-24 04:00:42.346555: step: 370/466, loss: 0.03940851613879204 2023-01-24 04:00:43.111831: step: 372/466, loss: 0.44761019945144653 2023-01-24 04:00:43.845389: step: 374/466, loss: 0.09093191474676132 2023-01-24 04:00:44.509335: step: 376/466, loss: 0.2419499009847641 2023-01-24 04:00:45.262358: step: 378/466, loss: 0.09732669591903687 2023-01-24 04:00:46.071064: step: 380/466, loss: 0.1610875129699707 2023-01-24 04:00:46.868197: step: 382/466, loss: 0.048162516206502914 2023-01-24 04:00:47.662469: step: 384/466, loss: 0.05992291122674942 2023-01-24 04:00:48.407348: step: 386/466, loss: 0.0989617109298706 2023-01-24 04:00:49.134076: step: 388/466, loss: 0.05073726549744606 2023-01-24 04:00:50.095322: step: 390/466, loss: 0.260887086391449 2023-01-24 04:00:50.868758: step: 392/466, loss: 0.06377539038658142 2023-01-24 04:00:51.659990: step: 394/466, loss: 0.0285948496311903 2023-01-24 04:00:52.480537: step: 396/466, loss: 0.3953843116760254 2023-01-24 04:00:53.201646: step: 398/466, loss: 0.015707774087786674 2023-01-24 04:00:54.056499: step: 400/466, loss: 0.00765496538951993 2023-01-24 04:00:54.771124: step: 402/466, loss: 0.023716503754258156 2023-01-24 04:00:55.518773: step: 404/466, loss: 0.08415870368480682 2023-01-24 04:00:56.298707: step: 406/466, loss: 0.01940714195370674 2023-01-24 04:00:57.069057: step: 408/466, loss: 0.014717105776071548 2023-01-24 04:00:57.884032: step: 410/466, loss: 0.44251349568367004 2023-01-24 04:00:58.764830: step: 412/466, loss: 0.03738182783126831 2023-01-24 04:00:59.543611: step: 414/466, loss: 0.0421098917722702 2023-01-24 04:01:00.428974: step: 416/466, loss: 0.11240135133266449 2023-01-24 04:01:01.236708: step: 418/466, loss: 0.9660542607307434 2023-01-24 04:01:02.053868: step: 420/466, loss: 0.06278149783611298 2023-01-24 04:01:02.831292: step: 422/466, loss: 0.04971605911850929 2023-01-24 04:01:03.601030: step: 424/466, loss: 1.2707209587097168 2023-01-24 04:01:04.367657: step: 426/466, loss: 0.1101389154791832 2023-01-24 04:01:05.103074: step: 428/466, loss: 0.02900017239153385 2023-01-24 04:01:05.880626: step: 430/466, loss: 0.10317675769329071 2023-01-24 04:01:06.645040: step: 432/466, loss: 0.07279238849878311 2023-01-24 04:01:07.443916: step: 434/466, loss: 0.07171858102083206 2023-01-24 04:01:08.266664: step: 436/466, loss: 0.13305605947971344 2023-01-24 04:01:09.040952: step: 438/466, loss: 0.10145549476146698 2023-01-24 04:01:09.979160: step: 440/466, loss: 0.11025592684745789 2023-01-24 04:01:10.724787: step: 442/466, loss: 0.03324136510491371 2023-01-24 04:01:11.478251: step: 444/466, loss: 0.01895085908472538 2023-01-24 04:01:12.176331: step: 446/466, loss: 0.06639997661113739 2023-01-24 04:01:12.908144: step: 448/466, loss: 0.0384809672832489 2023-01-24 04:01:13.668911: step: 450/466, loss: 0.0413355752825737 2023-01-24 04:01:14.409444: step: 452/466, loss: 0.04279356077313423 2023-01-24 04:01:15.170322: step: 454/466, loss: 0.03977316617965698 2023-01-24 04:01:15.919288: step: 456/466, loss: 0.05302877724170685 2023-01-24 04:01:16.732922: step: 458/466, loss: 0.01950419880449772 2023-01-24 04:01:17.453409: step: 460/466, loss: 0.10128819197416306 2023-01-24 04:01:18.217456: step: 462/466, loss: 0.12025299668312073 2023-01-24 04:01:18.994735: step: 464/466, loss: 0.1661524772644043 2023-01-24 04:01:19.745003: step: 466/466, loss: 0.17702050507068634 2023-01-24 04:01:20.528466: step: 468/466, loss: 0.09875538945198059 2023-01-24 04:01:21.236768: step: 470/466, loss: 0.19672173261642456 2023-01-24 04:01:21.939199: step: 472/466, loss: 0.07020531594753265 2023-01-24 04:01:22.675214: step: 474/466, loss: 0.4568902850151062 2023-01-24 04:01:23.432107: step: 476/466, loss: 0.17930543422698975 2023-01-24 04:01:24.188235: step: 478/466, loss: 0.08048145473003387 2023-01-24 04:01:24.937702: step: 480/466, loss: 0.04901100695133209 2023-01-24 04:01:25.685571: step: 482/466, loss: 0.17724567651748657 2023-01-24 04:01:26.409449: step: 484/466, loss: 0.04301964491605759 2023-01-24 04:01:27.179351: step: 486/466, loss: 0.23543091118335724 2023-01-24 04:01:27.922286: step: 488/466, loss: 0.06778578460216522 2023-01-24 04:01:28.641302: step: 490/466, loss: 1.0197558403015137 2023-01-24 04:01:29.393983: step: 492/466, loss: 0.06885930895805359 2023-01-24 04:01:30.229533: step: 494/466, loss: 0.14625200629234314 2023-01-24 04:01:31.045913: step: 496/466, loss: 0.04098017141222954 2023-01-24 04:01:31.867958: step: 498/466, loss: 0.42735791206359863 2023-01-24 04:01:32.623972: step: 500/466, loss: 0.08337932080030441 2023-01-24 04:01:33.341092: step: 502/466, loss: 0.11113856732845306 2023-01-24 04:01:34.025203: step: 504/466, loss: 0.07578197866678238 2023-01-24 04:01:34.759160: step: 506/466, loss: 0.04823679476976395 2023-01-24 04:01:35.563281: step: 508/466, loss: 0.6794129610061646 2023-01-24 04:01:36.296707: step: 510/466, loss: 5.463428020477295 2023-01-24 04:01:37.082871: step: 512/466, loss: 0.019826870411634445 2023-01-24 04:01:37.846737: step: 514/466, loss: 0.298760324716568 2023-01-24 04:01:38.611333: step: 516/466, loss: 0.07626804709434509 2023-01-24 04:01:39.324173: step: 518/466, loss: 0.016729634255170822 2023-01-24 04:01:40.124370: step: 520/466, loss: 0.12583647668361664 2023-01-24 04:01:40.883011: step: 522/466, loss: 0.056236542761325836 2023-01-24 04:01:41.805609: step: 524/466, loss: 0.09457962214946747 2023-01-24 04:01:42.609029: step: 526/466, loss: 0.06622578203678131 2023-01-24 04:01:43.431724: step: 528/466, loss: 0.2225954830646515 2023-01-24 04:01:44.233686: step: 530/466, loss: 0.04802557826042175 2023-01-24 04:01:44.958628: step: 532/466, loss: 0.026738611981272697 2023-01-24 04:01:45.830964: step: 534/466, loss: 0.07819850742816925 2023-01-24 04:01:46.700063: step: 536/466, loss: 0.22115235030651093 2023-01-24 04:01:47.493517: step: 538/466, loss: 0.10885797441005707 2023-01-24 04:01:48.183309: step: 540/466, loss: 0.038471028208732605 2023-01-24 04:01:48.893468: step: 542/466, loss: 0.02333487570285797 2023-01-24 04:01:49.668937: step: 544/466, loss: 0.07428286224603653 2023-01-24 04:01:50.395666: step: 546/466, loss: 0.04125874862074852 2023-01-24 04:01:51.209278: step: 548/466, loss: 0.04482059180736542 2023-01-24 04:01:51.962305: step: 550/466, loss: 0.024111980572342873 2023-01-24 04:01:52.765897: step: 552/466, loss: 0.24125415086746216 2023-01-24 04:01:53.576123: step: 554/466, loss: 0.19689196348190308 2023-01-24 04:01:54.437593: step: 556/466, loss: 0.1388603299856186 2023-01-24 04:01:55.211609: step: 558/466, loss: 0.02577141858637333 2023-01-24 04:01:55.939928: step: 560/466, loss: 0.09173966199159622 2023-01-24 04:01:56.636019: step: 562/466, loss: 0.0680513009428978 2023-01-24 04:01:57.399740: step: 564/466, loss: 0.057585883885622025 2023-01-24 04:01:58.134832: step: 566/466, loss: 0.011553946882486343 2023-01-24 04:01:58.958094: step: 568/466, loss: 0.059238385409116745 2023-01-24 04:01:59.643810: step: 570/466, loss: 0.07379874587059021 2023-01-24 04:02:00.477352: step: 572/466, loss: 0.012319848872721195 2023-01-24 04:02:01.275306: step: 574/466, loss: 0.043450977653265 2023-01-24 04:02:02.359011: step: 576/466, loss: 0.13805872201919556 2023-01-24 04:02:03.079142: step: 578/466, loss: 0.022836795076727867 2023-01-24 04:02:03.765213: step: 580/466, loss: 0.07715574651956558 2023-01-24 04:02:04.542593: step: 582/466, loss: 0.12319004535675049 2023-01-24 04:02:05.379415: step: 584/466, loss: 0.10562030225992203 2023-01-24 04:02:06.227199: step: 586/466, loss: 0.06464926153421402 2023-01-24 04:02:06.981641: step: 588/466, loss: 0.06526906043291092 2023-01-24 04:02:07.790335: step: 590/466, loss: 0.021421361714601517 2023-01-24 04:02:08.656124: step: 592/466, loss: 0.03983045369386673 2023-01-24 04:02:09.503313: step: 594/466, loss: 0.10576335340738297 2023-01-24 04:02:10.301293: step: 596/466, loss: 6.454718589782715 2023-01-24 04:02:11.071030: step: 598/466, loss: 0.08060228824615479 2023-01-24 04:02:11.838001: step: 600/466, loss: 0.28485438227653503 2023-01-24 04:02:12.590698: step: 602/466, loss: 0.04787430539727211 2023-01-24 04:02:13.339828: step: 604/466, loss: 0.06249743700027466 2023-01-24 04:02:14.100600: step: 606/466, loss: 0.28890371322631836 2023-01-24 04:02:14.906969: step: 608/466, loss: 0.06000453978776932 2023-01-24 04:02:15.622984: step: 610/466, loss: 0.048165880143642426 2023-01-24 04:02:16.431423: step: 612/466, loss: 0.048403237015008926 2023-01-24 04:02:17.244437: step: 614/466, loss: 0.021623866632580757 2023-01-24 04:02:18.046842: step: 616/466, loss: 0.07675395905971527 2023-01-24 04:02:18.760500: step: 618/466, loss: 0.04885542392730713 2023-01-24 04:02:19.547819: step: 620/466, loss: 0.10137677192687988 2023-01-24 04:02:20.309873: step: 622/466, loss: 0.10909571498632431 2023-01-24 04:02:21.029643: step: 624/466, loss: 0.05348493903875351 2023-01-24 04:02:21.838077: step: 626/466, loss: 0.05490071326494217 2023-01-24 04:02:22.640578: step: 628/466, loss: 0.2063806653022766 2023-01-24 04:02:23.454567: step: 630/466, loss: 0.11666103452444077 2023-01-24 04:02:24.280319: step: 632/466, loss: 0.08298062533140182 2023-01-24 04:02:25.125373: step: 634/466, loss: 0.1579156070947647 2023-01-24 04:02:25.837526: step: 636/466, loss: 0.00876704789698124 2023-01-24 04:02:26.562413: step: 638/466, loss: 0.1683746874332428 2023-01-24 04:02:27.344321: step: 640/466, loss: 0.5691487193107605 2023-01-24 04:02:28.040878: step: 642/466, loss: 0.08855330944061279 2023-01-24 04:02:28.812286: step: 644/466, loss: 0.039370011538267136 2023-01-24 04:02:29.630980: step: 646/466, loss: 0.2885306179523468 2023-01-24 04:02:30.456514: step: 648/466, loss: 0.0817800760269165 2023-01-24 04:02:31.252797: step: 650/466, loss: 0.15116387605667114 2023-01-24 04:02:31.990500: step: 652/466, loss: 0.3503814935684204 2023-01-24 04:02:32.777818: step: 654/466, loss: 0.3401745855808258 2023-01-24 04:02:33.498403: step: 656/466, loss: 0.1454222947359085 2023-01-24 04:02:34.229101: step: 658/466, loss: 0.08071214705705643 2023-01-24 04:02:35.020395: step: 660/466, loss: 0.12128698825836182 2023-01-24 04:02:35.846716: step: 662/466, loss: 0.07182589918375015 2023-01-24 04:02:36.612487: step: 664/466, loss: 0.10454913228750229 2023-01-24 04:02:37.430713: step: 666/466, loss: 0.03323966637253761 2023-01-24 04:02:38.250699: step: 668/466, loss: 0.2441716492176056 2023-01-24 04:02:39.010357: step: 670/466, loss: 0.10043670982122421 2023-01-24 04:02:39.727664: step: 672/466, loss: 0.05408914014697075 2023-01-24 04:02:40.520568: step: 674/466, loss: 0.06823401153087616 2023-01-24 04:02:41.258275: step: 676/466, loss: 0.0523863360285759 2023-01-24 04:02:42.052336: step: 678/466, loss: 0.26087960600852966 2023-01-24 04:02:42.797153: step: 680/466, loss: 0.033554527908563614 2023-01-24 04:02:43.519236: step: 682/466, loss: 0.22630837559700012 2023-01-24 04:02:44.332596: step: 684/466, loss: 0.4421272575855255 2023-01-24 04:02:45.048304: step: 686/466, loss: 0.910071074962616 2023-01-24 04:02:45.825770: step: 688/466, loss: 0.015432994812726974 2023-01-24 04:02:46.669700: step: 690/466, loss: 0.1441815048456192 2023-01-24 04:02:47.347738: step: 692/466, loss: 0.2625843584537506 2023-01-24 04:02:48.170856: step: 694/466, loss: 0.10949409753084183 2023-01-24 04:02:49.001103: step: 696/466, loss: 1.3729277849197388 2023-01-24 04:02:49.796908: step: 698/466, loss: 0.07162289321422577 2023-01-24 04:02:50.670481: step: 700/466, loss: 0.049353040754795074 2023-01-24 04:02:51.450924: step: 702/466, loss: 0.08966630697250366 2023-01-24 04:02:52.183647: step: 704/466, loss: 0.015086804516613483 2023-01-24 04:02:52.983760: step: 706/466, loss: 0.10388107597827911 2023-01-24 04:02:53.739148: step: 708/466, loss: 0.14753474295139313 2023-01-24 04:02:54.587297: step: 710/466, loss: 0.10368377715349197 2023-01-24 04:02:55.282884: step: 712/466, loss: 0.02157321386039257 2023-01-24 04:02:56.018369: step: 714/466, loss: 0.08537424355745316 2023-01-24 04:02:56.825132: step: 716/466, loss: 0.03927518427371979 2023-01-24 04:02:57.597048: step: 718/466, loss: 0.1469808667898178 2023-01-24 04:02:58.401799: step: 720/466, loss: 0.13254177570343018 2023-01-24 04:02:59.102810: step: 722/466, loss: 0.019300326704978943 2023-01-24 04:02:59.867145: step: 724/466, loss: 0.09577452391386032 2023-01-24 04:03:00.668564: step: 726/466, loss: 0.09022750705480576 2023-01-24 04:03:01.471235: step: 728/466, loss: 0.05232621356844902 2023-01-24 04:03:02.254290: step: 730/466, loss: 0.18306727707386017 2023-01-24 04:03:03.003543: step: 732/466, loss: 0.0629793256521225 2023-01-24 04:03:03.831088: step: 734/466, loss: 0.20556853711605072 2023-01-24 04:03:04.507526: step: 736/466, loss: 0.022572429850697517 2023-01-24 04:03:05.252375: step: 738/466, loss: 0.1834261119365692 2023-01-24 04:03:06.019699: step: 740/466, loss: 0.6015645265579224 2023-01-24 04:03:06.836209: step: 742/466, loss: 0.04095159471035004 2023-01-24 04:03:07.577133: step: 744/466, loss: 0.07869676500558853 2023-01-24 04:03:08.268143: step: 746/466, loss: 0.0747077688574791 2023-01-24 04:03:08.983746: step: 748/466, loss: 0.21149002015590668 2023-01-24 04:03:09.766295: step: 750/466, loss: 0.05114706978201866 2023-01-24 04:03:10.511719: step: 752/466, loss: 0.10323463380336761 2023-01-24 04:03:11.308466: step: 754/466, loss: 0.24099045991897583 2023-01-24 04:03:12.097499: step: 756/466, loss: 0.08624985814094543 2023-01-24 04:03:12.878640: step: 758/466, loss: 0.10293315351009369 2023-01-24 04:03:13.617288: step: 760/466, loss: 0.010724215768277645 2023-01-24 04:03:14.405463: step: 762/466, loss: 0.242195725440979 2023-01-24 04:03:15.125332: step: 764/466, loss: 0.14631156623363495 2023-01-24 04:03:16.018506: step: 766/466, loss: 0.15860383212566376 2023-01-24 04:03:16.705623: step: 768/466, loss: 0.14705486595630646 2023-01-24 04:03:17.478962: step: 770/466, loss: 0.017465418204665184 2023-01-24 04:03:18.259842: step: 772/466, loss: 0.08447589725255966 2023-01-24 04:03:19.025962: step: 774/466, loss: 0.8887242078781128 2023-01-24 04:03:19.752363: step: 776/466, loss: 0.5055169463157654 2023-01-24 04:03:20.487680: step: 778/466, loss: 0.0075454795733094215 2023-01-24 04:03:21.176232: step: 780/466, loss: 0.07857229560613632 2023-01-24 04:03:21.901593: step: 782/466, loss: 0.07658013701438904 2023-01-24 04:03:22.561811: step: 784/466, loss: 0.14287102222442627 2023-01-24 04:03:23.391032: step: 786/466, loss: 0.13052555918693542 2023-01-24 04:03:24.134359: step: 788/466, loss: 0.5707105994224548 2023-01-24 04:03:24.882300: step: 790/466, loss: 0.0379331149160862 2023-01-24 04:03:25.579851: step: 792/466, loss: 0.02855241671204567 2023-01-24 04:03:26.355506: step: 794/466, loss: 0.08172359317541122 2023-01-24 04:03:27.023292: step: 796/466, loss: 0.00676638213917613 2023-01-24 04:03:27.799989: step: 798/466, loss: 0.18921613693237305 2023-01-24 04:03:28.593760: step: 800/466, loss: 0.10927959531545639 2023-01-24 04:03:29.323358: step: 802/466, loss: 0.3304141163825989 2023-01-24 04:03:30.105319: step: 804/466, loss: 0.16432689130306244 2023-01-24 04:03:30.860402: step: 806/466, loss: 0.035742390900850296 2023-01-24 04:03:31.601004: step: 808/466, loss: 0.11775850504636765 2023-01-24 04:03:32.432801: step: 810/466, loss: 0.2532691955566406 2023-01-24 04:03:33.235523: step: 812/466, loss: 0.17157238721847534 2023-01-24 04:03:34.070621: step: 814/466, loss: 0.031197600066661835 2023-01-24 04:03:34.786792: step: 816/466, loss: 0.1423412412405014 2023-01-24 04:03:35.623817: step: 818/466, loss: 0.036518923938274384 2023-01-24 04:03:36.345266: step: 820/466, loss: 0.023789752274751663 2023-01-24 04:03:37.145068: step: 822/466, loss: 0.057291388511657715 2023-01-24 04:03:37.887709: step: 824/466, loss: 0.09669878333806992 2023-01-24 04:03:38.714258: step: 826/466, loss: 0.529859185218811 2023-01-24 04:03:39.478891: step: 828/466, loss: 0.051852673292160034 2023-01-24 04:03:40.206623: step: 830/466, loss: 0.06867492944002151 2023-01-24 04:03:40.923037: step: 832/466, loss: 0.6612184047698975 2023-01-24 04:03:41.669654: step: 834/466, loss: 0.12671461701393127 2023-01-24 04:03:42.373437: step: 836/466, loss: 0.11311160027980804 2023-01-24 04:03:43.200540: step: 838/466, loss: 0.7831020951271057 2023-01-24 04:03:43.881772: step: 840/466, loss: 0.04771916940808296 2023-01-24 04:03:44.687336: step: 842/466, loss: 0.03580975905060768 2023-01-24 04:03:45.528075: step: 844/466, loss: 0.07718465477228165 2023-01-24 04:03:46.335896: step: 846/466, loss: 0.10081658512353897 2023-01-24 04:03:47.098676: step: 848/466, loss: 0.25340622663497925 2023-01-24 04:03:47.876856: step: 850/466, loss: 0.0629146546125412 2023-01-24 04:03:48.621535: step: 852/466, loss: 0.13862603902816772 2023-01-24 04:03:49.392131: step: 854/466, loss: 0.05720217898488045 2023-01-24 04:03:50.105986: step: 856/466, loss: 0.03626011312007904 2023-01-24 04:03:50.907149: step: 858/466, loss: 0.059770986437797546 2023-01-24 04:03:51.614761: step: 860/466, loss: 0.03019898012280464 2023-01-24 04:03:52.365770: step: 862/466, loss: 0.038489192724227905 2023-01-24 04:03:53.076104: step: 864/466, loss: 0.3332316279411316 2023-01-24 04:03:53.888436: step: 866/466, loss: 0.09183098375797272 2023-01-24 04:03:54.583893: step: 868/466, loss: 0.021056165918707848 2023-01-24 04:03:55.361901: step: 870/466, loss: 0.018868671730160713 2023-01-24 04:03:56.193128: step: 872/466, loss: 0.08701654523611069 2023-01-24 04:03:56.902782: step: 874/466, loss: 0.027373263612389565 2023-01-24 04:03:57.637604: step: 876/466, loss: 0.06184190884232521 2023-01-24 04:03:58.513019: step: 878/466, loss: 0.17068737745285034 2023-01-24 04:03:59.237301: step: 880/466, loss: 0.8000120520591736 2023-01-24 04:04:00.128903: step: 882/466, loss: 0.12541207671165466 2023-01-24 04:04:00.867400: step: 884/466, loss: 0.09928561747074127 2023-01-24 04:04:01.660230: step: 886/466, loss: 0.04651761054992676 2023-01-24 04:04:02.322227: step: 888/466, loss: 0.16089124977588654 2023-01-24 04:04:03.186111: step: 890/466, loss: 0.07804146409034729 2023-01-24 04:04:03.918653: step: 892/466, loss: 0.1407633274793625 2023-01-24 04:04:04.626899: step: 894/466, loss: 0.15471599996089935 2023-01-24 04:04:05.329522: step: 896/466, loss: 0.25648272037506104 2023-01-24 04:04:06.050399: step: 898/466, loss: 0.033358488231897354 2023-01-24 04:04:06.772689: step: 900/466, loss: 0.05572345107793808 2023-01-24 04:04:07.571188: step: 902/466, loss: 0.05574074015021324 2023-01-24 04:04:08.313914: step: 904/466, loss: 0.2865733504295349 2023-01-24 04:04:09.080302: step: 906/466, loss: 0.09892137348651886 2023-01-24 04:04:09.845362: step: 908/466, loss: 0.021022509783506393 2023-01-24 04:04:10.633141: step: 910/466, loss: 0.17897191643714905 2023-01-24 04:04:11.380945: step: 912/466, loss: 0.08533147722482681 2023-01-24 04:04:12.094036: step: 914/466, loss: 0.057386137545108795 2023-01-24 04:04:12.900180: step: 916/466, loss: 0.11319765448570251 2023-01-24 04:04:13.647275: step: 918/466, loss: 0.1495431363582611 2023-01-24 04:04:14.365566: step: 920/466, loss: 0.974583625793457 2023-01-24 04:04:15.214086: step: 922/466, loss: 0.04371223598718643 2023-01-24 04:04:15.884890: step: 924/466, loss: 0.06663193553686142 2023-01-24 04:04:16.600458: step: 926/466, loss: 0.173104390501976 2023-01-24 04:04:17.307051: step: 928/466, loss: 0.24562832713127136 2023-01-24 04:04:18.020373: step: 930/466, loss: 0.1565488576889038 2023-01-24 04:04:18.816660: step: 932/466, loss: 0.10224548727273941 ================================================== Loss: 0.157 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33391293774319064, 'r': 0.3256759962049336, 'f1': 0.32974303554274736}, 'combined': 0.24296855250518226, 'epoch': 17} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35233050724815995, 'r': 0.2915733400537199, 'f1': 0.31908547598102677}, 'combined': 0.19612082913955792, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30201537040872684, 'r': 0.32551182237600923, 'f1': 0.3133237084788253}, 'combined': 0.23087010098439756, 'epoch': 17} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3492864066480485, 'r': 0.30176650557028106, 'f1': 0.3237922337778748}, 'combined': 0.19901376320005965, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32943822957198443, 'r': 0.32131166982922205, 'f1': 0.32532420749279545}, 'combined': 0.2397125739420598, 'epoch': 17} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3630175522743405, 'r': 0.29261414819689263, 'f1': 0.3240358016945456}, 'combined': 0.20013975987016056, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.266025641025641, 'r': 0.29642857142857143, 'f1': 0.2804054054054054}, 'combined': 0.18693693693693691, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23387096774193547, 'r': 0.31521739130434784, 'f1': 0.26851851851851855}, 'combined': 0.13425925925925927, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.06896551724137931, 'f1': 0.1081081081081081}, 'combined': 0.07207207207207206, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3663210818307906, 'r': 0.3579798048251559, 'f1': 0.36210241294214424}, 'combined': 0.2668123042731589, 'epoch': 13} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37875489782431904, 'r': 0.26988336009473124, 'f1': 0.31518228605603094}, 'combined': 0.19467141197578386, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 13} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:07:02.854625: step: 2/466, loss: 0.19651255011558533 2023-01-24 04:07:03.606572: step: 4/466, loss: 0.10556065291166306 2023-01-24 04:07:04.356187: step: 6/466, loss: 0.14724516868591309 2023-01-24 04:07:05.108810: step: 8/466, loss: 0.052163269370794296 2023-01-24 04:07:05.938375: step: 10/466, loss: 0.16769303381443024 2023-01-24 04:07:06.680803: step: 12/466, loss: 0.01563209481537342 2023-01-24 04:07:07.420809: step: 14/466, loss: 0.10199791193008423 2023-01-24 04:07:08.182279: step: 16/466, loss: 0.05524469166994095 2023-01-24 04:07:08.870977: step: 18/466, loss: 0.011698777787387371 2023-01-24 04:07:09.585844: step: 20/466, loss: 0.054983723908662796 2023-01-24 04:07:10.319255: step: 22/466, loss: 0.007442728150635958 2023-01-24 04:07:11.093995: step: 24/466, loss: 0.046553682535886765 2023-01-24 04:07:11.864207: step: 26/466, loss: 0.47874942421913147 2023-01-24 04:07:12.599531: step: 28/466, loss: 0.078194759786129 2023-01-24 04:07:13.344557: step: 30/466, loss: 0.011652514338493347 2023-01-24 04:07:14.088150: step: 32/466, loss: 0.09648700803518295 2023-01-24 04:07:14.817403: step: 34/466, loss: 0.10296231508255005 2023-01-24 04:07:15.543074: step: 36/466, loss: 0.11012774705886841 2023-01-24 04:07:16.270257: step: 38/466, loss: 0.02304697409272194 2023-01-24 04:07:17.057715: step: 40/466, loss: 0.1753087341785431 2023-01-24 04:07:17.805222: step: 42/466, loss: 0.039917685091495514 2023-01-24 04:07:18.565478: step: 44/466, loss: 0.13597364723682404 2023-01-24 04:07:19.277975: step: 46/466, loss: 0.08333782851696014 2023-01-24 04:07:19.992451: step: 48/466, loss: 0.07603185623884201 2023-01-24 04:07:20.692202: step: 50/466, loss: 0.009517199359834194 2023-01-24 04:07:21.409341: step: 52/466, loss: 0.023968877270817757 2023-01-24 04:07:22.190297: step: 54/466, loss: 0.0795581191778183 2023-01-24 04:07:22.940460: step: 56/466, loss: 0.036575641483068466 2023-01-24 04:07:23.686827: step: 58/466, loss: 0.024914629757404327 2023-01-24 04:07:24.458072: step: 60/466, loss: 1.6338603496551514 2023-01-24 04:07:25.182387: step: 62/466, loss: 0.03559728339314461 2023-01-24 04:07:25.926337: step: 64/466, loss: 0.028533292934298515 2023-01-24 04:07:26.596585: step: 66/466, loss: 0.04734528437256813 2023-01-24 04:07:27.341214: step: 68/466, loss: 0.42479050159454346 2023-01-24 04:07:28.263554: step: 70/466, loss: 0.0825815424323082 2023-01-24 04:07:28.968285: step: 72/466, loss: 0.02216201275587082 2023-01-24 04:07:29.655840: step: 74/466, loss: 0.37110376358032227 2023-01-24 04:07:30.361641: step: 76/466, loss: 0.044285114854574203 2023-01-24 04:07:31.107643: step: 78/466, loss: 0.00979369506239891 2023-01-24 04:07:31.874572: step: 80/466, loss: 0.1044786274433136 2023-01-24 04:07:32.609584: step: 82/466, loss: 0.014085735194385052 2023-01-24 04:07:33.325136: step: 84/466, loss: 0.10190731287002563 2023-01-24 04:07:34.093134: step: 86/466, loss: 0.06342112272977829 2023-01-24 04:07:34.815126: step: 88/466, loss: 0.04446432739496231 2023-01-24 04:07:35.673197: step: 90/466, loss: 0.17387893795967102 2023-01-24 04:07:36.397415: step: 92/466, loss: 0.03474210202693939 2023-01-24 04:07:37.115850: step: 94/466, loss: 0.03775353729724884 2023-01-24 04:07:37.920247: step: 96/466, loss: 0.30774736404418945 2023-01-24 04:07:38.630427: step: 98/466, loss: 0.035334642976522446 2023-01-24 04:07:39.333934: step: 100/466, loss: 0.01667802408337593 2023-01-24 04:07:40.107892: step: 102/466, loss: 0.05429663509130478 2023-01-24 04:07:40.836193: step: 104/466, loss: 0.3033483922481537 2023-01-24 04:07:41.617657: step: 106/466, loss: 0.004159913398325443 2023-01-24 04:07:42.329606: step: 108/466, loss: 0.024774271994829178 2023-01-24 04:07:43.165256: step: 110/466, loss: 0.07045798003673553 2023-01-24 04:07:43.926321: step: 112/466, loss: 0.05230962857604027 2023-01-24 04:07:44.662841: step: 114/466, loss: 0.020267944782972336 2023-01-24 04:07:45.451739: step: 116/466, loss: 0.11753572523593903 2023-01-24 04:07:46.215508: step: 118/466, loss: 0.02458942122757435 2023-01-24 04:07:46.947925: step: 120/466, loss: 0.6894834637641907 2023-01-24 04:07:47.734614: step: 122/466, loss: 0.22759802639484406 2023-01-24 04:07:48.455411: step: 124/466, loss: 0.01465371623635292 2023-01-24 04:07:49.302238: step: 126/466, loss: 0.0885717123746872 2023-01-24 04:07:50.045836: step: 128/466, loss: 0.06091802567243576 2023-01-24 04:07:50.956325: step: 130/466, loss: 0.13552923500537872 2023-01-24 04:07:51.742288: step: 132/466, loss: 0.061866942793130875 2023-01-24 04:07:52.471535: step: 134/466, loss: 0.2426026165485382 2023-01-24 04:07:53.189450: step: 136/466, loss: 0.024837233126163483 2023-01-24 04:07:53.949221: step: 138/466, loss: 0.06240249425172806 2023-01-24 04:07:54.718983: step: 140/466, loss: 0.05201075226068497 2023-01-24 04:07:55.489048: step: 142/466, loss: 0.13489706814289093 2023-01-24 04:07:56.249840: step: 144/466, loss: 0.0347241647541523 2023-01-24 04:07:57.031560: step: 146/466, loss: 0.15452273190021515 2023-01-24 04:07:57.835276: step: 148/466, loss: 0.3027150630950928 2023-01-24 04:07:58.677699: step: 150/466, loss: 0.06525728106498718 2023-01-24 04:07:59.470672: step: 152/466, loss: 0.05072518065571785 2023-01-24 04:08:00.288810: step: 154/466, loss: 0.025421161204576492 2023-01-24 04:08:01.122572: step: 156/466, loss: 0.07371964305639267 2023-01-24 04:08:01.896441: step: 158/466, loss: 0.05787751078605652 2023-01-24 04:08:02.652204: step: 160/466, loss: 0.056591957807540894 2023-01-24 04:08:03.455495: step: 162/466, loss: 0.06800525635480881 2023-01-24 04:08:04.178685: step: 164/466, loss: 0.24484489858150482 2023-01-24 04:08:04.982206: step: 166/466, loss: 0.10593988746404648 2023-01-24 04:08:05.790315: step: 168/466, loss: 0.03584035485982895 2023-01-24 04:08:06.513798: step: 170/466, loss: 0.04015500470995903 2023-01-24 04:08:07.357067: step: 172/466, loss: 0.16253499686717987 2023-01-24 04:08:08.102923: step: 174/466, loss: 0.04112454131245613 2023-01-24 04:08:08.903076: step: 176/466, loss: 0.04308474063873291 2023-01-24 04:08:09.787657: step: 178/466, loss: 0.28503429889678955 2023-01-24 04:08:10.594320: step: 180/466, loss: 0.188304603099823 2023-01-24 04:08:11.346405: step: 182/466, loss: 0.03568580374121666 2023-01-24 04:08:12.072627: step: 184/466, loss: 0.08562834560871124 2023-01-24 04:08:12.760357: step: 186/466, loss: 0.029489878565073013 2023-01-24 04:08:13.561809: step: 188/466, loss: 0.041018228977918625 2023-01-24 04:08:14.450582: step: 190/466, loss: 0.4618982672691345 2023-01-24 04:08:15.154115: step: 192/466, loss: 0.11267931014299393 2023-01-24 04:08:15.954988: step: 194/466, loss: 0.10493389517068863 2023-01-24 04:08:16.612685: step: 196/466, loss: 0.0027219559997320175 2023-01-24 04:08:17.394802: step: 198/466, loss: 0.03727314621210098 2023-01-24 04:08:18.163903: step: 200/466, loss: 0.14151844382286072 2023-01-24 04:08:18.870689: step: 202/466, loss: 0.03493554890155792 2023-01-24 04:08:19.609840: step: 204/466, loss: 0.02788936160504818 2023-01-24 04:08:20.358516: step: 206/466, loss: 0.01529090479016304 2023-01-24 04:08:21.151386: step: 208/466, loss: 0.07066036015748978 2023-01-24 04:08:21.932722: step: 210/466, loss: 0.04624416306614876 2023-01-24 04:08:22.669873: step: 212/466, loss: 0.0838155597448349 2023-01-24 04:08:23.387010: step: 214/466, loss: 0.05787842348217964 2023-01-24 04:08:24.180542: step: 216/466, loss: 0.3357498049736023 2023-01-24 04:08:25.021695: step: 218/466, loss: 0.15849146246910095 2023-01-24 04:08:25.891464: step: 220/466, loss: 0.08705505728721619 2023-01-24 04:08:26.585711: step: 222/466, loss: 0.0284771379083395 2023-01-24 04:08:27.335010: step: 224/466, loss: 0.04284011945128441 2023-01-24 04:08:28.124661: step: 226/466, loss: 0.06858966499567032 2023-01-24 04:08:28.843534: step: 228/466, loss: 0.17510446906089783 2023-01-24 04:08:29.521860: step: 230/466, loss: 0.08641013503074646 2023-01-24 04:08:30.389806: step: 232/466, loss: 0.3538026809692383 2023-01-24 04:08:31.126781: step: 234/466, loss: 0.02047703228890896 2023-01-24 04:08:31.889591: step: 236/466, loss: 0.1350494623184204 2023-01-24 04:08:32.628512: step: 238/466, loss: 0.012271600775420666 2023-01-24 04:08:33.429042: step: 240/466, loss: 0.044202882796525955 2023-01-24 04:08:34.388079: step: 242/466, loss: 0.07256503403186798 2023-01-24 04:08:35.141245: step: 244/466, loss: 0.04749925807118416 2023-01-24 04:08:35.890129: step: 246/466, loss: 0.1495182067155838 2023-01-24 04:08:36.684882: step: 248/466, loss: 0.06420061737298965 2023-01-24 04:08:37.449822: step: 250/466, loss: 0.2077827900648117 2023-01-24 04:08:38.170578: step: 252/466, loss: 0.051569852977991104 2023-01-24 04:08:38.972169: step: 254/466, loss: 0.05237215757369995 2023-01-24 04:08:39.742721: step: 256/466, loss: 0.07263194024562836 2023-01-24 04:08:40.464899: step: 258/466, loss: 0.11570609360933304 2023-01-24 04:08:41.244231: step: 260/466, loss: 0.053916919976472855 2023-01-24 04:08:42.092609: step: 262/466, loss: 0.06967765837907791 2023-01-24 04:08:42.797200: step: 264/466, loss: 0.053337082266807556 2023-01-24 04:08:43.535670: step: 266/466, loss: 0.1144920140504837 2023-01-24 04:08:44.280113: step: 268/466, loss: 0.036679599434137344 2023-01-24 04:08:45.037495: step: 270/466, loss: 0.043247222900390625 2023-01-24 04:08:45.761981: step: 272/466, loss: 0.12316104024648666 2023-01-24 04:08:46.511101: step: 274/466, loss: 0.03358753025531769 2023-01-24 04:08:47.264566: step: 276/466, loss: 1.4639196395874023 2023-01-24 04:08:48.012501: step: 278/466, loss: 0.1559915691614151 2023-01-24 04:08:48.779160: step: 280/466, loss: 0.12618787586688995 2023-01-24 04:08:49.580923: step: 282/466, loss: 0.042094483971595764 2023-01-24 04:08:50.392307: step: 284/466, loss: 0.1456632912158966 2023-01-24 04:08:51.106418: step: 286/466, loss: 0.06955816596746445 2023-01-24 04:08:51.994209: step: 288/466, loss: 0.18508176505565643 2023-01-24 04:08:52.758449: step: 290/466, loss: 0.38990962505340576 2023-01-24 04:08:53.569189: step: 292/466, loss: 0.15841281414031982 2023-01-24 04:08:54.308522: step: 294/466, loss: 5.494202613830566 2023-01-24 04:08:55.042225: step: 296/466, loss: 0.04548550769686699 2023-01-24 04:08:55.761650: step: 298/466, loss: 0.08657371997833252 2023-01-24 04:08:56.553211: step: 300/466, loss: 0.09277087450027466 2023-01-24 04:08:57.326021: step: 302/466, loss: 0.06852469593286514 2023-01-24 04:08:58.086518: step: 304/466, loss: 0.09778723120689392 2023-01-24 04:08:58.867889: step: 306/466, loss: 0.1429487019777298 2023-01-24 04:08:59.665148: step: 308/466, loss: 0.20687298476696014 2023-01-24 04:09:00.543280: step: 310/466, loss: 0.11959525942802429 2023-01-24 04:09:01.299054: step: 312/466, loss: 0.10952453315258026 2023-01-24 04:09:02.089570: step: 314/466, loss: 0.02904977649450302 2023-01-24 04:09:02.920694: step: 316/466, loss: 0.020986704155802727 2023-01-24 04:09:03.623932: step: 318/466, loss: 0.11110341548919678 2023-01-24 04:09:04.460049: step: 320/466, loss: 0.059407394379377365 2023-01-24 04:09:05.248341: step: 322/466, loss: 0.06788373738527298 2023-01-24 04:09:06.087272: step: 324/466, loss: 0.16677653789520264 2023-01-24 04:09:06.820562: step: 326/466, loss: 0.09506413340568542 2023-01-24 04:09:07.606762: step: 328/466, loss: 0.0655602365732193 2023-01-24 04:09:08.351880: step: 330/466, loss: 0.04528603330254555 2023-01-24 04:09:09.120828: step: 332/466, loss: 0.05972367525100708 2023-01-24 04:09:09.956150: step: 334/466, loss: 0.10645157843828201 2023-01-24 04:09:10.722640: step: 336/466, loss: 0.09351608157157898 2023-01-24 04:09:11.493114: step: 338/466, loss: 0.13656413555145264 2023-01-24 04:09:12.339492: step: 340/466, loss: 0.3023928701877594 2023-01-24 04:09:13.133350: step: 342/466, loss: 0.11083973199129105 2023-01-24 04:09:13.874141: step: 344/466, loss: 0.04845494404435158 2023-01-24 04:09:14.581389: step: 346/466, loss: 0.08185308426618576 2023-01-24 04:09:15.347444: step: 348/466, loss: 0.01976921781897545 2023-01-24 04:09:16.119668: step: 350/466, loss: 0.026596803218126297 2023-01-24 04:09:16.994788: step: 352/466, loss: 0.025404812768101692 2023-01-24 04:09:17.702080: step: 354/466, loss: 0.06451868265867233 2023-01-24 04:09:18.455551: step: 356/466, loss: 0.09652923792600632 2023-01-24 04:09:19.191654: step: 358/466, loss: 0.07527028024196625 2023-01-24 04:09:19.951085: step: 360/466, loss: 0.132870152592659 2023-01-24 04:09:20.653748: step: 362/466, loss: 0.049478884786367416 2023-01-24 04:09:21.396084: step: 364/466, loss: 0.5129496455192566 2023-01-24 04:09:22.164401: step: 366/466, loss: 0.12872274219989777 2023-01-24 04:09:22.942857: step: 368/466, loss: 0.03989960625767708 2023-01-24 04:09:23.684073: step: 370/466, loss: 0.0571373850107193 2023-01-24 04:09:24.449864: step: 372/466, loss: 0.16923730075359344 2023-01-24 04:09:25.117206: step: 374/466, loss: 0.060910288244485855 2023-01-24 04:09:25.899525: step: 376/466, loss: 0.14860039949417114 2023-01-24 04:09:26.673977: step: 378/466, loss: 0.14830780029296875 2023-01-24 04:09:27.402169: step: 380/466, loss: 0.057242076843976974 2023-01-24 04:09:28.101579: step: 382/466, loss: 0.046780772507190704 2023-01-24 04:09:28.805623: step: 384/466, loss: 0.35573306679725647 2023-01-24 04:09:29.567663: step: 386/466, loss: 0.14995847642421722 2023-01-24 04:09:30.266660: step: 388/466, loss: 0.30169206857681274 2023-01-24 04:09:31.047798: step: 390/466, loss: 0.031957320868968964 2023-01-24 04:09:31.765888: step: 392/466, loss: 0.08432843536138535 2023-01-24 04:09:32.723634: step: 394/466, loss: 0.08463986963033676 2023-01-24 04:09:33.551523: step: 396/466, loss: 0.0620778426527977 2023-01-24 04:09:34.361864: step: 398/466, loss: 0.02887018956243992 2023-01-24 04:09:35.099998: step: 400/466, loss: 0.037249885499477386 2023-01-24 04:09:35.832512: step: 402/466, loss: 0.15724727511405945 2023-01-24 04:09:36.622988: step: 404/466, loss: 0.05442766472697258 2023-01-24 04:09:37.378472: step: 406/466, loss: 0.08185989409685135 2023-01-24 04:09:38.089606: step: 408/466, loss: 0.11819741874933243 2023-01-24 04:09:38.860813: step: 410/466, loss: 0.02740086242556572 2023-01-24 04:09:39.789374: step: 412/466, loss: 0.061571717262268066 2023-01-24 04:09:40.640859: step: 414/466, loss: 0.08704986423254013 2023-01-24 04:09:41.464765: step: 416/466, loss: 0.0622885562479496 2023-01-24 04:09:42.174494: step: 418/466, loss: 0.4812530279159546 2023-01-24 04:09:42.912882: step: 420/466, loss: 0.014615857042372227 2023-01-24 04:09:43.617427: step: 422/466, loss: 0.09261064231395721 2023-01-24 04:09:44.336843: step: 424/466, loss: 0.016920937225222588 2023-01-24 04:09:45.169802: step: 426/466, loss: 0.08510475605726242 2023-01-24 04:09:45.894672: step: 428/466, loss: 0.07296749949455261 2023-01-24 04:09:46.667960: step: 430/466, loss: 0.051430556923151016 2023-01-24 04:09:47.413665: step: 432/466, loss: 0.03637000918388367 2023-01-24 04:09:48.182338: step: 434/466, loss: 0.02003244124352932 2023-01-24 04:09:48.989382: step: 436/466, loss: 0.044223539531230927 2023-01-24 04:09:49.822536: step: 438/466, loss: 0.13895899057388306 2023-01-24 04:09:50.534084: step: 440/466, loss: 0.015650153160095215 2023-01-24 04:09:51.370022: step: 442/466, loss: 0.0724409818649292 2023-01-24 04:09:52.109742: step: 444/466, loss: 0.040596701204776764 2023-01-24 04:09:52.836593: step: 446/466, loss: 0.11499258875846863 2023-01-24 04:09:53.607462: step: 448/466, loss: 1.051138162612915 2023-01-24 04:09:54.389800: step: 450/466, loss: 0.036972444504499435 2023-01-24 04:09:55.168777: step: 452/466, loss: 0.06746082752943039 2023-01-24 04:09:55.966012: step: 454/466, loss: 0.2163953334093094 2023-01-24 04:09:56.682648: step: 456/466, loss: 0.040912926197052 2023-01-24 04:09:57.434329: step: 458/466, loss: 0.14414256811141968 2023-01-24 04:09:58.201191: step: 460/466, loss: 0.024899596348404884 2023-01-24 04:09:59.041489: step: 462/466, loss: 0.22652199864387512 2023-01-24 04:09:59.879476: step: 464/466, loss: 0.15495027601718903 2023-01-24 04:10:00.726971: step: 466/466, loss: 0.027327006682753563 2023-01-24 04:10:01.528309: step: 468/466, loss: 0.020539091899991035 2023-01-24 04:10:02.307861: step: 470/466, loss: 0.2805376648902893 2023-01-24 04:10:03.036784: step: 472/466, loss: 0.06690191477537155 2023-01-24 04:10:03.817410: step: 474/466, loss: 0.03993724286556244 2023-01-24 04:10:04.602318: step: 476/466, loss: 0.04442617669701576 2023-01-24 04:10:05.369150: step: 478/466, loss: 0.039570402354002 2023-01-24 04:10:06.119661: step: 480/466, loss: 0.09646327793598175 2023-01-24 04:10:06.836019: step: 482/466, loss: 0.33058470487594604 2023-01-24 04:10:07.567661: step: 484/466, loss: 0.05744529142975807 2023-01-24 04:10:08.360551: step: 486/466, loss: 0.07537069916725159 2023-01-24 04:10:09.001488: step: 488/466, loss: 0.03509717062115669 2023-01-24 04:10:09.781861: step: 490/466, loss: 0.04638931155204773 2023-01-24 04:10:10.526799: step: 492/466, loss: 0.03242679685354233 2023-01-24 04:10:11.330623: step: 494/466, loss: 0.007616049610078335 2023-01-24 04:10:12.031350: step: 496/466, loss: 0.021879682317376137 2023-01-24 04:10:12.814233: step: 498/466, loss: 0.08258692920207977 2023-01-24 04:10:13.466672: step: 500/466, loss: 0.016804566606879234 2023-01-24 04:10:14.228706: step: 502/466, loss: 0.1131386086344719 2023-01-24 04:10:14.948146: step: 504/466, loss: 0.012570555321872234 2023-01-24 04:10:15.666380: step: 506/466, loss: 0.1193646639585495 2023-01-24 04:10:16.456297: step: 508/466, loss: 0.04365543648600578 2023-01-24 04:10:17.189431: step: 510/466, loss: 0.05366494134068489 2023-01-24 04:10:17.933302: step: 512/466, loss: 0.28607046604156494 2023-01-24 04:10:18.708723: step: 514/466, loss: 0.06540153175592422 2023-01-24 04:10:19.457182: step: 516/466, loss: 0.030185092240571976 2023-01-24 04:10:20.204024: step: 518/466, loss: 0.14537490904331207 2023-01-24 04:10:21.063178: step: 520/466, loss: 0.08515751361846924 2023-01-24 04:10:21.752732: step: 522/466, loss: 0.056321412324905396 2023-01-24 04:10:22.578087: step: 524/466, loss: 0.09485920518636703 2023-01-24 04:10:23.394345: step: 526/466, loss: 0.06368310749530792 2023-01-24 04:10:24.050191: step: 528/466, loss: 0.030335480347275734 2023-01-24 04:10:24.854382: step: 530/466, loss: 0.06590232998132706 2023-01-24 04:10:25.610072: step: 532/466, loss: 0.012064045295119286 2023-01-24 04:10:26.388465: step: 534/466, loss: 0.051502350717782974 2023-01-24 04:10:27.141034: step: 536/466, loss: 0.08099795877933502 2023-01-24 04:10:27.929373: step: 538/466, loss: 0.29615336656570435 2023-01-24 04:10:28.666707: step: 540/466, loss: 0.060773301869630814 2023-01-24 04:10:29.347934: step: 542/466, loss: 0.07017336785793304 2023-01-24 04:10:30.196806: step: 544/466, loss: 0.45528674125671387 2023-01-24 04:10:30.994724: step: 546/466, loss: 0.07251206040382385 2023-01-24 04:10:31.758131: step: 548/466, loss: 0.10662711411714554 2023-01-24 04:10:32.547288: step: 550/466, loss: 0.420543909072876 2023-01-24 04:10:33.337077: step: 552/466, loss: 0.12031018733978271 2023-01-24 04:10:34.097943: step: 554/466, loss: 0.05517619475722313 2023-01-24 04:10:34.861233: step: 556/466, loss: 0.05768076702952385 2023-01-24 04:10:35.603166: step: 558/466, loss: 0.12633764743804932 2023-01-24 04:10:36.388647: step: 560/466, loss: 0.07834474742412567 2023-01-24 04:10:37.180144: step: 562/466, loss: 0.03466970846056938 2023-01-24 04:10:37.966771: step: 564/466, loss: 0.06017180532217026 2023-01-24 04:10:38.764648: step: 566/466, loss: 1.0684977769851685 2023-01-24 04:10:39.527282: step: 568/466, loss: 0.046178270131349564 2023-01-24 04:10:40.261594: step: 570/466, loss: 0.04773535206913948 2023-01-24 04:10:40.977055: step: 572/466, loss: 0.45768627524375916 2023-01-24 04:10:41.686701: step: 574/466, loss: 0.08987529575824738 2023-01-24 04:10:42.444607: step: 576/466, loss: 0.06217389926314354 2023-01-24 04:10:43.221359: step: 578/466, loss: 0.08079030364751816 2023-01-24 04:10:44.031592: step: 580/466, loss: 0.33118191361427307 2023-01-24 04:10:44.776767: step: 582/466, loss: 0.022694973275065422 2023-01-24 04:10:45.502540: step: 584/466, loss: 0.024916207417845726 2023-01-24 04:10:46.265447: step: 586/466, loss: 0.09800676256418228 2023-01-24 04:10:47.039748: step: 588/466, loss: 0.07045701891183853 2023-01-24 04:10:47.805360: step: 590/466, loss: 0.09660731256008148 2023-01-24 04:10:48.577321: step: 592/466, loss: 0.061863940209150314 2023-01-24 04:10:49.310697: step: 594/466, loss: 0.048730917274951935 2023-01-24 04:10:50.077647: step: 596/466, loss: 0.025510050356388092 2023-01-24 04:10:50.798766: step: 598/466, loss: 0.16134203970432281 2023-01-24 04:10:51.502765: step: 600/466, loss: 0.04883525148034096 2023-01-24 04:10:52.222182: step: 602/466, loss: 0.07418007403612137 2023-01-24 04:10:52.933401: step: 604/466, loss: 0.035486262291669846 2023-01-24 04:10:53.645797: step: 606/466, loss: 0.008110095746815205 2023-01-24 04:10:54.366305: step: 608/466, loss: 0.7604119777679443 2023-01-24 04:10:55.080602: step: 610/466, loss: 0.16966570913791656 2023-01-24 04:10:55.828635: step: 612/466, loss: 0.0026693926192820072 2023-01-24 04:10:56.582488: step: 614/466, loss: 0.1456402987241745 2023-01-24 04:10:57.309584: step: 616/466, loss: 0.01035115122795105 2023-01-24 04:10:58.159508: step: 618/466, loss: 0.1391374170780182 2023-01-24 04:10:58.943249: step: 620/466, loss: 0.12024839222431183 2023-01-24 04:10:59.697232: step: 622/466, loss: 0.02679123915731907 2023-01-24 04:11:00.496089: step: 624/466, loss: 0.18648961186408997 2023-01-24 04:11:01.388612: step: 626/466, loss: 6.7376532554626465 2023-01-24 04:11:02.221610: step: 628/466, loss: 0.04615697264671326 2023-01-24 04:11:02.990594: step: 630/466, loss: 0.1045772135257721 2023-01-24 04:11:03.736421: step: 632/466, loss: 0.0611422024667263 2023-01-24 04:11:04.442952: step: 634/466, loss: 0.002596375299617648 2023-01-24 04:11:05.260348: step: 636/466, loss: 0.06962011754512787 2023-01-24 04:11:06.040943: step: 638/466, loss: 0.04378426820039749 2023-01-24 04:11:06.834329: step: 640/466, loss: 0.05531560257077217 2023-01-24 04:11:07.540427: step: 642/466, loss: 0.04268931224942207 2023-01-24 04:11:08.341959: step: 644/466, loss: 0.09115175157785416 2023-01-24 04:11:09.070556: step: 646/466, loss: 0.1862109899520874 2023-01-24 04:11:09.783668: step: 648/466, loss: 0.0526001863181591 2023-01-24 04:11:10.540407: step: 650/466, loss: 0.9602542519569397 2023-01-24 04:11:11.299308: step: 652/466, loss: 0.026968909427523613 2023-01-24 04:11:12.026599: step: 654/466, loss: 0.06525703519582748 2023-01-24 04:11:12.926908: step: 656/466, loss: 0.09172939509153366 2023-01-24 04:11:13.679951: step: 658/466, loss: 0.08876709640026093 2023-01-24 04:11:14.508019: step: 660/466, loss: 0.0551137700676918 2023-01-24 04:11:15.249648: step: 662/466, loss: 0.04312824830412865 2023-01-24 04:11:16.054898: step: 664/466, loss: 0.06608863174915314 2023-01-24 04:11:16.789627: step: 666/466, loss: 0.03778354823589325 2023-01-24 04:11:17.615700: step: 668/466, loss: 0.057987332344055176 2023-01-24 04:11:18.406433: step: 670/466, loss: 0.02959345281124115 2023-01-24 04:11:19.094455: step: 672/466, loss: 0.04160600155591965 2023-01-24 04:11:19.881503: step: 674/466, loss: 0.04615752771496773 2023-01-24 04:11:20.711169: step: 676/466, loss: 2.3827602863311768 2023-01-24 04:11:21.451263: step: 678/466, loss: 0.12117211520671844 2023-01-24 04:11:22.238130: step: 680/466, loss: 0.021775022149086 2023-01-24 04:11:22.972186: step: 682/466, loss: 0.04267100989818573 2023-01-24 04:11:23.739204: step: 684/466, loss: 0.14708609879016876 2023-01-24 04:11:24.489487: step: 686/466, loss: 0.021864986047148705 2023-01-24 04:11:25.186083: step: 688/466, loss: 0.1048935055732727 2023-01-24 04:11:25.952385: step: 690/466, loss: 0.06021007522940636 2023-01-24 04:11:26.726648: step: 692/466, loss: 0.07317975908517838 2023-01-24 04:11:27.479550: step: 694/466, loss: 0.05775444954633713 2023-01-24 04:11:28.202163: step: 696/466, loss: 0.33284351229667664 2023-01-24 04:11:29.012301: step: 698/466, loss: 0.06062760576605797 2023-01-24 04:11:29.746794: step: 700/466, loss: 0.1711597740650177 2023-01-24 04:11:30.489314: step: 702/466, loss: 0.03754610940814018 2023-01-24 04:11:31.203254: step: 704/466, loss: 0.05172597989439964 2023-01-24 04:11:31.954098: step: 706/466, loss: 0.0590791180729866 2023-01-24 04:11:32.739997: step: 708/466, loss: 0.04053680971264839 2023-01-24 04:11:33.542215: step: 710/466, loss: 0.07455841451883316 2023-01-24 04:11:34.295977: step: 712/466, loss: 0.05597339943051338 2023-01-24 04:11:35.027839: step: 714/466, loss: 0.07891589403152466 2023-01-24 04:11:35.801466: step: 716/466, loss: 0.1539381742477417 2023-01-24 04:11:36.628577: step: 718/466, loss: 0.19610744714736938 2023-01-24 04:11:37.431428: step: 720/466, loss: 0.1722133606672287 2023-01-24 04:11:38.256852: step: 722/466, loss: 0.025108935311436653 2023-01-24 04:11:38.963565: step: 724/466, loss: 0.07888443768024445 2023-01-24 04:11:39.664715: step: 726/466, loss: 0.06843721121549606 2023-01-24 04:11:40.403824: step: 728/466, loss: 0.06443135440349579 2023-01-24 04:11:41.220008: step: 730/466, loss: 0.16125111281871796 2023-01-24 04:11:41.955185: step: 732/466, loss: 0.1632445901632309 2023-01-24 04:11:42.944085: step: 734/466, loss: 0.2908114194869995 2023-01-24 04:11:43.617696: step: 736/466, loss: 0.5993254780769348 2023-01-24 04:11:44.400023: step: 738/466, loss: 0.07324164360761642 2023-01-24 04:11:45.206879: step: 740/466, loss: 0.03170664981007576 2023-01-24 04:11:45.970342: step: 742/466, loss: 0.03607878088951111 2023-01-24 04:11:46.666082: step: 744/466, loss: 0.0928855612874031 2023-01-24 04:11:47.483860: step: 746/466, loss: 0.07790108770132065 2023-01-24 04:11:48.206404: step: 748/466, loss: 0.04500148817896843 2023-01-24 04:11:48.967203: step: 750/466, loss: 0.08000680059194565 2023-01-24 04:11:49.707685: step: 752/466, loss: 0.21646849811077118 2023-01-24 04:11:50.417264: step: 754/466, loss: 0.13076795637607574 2023-01-24 04:11:51.209447: step: 756/466, loss: 0.6503867506980896 2023-01-24 04:11:51.929549: step: 758/466, loss: 0.20445503294467926 2023-01-24 04:11:52.734333: step: 760/466, loss: 0.1860615313053131 2023-01-24 04:11:53.547046: step: 762/466, loss: 0.05238658934831619 2023-01-24 04:11:54.427769: step: 764/466, loss: 0.15584617853164673 2023-01-24 04:11:55.186760: step: 766/466, loss: 0.11111781746149063 2023-01-24 04:11:55.905377: step: 768/466, loss: 0.05492393672466278 2023-01-24 04:11:56.697183: step: 770/466, loss: 0.015201396308839321 2023-01-24 04:11:57.466479: step: 772/466, loss: 0.015985164791345596 2023-01-24 04:11:58.231274: step: 774/466, loss: 0.05980583652853966 2023-01-24 04:11:58.917741: step: 776/466, loss: 0.10083527117967606 2023-01-24 04:11:59.691141: step: 778/466, loss: 0.5878909826278687 2023-01-24 04:12:00.580705: step: 780/466, loss: 0.07962486147880554 2023-01-24 04:12:01.346559: step: 782/466, loss: 0.18757252395153046 2023-01-24 04:12:03.039804: step: 784/466, loss: 0.19431068003177643 2023-01-24 04:12:03.805600: step: 786/466, loss: 0.011340529657900333 2023-01-24 04:12:04.518139: step: 788/466, loss: 0.9627336859703064 2023-01-24 04:12:05.313994: step: 790/466, loss: 0.030145462602376938 2023-01-24 04:12:06.117595: step: 792/466, loss: 0.04864287003874779 2023-01-24 04:12:06.845908: step: 794/466, loss: 0.02933620661497116 2023-01-24 04:12:07.523587: step: 796/466, loss: 0.02567676641047001 2023-01-24 04:12:08.271844: step: 798/466, loss: 0.05721559375524521 2023-01-24 04:12:09.120178: step: 800/466, loss: 0.1796366423368454 2023-01-24 04:12:09.924152: step: 802/466, loss: 0.09617342054843903 2023-01-24 04:12:10.672593: step: 804/466, loss: 0.08915657550096512 2023-01-24 04:12:11.448576: step: 806/466, loss: 0.056109026074409485 2023-01-24 04:12:12.215683: step: 808/466, loss: 0.8052629828453064 2023-01-24 04:12:13.043435: step: 810/466, loss: 0.1469426453113556 2023-01-24 04:12:13.762308: step: 812/466, loss: 0.011045991443097591 2023-01-24 04:12:14.453156: step: 814/466, loss: 0.020637711510062218 2023-01-24 04:12:15.172721: step: 816/466, loss: 0.15615445375442505 2023-01-24 04:12:15.892028: step: 818/466, loss: 0.41356489062309265 2023-01-24 04:12:16.615484: step: 820/466, loss: 0.09979367256164551 2023-01-24 04:12:17.320567: step: 822/466, loss: 0.019931530579924583 2023-01-24 04:12:18.166538: step: 824/466, loss: 0.024886123836040497 2023-01-24 04:12:18.932398: step: 826/466, loss: 0.0668100118637085 2023-01-24 04:12:19.647437: step: 828/466, loss: 0.13887907564640045 2023-01-24 04:12:20.452679: step: 830/466, loss: 0.04640275612473488 2023-01-24 04:12:21.143214: step: 832/466, loss: 0.08429426699876785 2023-01-24 04:12:21.902355: step: 834/466, loss: 0.05646169185638428 2023-01-24 04:12:22.574799: step: 836/466, loss: 0.07541132718324661 2023-01-24 04:12:23.318393: step: 838/466, loss: 0.021639710292220116 2023-01-24 04:12:24.073461: step: 840/466, loss: 0.03935703635215759 2023-01-24 04:12:24.854805: step: 842/466, loss: 0.04783592000603676 2023-01-24 04:12:25.570442: step: 844/466, loss: 0.3185446262359619 2023-01-24 04:12:26.381097: step: 846/466, loss: 0.0960950031876564 2023-01-24 04:12:27.099826: step: 848/466, loss: 0.032344914972782135 2023-01-24 04:12:27.860209: step: 850/466, loss: 0.13070397078990936 2023-01-24 04:12:28.627306: step: 852/466, loss: 0.03631366789340973 2023-01-24 04:12:29.340009: step: 854/466, loss: 0.0661098062992096 2023-01-24 04:12:30.222769: step: 856/466, loss: 0.035494230687618256 2023-01-24 04:12:31.039368: step: 858/466, loss: 0.13137997686862946 2023-01-24 04:12:31.811358: step: 860/466, loss: 0.026316052302718163 2023-01-24 04:12:32.608915: step: 862/466, loss: 0.08850234746932983 2023-01-24 04:12:33.467960: step: 864/466, loss: 0.10195964574813843 2023-01-24 04:12:34.228053: step: 866/466, loss: 0.07952665537595749 2023-01-24 04:12:35.054817: step: 868/466, loss: 0.06374545395374298 2023-01-24 04:12:35.777039: step: 870/466, loss: 0.0911751389503479 2023-01-24 04:12:36.534035: step: 872/466, loss: 0.07596917450428009 2023-01-24 04:12:37.197254: step: 874/466, loss: 0.15608255565166473 2023-01-24 04:12:37.846716: step: 876/466, loss: 0.03925901651382446 2023-01-24 04:12:38.541162: step: 878/466, loss: 0.06515513360500336 2023-01-24 04:12:39.335592: step: 880/466, loss: 0.07271917909383774 2023-01-24 04:12:40.081203: step: 882/466, loss: 0.20699360966682434 2023-01-24 04:12:40.826601: step: 884/466, loss: 0.05623829364776611 2023-01-24 04:12:41.689642: step: 886/466, loss: 0.05620102211833 2023-01-24 04:12:42.496251: step: 888/466, loss: 0.05816996097564697 2023-01-24 04:12:43.251715: step: 890/466, loss: 0.0925942212343216 2023-01-24 04:12:43.958347: step: 892/466, loss: 0.0655890479683876 2023-01-24 04:12:44.780715: step: 894/466, loss: 0.8882849216461182 2023-01-24 04:12:45.524055: step: 896/466, loss: 0.11083611845970154 2023-01-24 04:12:46.323376: step: 898/466, loss: 0.15663686394691467 2023-01-24 04:12:47.055606: step: 900/466, loss: 0.07761363685131073 2023-01-24 04:12:47.800529: step: 902/466, loss: 0.05530351772904396 2023-01-24 04:12:48.661250: step: 904/466, loss: 0.053331077098846436 2023-01-24 04:12:49.413763: step: 906/466, loss: 0.04591398313641548 2023-01-24 04:12:50.204683: step: 908/466, loss: 0.028425119817256927 2023-01-24 04:12:50.955154: step: 910/466, loss: 0.018302908167243004 2023-01-24 04:12:51.739852: step: 912/466, loss: 0.040488116443157196 2023-01-24 04:12:52.411166: step: 914/466, loss: 0.05558871850371361 2023-01-24 04:12:53.155828: step: 916/466, loss: 0.06911761313676834 2023-01-24 04:12:53.940852: step: 918/466, loss: 0.060869622975587845 2023-01-24 04:12:54.630602: step: 920/466, loss: 0.031241292133927345 2023-01-24 04:12:55.349355: step: 922/466, loss: 0.048789240419864655 2023-01-24 04:12:56.110886: step: 924/466, loss: 0.08579286932945251 2023-01-24 04:12:56.883453: step: 926/466, loss: 0.053416553884744644 2023-01-24 04:12:57.648614: step: 928/466, loss: 0.04905460402369499 2023-01-24 04:12:58.372243: step: 930/466, loss: 0.0519297830760479 2023-01-24 04:12:59.208085: step: 932/466, loss: 0.05160481110215187 ================================================== Loss: 0.147 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33366395793499043, 'r': 0.33113140417457304, 'f1': 0.33239285714285716}, 'combined': 0.24492105263157896, 'epoch': 18} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36391723839777995, 'r': 0.27474963799382174, 'f1': 0.31310886224258916}, 'combined': 0.19244739825642065, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30856201855742293, 'r': 0.3278837388845481, 'f1': 0.31792958673809907}, 'combined': 0.23426390601754668, 'epoch': 18} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.34291047437567, 'r': 0.27694329472974394, 'f1': 0.30641664632610205}, 'combined': 0.18833413383945785, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3453234435797665, 'r': 0.3368050284629981, 'f1': 0.34101104707012486}, 'combined': 0.2512712978411446, 'epoch': 18} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37050265385169406, 'r': 0.2784383580461216, 'f1': 0.3179399936166787}, 'combined': 0.19637470193971335, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29411764705882354, 'r': 0.2857142857142857, 'f1': 0.2898550724637681}, 'combined': 0.1932367149758454, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2702702702702703, 'r': 0.43478260869565216, 'f1': 0.3333333333333333}, 'combined': 0.16666666666666666, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3663210818307906, 'r': 0.3579798048251559, 'f1': 0.36210241294214424}, 'combined': 0.2668123042731589, 'epoch': 13} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37875489782431904, 'r': 0.26988336009473124, 'f1': 0.31518228605603094}, 'combined': 0.19467141197578386, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 13} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:15:42.867018: step: 2/466, loss: 0.07707686722278595 2023-01-24 04:15:43.700492: step: 4/466, loss: 0.07725892961025238 2023-01-24 04:15:44.481524: step: 6/466, loss: 0.03708843141794205 2023-01-24 04:15:45.248391: step: 8/466, loss: 0.036257512867450714 2023-01-24 04:15:46.044919: step: 10/466, loss: 0.017364487051963806 2023-01-24 04:15:46.747982: step: 12/466, loss: 0.04551801085472107 2023-01-24 04:15:47.438251: step: 14/466, loss: 0.06593490391969681 2023-01-24 04:15:48.253124: step: 16/466, loss: 0.10358086228370667 2023-01-24 04:15:49.105483: step: 18/466, loss: 0.19388654828071594 2023-01-24 04:15:49.867778: step: 20/466, loss: 0.066173255443573 2023-01-24 04:15:50.645943: step: 22/466, loss: 0.019162917509675026 2023-01-24 04:15:51.394698: step: 24/466, loss: 0.08188024908304214 2023-01-24 04:15:52.105839: step: 26/466, loss: 0.05701979249715805 2023-01-24 04:15:52.830981: step: 28/466, loss: 0.007061370182782412 2023-01-24 04:15:53.538720: step: 30/466, loss: 0.12841834127902985 2023-01-24 04:15:54.318288: step: 32/466, loss: 0.050115082412958145 2023-01-24 04:15:55.172730: step: 34/466, loss: 0.02729623392224312 2023-01-24 04:15:55.889948: step: 36/466, loss: 0.08978027105331421 2023-01-24 04:15:56.623723: step: 38/466, loss: 0.06600786000490189 2023-01-24 04:15:57.439772: step: 40/466, loss: 0.04679092392325401 2023-01-24 04:15:58.280309: step: 42/466, loss: 0.017778532579541206 2023-01-24 04:15:59.136937: step: 44/466, loss: 0.07528173178434372 2023-01-24 04:15:59.897346: step: 46/466, loss: 0.04950503632426262 2023-01-24 04:16:00.640564: step: 48/466, loss: 0.0496654212474823 2023-01-24 04:16:01.392445: step: 50/466, loss: 0.0461617186665535 2023-01-24 04:16:02.228431: step: 52/466, loss: 0.03887036815285683 2023-01-24 04:16:03.029416: step: 54/466, loss: 0.04674646258354187 2023-01-24 04:16:03.684832: step: 56/466, loss: 0.06008967384696007 2023-01-24 04:16:04.446606: step: 58/466, loss: 0.04620389640331268 2023-01-24 04:16:05.205959: step: 60/466, loss: 0.1120811179280281 2023-01-24 04:16:05.963502: step: 62/466, loss: 0.03864070773124695 2023-01-24 04:16:06.662058: step: 64/466, loss: 0.05045907199382782 2023-01-24 04:16:07.430349: step: 66/466, loss: 0.24931874871253967 2023-01-24 04:16:08.193447: step: 68/466, loss: 0.12652790546417236 2023-01-24 04:16:08.955026: step: 70/466, loss: 0.015776850283145905 2023-01-24 04:16:09.690732: step: 72/466, loss: 0.023323623463511467 2023-01-24 04:16:10.365700: step: 74/466, loss: 0.019689107313752174 2023-01-24 04:16:11.124222: step: 76/466, loss: 0.010504703037440777 2023-01-24 04:16:11.823733: step: 78/466, loss: 0.853032112121582 2023-01-24 04:16:12.738714: step: 80/466, loss: 0.06561411172151566 2023-01-24 04:16:13.510041: step: 82/466, loss: 0.24904441833496094 2023-01-24 04:16:14.339021: step: 84/466, loss: 0.04590437933802605 2023-01-24 04:16:15.114026: step: 86/466, loss: 0.027438893914222717 2023-01-24 04:16:15.848825: step: 88/466, loss: 0.033957649022340775 2023-01-24 04:16:16.706480: step: 90/466, loss: 0.04507846385240555 2023-01-24 04:16:17.401541: step: 92/466, loss: 0.01910211518406868 2023-01-24 04:16:18.194694: step: 94/466, loss: 0.01948835887014866 2023-01-24 04:16:19.099539: step: 96/466, loss: 0.023070571944117546 2023-01-24 04:16:19.861092: step: 98/466, loss: 0.035979241132736206 2023-01-24 04:16:20.612045: step: 100/466, loss: 0.07960603386163712 2023-01-24 04:16:21.427860: step: 102/466, loss: 0.0930582657456398 2023-01-24 04:16:22.154976: step: 104/466, loss: 0.007280975580215454 2023-01-24 04:16:22.881662: step: 106/466, loss: 0.09409566968679428 2023-01-24 04:16:23.646474: step: 108/466, loss: 0.03311523422598839 2023-01-24 04:16:24.422223: step: 110/466, loss: 0.03451240807771683 2023-01-24 04:16:25.264696: step: 112/466, loss: 0.10993362218141556 2023-01-24 04:16:25.987766: step: 114/466, loss: 0.12965621054172516 2023-01-24 04:16:26.768484: step: 116/466, loss: 0.03790228068828583 2023-01-24 04:16:27.474863: step: 118/466, loss: 0.009076929651200771 2023-01-24 04:16:28.255716: step: 120/466, loss: 0.04168053716421127 2023-01-24 04:16:29.035220: step: 122/466, loss: 0.05517464131116867 2023-01-24 04:16:29.739752: step: 124/466, loss: 0.07232671231031418 2023-01-24 04:16:30.419202: step: 126/466, loss: 0.05364637449383736 2023-01-24 04:16:31.139143: step: 128/466, loss: 0.048415087163448334 2023-01-24 04:16:31.814001: step: 130/466, loss: 0.0030281036160886288 2023-01-24 04:16:32.521360: step: 132/466, loss: 0.07231735438108444 2023-01-24 04:16:33.328188: step: 134/466, loss: 0.006062332075089216 2023-01-24 04:16:34.006372: step: 136/466, loss: 0.010802896693348885 2023-01-24 04:16:34.768723: step: 138/466, loss: 0.039841752499341965 2023-01-24 04:16:35.578417: step: 140/466, loss: 0.10794732719659805 2023-01-24 04:16:36.315103: step: 142/466, loss: 0.08357289433479309 2023-01-24 04:16:37.078700: step: 144/466, loss: 0.43468421697616577 2023-01-24 04:16:37.882578: step: 146/466, loss: 0.0399923138320446 2023-01-24 04:16:38.618196: step: 148/466, loss: 0.06643982976675034 2023-01-24 04:16:39.385101: step: 150/466, loss: 0.013120495714247227 2023-01-24 04:16:40.096265: step: 152/466, loss: 0.12737302482128143 2023-01-24 04:16:40.948317: step: 154/466, loss: 0.06080927327275276 2023-01-24 04:16:41.747266: step: 156/466, loss: 0.0551338791847229 2023-01-24 04:16:42.593274: step: 158/466, loss: 0.08971381932497025 2023-01-24 04:16:43.317694: step: 160/466, loss: 0.07730238884687424 2023-01-24 04:16:44.078719: step: 162/466, loss: 0.012164798565208912 2023-01-24 04:16:44.816583: step: 164/466, loss: 0.07556217908859253 2023-01-24 04:16:45.541281: step: 166/466, loss: 0.0272072684019804 2023-01-24 04:16:46.325519: step: 168/466, loss: 0.0506470613181591 2023-01-24 04:16:47.162156: step: 170/466, loss: 0.06211550906300545 2023-01-24 04:16:47.960563: step: 172/466, loss: 0.05883381515741348 2023-01-24 04:16:48.685792: step: 174/466, loss: 0.027972858399152756 2023-01-24 04:16:49.474119: step: 176/466, loss: 0.03771953657269478 2023-01-24 04:16:50.285926: step: 178/466, loss: 0.05252067372202873 2023-01-24 04:16:51.074270: step: 180/466, loss: 0.07096660882234573 2023-01-24 04:16:51.810571: step: 182/466, loss: 0.005565475672483444 2023-01-24 04:16:52.629276: step: 184/466, loss: 0.053429294377565384 2023-01-24 04:16:53.409585: step: 186/466, loss: 0.06567326188087463 2023-01-24 04:16:54.122070: step: 188/466, loss: 0.04723641648888588 2023-01-24 04:16:54.857804: step: 190/466, loss: 0.040017325431108475 2023-01-24 04:16:55.610860: step: 192/466, loss: 0.1834365278482437 2023-01-24 04:16:56.345553: step: 194/466, loss: 0.06563407927751541 2023-01-24 04:16:57.137736: step: 196/466, loss: 0.013922265730798244 2023-01-24 04:16:58.006561: step: 198/466, loss: 0.03886845335364342 2023-01-24 04:16:58.737365: step: 200/466, loss: 0.03570050001144409 2023-01-24 04:16:59.476093: step: 202/466, loss: 0.09004824608564377 2023-01-24 04:17:00.448187: step: 204/466, loss: 0.0846221074461937 2023-01-24 04:17:01.222848: step: 206/466, loss: 0.0383138544857502 2023-01-24 04:17:01.954853: step: 208/466, loss: 0.033008407801389694 2023-01-24 04:17:02.688651: step: 210/466, loss: 0.011824160814285278 2023-01-24 04:17:03.459016: step: 212/466, loss: 0.0965164303779602 2023-01-24 04:17:04.256758: step: 214/466, loss: 0.04126487672328949 2023-01-24 04:17:04.970319: step: 216/466, loss: 0.027900833636522293 2023-01-24 04:17:05.694645: step: 218/466, loss: 0.01755242981016636 2023-01-24 04:17:06.455142: step: 220/466, loss: 0.30426502227783203 2023-01-24 04:17:07.291093: step: 222/466, loss: 0.03312382474541664 2023-01-24 04:17:08.087340: step: 224/466, loss: 0.06691645085811615 2023-01-24 04:17:08.871407: step: 226/466, loss: 0.024335216730833054 2023-01-24 04:17:09.779689: step: 228/466, loss: 0.060176149010658264 2023-01-24 04:17:10.538269: step: 230/466, loss: 0.11157464981079102 2023-01-24 04:17:11.366661: step: 232/466, loss: 0.05215362459421158 2023-01-24 04:17:12.128131: step: 234/466, loss: 0.0009370064362883568 2023-01-24 04:17:12.860059: step: 236/466, loss: 0.06926261633634567 2023-01-24 04:17:13.581244: step: 238/466, loss: 0.04493451490998268 2023-01-24 04:17:14.305972: step: 240/466, loss: 0.3802512288093567 2023-01-24 04:17:15.155677: step: 242/466, loss: 0.03863971307873726 2023-01-24 04:17:15.844576: step: 244/466, loss: 0.024827376008033752 2023-01-24 04:17:16.543394: step: 246/466, loss: 0.04101860523223877 2023-01-24 04:17:17.239179: step: 248/466, loss: 0.03435073420405388 2023-01-24 04:17:17.987180: step: 250/466, loss: 0.04637880250811577 2023-01-24 04:17:18.842797: step: 252/466, loss: 0.011644534766674042 2023-01-24 04:17:19.631808: step: 254/466, loss: 0.48259320855140686 2023-01-24 04:17:20.355090: step: 256/466, loss: 0.4899650812149048 2023-01-24 04:17:21.077447: step: 258/466, loss: 0.07497600466012955 2023-01-24 04:17:21.810570: step: 260/466, loss: 0.11135821789503098 2023-01-24 04:17:22.554798: step: 262/466, loss: 0.09177925437688828 2023-01-24 04:17:23.276296: step: 264/466, loss: 0.03555946797132492 2023-01-24 04:17:23.998711: step: 266/466, loss: 0.03181109204888344 2023-01-24 04:17:24.765427: step: 268/466, loss: 0.05030002444982529 2023-01-24 04:17:25.538472: step: 270/466, loss: 0.06433197855949402 2023-01-24 04:17:26.317695: step: 272/466, loss: 0.14185172319412231 2023-01-24 04:17:27.143164: step: 274/466, loss: 0.06486351042985916 2023-01-24 04:17:27.933842: step: 276/466, loss: 0.13831187784671783 2023-01-24 04:17:28.732223: step: 278/466, loss: 0.07320712506771088 2023-01-24 04:17:29.519757: step: 280/466, loss: 0.04056846350431442 2023-01-24 04:17:30.298965: step: 282/466, loss: 0.0762358158826828 2023-01-24 04:17:31.091066: step: 284/466, loss: 0.13892656564712524 2023-01-24 04:17:31.808818: step: 286/466, loss: 0.14571847021579742 2023-01-24 04:17:32.553959: step: 288/466, loss: 0.056351128965616226 2023-01-24 04:17:33.360912: step: 290/466, loss: 0.47937020659446716 2023-01-24 04:17:34.146781: step: 292/466, loss: 0.017977619543671608 2023-01-24 04:17:35.030765: step: 294/466, loss: 0.022253964096307755 2023-01-24 04:17:35.928621: step: 296/466, loss: 0.021573202684521675 2023-01-24 04:17:36.694230: step: 298/466, loss: 0.10714520514011383 2023-01-24 04:17:37.466690: step: 300/466, loss: 0.20172281563282013 2023-01-24 04:17:38.131779: step: 302/466, loss: 0.010043538175523281 2023-01-24 04:17:38.889406: step: 304/466, loss: 0.13285380601882935 2023-01-24 04:17:39.669369: step: 306/466, loss: 0.11064116656780243 2023-01-24 04:17:40.382941: step: 308/466, loss: 0.014111662283539772 2023-01-24 04:17:41.190212: step: 310/466, loss: 0.026592249050736427 2023-01-24 04:17:42.002888: step: 312/466, loss: 0.5118361711502075 2023-01-24 04:17:42.776564: step: 314/466, loss: 0.07001351565122604 2023-01-24 04:17:43.530375: step: 316/466, loss: 0.08644430339336395 2023-01-24 04:17:44.218406: step: 318/466, loss: 0.03326092287898064 2023-01-24 04:17:44.965186: step: 320/466, loss: 0.045971401035785675 2023-01-24 04:17:45.670168: step: 322/466, loss: 0.05814187228679657 2023-01-24 04:17:46.504834: step: 324/466, loss: 0.05052163079380989 2023-01-24 04:17:47.348880: step: 326/466, loss: 0.09300139546394348 2023-01-24 04:17:48.238379: step: 328/466, loss: 0.08645815402269363 2023-01-24 04:17:49.015121: step: 330/466, loss: 0.043647561222314835 2023-01-24 04:17:49.799397: step: 332/466, loss: 0.05537186563014984 2023-01-24 04:17:50.572361: step: 334/466, loss: 0.07523120939731598 2023-01-24 04:17:51.237191: step: 336/466, loss: 0.0026097306981682777 2023-01-24 04:17:51.966570: step: 338/466, loss: 0.08779795467853546 2023-01-24 04:17:52.805698: step: 340/466, loss: 0.042835015803575516 2023-01-24 04:17:53.633179: step: 342/466, loss: 0.38242512941360474 2023-01-24 04:17:54.422731: step: 344/466, loss: 0.0324886180460453 2023-01-24 04:17:55.209364: step: 346/466, loss: 0.08252550661563873 2023-01-24 04:17:55.968085: step: 348/466, loss: 0.051319487392902374 2023-01-24 04:17:56.878353: step: 350/466, loss: 0.13323046267032623 2023-01-24 04:17:57.664231: step: 352/466, loss: 0.05508129298686981 2023-01-24 04:17:58.446488: step: 354/466, loss: 0.013235564343631268 2023-01-24 04:17:59.189179: step: 356/466, loss: 0.05842670053243637 2023-01-24 04:17:59.952994: step: 358/466, loss: 0.18304871022701263 2023-01-24 04:18:00.716395: step: 360/466, loss: 0.09490270912647247 2023-01-24 04:18:01.370882: step: 362/466, loss: 0.05648095905780792 2023-01-24 04:18:02.129810: step: 364/466, loss: 0.03505036234855652 2023-01-24 04:18:02.885886: step: 366/466, loss: 0.03604874759912491 2023-01-24 04:18:03.638428: step: 368/466, loss: 0.03895943611860275 2023-01-24 04:18:04.373583: step: 370/466, loss: 0.17214235663414001 2023-01-24 04:18:05.056628: step: 372/466, loss: 0.011825804598629475 2023-01-24 04:18:05.784520: step: 374/466, loss: 0.05837767571210861 2023-01-24 04:18:06.545292: step: 376/466, loss: 0.004728924483060837 2023-01-24 04:18:07.353032: step: 378/466, loss: 0.07020552456378937 2023-01-24 04:18:08.145181: step: 380/466, loss: 0.19587011635303497 2023-01-24 04:18:08.881683: step: 382/466, loss: 0.0404064804315567 2023-01-24 04:18:09.611741: step: 384/466, loss: 0.014871107414364815 2023-01-24 04:18:10.338942: step: 386/466, loss: 0.2635861337184906 2023-01-24 04:18:11.090844: step: 388/466, loss: 0.08111178874969482 2023-01-24 04:18:11.913715: step: 390/466, loss: 0.016424495726823807 2023-01-24 04:18:12.636969: step: 392/466, loss: 0.15529052913188934 2023-01-24 04:18:13.417424: step: 394/466, loss: 0.16692110896110535 2023-01-24 04:18:14.077803: step: 396/466, loss: 0.12100377678871155 2023-01-24 04:18:14.836529: step: 398/466, loss: 0.014978073537349701 2023-01-24 04:18:15.570245: step: 400/466, loss: 0.004079705569893122 2023-01-24 04:18:16.345572: step: 402/466, loss: 0.06326240301132202 2023-01-24 04:18:17.155671: step: 404/466, loss: 0.07025929540395737 2023-01-24 04:18:17.990302: step: 406/466, loss: 0.06290563941001892 2023-01-24 04:18:18.762071: step: 408/466, loss: 0.010200410149991512 2023-01-24 04:18:19.499495: step: 410/466, loss: 0.12491553276777267 2023-01-24 04:18:20.297723: step: 412/466, loss: 0.06633555889129639 2023-01-24 04:18:21.036638: step: 414/466, loss: 0.0457879975438118 2023-01-24 04:18:21.812692: step: 416/466, loss: 0.20419169962406158 2023-01-24 04:18:22.550477: step: 418/466, loss: 0.16641490161418915 2023-01-24 04:18:23.386460: step: 420/466, loss: 0.08982834964990616 2023-01-24 04:18:24.105613: step: 422/466, loss: 0.17228271067142487 2023-01-24 04:18:24.855673: step: 424/466, loss: 0.06556040048599243 2023-01-24 04:18:25.613149: step: 426/466, loss: 0.0518675372004509 2023-01-24 04:18:26.441143: step: 428/466, loss: 0.13832581043243408 2023-01-24 04:18:27.259615: step: 430/466, loss: 0.09893655776977539 2023-01-24 04:18:28.099482: step: 432/466, loss: 0.023732980713248253 2023-01-24 04:18:28.835271: step: 434/466, loss: 0.06245775148272514 2023-01-24 04:18:29.667168: step: 436/466, loss: 0.0800282433629036 2023-01-24 04:18:30.415023: step: 438/466, loss: 0.06479812413454056 2023-01-24 04:18:31.127100: step: 440/466, loss: 0.010589199140667915 2023-01-24 04:18:31.848460: step: 442/466, loss: 0.22055262327194214 2023-01-24 04:18:32.546851: step: 444/466, loss: 0.04050662741065025 2023-01-24 04:18:33.242300: step: 446/466, loss: 0.04848470911383629 2023-01-24 04:18:33.983297: step: 448/466, loss: 0.040982309728860855 2023-01-24 04:18:34.731834: step: 450/466, loss: 0.03166522458195686 2023-01-24 04:18:35.493624: step: 452/466, loss: 0.08621339499950409 2023-01-24 04:18:36.258060: step: 454/466, loss: 0.043064236640930176 2023-01-24 04:18:37.007543: step: 456/466, loss: 0.07764985412359238 2023-01-24 04:18:37.707206: step: 458/466, loss: 0.04215572401881218 2023-01-24 04:18:38.475967: step: 460/466, loss: 0.04419616982340813 2023-01-24 04:18:39.215171: step: 462/466, loss: 0.012509040534496307 2023-01-24 04:18:39.940934: step: 464/466, loss: 0.10873904824256897 2023-01-24 04:18:40.709453: step: 466/466, loss: 0.04994974657893181 2023-01-24 04:18:41.544017: step: 468/466, loss: 0.05436602234840393 2023-01-24 04:18:42.242268: step: 470/466, loss: 0.008949813432991505 2023-01-24 04:18:43.010806: step: 472/466, loss: 0.9225003123283386 2023-01-24 04:18:43.793577: step: 474/466, loss: 0.3254028856754303 2023-01-24 04:18:44.571466: step: 476/466, loss: 0.06196141242980957 2023-01-24 04:18:45.340833: step: 478/466, loss: 0.04715636372566223 2023-01-24 04:18:46.116147: step: 480/466, loss: 0.022531533613801003 2023-01-24 04:18:46.889653: step: 482/466, loss: 0.01798596791923046 2023-01-24 04:18:47.760627: step: 484/466, loss: 0.076285719871521 2023-01-24 04:18:48.526765: step: 486/466, loss: 0.09098166972398758 2023-01-24 04:18:49.238346: step: 488/466, loss: 0.04622410237789154 2023-01-24 04:18:49.982124: step: 490/466, loss: 0.045328788459300995 2023-01-24 04:18:50.955208: step: 492/466, loss: 0.03675145283341408 2023-01-24 04:18:51.674657: step: 494/466, loss: 3.8558883666992188 2023-01-24 04:18:52.528248: step: 496/466, loss: 0.11598634719848633 2023-01-24 04:18:53.215877: step: 498/466, loss: 0.009622450917959213 2023-01-24 04:18:53.910177: step: 500/466, loss: 0.02720300666987896 2023-01-24 04:18:54.629834: step: 502/466, loss: 0.06930980086326599 2023-01-24 04:18:55.413529: step: 504/466, loss: 0.10974381119012833 2023-01-24 04:18:56.193816: step: 506/466, loss: 0.041853148490190506 2023-01-24 04:18:56.922376: step: 508/466, loss: 0.053615596145391464 2023-01-24 04:18:57.544132: step: 510/466, loss: 0.023178689181804657 2023-01-24 04:18:58.350585: step: 512/466, loss: 0.10109889507293701 2023-01-24 04:18:59.058752: step: 514/466, loss: 0.020574018359184265 2023-01-24 04:18:59.877887: step: 516/466, loss: 0.11134310066699982 2023-01-24 04:19:00.579920: step: 518/466, loss: 0.02680542692542076 2023-01-24 04:19:01.331629: step: 520/466, loss: 0.08171765506267548 2023-01-24 04:19:02.143893: step: 522/466, loss: 0.04899270087480545 2023-01-24 04:19:02.854473: step: 524/466, loss: 0.04399009793996811 2023-01-24 04:19:03.626679: step: 526/466, loss: 0.27386415004730225 2023-01-24 04:19:04.370636: step: 528/466, loss: 0.07502332329750061 2023-01-24 04:19:05.107739: step: 530/466, loss: 0.02962682582437992 2023-01-24 04:19:05.815798: step: 532/466, loss: 0.18936896324157715 2023-01-24 04:19:06.601389: step: 534/466, loss: 0.08553482592105865 2023-01-24 04:19:07.277958: step: 536/466, loss: 0.011363615281879902 2023-01-24 04:19:07.935374: step: 538/466, loss: 0.017759401351213455 2023-01-24 04:19:08.730189: step: 540/466, loss: 0.041266944259405136 2023-01-24 04:19:09.507534: step: 542/466, loss: 0.08004105091094971 2023-01-24 04:19:10.194843: step: 544/466, loss: 0.02674659714102745 2023-01-24 04:19:10.909712: step: 546/466, loss: 0.0036398719530552626 2023-01-24 04:19:11.616287: step: 548/466, loss: 0.027790717780590057 2023-01-24 04:19:12.444440: step: 550/466, loss: 0.06974704563617706 2023-01-24 04:19:13.213327: step: 552/466, loss: 0.010123740881681442 2023-01-24 04:19:13.932233: step: 554/466, loss: 0.03165018931031227 2023-01-24 04:19:14.693946: step: 556/466, loss: 0.07956918329000473 2023-01-24 04:19:15.465748: step: 558/466, loss: 0.12074112147092819 2023-01-24 04:19:16.273501: step: 560/466, loss: 0.16099034249782562 2023-01-24 04:19:17.090617: step: 562/466, loss: 0.05869884043931961 2023-01-24 04:19:17.818145: step: 564/466, loss: 0.011771907098591328 2023-01-24 04:19:18.528224: step: 566/466, loss: 0.004809739533811808 2023-01-24 04:19:19.263862: step: 568/466, loss: 0.06332944333553314 2023-01-24 04:19:20.015236: step: 570/466, loss: 0.26421257853507996 2023-01-24 04:19:20.746291: step: 572/466, loss: 0.817116916179657 2023-01-24 04:19:21.436637: step: 574/466, loss: 0.31787359714508057 2023-01-24 04:19:22.188702: step: 576/466, loss: 0.05923830345273018 2023-01-24 04:19:23.048050: step: 578/466, loss: 0.37814825773239136 2023-01-24 04:19:23.818337: step: 580/466, loss: 0.08397063612937927 2023-01-24 04:19:24.626612: step: 582/466, loss: 0.03459261357784271 2023-01-24 04:19:25.423819: step: 584/466, loss: 0.046183399856090546 2023-01-24 04:19:26.120367: step: 586/466, loss: 0.0778326541185379 2023-01-24 04:19:26.887049: step: 588/466, loss: 0.025441646575927734 2023-01-24 04:19:27.618027: step: 590/466, loss: 0.02456839382648468 2023-01-24 04:19:28.450925: step: 592/466, loss: 0.04389163479208946 2023-01-24 04:19:29.190944: step: 594/466, loss: 0.009534381330013275 2023-01-24 04:19:29.914538: step: 596/466, loss: 0.019304398447275162 2023-01-24 04:19:30.699998: step: 598/466, loss: 0.03234853595495224 2023-01-24 04:19:31.478767: step: 600/466, loss: 0.03297156095504761 2023-01-24 04:19:32.291426: step: 602/466, loss: 0.05493513494729996 2023-01-24 04:19:33.074471: step: 604/466, loss: 0.6842228770256042 2023-01-24 04:19:33.818378: step: 606/466, loss: 0.07774440199136734 2023-01-24 04:19:34.581664: step: 608/466, loss: 1.7763886451721191 2023-01-24 04:19:35.333947: step: 610/466, loss: 0.1671164333820343 2023-01-24 04:19:36.085356: step: 612/466, loss: 0.458658903837204 2023-01-24 04:19:36.908046: step: 614/466, loss: 0.11497402936220169 2023-01-24 04:19:37.686477: step: 616/466, loss: 0.1912817507982254 2023-01-24 04:19:38.393977: step: 618/466, loss: 0.09462592750787735 2023-01-24 04:19:39.212716: step: 620/466, loss: 0.06380286812782288 2023-01-24 04:19:39.932760: step: 622/466, loss: 0.028667034581303596 2023-01-24 04:19:40.685874: step: 624/466, loss: 0.016529444605112076 2023-01-24 04:19:41.466062: step: 626/466, loss: 0.025493420660495758 2023-01-24 04:19:42.218892: step: 628/466, loss: 0.13471835851669312 2023-01-24 04:19:42.954006: step: 630/466, loss: 0.07315074652433395 2023-01-24 04:19:43.720942: step: 632/466, loss: 0.030087953433394432 2023-01-24 04:19:44.508748: step: 634/466, loss: 0.06007075682282448 2023-01-24 04:19:45.232147: step: 636/466, loss: 0.03061388060450554 2023-01-24 04:19:45.955760: step: 638/466, loss: 0.04862212762236595 2023-01-24 04:19:46.712326: step: 640/466, loss: 0.028137506917119026 2023-01-24 04:19:47.428525: step: 642/466, loss: 0.04800652340054512 2023-01-24 04:19:48.119424: step: 644/466, loss: 0.01501480583101511 2023-01-24 04:19:48.871465: step: 646/466, loss: 0.05946161225438118 2023-01-24 04:19:49.601174: step: 648/466, loss: 0.078636534512043 2023-01-24 04:19:50.363208: step: 650/466, loss: 0.04473862797021866 2023-01-24 04:19:51.072289: step: 652/466, loss: 0.06858037412166595 2023-01-24 04:19:51.838376: step: 654/466, loss: 0.030643390491604805 2023-01-24 04:19:52.557356: step: 656/466, loss: 0.07060644775629044 2023-01-24 04:19:53.339622: step: 658/466, loss: 0.02271956019103527 2023-01-24 04:19:54.037673: step: 660/466, loss: 0.051643069833517075 2023-01-24 04:19:54.804123: step: 662/466, loss: 0.07489115744829178 2023-01-24 04:19:55.521711: step: 664/466, loss: 0.0777987614274025 2023-01-24 04:19:56.250566: step: 666/466, loss: 0.11620701104402542 2023-01-24 04:19:57.065931: step: 668/466, loss: 0.045370180159807205 2023-01-24 04:19:57.797207: step: 670/466, loss: 0.033944014459848404 2023-01-24 04:19:58.574394: step: 672/466, loss: 0.18008968234062195 2023-01-24 04:19:59.357079: step: 674/466, loss: 0.01612461917102337 2023-01-24 04:20:00.125844: step: 676/466, loss: 0.2847628593444824 2023-01-24 04:20:00.918879: step: 678/466, loss: 0.0479779876768589 2023-01-24 04:20:01.631578: step: 680/466, loss: 0.048540275543928146 2023-01-24 04:20:02.408477: step: 682/466, loss: 0.028347892686724663 2023-01-24 04:20:03.187326: step: 684/466, loss: 0.042811281979084015 2023-01-24 04:20:03.880519: step: 686/466, loss: 0.03289152681827545 2023-01-24 04:20:04.572722: step: 688/466, loss: 0.18194958567619324 2023-01-24 04:20:05.290856: step: 690/466, loss: 0.1148754134774208 2023-01-24 04:20:06.086089: step: 692/466, loss: 0.0545729398727417 2023-01-24 04:20:06.844562: step: 694/466, loss: 0.03039471060037613 2023-01-24 04:20:07.610983: step: 696/466, loss: 0.0326075479388237 2023-01-24 04:20:08.311931: step: 698/466, loss: 0.0679233968257904 2023-01-24 04:20:09.029829: step: 700/466, loss: 0.09583453088998795 2023-01-24 04:20:09.929597: step: 702/466, loss: 0.05406005680561066 2023-01-24 04:20:10.748190: step: 704/466, loss: 0.02857596054673195 2023-01-24 04:20:11.500374: step: 706/466, loss: 0.057948268949985504 2023-01-24 04:20:12.209754: step: 708/466, loss: 0.36460480093955994 2023-01-24 04:20:12.994068: step: 710/466, loss: 0.026436137035489082 2023-01-24 04:20:13.795241: step: 712/466, loss: 0.38908836245536804 2023-01-24 04:20:14.667440: step: 714/466, loss: 0.08434242010116577 2023-01-24 04:20:15.450740: step: 716/466, loss: 0.02646796405315399 2023-01-24 04:20:16.187034: step: 718/466, loss: 0.02969576232135296 2023-01-24 04:20:16.966197: step: 720/466, loss: 0.051515594124794006 2023-01-24 04:20:17.818769: step: 722/466, loss: 0.04458548128604889 2023-01-24 04:20:18.593670: step: 724/466, loss: 0.2114185094833374 2023-01-24 04:20:19.354908: step: 726/466, loss: 0.10188092291355133 2023-01-24 04:20:20.071104: step: 728/466, loss: 0.10047049075365067 2023-01-24 04:20:20.793524: step: 730/466, loss: 0.09574826061725616 2023-01-24 04:20:21.531117: step: 732/466, loss: 0.1061575785279274 2023-01-24 04:20:22.339303: step: 734/466, loss: 0.09384144842624664 2023-01-24 04:20:23.171873: step: 736/466, loss: 0.054589297622442245 2023-01-24 04:20:23.875252: step: 738/466, loss: 0.16012555360794067 2023-01-24 04:20:24.663368: step: 740/466, loss: 0.012316581793129444 2023-01-24 04:20:25.380562: step: 742/466, loss: 0.006048239301890135 2023-01-24 04:20:26.160126: step: 744/466, loss: 0.04163842648267746 2023-01-24 04:20:26.897628: step: 746/466, loss: 0.012698384933173656 2023-01-24 04:20:27.645319: step: 748/466, loss: 0.033502642065286636 2023-01-24 04:20:28.466482: step: 750/466, loss: 0.045448169112205505 2023-01-24 04:20:29.268681: step: 752/466, loss: 0.0731145441532135 2023-01-24 04:20:30.012304: step: 754/466, loss: 0.024190278723835945 2023-01-24 04:20:30.864404: step: 756/466, loss: 0.08338966220617294 2023-01-24 04:20:31.640540: step: 758/466, loss: 0.031140204519033432 2023-01-24 04:20:32.359029: step: 760/466, loss: 0.06121518090367317 2023-01-24 04:20:33.062601: step: 762/466, loss: 0.11387481540441513 2023-01-24 04:20:33.847238: step: 764/466, loss: 0.07723309099674225 2023-01-24 04:20:34.620390: step: 766/466, loss: 0.046022143214941025 2023-01-24 04:20:35.353489: step: 768/466, loss: 0.045637596398591995 2023-01-24 04:20:36.147914: step: 770/466, loss: 0.04150305688381195 2023-01-24 04:20:36.904877: step: 772/466, loss: 0.045610833913087845 2023-01-24 04:20:37.700690: step: 774/466, loss: 0.09559348225593567 2023-01-24 04:20:38.403934: step: 776/466, loss: 0.14263160526752472 2023-01-24 04:20:39.162094: step: 778/466, loss: 0.03592607378959656 2023-01-24 04:20:39.905019: step: 780/466, loss: 0.019835738465189934 2023-01-24 04:20:40.656942: step: 782/466, loss: 0.10670880228281021 2023-01-24 04:20:41.559774: step: 784/466, loss: 0.02934003621339798 2023-01-24 04:20:42.279894: step: 786/466, loss: 0.052224624902009964 2023-01-24 04:20:43.010920: step: 788/466, loss: 0.017788240686058998 2023-01-24 04:20:43.814407: step: 790/466, loss: 0.1800827831029892 2023-01-24 04:20:44.623453: step: 792/466, loss: 0.4770299196243286 2023-01-24 04:20:45.350695: step: 794/466, loss: 0.037891022861003876 2023-01-24 04:20:46.043450: step: 796/466, loss: 0.025547461584210396 2023-01-24 04:20:46.827796: step: 798/466, loss: 0.02444528415799141 2023-01-24 04:20:47.575622: step: 800/466, loss: 0.305006206035614 2023-01-24 04:20:48.318195: step: 802/466, loss: 0.024854356423020363 2023-01-24 04:20:49.130378: step: 804/466, loss: 0.040885668247938156 2023-01-24 04:20:49.866464: step: 806/466, loss: 4.361806392669678 2023-01-24 04:20:50.562767: step: 808/466, loss: 0.0032287875656038523 2023-01-24 04:20:51.402983: step: 810/466, loss: 0.1244574561715126 2023-01-24 04:20:52.158107: step: 812/466, loss: 0.021160701289772987 2023-01-24 04:20:52.845659: step: 814/466, loss: 0.04389666020870209 2023-01-24 04:20:53.612034: step: 816/466, loss: 0.14484992623329163 2023-01-24 04:20:54.365993: step: 818/466, loss: 0.09680794924497604 2023-01-24 04:20:55.112194: step: 820/466, loss: 0.1459362655878067 2023-01-24 04:20:55.895598: step: 822/466, loss: 0.4532628655433655 2023-01-24 04:20:56.579847: step: 824/466, loss: 0.05273761972784996 2023-01-24 04:20:57.317628: step: 826/466, loss: 0.037020210176706314 2023-01-24 04:20:58.030808: step: 828/466, loss: 0.11131815612316132 2023-01-24 04:20:58.764377: step: 830/466, loss: 0.028645120561122894 2023-01-24 04:20:59.520976: step: 832/466, loss: 0.10022107511758804 2023-01-24 04:21:00.310673: step: 834/466, loss: 0.10250432044267654 2023-01-24 04:21:01.038763: step: 836/466, loss: 0.005558273755013943 2023-01-24 04:21:01.859616: step: 838/466, loss: 0.06928959488868713 2023-01-24 04:21:02.582395: step: 840/466, loss: 0.4579852521419525 2023-01-24 04:21:03.394533: step: 842/466, loss: 0.0566575787961483 2023-01-24 04:21:04.222138: step: 844/466, loss: 0.014095243066549301 2023-01-24 04:21:04.997181: step: 846/466, loss: 0.041836559772491455 2023-01-24 04:21:05.736645: step: 848/466, loss: 0.08909980207681656 2023-01-24 04:21:06.504374: step: 850/466, loss: 0.032618314027786255 2023-01-24 04:21:07.260542: step: 852/466, loss: 0.20079179108142853 2023-01-24 04:21:07.989757: step: 854/466, loss: 0.04353923723101616 2023-01-24 04:21:08.752427: step: 856/466, loss: 0.08984728157520294 2023-01-24 04:21:09.445679: step: 858/466, loss: 0.05157919600605965 2023-01-24 04:21:10.117452: step: 860/466, loss: 0.027703218162059784 2023-01-24 04:21:11.020102: step: 862/466, loss: 0.13330939412117004 2023-01-24 04:21:11.773481: step: 864/466, loss: 0.03492619842290878 2023-01-24 04:21:12.502783: step: 866/466, loss: 0.013390794396400452 2023-01-24 04:21:13.306498: step: 868/466, loss: 0.007219783030450344 2023-01-24 04:21:14.066170: step: 870/466, loss: 0.6684727668762207 2023-01-24 04:21:14.858750: step: 872/466, loss: 0.028121741488575935 2023-01-24 04:21:15.612757: step: 874/466, loss: 0.042943116277456284 2023-01-24 04:21:16.411527: step: 876/466, loss: 0.1596052497625351 2023-01-24 04:21:17.195425: step: 878/466, loss: 0.05617586523294449 2023-01-24 04:21:17.960267: step: 880/466, loss: 0.03300650045275688 2023-01-24 04:21:18.705007: step: 882/466, loss: 0.02836972288787365 2023-01-24 04:21:19.489536: step: 884/466, loss: 0.0764867439866066 2023-01-24 04:21:20.247300: step: 886/466, loss: 0.3197813630104065 2023-01-24 04:21:21.059834: step: 888/466, loss: 0.08093675225973129 2023-01-24 04:21:21.843870: step: 890/466, loss: 0.08463520556688309 2023-01-24 04:21:22.616511: step: 892/466, loss: 0.025558089837431908 2023-01-24 04:21:23.308164: step: 894/466, loss: 0.01471740286797285 2023-01-24 04:21:24.127584: step: 896/466, loss: 0.0714501366019249 2023-01-24 04:21:24.852839: step: 898/466, loss: 0.08137260377407074 2023-01-24 04:21:25.615491: step: 900/466, loss: 0.08838675916194916 2023-01-24 04:21:26.343409: step: 902/466, loss: 0.014560588635504246 2023-01-24 04:21:27.173525: step: 904/466, loss: 0.1300331950187683 2023-01-24 04:21:27.969709: step: 906/466, loss: 0.13205008208751678 2023-01-24 04:21:28.739700: step: 908/466, loss: 0.033016860485076904 2023-01-24 04:21:29.549594: step: 910/466, loss: 0.04948754981160164 2023-01-24 04:21:30.287996: step: 912/466, loss: 0.01946226879954338 2023-01-24 04:21:31.072954: step: 914/466, loss: 0.13564474880695343 2023-01-24 04:21:31.953849: step: 916/466, loss: 0.09711972624063492 2023-01-24 04:21:32.732541: step: 918/466, loss: 0.10797549039125443 2023-01-24 04:21:33.508102: step: 920/466, loss: 0.0589141882956028 2023-01-24 04:21:34.239220: step: 922/466, loss: 0.24523958563804626 2023-01-24 04:21:35.021287: step: 924/466, loss: 0.09199422597885132 2023-01-24 04:21:35.788626: step: 926/466, loss: 0.05299725756049156 2023-01-24 04:21:36.524171: step: 928/466, loss: 0.22056227922439575 2023-01-24 04:21:37.365522: step: 930/466, loss: 0.03502621129155159 2023-01-24 04:21:38.134879: step: 932/466, loss: 0.011367511935532093 ================================================== Loss: 0.108 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33767258382642995, 'r': 0.32485768500948764, 'f1': 0.3311411992263056}, 'combined': 0.2439987783772778, 'epoch': 19} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.356016337858035, 'r': 0.295653706179683, 'f1': 0.32303939919065555}, 'combined': 0.1985510453562078, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3128359888259101, 'r': 0.32827002243022446, 'f1': 0.32036722559394126}, 'combined': 0.2360600609639567, 'epoch': 19} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.33803403995143155, 'r': 0.2973528870914757, 'f1': 0.3163911419536199}, 'combined': 0.19446479944466394, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34582504970178923, 'r': 0.33007590132827325, 'f1': 0.3377669902912621}, 'combined': 0.24888094021461418, 'epoch': 19} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36038315043942815, 'r': 0.2939803727200525, 'f1': 0.3238125877697768}, 'combined': 0.20000189244603864, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32421875, 'r': 0.29642857142857143, 'f1': 0.3097014925373134}, 'combined': 0.20646766169154224, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2727272727272727, 'r': 0.391304347826087, 'f1': 0.32142857142857145}, 'combined': 0.16071428571428573, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 19} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34582504970178923, 'r': 0.33007590132827325, 'f1': 0.3377669902912621}, 'combined': 0.24888094021461418, 'epoch': 19} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36038315043942815, 'r': 0.2939803727200525, 'f1': 0.3238125877697768}, 'combined': 0.20000189244603864, 'epoch': 19} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 19} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:24:31.935258: step: 2/466, loss: 0.018460728228092194 2023-01-24 04:24:32.655914: step: 4/466, loss: 0.03275569900870323 2023-01-24 04:24:33.473282: step: 6/466, loss: 0.03537336736917496 2023-01-24 04:24:34.346471: step: 8/466, loss: 0.19849158823490143 2023-01-24 04:24:35.066465: step: 10/466, loss: 0.05351646617054939 2023-01-24 04:24:35.800481: step: 12/466, loss: 0.032131731510162354 2023-01-24 04:24:36.561888: step: 14/466, loss: 0.014430488459765911 2023-01-24 04:24:37.268703: step: 16/466, loss: 0.08124786615371704 2023-01-24 04:24:37.957999: step: 18/466, loss: 0.09149978309869766 2023-01-24 04:24:38.595000: step: 20/466, loss: 0.021502234041690826 2023-01-24 04:24:39.426136: step: 22/466, loss: 0.014477944932878017 2023-01-24 04:24:40.137317: step: 24/466, loss: 0.04463953897356987 2023-01-24 04:24:40.838279: step: 26/466, loss: 0.07978744804859161 2023-01-24 04:24:41.542282: step: 28/466, loss: 0.022446228191256523 2023-01-24 04:24:42.306568: step: 30/466, loss: 0.06083933636546135 2023-01-24 04:24:43.023588: step: 32/466, loss: 0.03639407828450203 2023-01-24 04:24:43.805826: step: 34/466, loss: 0.05580973997712135 2023-01-24 04:24:44.543432: step: 36/466, loss: 0.027402512729167938 2023-01-24 04:24:45.325690: step: 38/466, loss: 0.04815574362874031 2023-01-24 04:24:46.100978: step: 40/466, loss: 0.008505039848387241 2023-01-24 04:24:46.877783: step: 42/466, loss: 0.011803867295384407 2023-01-24 04:24:47.678480: step: 44/466, loss: 0.0648321807384491 2023-01-24 04:24:48.381849: step: 46/466, loss: 0.06518565118312836 2023-01-24 04:24:49.145846: step: 48/466, loss: 0.4239426553249359 2023-01-24 04:24:49.849234: step: 50/466, loss: 0.11440339684486389 2023-01-24 04:24:50.573019: step: 52/466, loss: 0.026564881205558777 2023-01-24 04:24:51.391843: step: 54/466, loss: 0.1800045222043991 2023-01-24 04:24:52.224658: step: 56/466, loss: 0.08924289047718048 2023-01-24 04:24:52.986666: step: 58/466, loss: 0.01246184203773737 2023-01-24 04:24:53.777955: step: 60/466, loss: 0.12218394130468369 2023-01-24 04:24:54.551135: step: 62/466, loss: 0.08775953203439713 2023-01-24 04:24:55.300177: step: 64/466, loss: 0.06456584483385086 2023-01-24 04:24:56.048353: step: 66/466, loss: 0.02095860429108143 2023-01-24 04:24:56.768356: step: 68/466, loss: 0.29399341344833374 2023-01-24 04:24:57.498378: step: 70/466, loss: 0.06159337982535362 2023-01-24 04:24:58.331245: step: 72/466, loss: 0.0189979188144207 2023-01-24 04:24:59.191507: step: 74/466, loss: 0.01161477155983448 2023-01-24 04:24:59.956088: step: 76/466, loss: 0.06281650811433792 2023-01-24 04:25:00.668291: step: 78/466, loss: 0.05198509618639946 2023-01-24 04:25:01.408911: step: 80/466, loss: 0.029480332508683205 2023-01-24 04:25:02.214268: step: 82/466, loss: 0.03114878572523594 2023-01-24 04:25:02.982709: step: 84/466, loss: 0.17855964601039886 2023-01-24 04:25:03.772462: step: 86/466, loss: 2.124892234802246 2023-01-24 04:25:04.556813: step: 88/466, loss: 0.03497857227921486 2023-01-24 04:25:05.323514: step: 90/466, loss: 0.10798290371894836 2023-01-24 04:25:06.070319: step: 92/466, loss: 0.030730247497558594 2023-01-24 04:25:06.777393: step: 94/466, loss: 0.05837418511509895 2023-01-24 04:25:07.475154: step: 96/466, loss: 0.037210673093795776 2023-01-24 04:25:08.351885: step: 98/466, loss: 0.02605927363038063 2023-01-24 04:25:09.151076: step: 100/466, loss: 0.05797327309846878 2023-01-24 04:25:09.846601: step: 102/466, loss: 0.014021651819348335 2023-01-24 04:25:10.604111: step: 104/466, loss: 0.008650483563542366 2023-01-24 04:25:11.320952: step: 106/466, loss: 0.27503061294555664 2023-01-24 04:25:12.084524: step: 108/466, loss: 0.05865919962525368 2023-01-24 04:25:12.946880: step: 110/466, loss: 0.32320258021354675 2023-01-24 04:25:13.684027: step: 112/466, loss: 0.03165920823812485 2023-01-24 04:25:14.464220: step: 114/466, loss: 0.3183203637599945 2023-01-24 04:25:15.201022: step: 116/466, loss: 0.3369978368282318 2023-01-24 04:25:16.008991: step: 118/466, loss: 0.049105823040008545 2023-01-24 04:25:16.782844: step: 120/466, loss: 0.01705557107925415 2023-01-24 04:25:17.514838: step: 122/466, loss: 0.10555698722600937 2023-01-24 04:25:18.188109: step: 124/466, loss: 0.10458479821681976 2023-01-24 04:25:18.971178: step: 126/466, loss: 0.02696489356458187 2023-01-24 04:25:19.638683: step: 128/466, loss: 0.02288011461496353 2023-01-24 04:25:20.543144: step: 130/466, loss: 0.06803611665964127 2023-01-24 04:25:21.322115: step: 132/466, loss: 0.022408053278923035 2023-01-24 04:25:22.058075: step: 134/466, loss: 0.11165321618318558 2023-01-24 04:25:22.798012: step: 136/466, loss: 0.02976180799305439 2023-01-24 04:25:23.582369: step: 138/466, loss: 0.05110258609056473 2023-01-24 04:25:24.293008: step: 140/466, loss: 0.04810798540711403 2023-01-24 04:25:25.027229: step: 142/466, loss: 0.04758574068546295 2023-01-24 04:25:25.746361: step: 144/466, loss: 0.0897674486041069 2023-01-24 04:25:26.528407: step: 146/466, loss: 0.13839790225028992 2023-01-24 04:25:27.342597: step: 148/466, loss: 0.026050128042697906 2023-01-24 04:25:28.130533: step: 150/466, loss: 0.07832131534814835 2023-01-24 04:25:28.948791: step: 152/466, loss: 0.03038802742958069 2023-01-24 04:25:29.655330: step: 154/466, loss: 0.010046614333987236 2023-01-24 04:25:30.389885: step: 156/466, loss: 0.028194040060043335 2023-01-24 04:25:31.117071: step: 158/466, loss: 0.05797536298632622 2023-01-24 04:25:31.999425: step: 160/466, loss: 0.06366293877363205 2023-01-24 04:25:32.754347: step: 162/466, loss: 0.06145820766687393 2023-01-24 04:25:33.548906: step: 164/466, loss: 0.006274975370615721 2023-01-24 04:25:34.291731: step: 166/466, loss: 0.19345133006572723 2023-01-24 04:25:35.086014: step: 168/466, loss: 0.054684944450855255 2023-01-24 04:25:35.795931: step: 170/466, loss: 0.031118638813495636 2023-01-24 04:25:36.475724: step: 172/466, loss: 0.07022649049758911 2023-01-24 04:25:37.221560: step: 174/466, loss: 0.034614648669958115 2023-01-24 04:25:38.053311: step: 176/466, loss: 0.07210703939199448 2023-01-24 04:25:38.795605: step: 178/466, loss: 0.0653044581413269 2023-01-24 04:25:39.466208: step: 180/466, loss: 0.08951914310455322 2023-01-24 04:25:40.171629: step: 182/466, loss: 0.09185580909252167 2023-01-24 04:25:40.887630: step: 184/466, loss: 0.0031846435740590096 2023-01-24 04:25:41.640210: step: 186/466, loss: 0.06330131739377975 2023-01-24 04:25:42.378753: step: 188/466, loss: 0.011010921560227871 2023-01-24 04:25:43.180513: step: 190/466, loss: 0.018669040873646736 2023-01-24 04:25:43.942753: step: 192/466, loss: 0.03512444719672203 2023-01-24 04:25:44.776118: step: 194/466, loss: 0.05187104269862175 2023-01-24 04:25:45.582587: step: 196/466, loss: 0.062332749366760254 2023-01-24 04:25:46.344707: step: 198/466, loss: 0.46412545442581177 2023-01-24 04:25:47.123506: step: 200/466, loss: 0.02101816236972809 2023-01-24 04:25:47.892964: step: 202/466, loss: 0.10228997468948364 2023-01-24 04:25:48.616997: step: 204/466, loss: 0.2514619529247284 2023-01-24 04:25:49.465681: step: 206/466, loss: 0.09877415746450424 2023-01-24 04:25:50.257365: step: 208/466, loss: 0.013377824798226357 2023-01-24 04:25:51.013648: step: 210/466, loss: 0.057463813573122025 2023-01-24 04:25:51.671279: step: 212/466, loss: 0.01609615981578827 2023-01-24 04:25:52.338974: step: 214/466, loss: 0.019593212753534317 2023-01-24 04:25:52.969091: step: 216/466, loss: 0.034630268812179565 2023-01-24 04:25:53.692716: step: 218/466, loss: 0.6699469089508057 2023-01-24 04:25:54.477444: step: 220/466, loss: 0.041075363755226135 2023-01-24 04:25:55.311544: step: 222/466, loss: 0.0339871346950531 2023-01-24 04:25:56.051622: step: 224/466, loss: 0.09721852093935013 2023-01-24 04:25:56.765581: step: 226/466, loss: 0.138138547539711 2023-01-24 04:25:57.523188: step: 228/466, loss: 0.02130807936191559 2023-01-24 04:25:58.274197: step: 230/466, loss: 0.04031668230891228 2023-01-24 04:25:59.017975: step: 232/466, loss: 0.020303938537836075 2023-01-24 04:25:59.801735: step: 234/466, loss: 0.05183028429746628 2023-01-24 04:26:00.572292: step: 236/466, loss: 0.07668639719486237 2023-01-24 04:26:01.359704: step: 238/466, loss: 0.019458649680018425 2023-01-24 04:26:02.186962: step: 240/466, loss: 0.1946711540222168 2023-01-24 04:26:02.977368: step: 242/466, loss: 0.059943996369838715 2023-01-24 04:26:03.797858: step: 244/466, loss: 0.08493660390377045 2023-01-24 04:26:04.529402: step: 246/466, loss: 0.0389455184340477 2023-01-24 04:26:05.260574: step: 248/466, loss: 0.03665849566459656 2023-01-24 04:26:05.999013: step: 250/466, loss: 0.10476026684045792 2023-01-24 04:26:06.805087: step: 252/466, loss: 0.017195170745253563 2023-01-24 04:26:07.543211: step: 254/466, loss: 0.0854298323392868 2023-01-24 04:26:08.285307: step: 256/466, loss: 0.020137647166848183 2023-01-24 04:26:09.079442: step: 258/466, loss: 0.026926511898636818 2023-01-24 04:26:09.785097: step: 260/466, loss: 0.14978930354118347 2023-01-24 04:26:10.568090: step: 262/466, loss: 0.04770808294415474 2023-01-24 04:26:11.316425: step: 264/466, loss: 0.03563295304775238 2023-01-24 04:26:12.022975: step: 266/466, loss: 0.03312264755368233 2023-01-24 04:26:12.800354: step: 268/466, loss: 0.05192991718649864 2023-01-24 04:26:13.567808: step: 270/466, loss: 0.4052007496356964 2023-01-24 04:26:14.286166: step: 272/466, loss: 0.08925099670886993 2023-01-24 04:26:14.991749: step: 274/466, loss: 0.05085451528429985 2023-01-24 04:26:15.887942: step: 276/466, loss: 0.10984174907207489 2023-01-24 04:26:16.624657: step: 278/466, loss: 0.020902059972286224 2023-01-24 04:26:17.357726: step: 280/466, loss: 0.33469197154045105 2023-01-24 04:26:18.145207: step: 282/466, loss: 0.05572224780917168 2023-01-24 04:26:18.938048: step: 284/466, loss: 0.06708931177854538 2023-01-24 04:26:19.682550: step: 286/466, loss: 0.03186662867665291 2023-01-24 04:26:20.398359: step: 288/466, loss: 0.0212254598736763 2023-01-24 04:26:21.194977: step: 290/466, loss: 0.0902729481458664 2023-01-24 04:26:21.919651: step: 292/466, loss: 0.020895853638648987 2023-01-24 04:26:22.581808: step: 294/466, loss: 0.046829309314489365 2023-01-24 04:26:23.328528: step: 296/466, loss: 0.10319173336029053 2023-01-24 04:26:24.037543: step: 298/466, loss: 0.03693874925374985 2023-01-24 04:26:24.752693: step: 300/466, loss: 0.014299717731773853 2023-01-24 04:26:25.559561: step: 302/466, loss: 0.6914137005805969 2023-01-24 04:26:26.316515: step: 304/466, loss: 0.07307901233434677 2023-01-24 04:26:27.101818: step: 306/466, loss: 0.04094526544213295 2023-01-24 04:26:27.905037: step: 308/466, loss: 0.047497380524873734 2023-01-24 04:26:28.722142: step: 310/466, loss: 0.057354554533958435 2023-01-24 04:26:29.508164: step: 312/466, loss: 0.026758970692753792 2023-01-24 04:26:30.208285: step: 314/466, loss: 0.019884929060935974 2023-01-24 04:26:31.037581: step: 316/466, loss: 0.0628701001405716 2023-01-24 04:26:31.809390: step: 318/466, loss: 0.19064365327358246 2023-01-24 04:26:32.591232: step: 320/466, loss: 0.12521429359912872 2023-01-24 04:26:33.256595: step: 322/466, loss: 0.04952094703912735 2023-01-24 04:26:34.059980: step: 324/466, loss: 0.09516461938619614 2023-01-24 04:26:34.889191: step: 326/466, loss: 0.08458506315946579 2023-01-24 04:26:35.691569: step: 328/466, loss: 0.03529912978410721 2023-01-24 04:26:36.495421: step: 330/466, loss: 5.178069114685059 2023-01-24 04:26:37.246386: step: 332/466, loss: 0.21790748834609985 2023-01-24 04:26:37.953564: step: 334/466, loss: 0.01928175799548626 2023-01-24 04:26:38.665962: step: 336/466, loss: 0.004510574974119663 2023-01-24 04:26:39.381301: step: 338/466, loss: 0.029111620038747787 2023-01-24 04:26:40.161395: step: 340/466, loss: 0.0197187177836895 2023-01-24 04:26:40.919174: step: 342/466, loss: 0.06134937331080437 2023-01-24 04:26:41.769422: step: 344/466, loss: 0.03785282000899315 2023-01-24 04:26:42.476618: step: 346/466, loss: 0.011232880875468254 2023-01-24 04:26:43.255607: step: 348/466, loss: 0.0719815343618393 2023-01-24 04:26:44.059309: step: 350/466, loss: 0.08595134317874908 2023-01-24 04:26:44.831930: step: 352/466, loss: 0.04742708057165146 2023-01-24 04:26:45.589311: step: 354/466, loss: 0.19618232548236847 2023-01-24 04:26:46.335313: step: 356/466, loss: 0.11792128533124924 2023-01-24 04:26:47.069770: step: 358/466, loss: 0.05558709800243378 2023-01-24 04:26:47.783262: step: 360/466, loss: 0.046625006943941116 2023-01-24 04:26:48.544542: step: 362/466, loss: 0.024011583998799324 2023-01-24 04:26:49.251681: step: 364/466, loss: 0.20778243243694305 2023-01-24 04:26:50.036670: step: 366/466, loss: 0.8859660029411316 2023-01-24 04:26:50.854547: step: 368/466, loss: 0.25369954109191895 2023-01-24 04:26:51.635676: step: 370/466, loss: 0.003959276247769594 2023-01-24 04:26:52.408330: step: 372/466, loss: 0.05117206275463104 2023-01-24 04:26:53.225427: step: 374/466, loss: 0.038259174674749374 2023-01-24 04:26:53.999542: step: 376/466, loss: 0.03823187202215195 2023-01-24 04:26:54.834344: step: 378/466, loss: 0.1659523993730545 2023-01-24 04:26:55.671188: step: 380/466, loss: 0.02509915828704834 2023-01-24 04:26:56.403240: step: 382/466, loss: 0.06251364201307297 2023-01-24 04:26:57.150220: step: 384/466, loss: 0.02569531463086605 2023-01-24 04:26:57.894864: step: 386/466, loss: 0.10763271898031235 2023-01-24 04:26:58.820474: step: 388/466, loss: 0.1218181699514389 2023-01-24 04:26:59.513150: step: 390/466, loss: 0.10014048218727112 2023-01-24 04:27:00.310404: step: 392/466, loss: 0.032277580350637436 2023-01-24 04:27:01.084665: step: 394/466, loss: 0.025713779032230377 2023-01-24 04:27:01.952675: step: 396/466, loss: 0.037789396941661835 2023-01-24 04:27:02.668230: step: 398/466, loss: 0.002991467248648405 2023-01-24 04:27:03.398514: step: 400/466, loss: 0.06556622684001923 2023-01-24 04:27:04.167570: step: 402/466, loss: 0.2638910710811615 2023-01-24 04:27:04.894222: step: 404/466, loss: 0.043477851897478104 2023-01-24 04:27:05.741257: step: 406/466, loss: 0.12631016969680786 2023-01-24 04:27:06.451310: step: 408/466, loss: 0.009239846840500832 2023-01-24 04:27:07.288953: step: 410/466, loss: 0.26313164830207825 2023-01-24 04:27:08.062311: step: 412/466, loss: 0.029916265979409218 2023-01-24 04:27:08.973247: step: 414/466, loss: 0.042880747467279434 2023-01-24 04:27:09.848142: step: 416/466, loss: 0.04426199197769165 2023-01-24 04:27:10.558398: step: 418/466, loss: 0.0630863681435585 2023-01-24 04:27:11.309015: step: 420/466, loss: 0.7282978296279907 2023-01-24 04:27:12.070873: step: 422/466, loss: 0.0994003489613533 2023-01-24 04:27:12.795343: step: 424/466, loss: 0.08450376987457275 2023-01-24 04:27:13.641617: step: 426/466, loss: 0.003517021657899022 2023-01-24 04:27:14.426402: step: 428/466, loss: 0.02976216748356819 2023-01-24 04:27:15.201264: step: 430/466, loss: 0.015140297822654247 2023-01-24 04:27:15.962174: step: 432/466, loss: 0.04440704360604286 2023-01-24 04:27:16.690719: step: 434/466, loss: 0.031749606132507324 2023-01-24 04:27:17.398250: step: 436/466, loss: 0.04850994795560837 2023-01-24 04:27:18.123823: step: 438/466, loss: 0.02641472965478897 2023-01-24 04:27:18.821793: step: 440/466, loss: 0.00908383633941412 2023-01-24 04:27:19.619720: step: 442/466, loss: 0.06254004687070847 2023-01-24 04:27:20.349513: step: 444/466, loss: 0.04309402033686638 2023-01-24 04:27:21.165928: step: 446/466, loss: 0.9316014051437378 2023-01-24 04:27:21.922409: step: 448/466, loss: 0.059038687497377396 2023-01-24 04:27:22.690186: step: 450/466, loss: 0.08485215902328491 2023-01-24 04:27:23.431496: step: 452/466, loss: 0.0382041372358799 2023-01-24 04:27:24.222037: step: 454/466, loss: 0.03592300042510033 2023-01-24 04:27:25.010778: step: 456/466, loss: 0.06754646450281143 2023-01-24 04:27:25.778861: step: 458/466, loss: 0.004598780535161495 2023-01-24 04:27:26.637721: step: 460/466, loss: 0.04046473652124405 2023-01-24 04:27:27.321282: step: 462/466, loss: 0.20700432360172272 2023-01-24 04:27:28.096990: step: 464/466, loss: 0.009331752546131611 2023-01-24 04:27:28.827199: step: 466/466, loss: 0.038695015013217926 2023-01-24 04:27:29.571998: step: 468/466, loss: 0.03941889852285385 2023-01-24 04:27:30.333425: step: 470/466, loss: 0.08039289712905884 2023-01-24 04:27:31.108634: step: 472/466, loss: 0.06569116562604904 2023-01-24 04:27:31.914984: step: 474/466, loss: 0.026711666956543922 2023-01-24 04:27:32.804376: step: 476/466, loss: 0.07863642275333405 2023-01-24 04:27:33.624223: step: 478/466, loss: 0.018847770988941193 2023-01-24 04:27:34.379649: step: 480/466, loss: 0.0439763106405735 2023-01-24 04:27:35.175248: step: 482/466, loss: 0.029851358383893967 2023-01-24 04:27:35.872614: step: 484/466, loss: 0.007683632429689169 2023-01-24 04:27:36.657700: step: 486/466, loss: 0.14108704030513763 2023-01-24 04:27:37.526601: step: 488/466, loss: 0.03667457029223442 2023-01-24 04:27:38.237768: step: 490/466, loss: 0.03250245749950409 2023-01-24 04:27:38.923081: step: 492/466, loss: 0.06343529373407364 2023-01-24 04:27:39.697392: step: 494/466, loss: 0.0005856614443473518 2023-01-24 04:27:40.417853: step: 496/466, loss: 0.11331066489219666 2023-01-24 04:27:41.167259: step: 498/466, loss: 0.037126459181308746 2023-01-24 04:27:41.934582: step: 500/466, loss: 0.06286133080720901 2023-01-24 04:27:42.675474: step: 502/466, loss: 0.030627667903900146 2023-01-24 04:27:43.463376: step: 504/466, loss: 0.060949284583330154 2023-01-24 04:27:44.265446: step: 506/466, loss: 0.039407823234796524 2023-01-24 04:27:45.003102: step: 508/466, loss: 0.030075030401349068 2023-01-24 04:27:45.734439: step: 510/466, loss: 0.1829003244638443 2023-01-24 04:27:46.496396: step: 512/466, loss: 0.03227916359901428 2023-01-24 04:27:47.304252: step: 514/466, loss: 0.06386967748403549 2023-01-24 04:27:48.004265: step: 516/466, loss: 0.029427075758576393 2023-01-24 04:27:48.814587: step: 518/466, loss: 0.019630271941423416 2023-01-24 04:27:49.584863: step: 520/466, loss: 0.1718192994594574 2023-01-24 04:27:50.339568: step: 522/466, loss: 0.10603654384613037 2023-01-24 04:27:51.020419: step: 524/466, loss: 0.07974043488502502 2023-01-24 04:27:51.805893: step: 526/466, loss: 0.11766904592514038 2023-01-24 04:27:52.547670: step: 528/466, loss: 0.025251492857933044 2023-01-24 04:27:53.325513: step: 530/466, loss: 0.008425693958997726 2023-01-24 04:27:54.069615: step: 532/466, loss: 0.015584269538521767 2023-01-24 04:27:54.827812: step: 534/466, loss: 0.05043382942676544 2023-01-24 04:27:55.587617: step: 536/466, loss: 0.0699617862701416 2023-01-24 04:27:56.375680: step: 538/466, loss: 0.05954763665795326 2023-01-24 04:27:57.089864: step: 540/466, loss: 0.027190033346414566 2023-01-24 04:27:57.803876: step: 542/466, loss: 0.04364040866494179 2023-01-24 04:27:58.526076: step: 544/466, loss: 0.06381090730428696 2023-01-24 04:27:59.271039: step: 546/466, loss: 0.05105084925889969 2023-01-24 04:27:59.984559: step: 548/466, loss: 0.05020805820822716 2023-01-24 04:28:00.775404: step: 550/466, loss: 0.17512200772762299 2023-01-24 04:28:01.618950: step: 552/466, loss: 0.0026450727600604296 2023-01-24 04:28:02.490694: step: 554/466, loss: 0.13461250066757202 2023-01-24 04:28:03.246215: step: 556/466, loss: 0.033968906849622726 2023-01-24 04:28:04.033279: step: 558/466, loss: 0.037686340510845184 2023-01-24 04:28:04.820460: step: 560/466, loss: 0.05116521194577217 2023-01-24 04:28:05.570761: step: 562/466, loss: 0.1257532835006714 2023-01-24 04:28:06.311947: step: 564/466, loss: 0.07375410944223404 2023-01-24 04:28:07.012761: step: 566/466, loss: 0.06227536499500275 2023-01-24 04:28:07.762444: step: 568/466, loss: 0.043114885687828064 2023-01-24 04:28:08.524330: step: 570/466, loss: 0.08072521537542343 2023-01-24 04:28:09.294041: step: 572/466, loss: 0.02490549348294735 2023-01-24 04:28:10.001569: step: 574/466, loss: 0.057185281068086624 2023-01-24 04:28:10.705774: step: 576/466, loss: 0.021186070516705513 2023-01-24 04:28:11.477718: step: 578/466, loss: 0.03023500367999077 2023-01-24 04:28:12.173588: step: 580/466, loss: 0.08721121400594711 2023-01-24 04:28:12.925453: step: 582/466, loss: 0.20383666455745697 2023-01-24 04:28:13.679699: step: 584/466, loss: 0.03602638468146324 2023-01-24 04:28:14.424111: step: 586/466, loss: 0.09032157063484192 2023-01-24 04:28:15.239553: step: 588/466, loss: 0.04010477289557457 2023-01-24 04:28:16.027912: step: 590/466, loss: 0.10806169360876083 2023-01-24 04:28:16.761345: step: 592/466, loss: 0.33428919315338135 2023-01-24 04:28:17.563935: step: 594/466, loss: 0.003103163791820407 2023-01-24 04:28:18.273790: step: 596/466, loss: 0.057396624237298965 2023-01-24 04:28:19.027783: step: 598/466, loss: 0.03389997407793999 2023-01-24 04:28:19.815220: step: 600/466, loss: 0.1480042040348053 2023-01-24 04:28:20.529916: step: 602/466, loss: 0.11612707376480103 2023-01-24 04:28:21.336981: step: 604/466, loss: 0.14691177010536194 2023-01-24 04:28:22.132922: step: 606/466, loss: 0.05263170599937439 2023-01-24 04:28:22.796453: step: 608/466, loss: 0.026127617806196213 2023-01-24 04:28:23.499831: step: 610/466, loss: 0.009142450988292694 2023-01-24 04:28:24.298520: step: 612/466, loss: 0.13319078087806702 2023-01-24 04:28:25.038311: step: 614/466, loss: 0.027096861973404884 2023-01-24 04:28:25.741338: step: 616/466, loss: 0.10066401213407516 2023-01-24 04:28:26.552072: step: 618/466, loss: 0.25737836956977844 2023-01-24 04:28:27.288948: step: 620/466, loss: 0.01815767213702202 2023-01-24 04:28:28.184003: step: 622/466, loss: 0.039933137595653534 2023-01-24 04:28:28.944402: step: 624/466, loss: 0.008822445757687092 2023-01-24 04:28:29.787700: step: 626/466, loss: 0.04307998716831207 2023-01-24 04:28:30.568647: step: 628/466, loss: 0.04361351579427719 2023-01-24 04:28:31.273311: step: 630/466, loss: 0.06477247178554535 2023-01-24 04:28:32.076770: step: 632/466, loss: 0.09567868709564209 2023-01-24 04:28:32.781882: step: 634/466, loss: 0.05815883353352547 2023-01-24 04:28:33.510372: step: 636/466, loss: 0.021055176854133606 2023-01-24 04:28:34.323934: step: 638/466, loss: 0.07199005782604218 2023-01-24 04:28:35.093162: step: 640/466, loss: 0.03393526375293732 2023-01-24 04:28:35.856345: step: 642/466, loss: 0.1396375149488449 2023-01-24 04:28:36.626714: step: 644/466, loss: 0.05805453285574913 2023-01-24 04:28:37.325734: step: 646/466, loss: 0.00800328515470028 2023-01-24 04:28:38.108412: step: 648/466, loss: 0.12152507156133652 2023-01-24 04:28:38.820199: step: 650/466, loss: 0.20110099017620087 2023-01-24 04:28:39.681092: step: 652/466, loss: 0.08574999123811722 2023-01-24 04:28:40.459720: step: 654/466, loss: 0.02011111192405224 2023-01-24 04:28:41.230894: step: 656/466, loss: 0.07749201357364655 2023-01-24 04:28:42.024390: step: 658/466, loss: 0.12124801427125931 2023-01-24 04:28:42.838289: step: 660/466, loss: 0.03949256241321564 2023-01-24 04:28:43.767035: step: 662/466, loss: 0.010345552116632462 2023-01-24 04:28:44.468581: step: 664/466, loss: 0.021447787061333656 2023-01-24 04:28:45.237111: step: 666/466, loss: 0.08892843127250671 2023-01-24 04:28:46.003354: step: 668/466, loss: 0.058989591896533966 2023-01-24 04:28:46.717966: step: 670/466, loss: 0.016034310683608055 2023-01-24 04:28:47.431130: step: 672/466, loss: 0.01991070993244648 2023-01-24 04:28:48.372133: step: 674/466, loss: 0.05193669721484184 2023-01-24 04:28:49.197217: step: 676/466, loss: 0.0062385061755776405 2023-01-24 04:28:49.999624: step: 678/466, loss: 0.06463981419801712 2023-01-24 04:28:50.820976: step: 680/466, loss: 0.18236199021339417 2023-01-24 04:28:51.532868: step: 682/466, loss: 0.019208496436476707 2023-01-24 04:28:52.235813: step: 684/466, loss: 0.010285614989697933 2023-01-24 04:28:53.001104: step: 686/466, loss: 0.03297063335776329 2023-01-24 04:28:53.703826: step: 688/466, loss: 0.1760704517364502 2023-01-24 04:28:54.532123: step: 690/466, loss: 0.01781740039587021 2023-01-24 04:28:55.335051: step: 692/466, loss: 0.03579838573932648 2023-01-24 04:28:56.096676: step: 694/466, loss: 0.040287796407938004 2023-01-24 04:28:56.923473: step: 696/466, loss: 0.03944031521677971 2023-01-24 04:28:57.685374: step: 698/466, loss: 0.0013104267418384552 2023-01-24 04:28:58.460705: step: 700/466, loss: 0.17801056802272797 2023-01-24 04:28:59.268979: step: 702/466, loss: 0.0916200578212738 2023-01-24 04:29:00.144206: step: 704/466, loss: 0.033320989459753036 2023-01-24 04:29:00.919444: step: 706/466, loss: 0.04283035546541214 2023-01-24 04:29:01.612419: step: 708/466, loss: 0.1713336706161499 2023-01-24 04:29:02.397851: step: 710/466, loss: 0.016873905435204506 2023-01-24 04:29:03.173770: step: 712/466, loss: 0.05669533833861351 2023-01-24 04:29:03.921077: step: 714/466, loss: 0.2237488031387329 2023-01-24 04:29:04.686025: step: 716/466, loss: 0.05956115201115608 2023-01-24 04:29:05.462452: step: 718/466, loss: 0.45502448081970215 2023-01-24 04:29:06.253749: step: 720/466, loss: 0.06795157492160797 2023-01-24 04:29:07.061494: step: 722/466, loss: 0.12121880799531937 2023-01-24 04:29:07.768457: step: 724/466, loss: 0.05142972618341446 2023-01-24 04:29:08.620516: step: 726/466, loss: 0.08417578041553497 2023-01-24 04:29:09.389915: step: 728/466, loss: 0.1284528374671936 2023-01-24 04:29:10.119240: step: 730/466, loss: 0.09293889999389648 2023-01-24 04:29:10.865713: step: 732/466, loss: 0.06237662583589554 2023-01-24 04:29:11.590029: step: 734/466, loss: 0.08169272541999817 2023-01-24 04:29:12.318742: step: 736/466, loss: 0.015537315979599953 2023-01-24 04:29:13.106312: step: 738/466, loss: 0.018973803147673607 2023-01-24 04:29:13.864412: step: 740/466, loss: 0.019834930077195168 2023-01-24 04:29:14.601758: step: 742/466, loss: 0.024088917300105095 2023-01-24 04:29:15.411212: step: 744/466, loss: 0.10155860334634781 2023-01-24 04:29:16.195244: step: 746/466, loss: 0.024841653183102608 2023-01-24 04:29:16.968287: step: 748/466, loss: 0.03416226804256439 2023-01-24 04:29:17.782058: step: 750/466, loss: 0.0373273529112339 2023-01-24 04:29:18.456684: step: 752/466, loss: 0.03173748031258583 2023-01-24 04:29:19.238564: step: 754/466, loss: 0.035857170820236206 2023-01-24 04:29:19.948273: step: 756/466, loss: 0.0462789386510849 2023-01-24 04:29:20.713315: step: 758/466, loss: 0.2072424590587616 2023-01-24 04:29:21.483764: step: 760/466, loss: 0.15545901656150818 2023-01-24 04:29:22.269467: step: 762/466, loss: 0.09147053956985474 2023-01-24 04:29:23.097835: step: 764/466, loss: 0.1897769570350647 2023-01-24 04:29:23.866939: step: 766/466, loss: 0.040288910269737244 2023-01-24 04:29:24.639490: step: 768/466, loss: 0.014940326102077961 2023-01-24 04:29:25.337289: step: 770/466, loss: 0.037154488265514374 2023-01-24 04:29:26.083107: step: 772/466, loss: 0.052049510180950165 2023-01-24 04:29:26.838941: step: 774/466, loss: 0.1100940853357315 2023-01-24 04:29:27.625224: step: 776/466, loss: 0.033307034522295 2023-01-24 04:29:28.437204: step: 778/466, loss: 0.03242477402091026 2023-01-24 04:29:29.181633: step: 780/466, loss: 0.02020042948424816 2023-01-24 04:29:29.848444: step: 782/466, loss: 0.03318953886628151 2023-01-24 04:29:30.631840: step: 784/466, loss: 0.029286310076713562 2023-01-24 04:29:31.276610: step: 786/466, loss: 0.04063934460282326 2023-01-24 04:29:32.086834: step: 788/466, loss: 0.05453144386410713 2023-01-24 04:29:32.874911: step: 790/466, loss: 0.05044718086719513 2023-01-24 04:29:33.693377: step: 792/466, loss: 0.03858442232012749 2023-01-24 04:29:34.401737: step: 794/466, loss: 0.07941761612892151 2023-01-24 04:29:35.158399: step: 796/466, loss: 0.022421518340706825 2023-01-24 04:29:35.874357: step: 798/466, loss: 0.16522493958473206 2023-01-24 04:29:36.698687: step: 800/466, loss: 0.036643315106630325 2023-01-24 04:29:37.463371: step: 802/466, loss: 0.05247822403907776 2023-01-24 04:29:38.229022: step: 804/466, loss: 0.14290215075016022 2023-01-24 04:29:38.941289: step: 806/466, loss: 0.002410825341939926 2023-01-24 04:29:39.691249: step: 808/466, loss: 0.00896370504051447 2023-01-24 04:29:40.433863: step: 810/466, loss: 0.21436962485313416 2023-01-24 04:29:41.171693: step: 812/466, loss: 0.01603039540350437 2023-01-24 04:29:41.954911: step: 814/466, loss: 0.046455636620521545 2023-01-24 04:29:42.751814: step: 816/466, loss: 0.015781737864017487 2023-01-24 04:29:43.467928: step: 818/466, loss: 0.4075051248073578 2023-01-24 04:29:44.189570: step: 820/466, loss: 0.0431065633893013 2023-01-24 04:29:44.964652: step: 822/466, loss: 0.039836108684539795 2023-01-24 04:29:45.750814: step: 824/466, loss: 0.36340564489364624 2023-01-24 04:29:46.542716: step: 826/466, loss: 0.020049631595611572 2023-01-24 04:29:47.280476: step: 828/466, loss: 0.36558952927589417 2023-01-24 04:29:48.092552: step: 830/466, loss: 0.2860356867313385 2023-01-24 04:29:48.794378: step: 832/466, loss: 0.10677853226661682 2023-01-24 04:29:49.635101: step: 834/466, loss: 0.2021457999944687 2023-01-24 04:29:50.405400: step: 836/466, loss: 0.011159212328493595 2023-01-24 04:29:51.099861: step: 838/466, loss: 0.0147927301004529 2023-01-24 04:29:51.906221: step: 840/466, loss: 0.8715988993644714 2023-01-24 04:29:52.734306: step: 842/466, loss: 0.03797810524702072 2023-01-24 04:29:53.694699: step: 844/466, loss: 0.056683532893657684 2023-01-24 04:29:54.458488: step: 846/466, loss: 0.00796705111861229 2023-01-24 04:29:55.232682: step: 848/466, loss: 0.03010483831167221 2023-01-24 04:29:56.039123: step: 850/466, loss: 0.02800583280622959 2023-01-24 04:29:56.733549: step: 852/466, loss: 0.09175509959459305 2023-01-24 04:29:57.548910: step: 854/466, loss: 0.08602626621723175 2023-01-24 04:29:58.323016: step: 856/466, loss: 0.04435117170214653 2023-01-24 04:29:59.102670: step: 858/466, loss: 0.266827255487442 2023-01-24 04:29:59.809806: step: 860/466, loss: 0.09036989510059357 2023-01-24 04:30:00.561595: step: 862/466, loss: 0.06072517856955528 2023-01-24 04:30:01.371813: step: 864/466, loss: 0.08128470182418823 2023-01-24 04:30:02.135737: step: 866/466, loss: 0.2942475378513336 2023-01-24 04:30:02.854506: step: 868/466, loss: 0.07774436473846436 2023-01-24 04:30:03.597747: step: 870/466, loss: 0.04398656636476517 2023-01-24 04:30:04.424273: step: 872/466, loss: 0.03236667811870575 2023-01-24 04:30:05.181874: step: 874/466, loss: 0.013491833582520485 2023-01-24 04:30:05.928454: step: 876/466, loss: 0.186244398355484 2023-01-24 04:30:06.654714: step: 878/466, loss: 0.02830761857330799 2023-01-24 04:30:07.410013: step: 880/466, loss: 0.14868517220020294 2023-01-24 04:30:08.229457: step: 882/466, loss: 0.009692513383924961 2023-01-24 04:30:08.957153: step: 884/466, loss: 0.009559868834912777 2023-01-24 04:30:09.744118: step: 886/466, loss: 0.1469455510377884 2023-01-24 04:30:10.466764: step: 888/466, loss: 0.0737396776676178 2023-01-24 04:30:11.490986: step: 890/466, loss: 0.05260159447789192 2023-01-24 04:30:12.236399: step: 892/466, loss: 0.02190791442990303 2023-01-24 04:30:12.975143: step: 894/466, loss: 0.05156734585762024 2023-01-24 04:30:13.754075: step: 896/466, loss: 0.09620752185583115 2023-01-24 04:30:14.435058: step: 898/466, loss: 0.1586761176586151 2023-01-24 04:30:15.161537: step: 900/466, loss: 0.10566425323486328 2023-01-24 04:30:16.144181: step: 902/466, loss: 0.06891264021396637 2023-01-24 04:30:16.875508: step: 904/466, loss: 0.04123598709702492 2023-01-24 04:30:17.669597: step: 906/466, loss: 0.07150737196207047 2023-01-24 04:30:18.567301: step: 908/466, loss: 0.04209690913558006 2023-01-24 04:30:19.207198: step: 910/466, loss: 0.0065148635767400265 2023-01-24 04:30:19.992276: step: 912/466, loss: 0.041471704840660095 2023-01-24 04:30:20.688280: step: 914/466, loss: 0.01814841665327549 2023-01-24 04:30:21.433094: step: 916/466, loss: 0.04168889299035072 2023-01-24 04:30:22.263325: step: 918/466, loss: 0.13480067253112793 2023-01-24 04:30:23.000142: step: 920/466, loss: 0.09623857587575912 2023-01-24 04:30:23.712890: step: 922/466, loss: 0.08783978968858719 2023-01-24 04:30:24.525954: step: 924/466, loss: 0.7061190009117126 2023-01-24 04:30:25.340850: step: 926/466, loss: 0.1090499609708786 2023-01-24 04:30:26.006988: step: 928/466, loss: 0.06472326815128326 2023-01-24 04:30:26.834315: step: 930/466, loss: 0.3719378709793091 2023-01-24 04:30:27.737700: step: 932/466, loss: 0.028679050505161285 ================================================== Loss: 0.100 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.346552225778295, 'r': 0.32287882895093517, 'f1': 0.33429694078024136}, 'combined': 0.24632406162754625, 'epoch': 20} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3668523711289184, 'r': 0.2803845678818943, 'f1': 0.3178426241018723}, 'combined': 0.19535692993578493, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3230445396442159, 'r': 0.32856142931555926, 'f1': 0.325779629819943}, 'combined': 0.24004814828837906, 'epoch': 20} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3573976362322824, 'r': 0.2849270583480934, 'f1': 0.3170740842176469}, 'combined': 0.19488455908011468, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35016867116456435, 'r': 0.32359040390349686, 'f1': 0.3363553113553113}, 'combined': 0.24784075573549255, 'epoch': 20} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3743334936080548, 'r': 0.2870755128623297, 'f1': 0.32494864329880185}, 'combined': 0.2007035738022012, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2847222222222222, 'r': 0.44565217391304346, 'f1': 0.3474576271186441}, 'combined': 0.17372881355932204, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34582504970178923, 'r': 0.33007590132827325, 'f1': 0.3377669902912621}, 'combined': 0.24888094021461418, 'epoch': 19} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36038315043942815, 'r': 0.2939803727200525, 'f1': 0.3238125877697768}, 'combined': 0.20000189244603864, 'epoch': 19} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 19} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:33:11.622937: step: 2/466, loss: 0.02420053258538246 2023-01-24 04:33:12.360213: step: 4/466, loss: 0.021157976239919662 2023-01-24 04:33:13.086711: step: 6/466, loss: 0.027058003470301628 2023-01-24 04:33:13.783000: step: 8/466, loss: 0.025552958250045776 2023-01-24 04:33:14.500455: step: 10/466, loss: 0.017578069120645523 2023-01-24 04:33:15.242159: step: 12/466, loss: 0.02473308891057968 2023-01-24 04:33:16.002949: step: 14/466, loss: 0.054351478815078735 2023-01-24 04:33:16.734059: step: 16/466, loss: 0.015328841283917427 2023-01-24 04:33:17.494611: step: 18/466, loss: 0.05652279034256935 2023-01-24 04:33:18.386175: step: 20/466, loss: 0.0549527108669281 2023-01-24 04:33:19.115507: step: 22/466, loss: 0.03255992382764816 2023-01-24 04:33:19.904723: step: 24/466, loss: 0.014854073524475098 2023-01-24 04:33:20.607742: step: 26/466, loss: 1.2386393547058105 2023-01-24 04:33:21.453766: step: 28/466, loss: 0.21185410022735596 2023-01-24 04:33:22.143133: step: 30/466, loss: 0.03914286196231842 2023-01-24 04:33:22.900301: step: 32/466, loss: 0.010245121084153652 2023-01-24 04:33:23.587974: step: 34/466, loss: 0.028347767889499664 2023-01-24 04:33:24.342946: step: 36/466, loss: 0.03141717240214348 2023-01-24 04:33:25.126208: step: 38/466, loss: 0.017886755988001823 2023-01-24 04:33:25.844142: step: 40/466, loss: 0.003856037277728319 2023-01-24 04:33:26.573642: step: 42/466, loss: 0.0017376018222421408 2023-01-24 04:33:27.330336: step: 44/466, loss: 0.15095050632953644 2023-01-24 04:33:28.110324: step: 46/466, loss: 0.05372604727745056 2023-01-24 04:33:28.852805: step: 48/466, loss: 0.012925044633448124 2023-01-24 04:33:29.701895: step: 50/466, loss: 0.01901458576321602 2023-01-24 04:33:30.384984: step: 52/466, loss: 0.026867816224694252 2023-01-24 04:33:31.143013: step: 54/466, loss: 0.030041607096791267 2023-01-24 04:33:31.885860: step: 56/466, loss: 0.042561452835798264 2023-01-24 04:33:32.573366: step: 58/466, loss: 0.027939572930336 2023-01-24 04:33:33.383262: step: 60/466, loss: 0.09363628923892975 2023-01-24 04:33:34.205496: step: 62/466, loss: 0.031050391495227814 2023-01-24 04:33:34.981117: step: 64/466, loss: 0.006767976563423872 2023-01-24 04:33:35.785646: step: 66/466, loss: 0.036450713872909546 2023-01-24 04:33:36.590362: step: 68/466, loss: 0.00916550774127245 2023-01-24 04:33:37.326868: step: 70/466, loss: 0.023364635184407234 2023-01-24 04:33:38.117192: step: 72/466, loss: 0.006844291463494301 2023-01-24 04:33:38.827465: step: 74/466, loss: 0.0034316659439355135 2023-01-24 04:33:39.678197: step: 76/466, loss: 0.6681513786315918 2023-01-24 04:33:40.449539: step: 78/466, loss: 0.04171931743621826 2023-01-24 04:33:41.268467: step: 80/466, loss: 0.09284312278032303 2023-01-24 04:33:41.975452: step: 82/466, loss: 0.05913330242037773 2023-01-24 04:33:42.784469: step: 84/466, loss: 0.05039322376251221 2023-01-24 04:33:43.508853: step: 86/466, loss: 0.8253809809684753 2023-01-24 04:33:44.316235: step: 88/466, loss: 0.06900697201490402 2023-01-24 04:33:45.178457: step: 90/466, loss: 0.01916087418794632 2023-01-24 04:33:45.923448: step: 92/466, loss: 0.006514217238873243 2023-01-24 04:33:46.692131: step: 94/466, loss: 0.015011530369520187 2023-01-24 04:33:47.417220: step: 96/466, loss: 0.017279941588640213 2023-01-24 04:33:48.288915: step: 98/466, loss: 0.07186633348464966 2023-01-24 04:33:49.107183: step: 100/466, loss: 0.024932416155934334 2023-01-24 04:33:49.931446: step: 102/466, loss: 0.05825050175189972 2023-01-24 04:33:50.665506: step: 104/466, loss: 0.05546033754944801 2023-01-24 04:33:51.528851: step: 106/466, loss: 0.13620232045650482 2023-01-24 04:33:52.312753: step: 108/466, loss: 0.039917781949043274 2023-01-24 04:33:53.037319: step: 110/466, loss: 0.020082594826817513 2023-01-24 04:33:53.782672: step: 112/466, loss: 0.09066887199878693 2023-01-24 04:33:54.580039: step: 114/466, loss: 0.0343233123421669 2023-01-24 04:33:55.299991: step: 116/466, loss: 0.017364807426929474 2023-01-24 04:33:56.009424: step: 118/466, loss: 0.010822913609445095 2023-01-24 04:33:56.794941: step: 120/466, loss: 0.06861522793769836 2023-01-24 04:33:57.656995: step: 122/466, loss: 0.11362996697425842 2023-01-24 04:33:58.469211: step: 124/466, loss: 0.020435810089111328 2023-01-24 04:33:59.185073: step: 126/466, loss: 0.07974167913198471 2023-01-24 04:34:00.030988: step: 128/466, loss: 0.04215514659881592 2023-01-24 04:34:00.788852: step: 130/466, loss: 0.08239579200744629 2023-01-24 04:34:01.513048: step: 132/466, loss: 0.049038395285606384 2023-01-24 04:34:02.320564: step: 134/466, loss: 0.060617074370384216 2023-01-24 04:34:03.072190: step: 136/466, loss: 0.01857878267765045 2023-01-24 04:34:03.881550: step: 138/466, loss: 0.04355144873261452 2023-01-24 04:34:04.586875: step: 140/466, loss: 0.05773269385099411 2023-01-24 04:34:05.292324: step: 142/466, loss: 0.0333169624209404 2023-01-24 04:34:06.087463: step: 144/466, loss: 0.07793013006448746 2023-01-24 04:34:06.762962: step: 146/466, loss: 0.12647201120853424 2023-01-24 04:34:07.521792: step: 148/466, loss: 0.01892600767314434 2023-01-24 04:34:08.296296: step: 150/466, loss: 0.045988768339157104 2023-01-24 04:34:09.133666: step: 152/466, loss: 0.0888764038681984 2023-01-24 04:34:09.914536: step: 154/466, loss: 0.010339142754673958 2023-01-24 04:34:10.743232: step: 156/466, loss: 0.03397432714700699 2023-01-24 04:34:11.466679: step: 158/466, loss: 0.02980886958539486 2023-01-24 04:34:12.244209: step: 160/466, loss: 0.07075031101703644 2023-01-24 04:34:13.018743: step: 162/466, loss: 0.004570677876472473 2023-01-24 04:34:13.789562: step: 164/466, loss: 0.0559922493994236 2023-01-24 04:34:14.482317: step: 166/466, loss: 0.023972397670149803 2023-01-24 04:34:15.187974: step: 168/466, loss: 0.0040644872933626175 2023-01-24 04:34:15.901577: step: 170/466, loss: 0.06624700874090195 2023-01-24 04:34:16.650151: step: 172/466, loss: 0.08573484420776367 2023-01-24 04:34:17.437771: step: 174/466, loss: 0.02354901283979416 2023-01-24 04:34:18.129020: step: 176/466, loss: 0.0014065414434298873 2023-01-24 04:34:18.870991: step: 178/466, loss: 0.009613445028662682 2023-01-24 04:34:19.684026: step: 180/466, loss: 0.022372951731085777 2023-01-24 04:34:20.418299: step: 182/466, loss: 0.03555876389145851 2023-01-24 04:34:21.080753: step: 184/466, loss: 0.0734868124127388 2023-01-24 04:34:21.840739: step: 186/466, loss: 0.03425343707203865 2023-01-24 04:34:22.554734: step: 188/466, loss: 0.02357129566371441 2023-01-24 04:34:23.240778: step: 190/466, loss: 0.06736937165260315 2023-01-24 04:34:23.955197: step: 192/466, loss: 0.095945343375206 2023-01-24 04:34:24.814206: step: 194/466, loss: 0.10415996611118317 2023-01-24 04:34:25.503368: step: 196/466, loss: 0.402849942445755 2023-01-24 04:34:26.321234: step: 198/466, loss: 0.04138009622693062 2023-01-24 04:34:27.120395: step: 200/466, loss: 0.017961658537387848 2023-01-24 04:34:27.826780: step: 202/466, loss: 0.039096955209970474 2023-01-24 04:34:28.557137: step: 204/466, loss: 0.21321329474449158 2023-01-24 04:34:29.337246: step: 206/466, loss: 0.04853019863367081 2023-01-24 04:34:30.054698: step: 208/466, loss: 0.014309985563158989 2023-01-24 04:34:30.769299: step: 210/466, loss: 0.042209725826978683 2023-01-24 04:34:31.499412: step: 212/466, loss: 0.0016783431638032198 2023-01-24 04:34:32.265868: step: 214/466, loss: 0.006007462274283171 2023-01-24 04:34:33.041247: step: 216/466, loss: 8.656112670898438 2023-01-24 04:34:33.827158: step: 218/466, loss: 0.04152832552790642 2023-01-24 04:34:34.658507: step: 220/466, loss: 0.031420446932315826 2023-01-24 04:34:35.400743: step: 222/466, loss: 0.039263706654310226 2023-01-24 04:34:36.174117: step: 224/466, loss: 0.1400732547044754 2023-01-24 04:34:36.935783: step: 226/466, loss: 0.0901549756526947 2023-01-24 04:34:37.722225: step: 228/466, loss: 0.0026076301001012325 2023-01-24 04:34:38.495279: step: 230/466, loss: 0.16132469475269318 2023-01-24 04:34:39.231243: step: 232/466, loss: 0.002261529676616192 2023-01-24 04:34:39.981709: step: 234/466, loss: 0.018132373690605164 2023-01-24 04:34:40.735312: step: 236/466, loss: 0.01911015622317791 2023-01-24 04:34:41.521989: step: 238/466, loss: 0.07900919765233994 2023-01-24 04:34:42.276875: step: 240/466, loss: 0.02429911494255066 2023-01-24 04:34:43.088979: step: 242/466, loss: 0.0636911392211914 2023-01-24 04:34:43.778867: step: 244/466, loss: 0.032838374376297 2023-01-24 04:34:44.624109: step: 246/466, loss: 0.09503611922264099 2023-01-24 04:34:45.358352: step: 248/466, loss: 0.10877351462841034 2023-01-24 04:34:46.180181: step: 250/466, loss: 0.021549206227064133 2023-01-24 04:34:46.853876: step: 252/466, loss: 0.7020695805549622 2023-01-24 04:34:47.658710: step: 254/466, loss: 0.014177825301885605 2023-01-24 04:34:48.332088: step: 256/466, loss: 0.05610502511262894 2023-01-24 04:34:49.061399: step: 258/466, loss: 0.02227671630680561 2023-01-24 04:34:49.917106: step: 260/466, loss: 0.013494499027729034 2023-01-24 04:34:50.635359: step: 262/466, loss: 0.014789719134569168 2023-01-24 04:34:51.359308: step: 264/466, loss: 0.006463128607720137 2023-01-24 04:34:52.151172: step: 266/466, loss: 0.06511010974645615 2023-01-24 04:34:52.949415: step: 268/466, loss: 0.10073195397853851 2023-01-24 04:34:53.728627: step: 270/466, loss: 0.0873476192355156 2023-01-24 04:34:54.476835: step: 272/466, loss: 0.014236886985599995 2023-01-24 04:34:55.197588: step: 274/466, loss: 0.0363636277616024 2023-01-24 04:34:55.917607: step: 276/466, loss: 0.026089193299412727 2023-01-24 04:34:56.666134: step: 278/466, loss: 0.02656644769012928 2023-01-24 04:34:57.391907: step: 280/466, loss: 0.0171302892267704 2023-01-24 04:34:58.188024: step: 282/466, loss: 0.06511224061250687 2023-01-24 04:34:58.935337: step: 284/466, loss: 0.013560446910560131 2023-01-24 04:34:59.664053: step: 286/466, loss: 0.011944221332669258 2023-01-24 04:35:00.467146: step: 288/466, loss: 0.01988859660923481 2023-01-24 04:35:01.114961: step: 290/466, loss: 0.01531740091741085 2023-01-24 04:35:01.859263: step: 292/466, loss: 0.09434042870998383 2023-01-24 04:35:02.650133: step: 294/466, loss: 0.011777781881392002 2023-01-24 04:35:03.334059: step: 296/466, loss: 0.07179064303636551 2023-01-24 04:35:04.075404: step: 298/466, loss: 0.07993976771831512 2023-01-24 04:35:04.870811: step: 300/466, loss: 1.4223395586013794 2023-01-24 04:35:05.742622: step: 302/466, loss: 0.013697385787963867 2023-01-24 04:35:06.518466: step: 304/466, loss: 0.022314228117465973 2023-01-24 04:35:07.294299: step: 306/466, loss: 0.03576982766389847 2023-01-24 04:35:08.055609: step: 308/466, loss: 0.054183077067136765 2023-01-24 04:35:08.879203: step: 310/466, loss: 0.08912408351898193 2023-01-24 04:35:09.657452: step: 312/466, loss: 0.048739008605480194 2023-01-24 04:35:10.375304: step: 314/466, loss: 0.00755567429587245 2023-01-24 04:35:11.091921: step: 316/466, loss: 0.12594488263130188 2023-01-24 04:35:11.862028: step: 318/466, loss: 0.01600790023803711 2023-01-24 04:35:12.607510: step: 320/466, loss: 0.04415088891983032 2023-01-24 04:35:13.318969: step: 322/466, loss: 0.03756783530116081 2023-01-24 04:35:14.053825: step: 324/466, loss: 0.011490639299154282 2023-01-24 04:35:14.843405: step: 326/466, loss: 0.02899995446205139 2023-01-24 04:35:15.696128: step: 328/466, loss: 0.15357692539691925 2023-01-24 04:35:16.450215: step: 330/466, loss: 0.016842082142829895 2023-01-24 04:35:17.227825: step: 332/466, loss: 0.14622445404529572 2023-01-24 04:35:18.026566: step: 334/466, loss: 0.03253905102610588 2023-01-24 04:35:18.778622: step: 336/466, loss: 0.17871153354644775 2023-01-24 04:35:19.542529: step: 338/466, loss: 0.01588170975446701 2023-01-24 04:35:20.318584: step: 340/466, loss: 0.02543899230659008 2023-01-24 04:35:21.074510: step: 342/466, loss: 0.5410417914390564 2023-01-24 04:35:21.904299: step: 344/466, loss: 0.006712695118039846 2023-01-24 04:35:22.606075: step: 346/466, loss: 0.03204884007573128 2023-01-24 04:35:23.390676: step: 348/466, loss: 0.03173366189002991 2023-01-24 04:35:24.376699: step: 350/466, loss: 0.07975351065397263 2023-01-24 04:35:25.048839: step: 352/466, loss: 0.08007784187793732 2023-01-24 04:35:25.845873: step: 354/466, loss: 0.051084209233522415 2023-01-24 04:35:26.608003: step: 356/466, loss: 0.003182527609169483 2023-01-24 04:35:27.346200: step: 358/466, loss: 0.05374673381447792 2023-01-24 04:35:28.091170: step: 360/466, loss: 0.04119402915239334 2023-01-24 04:35:28.864748: step: 362/466, loss: 0.031509507447481155 2023-01-24 04:35:29.671515: step: 364/466, loss: 0.02087160013616085 2023-01-24 04:35:30.383241: step: 366/466, loss: 0.01806093193590641 2023-01-24 04:35:31.092246: step: 368/466, loss: 0.050688087940216064 2023-01-24 04:35:31.790275: step: 370/466, loss: 0.06753750890493393 2023-01-24 04:35:32.547848: step: 372/466, loss: 0.08971969038248062 2023-01-24 04:35:33.259556: step: 374/466, loss: 0.010876539163291454 2023-01-24 04:35:33.971327: step: 376/466, loss: 0.02585495449602604 2023-01-24 04:35:34.822784: step: 378/466, loss: 0.49682512879371643 2023-01-24 04:35:35.531882: step: 380/466, loss: 0.021598313003778458 2023-01-24 04:35:36.173489: step: 382/466, loss: 0.9013242721557617 2023-01-24 04:35:36.914540: step: 384/466, loss: 0.029887091368436813 2023-01-24 04:35:37.718950: step: 386/466, loss: 0.06689973920583725 2023-01-24 04:35:38.517704: step: 388/466, loss: 0.020853828638792038 2023-01-24 04:35:39.283407: step: 390/466, loss: 0.6124326586723328 2023-01-24 04:35:40.144125: step: 392/466, loss: 0.03974926099181175 2023-01-24 04:35:40.982609: step: 394/466, loss: 0.04044046252965927 2023-01-24 04:35:41.718027: step: 396/466, loss: 0.025991858914494514 2023-01-24 04:35:42.502522: step: 398/466, loss: 0.03772665187716484 2023-01-24 04:35:43.220482: step: 400/466, loss: 0.04145622253417969 2023-01-24 04:35:43.974373: step: 402/466, loss: 0.028987523168325424 2023-01-24 04:35:44.708062: step: 404/466, loss: 0.014267069287598133 2023-01-24 04:35:45.471590: step: 406/466, loss: 0.09248199313879013 2023-01-24 04:35:46.244931: step: 408/466, loss: 0.018743878230452538 2023-01-24 04:35:47.125114: step: 410/466, loss: 0.10698945820331573 2023-01-24 04:35:47.966822: step: 412/466, loss: 0.014625504612922668 2023-01-24 04:35:48.763553: step: 414/466, loss: 0.036891624331474304 2023-01-24 04:35:49.482383: step: 416/466, loss: 0.0296842772513628 2023-01-24 04:35:50.320957: step: 418/466, loss: 0.06785248965024948 2023-01-24 04:35:51.079222: step: 420/466, loss: 0.020969906821846962 2023-01-24 04:35:51.780738: step: 422/466, loss: 0.0571817010641098 2023-01-24 04:35:52.549951: step: 424/466, loss: 0.2317938655614853 2023-01-24 04:35:53.365781: step: 426/466, loss: 0.035724662244319916 2023-01-24 04:35:54.142966: step: 428/466, loss: 0.03899478167295456 2023-01-24 04:35:54.926011: step: 430/466, loss: 0.019177095964550972 2023-01-24 04:35:55.638845: step: 432/466, loss: 0.09212999790906906 2023-01-24 04:35:56.409227: step: 434/466, loss: 0.08950886875391006 2023-01-24 04:35:57.194964: step: 436/466, loss: 0.047341521829366684 2023-01-24 04:35:57.912104: step: 438/466, loss: 0.06883928179740906 2023-01-24 04:35:58.633557: step: 440/466, loss: 0.03817324712872505 2023-01-24 04:35:59.433433: step: 442/466, loss: 0.2911444902420044 2023-01-24 04:36:00.204097: step: 444/466, loss: 0.04138999804854393 2023-01-24 04:36:00.947601: step: 446/466, loss: 0.024322273209691048 2023-01-24 04:36:01.667032: step: 448/466, loss: 0.05159672722220421 2023-01-24 04:36:02.438556: step: 450/466, loss: 0.08194046467542648 2023-01-24 04:36:03.142775: step: 452/466, loss: 0.038597095757722855 2023-01-24 04:36:03.985570: step: 454/466, loss: 0.06234016641974449 2023-01-24 04:36:04.757063: step: 456/466, loss: 0.20821239054203033 2023-01-24 04:36:05.598870: step: 458/466, loss: 0.2597760558128357 2023-01-24 04:36:06.364343: step: 460/466, loss: 1.241673469543457 2023-01-24 04:36:07.106201: step: 462/466, loss: 0.13873669505119324 2023-01-24 04:36:07.899306: step: 464/466, loss: 0.07433667033910751 2023-01-24 04:36:08.577632: step: 466/466, loss: 0.052657343447208405 2023-01-24 04:36:09.367568: step: 468/466, loss: 0.09154356271028519 2023-01-24 04:36:10.110338: step: 470/466, loss: 0.10661379992961884 2023-01-24 04:36:10.879611: step: 472/466, loss: 0.17577272653579712 2023-01-24 04:36:11.615592: step: 474/466, loss: 0.04618869721889496 2023-01-24 04:36:12.427988: step: 476/466, loss: 0.05479388311505318 2023-01-24 04:36:13.095400: step: 478/466, loss: 0.05641501024365425 2023-01-24 04:36:13.805150: step: 480/466, loss: 0.01575690694153309 2023-01-24 04:36:14.539927: step: 482/466, loss: 0.16104817390441895 2023-01-24 04:36:15.450637: step: 484/466, loss: 0.010711174458265305 2023-01-24 04:36:16.146499: step: 486/466, loss: 0.046062011271715164 2023-01-24 04:36:16.937041: step: 488/466, loss: 0.06522417068481445 2023-01-24 04:36:17.667170: step: 490/466, loss: 0.020062437281012535 2023-01-24 04:36:18.473703: step: 492/466, loss: 0.037860166281461716 2023-01-24 04:36:19.241984: step: 494/466, loss: 0.09146386384963989 2023-01-24 04:36:19.987927: step: 496/466, loss: 0.01153822336345911 2023-01-24 04:36:20.676813: step: 498/466, loss: 0.16628001630306244 2023-01-24 04:36:21.473387: step: 500/466, loss: 0.08503858000040054 2023-01-24 04:36:22.207487: step: 502/466, loss: 0.025091633200645447 2023-01-24 04:36:22.987955: step: 504/466, loss: 0.3517615497112274 2023-01-24 04:36:23.745064: step: 506/466, loss: 0.01056545227766037 2023-01-24 04:36:24.476354: step: 508/466, loss: 0.023207852616906166 2023-01-24 04:36:25.246310: step: 510/466, loss: 0.47248417139053345 2023-01-24 04:36:25.985374: step: 512/466, loss: 0.058128539472818375 2023-01-24 04:36:26.734282: step: 514/466, loss: 0.09256549179553986 2023-01-24 04:36:27.484399: step: 516/466, loss: 0.06956575065851212 2023-01-24 04:36:28.179198: step: 518/466, loss: 0.01629825495183468 2023-01-24 04:36:28.974676: step: 520/466, loss: 0.044001027941703796 2023-01-24 04:36:29.661687: step: 522/466, loss: 0.023603804409503937 2023-01-24 04:36:30.401278: step: 524/466, loss: 0.09090606123209 2023-01-24 04:36:31.160448: step: 526/466, loss: 0.030994001775979996 2023-01-24 04:36:31.993880: step: 528/466, loss: 0.08065718412399292 2023-01-24 04:36:32.640828: step: 530/466, loss: 0.07034345716238022 2023-01-24 04:36:33.373678: step: 532/466, loss: 0.017793208360671997 2023-01-24 04:36:34.208182: step: 534/466, loss: 0.07686349004507065 2023-01-24 04:36:35.008856: step: 536/466, loss: 0.039163898676633835 2023-01-24 04:36:35.840930: step: 538/466, loss: 0.14436209201812744 2023-01-24 04:36:36.568624: step: 540/466, loss: 0.0816352590918541 2023-01-24 04:36:37.368981: step: 542/466, loss: 0.09122592955827713 2023-01-24 04:36:38.117624: step: 544/466, loss: 0.04344628378748894 2023-01-24 04:36:38.867828: step: 546/466, loss: 0.024878213182091713 2023-01-24 04:36:39.584739: step: 548/466, loss: 0.07749707251787186 2023-01-24 04:36:40.338917: step: 550/466, loss: 0.09194403886795044 2023-01-24 04:36:41.158899: step: 552/466, loss: 0.048549845814704895 2023-01-24 04:36:42.006465: step: 554/466, loss: 0.09204865992069244 2023-01-24 04:36:42.741429: step: 556/466, loss: 0.0338570736348629 2023-01-24 04:36:43.428923: step: 558/466, loss: 0.09505950659513474 2023-01-24 04:36:44.200860: step: 560/466, loss: 0.11021043360233307 2023-01-24 04:36:44.879287: step: 562/466, loss: 0.0074821156449615955 2023-01-24 04:36:45.671724: step: 564/466, loss: 0.0620780810713768 2023-01-24 04:36:46.435967: step: 566/466, loss: 0.04471885412931442 2023-01-24 04:36:47.234527: step: 568/466, loss: 0.062284309417009354 2023-01-24 04:36:48.019627: step: 570/466, loss: 0.014837083406746387 2023-01-24 04:36:48.739942: step: 572/466, loss: 0.10218457132577896 2023-01-24 04:36:49.474049: step: 574/466, loss: 0.013688577339053154 2023-01-24 04:36:50.130860: step: 576/466, loss: 0.05932674929499626 2023-01-24 04:36:50.930586: step: 578/466, loss: 0.015389678999781609 2023-01-24 04:36:51.626565: step: 580/466, loss: 0.20691397786140442 2023-01-24 04:36:52.325923: step: 582/466, loss: 0.008128570392727852 2023-01-24 04:36:53.038372: step: 584/466, loss: 0.13989268243312836 2023-01-24 04:36:53.755627: step: 586/466, loss: 0.06592102348804474 2023-01-24 04:36:54.401997: step: 588/466, loss: 0.020636077970266342 2023-01-24 04:36:55.098507: step: 590/466, loss: 0.045246824622154236 2023-01-24 04:36:55.817188: step: 592/466, loss: 0.018867220729589462 2023-01-24 04:36:56.530906: step: 594/466, loss: 0.022000886499881744 2023-01-24 04:36:57.233144: step: 596/466, loss: 0.03726113215088844 2023-01-24 04:36:58.000572: step: 598/466, loss: 0.008649222552776337 2023-01-24 04:36:58.775355: step: 600/466, loss: 0.027857929468154907 2023-01-24 04:36:59.556893: step: 602/466, loss: 0.061211489140987396 2023-01-24 04:37:00.383501: step: 604/466, loss: 0.01916958950459957 2023-01-24 04:37:01.100439: step: 606/466, loss: 0.060166411101818085 2023-01-24 04:37:01.892000: step: 608/466, loss: 0.0379047766327858 2023-01-24 04:37:02.717731: step: 610/466, loss: 0.03668516129255295 2023-01-24 04:37:03.537168: step: 612/466, loss: 0.020177414640784264 2023-01-24 04:37:04.254200: step: 614/466, loss: 0.03892548382282257 2023-01-24 04:37:04.978605: step: 616/466, loss: 0.058562684804201126 2023-01-24 04:37:05.679225: step: 618/466, loss: 0.023050406947731972 2023-01-24 04:37:06.478282: step: 620/466, loss: 0.08460335433483124 2023-01-24 04:37:07.216373: step: 622/466, loss: 0.011020708829164505 2023-01-24 04:37:08.066016: step: 624/466, loss: 0.017338331788778305 2023-01-24 04:37:08.827540: step: 626/466, loss: 0.034749772399663925 2023-01-24 04:37:09.539643: step: 628/466, loss: 0.011427835561335087 2023-01-24 04:37:10.269151: step: 630/466, loss: 0.016694651916623116 2023-01-24 04:37:11.040187: step: 632/466, loss: 0.004482876975089312 2023-01-24 04:37:11.896853: step: 634/466, loss: 0.08318014442920685 2023-01-24 04:37:12.579060: step: 636/466, loss: 0.05684254318475723 2023-01-24 04:37:13.414428: step: 638/466, loss: 0.09137509018182755 2023-01-24 04:37:14.138586: step: 640/466, loss: 0.09939432144165039 2023-01-24 04:37:14.936296: step: 642/466, loss: 0.02181725762784481 2023-01-24 04:37:15.705779: step: 644/466, loss: 0.015538212843239307 2023-01-24 04:37:16.477498: step: 646/466, loss: 0.026405729353427887 2023-01-24 04:37:17.182030: step: 648/466, loss: 0.15074358880519867 2023-01-24 04:37:17.944412: step: 650/466, loss: 0.03875117376446724 2023-01-24 04:37:18.656115: step: 652/466, loss: 0.10887783020734787 2023-01-24 04:37:19.376089: step: 654/466, loss: 0.0982695072889328 2023-01-24 04:37:20.183703: step: 656/466, loss: 0.137633815407753 2023-01-24 04:37:20.967376: step: 658/466, loss: 0.03798063471913338 2023-01-24 04:37:21.718745: step: 660/466, loss: 0.046758100390434265 2023-01-24 04:37:22.423324: step: 662/466, loss: 0.030941788107156754 2023-01-24 04:37:23.222349: step: 664/466, loss: 0.010282697156071663 2023-01-24 04:37:24.018238: step: 666/466, loss: 0.09982012957334518 2023-01-24 04:37:24.974205: step: 668/466, loss: 0.03435714170336723 2023-01-24 04:37:25.694675: step: 670/466, loss: 0.018653811886906624 2023-01-24 04:37:26.576078: step: 672/466, loss: 0.6183578372001648 2023-01-24 04:37:27.351736: step: 674/466, loss: 0.028816204518079758 2023-01-24 04:37:28.094214: step: 676/466, loss: 0.0988229289650917 2023-01-24 04:37:28.810381: step: 678/466, loss: 0.06322664022445679 2023-01-24 04:37:29.563908: step: 680/466, loss: 0.044840168207883835 2023-01-24 04:37:30.343371: step: 682/466, loss: 0.1099725216627121 2023-01-24 04:37:31.149916: step: 684/466, loss: 0.05679919198155403 2023-01-24 04:37:31.989736: step: 686/466, loss: 0.02459697611629963 2023-01-24 04:37:32.741967: step: 688/466, loss: 0.05390128120779991 2023-01-24 04:37:33.472298: step: 690/466, loss: 0.2060244232416153 2023-01-24 04:37:34.172944: step: 692/466, loss: 0.07413557171821594 2023-01-24 04:37:34.932348: step: 694/466, loss: 0.02973468042910099 2023-01-24 04:37:35.723773: step: 696/466, loss: 0.08305200934410095 2023-01-24 04:37:36.481995: step: 698/466, loss: 0.04088933765888214 2023-01-24 04:37:37.255768: step: 700/466, loss: 0.07874433696269989 2023-01-24 04:37:38.026391: step: 702/466, loss: 0.09399781376123428 2023-01-24 04:37:38.842465: step: 704/466, loss: 0.023996638134121895 2023-01-24 04:37:39.576055: step: 706/466, loss: 0.05157013610005379 2023-01-24 04:37:40.224956: step: 708/466, loss: 0.047712381929159164 2023-01-24 04:37:40.988101: step: 710/466, loss: 0.047248851507902145 2023-01-24 04:37:41.775694: step: 712/466, loss: 0.033434826880693436 2023-01-24 04:37:42.601386: step: 714/466, loss: 0.03782849386334419 2023-01-24 04:37:43.231865: step: 716/466, loss: 0.016999607905745506 2023-01-24 04:37:43.959862: step: 718/466, loss: 0.01766519993543625 2023-01-24 04:37:44.759845: step: 720/466, loss: 0.018042655661702156 2023-01-24 04:37:45.608019: step: 722/466, loss: 0.024905243888497353 2023-01-24 04:37:46.374887: step: 724/466, loss: 0.4071979224681854 2023-01-24 04:37:47.147702: step: 726/466, loss: 0.005354071501642466 2023-01-24 04:37:47.893093: step: 728/466, loss: 0.06616143137216568 2023-01-24 04:37:48.664827: step: 730/466, loss: 0.03915643319487572 2023-01-24 04:37:49.528849: step: 732/466, loss: 0.010733548551797867 2023-01-24 04:37:50.340901: step: 734/466, loss: 0.02929450199007988 2023-01-24 04:37:51.079063: step: 736/466, loss: 0.03881808742880821 2023-01-24 04:37:51.939202: step: 738/466, loss: 0.03280079364776611 2023-01-24 04:37:52.675061: step: 740/466, loss: 2.8601508140563965 2023-01-24 04:37:53.417783: step: 742/466, loss: 0.10068678855895996 2023-01-24 04:37:54.211893: step: 744/466, loss: 0.07240951806306839 2023-01-24 04:37:54.986094: step: 746/466, loss: 0.03611631318926811 2023-01-24 04:37:55.722232: step: 748/466, loss: 0.008190032094717026 2023-01-24 04:37:56.485615: step: 750/466, loss: 0.040565237402915955 2023-01-24 04:37:57.217467: step: 752/466, loss: 0.029234912246465683 2023-01-24 04:37:58.012977: step: 754/466, loss: 0.1493801325559616 2023-01-24 04:37:58.777582: step: 756/466, loss: 0.26373106241226196 2023-01-24 04:37:59.505863: step: 758/466, loss: 0.007217098958790302 2023-01-24 04:38:00.244266: step: 760/466, loss: 0.028150340542197227 2023-01-24 04:38:01.056441: step: 762/466, loss: 0.04375817999243736 2023-01-24 04:38:01.793189: step: 764/466, loss: 0.04241231828927994 2023-01-24 04:38:02.526267: step: 766/466, loss: 0.06087270751595497 2023-01-24 04:38:03.326669: step: 768/466, loss: 0.06415510922670364 2023-01-24 04:38:04.102621: step: 770/466, loss: 0.06608094274997711 2023-01-24 04:38:04.868171: step: 772/466, loss: 0.09900546818971634 2023-01-24 04:38:05.605861: step: 774/466, loss: 0.013609092682600021 2023-01-24 04:38:06.375148: step: 776/466, loss: 0.1549600213766098 2023-01-24 04:38:07.135077: step: 778/466, loss: 0.0728570744395256 2023-01-24 04:38:07.893366: step: 780/466, loss: 0.007868712767958641 2023-01-24 04:38:08.663507: step: 782/466, loss: 0.012212309055030346 2023-01-24 04:38:09.439534: step: 784/466, loss: 0.043318044394254684 2023-01-24 04:38:10.184021: step: 786/466, loss: 0.11056303232908249 2023-01-24 04:38:10.906918: step: 788/466, loss: 0.631340742111206 2023-01-24 04:38:11.619381: step: 790/466, loss: 0.01474701426923275 2023-01-24 04:38:12.503317: step: 792/466, loss: 0.10303998738527298 2023-01-24 04:38:13.172306: step: 794/466, loss: 0.02286025695502758 2023-01-24 04:38:13.892489: step: 796/466, loss: 0.025005998089909554 2023-01-24 04:38:14.673101: step: 798/466, loss: 0.03820018097758293 2023-01-24 04:38:15.513655: step: 800/466, loss: 0.2768549919128418 2023-01-24 04:38:16.263180: step: 802/466, loss: 0.07749795913696289 2023-01-24 04:38:16.999640: step: 804/466, loss: 0.05952690541744232 2023-01-24 04:38:17.692326: step: 806/466, loss: 0.08135931938886642 2023-01-24 04:38:18.480594: step: 808/466, loss: 0.08084471523761749 2023-01-24 04:38:19.212385: step: 810/466, loss: 0.06380957365036011 2023-01-24 04:38:20.017027: step: 812/466, loss: 0.1317671686410904 2023-01-24 04:38:20.833098: step: 814/466, loss: 0.08646216243505478 2023-01-24 04:38:21.578499: step: 816/466, loss: 0.07240697741508484 2023-01-24 04:38:22.285146: step: 818/466, loss: 0.07382383197546005 2023-01-24 04:38:23.098419: step: 820/466, loss: 0.0447627417743206 2023-01-24 04:38:23.819979: step: 822/466, loss: 0.01566709578037262 2023-01-24 04:38:24.634482: step: 824/466, loss: 0.04784432798624039 2023-01-24 04:38:25.476220: step: 826/466, loss: 0.051131147891283035 2023-01-24 04:38:26.260362: step: 828/466, loss: 0.06012752279639244 2023-01-24 04:38:26.947870: step: 830/466, loss: 0.014375496655702591 2023-01-24 04:38:27.685809: step: 832/466, loss: 0.0393557995557785 2023-01-24 04:38:28.440570: step: 834/466, loss: 0.0965733602643013 2023-01-24 04:38:29.198644: step: 836/466, loss: 0.11071842908859253 2023-01-24 04:38:29.966434: step: 838/466, loss: 0.03374619781970978 2023-01-24 04:38:30.682944: step: 840/466, loss: 0.08582701534032822 2023-01-24 04:38:31.391616: step: 842/466, loss: 0.08291257917881012 2023-01-24 04:38:32.178528: step: 844/466, loss: 0.25133976340293884 2023-01-24 04:38:33.003861: step: 846/466, loss: 0.09083317965269089 2023-01-24 04:38:33.771400: step: 848/466, loss: 0.04542336240410805 2023-01-24 04:38:34.613612: step: 850/466, loss: 0.2875499725341797 2023-01-24 04:38:35.334823: step: 852/466, loss: 0.10486269742250443 2023-01-24 04:38:36.134667: step: 854/466, loss: 0.03992059826850891 2023-01-24 04:38:36.884468: step: 856/466, loss: 0.018067266792058945 2023-01-24 04:38:37.596340: step: 858/466, loss: 0.0018403325229883194 2023-01-24 04:38:38.336191: step: 860/466, loss: 0.0541539341211319 2023-01-24 04:38:39.115430: step: 862/466, loss: 0.019118212163448334 2023-01-24 04:38:39.890247: step: 864/466, loss: 0.05498569458723068 2023-01-24 04:38:40.553312: step: 866/466, loss: 0.02778913825750351 2023-01-24 04:38:41.419103: step: 868/466, loss: 0.06923171132802963 2023-01-24 04:38:42.243143: step: 870/466, loss: 0.039038099348545074 2023-01-24 04:38:42.971673: step: 872/466, loss: 0.039004988968372345 2023-01-24 04:38:43.749409: step: 874/466, loss: 0.09032604098320007 2023-01-24 04:38:44.535968: step: 876/466, loss: 0.07266921550035477 2023-01-24 04:38:45.298447: step: 878/466, loss: 0.11244278401136398 2023-01-24 04:38:45.976452: step: 880/466, loss: 0.035093631595373154 2023-01-24 04:38:46.730236: step: 882/466, loss: 0.03499899059534073 2023-01-24 04:38:47.440041: step: 884/466, loss: 0.054025325924158096 2023-01-24 04:38:48.344047: step: 886/466, loss: 0.045914176851511 2023-01-24 04:38:49.131313: step: 888/466, loss: 0.013009753078222275 2023-01-24 04:38:49.936021: step: 890/466, loss: 0.050087813287973404 2023-01-24 04:38:50.710998: step: 892/466, loss: 0.05891994759440422 2023-01-24 04:38:51.481502: step: 894/466, loss: 0.04082118719816208 2023-01-24 04:38:52.241845: step: 896/466, loss: 0.03301764652132988 2023-01-24 04:38:52.980136: step: 898/466, loss: 0.06859487295150757 2023-01-24 04:38:53.740019: step: 900/466, loss: 0.04476600140333176 2023-01-24 04:38:54.491946: step: 902/466, loss: 0.06929872930049896 2023-01-24 04:38:55.241298: step: 904/466, loss: 0.46976327896118164 2023-01-24 04:38:55.997540: step: 906/466, loss: 0.04072031006217003 2023-01-24 04:38:56.771427: step: 908/466, loss: 0.010995362885296345 2023-01-24 04:38:57.517837: step: 910/466, loss: 0.05154285952448845 2023-01-24 04:38:58.407516: step: 912/466, loss: 0.03599536046385765 2023-01-24 04:38:59.159449: step: 914/466, loss: 0.0024055049289017916 2023-01-24 04:38:59.906873: step: 916/466, loss: 0.009881271980702877 2023-01-24 04:39:00.711145: step: 918/466, loss: 0.08806942403316498 2023-01-24 04:39:01.456288: step: 920/466, loss: 0.038152310997247696 2023-01-24 04:39:02.210870: step: 922/466, loss: 0.041223522275686264 2023-01-24 04:39:02.969300: step: 924/466, loss: 0.18694542348384857 2023-01-24 04:39:03.724233: step: 926/466, loss: 0.06713179498910904 2023-01-24 04:39:04.533649: step: 928/466, loss: 0.18050426244735718 2023-01-24 04:39:05.346335: step: 930/466, loss: 0.05860520899295807 2023-01-24 04:39:06.036161: step: 932/466, loss: 0.03831728547811508 ================================================== Loss: 0.103 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34212014719494716, 'r': 0.30057234943313194, 'f1': 0.3200032891944657}, 'combined': 0.23579189730118524, 'epoch': 21} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.374879651737115, 'r': 0.28245690022753217, 'f1': 0.32217086991911903}, 'combined': 0.19801721760882438, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31939744771426354, 'r': 0.30667003518675023, 'f1': 0.312904372784932}, 'combined': 0.23056111678889724, 'epoch': 21} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36412763885302274, 'r': 0.28601029072538464, 'f1': 0.32037586829316345}, 'combined': 0.19691394831677364, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.355677210709229, 'r': 0.3084335584328608, 'f1': 0.3303749701099952}, 'combined': 0.24343418850210174, 'epoch': 21} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3831727223249184, 'r': 0.27918289102844096, 'f1': 0.3230146049199062}, 'combined': 0.19950902068582446, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3577586206896552, 'r': 0.29642857142857143, 'f1': 0.32421875}, 'combined': 0.21614583333333331, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2785714285714286, 'r': 0.42391304347826086, 'f1': 0.33620689655172414}, 'combined': 0.16810344827586207, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34582504970178923, 'r': 0.33007590132827325, 'f1': 0.3377669902912621}, 'combined': 0.24888094021461418, 'epoch': 19} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36038315043942815, 'r': 0.2939803727200525, 'f1': 0.3238125877697768}, 'combined': 0.20000189244603864, 'epoch': 19} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 19} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:41:48.891442: step: 2/466, loss: 0.01801205240190029 2023-01-24 04:41:49.632579: step: 4/466, loss: 0.009650541469454765 2023-01-24 04:41:50.399626: step: 6/466, loss: 0.07053247094154358 2023-01-24 04:41:51.108507: step: 8/466, loss: 0.001292285742238164 2023-01-24 04:41:51.852287: step: 10/466, loss: 0.03261404484510422 2023-01-24 04:41:52.590874: step: 12/466, loss: 0.05088101327419281 2023-01-24 04:41:53.255806: step: 14/466, loss: 0.004686241038143635 2023-01-24 04:41:53.955655: step: 16/466, loss: 0.057643257081508636 2023-01-24 04:41:54.698969: step: 18/466, loss: 0.025192510336637497 2023-01-24 04:41:55.447715: step: 20/466, loss: 0.03585965558886528 2023-01-24 04:41:56.225923: step: 22/466, loss: 0.00812239944934845 2023-01-24 04:41:56.985389: step: 24/466, loss: 0.1684376448392868 2023-01-24 04:41:57.714411: step: 26/466, loss: 0.012567630037665367 2023-01-24 04:41:58.503088: step: 28/466, loss: 0.008991257287561893 2023-01-24 04:41:59.271957: step: 30/466, loss: 0.010555324144661427 2023-01-24 04:42:00.074675: step: 32/466, loss: 0.04046061635017395 2023-01-24 04:42:00.820528: step: 34/466, loss: 0.02395005337893963 2023-01-24 04:42:01.544372: step: 36/466, loss: 0.45707467198371887 2023-01-24 04:42:02.257454: step: 38/466, loss: 0.042842619121074677 2023-01-24 04:42:03.110862: step: 40/466, loss: 0.04697068780660629 2023-01-24 04:42:03.875446: step: 42/466, loss: 0.02398432418704033 2023-01-24 04:42:04.582583: step: 44/466, loss: 0.035828251391649246 2023-01-24 04:42:05.351611: step: 46/466, loss: 0.015519676730036736 2023-01-24 04:42:06.207321: step: 48/466, loss: 0.02766352891921997 2023-01-24 04:42:06.930037: step: 50/466, loss: 0.04843959957361221 2023-01-24 04:42:07.663921: step: 52/466, loss: 0.04603329673409462 2023-01-24 04:42:08.530371: step: 54/466, loss: 0.05231574550271034 2023-01-24 04:42:09.330510: step: 56/466, loss: 0.0449846126139164 2023-01-24 04:42:10.132478: step: 58/466, loss: 0.05117674544453621 2023-01-24 04:42:10.922310: step: 60/466, loss: 0.025951247662305832 2023-01-24 04:42:11.712047: step: 62/466, loss: 0.2809740900993347 2023-01-24 04:42:12.402203: step: 64/466, loss: 0.001973114674910903 2023-01-24 04:42:13.168483: step: 66/466, loss: 0.028032252565026283 2023-01-24 04:42:13.957254: step: 68/466, loss: 0.24467821419239044 2023-01-24 04:42:14.801540: step: 70/466, loss: 0.033111125230789185 2023-01-24 04:42:15.513407: step: 72/466, loss: 0.028844568878412247 2023-01-24 04:42:16.237883: step: 74/466, loss: 0.02851303108036518 2023-01-24 04:42:17.031203: step: 76/466, loss: 0.08196083456277847 2023-01-24 04:42:17.736479: step: 78/466, loss: 0.03638365492224693 2023-01-24 04:42:18.503518: step: 80/466, loss: 0.01183218415826559 2023-01-24 04:42:19.264396: step: 82/466, loss: 0.014731865376234055 2023-01-24 04:42:19.985177: step: 84/466, loss: 0.037835411727428436 2023-01-24 04:42:20.758321: step: 86/466, loss: 0.01963892951607704 2023-01-24 04:42:21.450366: step: 88/466, loss: 0.12295688688755035 2023-01-24 04:42:22.197715: step: 90/466, loss: 0.07707997411489487 2023-01-24 04:42:22.879142: step: 92/466, loss: 0.06897678971290588 2023-01-24 04:42:23.780609: step: 94/466, loss: 0.049370914697647095 2023-01-24 04:42:24.507426: step: 96/466, loss: 0.06621725857257843 2023-01-24 04:42:25.260054: step: 98/466, loss: 0.0824403166770935 2023-01-24 04:42:26.052111: step: 100/466, loss: 0.0027513070963323116 2023-01-24 04:42:26.728250: step: 102/466, loss: 0.06183888390660286 2023-01-24 04:42:27.587607: step: 104/466, loss: 0.10273338854312897 2023-01-24 04:42:28.275794: step: 106/466, loss: 0.005924407858401537 2023-01-24 04:42:29.018711: step: 108/466, loss: 0.029296522960066795 2023-01-24 04:42:29.774319: step: 110/466, loss: 0.0434553362429142 2023-01-24 04:42:30.572403: step: 112/466, loss: 0.01007161196321249 2023-01-24 04:42:31.282958: step: 114/466, loss: 0.03057682141661644 2023-01-24 04:42:32.084331: step: 116/466, loss: 0.12594719231128693 2023-01-24 04:42:32.761014: step: 118/466, loss: 0.027035461738705635 2023-01-24 04:42:33.548248: step: 120/466, loss: 0.03666792809963226 2023-01-24 04:42:34.334423: step: 122/466, loss: 0.04613330587744713 2023-01-24 04:42:35.048153: step: 124/466, loss: 0.06358573585748672 2023-01-24 04:42:35.744777: step: 126/466, loss: 0.027978744357824326 2023-01-24 04:42:36.521044: step: 128/466, loss: 0.0762336328625679 2023-01-24 04:42:37.252542: step: 130/466, loss: 0.0326845683157444 2023-01-24 04:42:37.982062: step: 132/466, loss: 0.011000591330230236 2023-01-24 04:42:38.744228: step: 134/466, loss: 0.963161826133728 2023-01-24 04:42:39.413260: step: 136/466, loss: 0.15354757010936737 2023-01-24 04:42:40.116209: step: 138/466, loss: 0.1283196657896042 2023-01-24 04:42:40.821743: step: 140/466, loss: 0.008298342116177082 2023-01-24 04:42:41.491269: step: 142/466, loss: 0.03561526536941528 2023-01-24 04:42:42.188920: step: 144/466, loss: 0.0693802759051323 2023-01-24 04:42:43.009718: step: 146/466, loss: 0.05710234120488167 2023-01-24 04:42:43.763658: step: 148/466, loss: 0.0016815579729154706 2023-01-24 04:42:44.571837: step: 150/466, loss: 0.033275868743658066 2023-01-24 04:42:45.343668: step: 152/466, loss: 0.0874953642487526 2023-01-24 04:42:46.125254: step: 154/466, loss: 0.03488751873373985 2023-01-24 04:42:46.889107: step: 156/466, loss: 0.19325657188892365 2023-01-24 04:42:47.676820: step: 158/466, loss: 2.2976832389831543 2023-01-24 04:42:48.485423: step: 160/466, loss: 0.037470534443855286 2023-01-24 04:42:49.254220: step: 162/466, loss: 0.03140714764595032 2023-01-24 04:42:50.073206: step: 164/466, loss: 0.0779411792755127 2023-01-24 04:42:50.813383: step: 166/466, loss: 0.052832603454589844 2023-01-24 04:42:51.609436: step: 168/466, loss: 0.028959710150957108 2023-01-24 04:42:52.263896: step: 170/466, loss: 0.05092615261673927 2023-01-24 04:42:53.072906: step: 172/466, loss: 0.039884038269519806 2023-01-24 04:42:53.760448: step: 174/466, loss: 0.023970339447259903 2023-01-24 04:42:54.789324: step: 176/466, loss: 0.015574868768453598 2023-01-24 04:42:55.512213: step: 178/466, loss: 0.014533845707774162 2023-01-24 04:42:56.279683: step: 180/466, loss: 0.03421838954091072 2023-01-24 04:42:57.031582: step: 182/466, loss: 0.0036980807781219482 2023-01-24 04:42:57.855403: step: 184/466, loss: 0.10932143777608871 2023-01-24 04:42:58.592606: step: 186/466, loss: 0.09876124560832977 2023-01-24 04:42:59.348213: step: 188/466, loss: 0.03024807572364807 2023-01-24 04:43:00.121296: step: 190/466, loss: 0.04149920493364334 2023-01-24 04:43:00.865981: step: 192/466, loss: 0.025310944765806198 2023-01-24 04:43:01.598836: step: 194/466, loss: 0.02424207702279091 2023-01-24 04:43:02.465683: step: 196/466, loss: 0.04799162968993187 2023-01-24 04:43:03.197894: step: 198/466, loss: 0.017459945753216743 2023-01-24 04:43:03.953213: step: 200/466, loss: 0.010713450610637665 2023-01-24 04:43:04.734725: step: 202/466, loss: 0.06455336511135101 2023-01-24 04:43:05.469024: step: 204/466, loss: 0.0043992772698402405 2023-01-24 04:43:06.208539: step: 206/466, loss: 0.02925746887922287 2023-01-24 04:43:06.994458: step: 208/466, loss: 0.08175531029701233 2023-01-24 04:43:07.775474: step: 210/466, loss: 0.07611624896526337 2023-01-24 04:43:08.590025: step: 212/466, loss: 0.08095847815275192 2023-01-24 04:43:09.245268: step: 214/466, loss: 0.09799374639987946 2023-01-24 04:43:09.954239: step: 216/466, loss: 0.3005771338939667 2023-01-24 04:43:10.636073: step: 218/466, loss: 0.009296614676713943 2023-01-24 04:43:11.425186: step: 220/466, loss: 0.022954193875193596 2023-01-24 04:43:12.158319: step: 222/466, loss: 0.05615520477294922 2023-01-24 04:43:12.931965: step: 224/466, loss: 0.07679169625043869 2023-01-24 04:43:13.685371: step: 226/466, loss: 0.021268269047141075 2023-01-24 04:43:14.470779: step: 228/466, loss: 0.017214687541127205 2023-01-24 04:43:15.206039: step: 230/466, loss: 0.01152557972818613 2023-01-24 04:43:15.923285: step: 232/466, loss: 0.06563958525657654 2023-01-24 04:43:16.746232: step: 234/466, loss: 0.005681390408426523 2023-01-24 04:43:17.483137: step: 236/466, loss: 0.013118581846356392 2023-01-24 04:43:18.261130: step: 238/466, loss: 0.04298659414052963 2023-01-24 04:43:19.058780: step: 240/466, loss: 0.05200590193271637 2023-01-24 04:43:19.781088: step: 242/466, loss: 0.002855125116184354 2023-01-24 04:43:20.559342: step: 244/466, loss: 0.014720942825078964 2023-01-24 04:43:21.256846: step: 246/466, loss: 0.07755979895591736 2023-01-24 04:43:22.125591: step: 248/466, loss: 0.846014142036438 2023-01-24 04:43:22.871662: step: 250/466, loss: 0.030162004753947258 2023-01-24 04:43:23.646917: step: 252/466, loss: 0.0932496190071106 2023-01-24 04:43:24.360084: step: 254/466, loss: 0.06960610300302505 2023-01-24 04:43:25.092167: step: 256/466, loss: 0.016617776826024055 2023-01-24 04:43:25.824073: step: 258/466, loss: 0.0021575915161520243 2023-01-24 04:43:26.536040: step: 260/466, loss: 0.04989505559206009 2023-01-24 04:43:27.317272: step: 262/466, loss: 0.0046344357542693615 2023-01-24 04:43:28.047372: step: 264/466, loss: 0.008433621376752853 2023-01-24 04:43:28.826958: step: 266/466, loss: 0.021122919395565987 2023-01-24 04:43:29.594826: step: 268/466, loss: 0.07885141670703888 2023-01-24 04:43:30.296014: step: 270/466, loss: 0.05262148380279541 2023-01-24 04:43:31.057325: step: 272/466, loss: 0.0057984814047813416 2023-01-24 04:43:31.847371: step: 274/466, loss: 0.0534018948674202 2023-01-24 04:43:32.578996: step: 276/466, loss: 0.04419717565178871 2023-01-24 04:43:33.408148: step: 278/466, loss: 0.016852280125021935 2023-01-24 04:43:34.139308: step: 280/466, loss: 0.0230227243155241 2023-01-24 04:43:34.834074: step: 282/466, loss: 0.008835147134959698 2023-01-24 04:43:35.542918: step: 284/466, loss: 0.070997454226017 2023-01-24 04:43:36.191865: step: 286/466, loss: 0.014292260631918907 2023-01-24 04:43:36.872739: step: 288/466, loss: 0.00638886634260416 2023-01-24 04:43:37.675003: step: 290/466, loss: 0.08430958539247513 2023-01-24 04:43:38.441369: step: 292/466, loss: 0.12293495237827301 2023-01-24 04:43:39.277072: step: 294/466, loss: 0.04928538203239441 2023-01-24 04:43:40.006497: step: 296/466, loss: 0.051817622035741806 2023-01-24 04:43:40.775029: step: 298/466, loss: 0.07045499235391617 2023-01-24 04:43:41.509060: step: 300/466, loss: 0.03849097341299057 2023-01-24 04:43:42.288040: step: 302/466, loss: 0.032904352992773056 2023-01-24 04:43:42.963186: step: 304/466, loss: 0.14624471962451935 2023-01-24 04:43:43.593885: step: 306/466, loss: 0.006894540973007679 2023-01-24 04:43:44.397419: step: 308/466, loss: 0.021506957709789276 2023-01-24 04:43:45.194244: step: 310/466, loss: 0.03344562277197838 2023-01-24 04:43:45.964699: step: 312/466, loss: 0.0015532016986981034 2023-01-24 04:43:46.670665: step: 314/466, loss: 0.011609219945967197 2023-01-24 04:43:47.416346: step: 316/466, loss: 0.08411398530006409 2023-01-24 04:43:48.220149: step: 318/466, loss: 0.024583594873547554 2023-01-24 04:43:49.048956: step: 320/466, loss: 0.011126959696412086 2023-01-24 04:43:49.845027: step: 322/466, loss: 0.050845298916101456 2023-01-24 04:43:50.594337: step: 324/466, loss: 0.004713057540357113 2023-01-24 04:43:51.439714: step: 326/466, loss: 0.04157442972064018 2023-01-24 04:43:52.154872: step: 328/466, loss: 0.08843721449375153 2023-01-24 04:43:52.895234: step: 330/466, loss: 0.011852320283651352 2023-01-24 04:43:53.630518: step: 332/466, loss: 0.08430391550064087 2023-01-24 04:43:54.400573: step: 334/466, loss: 0.007539176847785711 2023-01-24 04:43:55.236198: step: 336/466, loss: 0.14938679337501526 2023-01-24 04:43:56.028125: step: 338/466, loss: 0.051123447716236115 2023-01-24 04:43:56.796359: step: 340/466, loss: 0.01445749681442976 2023-01-24 04:43:57.587434: step: 342/466, loss: 0.09141960740089417 2023-01-24 04:43:58.281381: step: 344/466, loss: 0.028653070330619812 2023-01-24 04:43:59.098968: step: 346/466, loss: 0.12943173944950104 2023-01-24 04:43:59.967802: step: 348/466, loss: 0.06502082943916321 2023-01-24 04:44:00.707559: step: 350/466, loss: 0.04192391782999039 2023-01-24 04:44:01.541018: step: 352/466, loss: 0.03628654032945633 2023-01-24 04:44:02.268758: step: 354/466, loss: 0.036534518003463745 2023-01-24 04:44:02.985779: step: 356/466, loss: 0.003242996521294117 2023-01-24 04:44:03.744560: step: 358/466, loss: 0.01773170195519924 2023-01-24 04:44:04.559033: step: 360/466, loss: 0.04168696328997612 2023-01-24 04:44:05.358608: step: 362/466, loss: 0.05508454144001007 2023-01-24 04:44:06.034156: step: 364/466, loss: 0.041718918830156326 2023-01-24 04:44:06.796583: step: 366/466, loss: 0.048134926706552505 2023-01-24 04:44:07.497505: step: 368/466, loss: 0.06911245733499527 2023-01-24 04:44:08.217829: step: 370/466, loss: 0.0356752835214138 2023-01-24 04:44:09.029086: step: 372/466, loss: 0.06430232524871826 2023-01-24 04:44:09.756694: step: 374/466, loss: 0.021607208997011185 2023-01-24 04:44:10.500187: step: 376/466, loss: 0.1649162322282791 2023-01-24 04:44:11.285136: step: 378/466, loss: 0.015220481902360916 2023-01-24 04:44:12.104169: step: 380/466, loss: 0.04231276363134384 2023-01-24 04:44:12.924506: step: 382/466, loss: 0.027485070750117302 2023-01-24 04:44:13.730360: step: 384/466, loss: 0.055276788771152496 2023-01-24 04:44:14.513923: step: 386/466, loss: 0.009534847922623158 2023-01-24 04:44:15.310252: step: 388/466, loss: 0.058055032044649124 2023-01-24 04:44:15.990074: step: 390/466, loss: 0.02162765897810459 2023-01-24 04:44:16.684118: step: 392/466, loss: 0.09184350818395615 2023-01-24 04:44:17.514910: step: 394/466, loss: 0.017593739554286003 2023-01-24 04:44:18.256012: step: 396/466, loss: 0.0031726297456771135 2023-01-24 04:44:19.036519: step: 398/466, loss: 0.017662903293967247 2023-01-24 04:44:19.851348: step: 400/466, loss: 0.05700105056166649 2023-01-24 04:44:20.639388: step: 402/466, loss: 0.00686702411621809 2023-01-24 04:44:21.453046: step: 404/466, loss: 0.02812480553984642 2023-01-24 04:44:22.241156: step: 406/466, loss: 0.03714694455265999 2023-01-24 04:44:23.031811: step: 408/466, loss: 0.004215636756271124 2023-01-24 04:44:23.819840: step: 410/466, loss: 0.01110562402755022 2023-01-24 04:44:24.643841: step: 412/466, loss: 0.07172445207834244 2023-01-24 04:44:25.344279: step: 414/466, loss: 0.024139823392033577 2023-01-24 04:44:26.141998: step: 416/466, loss: 0.05758264660835266 2023-01-24 04:44:26.875215: step: 418/466, loss: 0.020258810371160507 2023-01-24 04:44:27.623423: step: 420/466, loss: 0.060611262917518616 2023-01-24 04:44:28.382543: step: 422/466, loss: 0.03956054151058197 2023-01-24 04:44:29.182844: step: 424/466, loss: 0.02907363325357437 2023-01-24 04:44:30.022251: step: 426/466, loss: 0.05078652501106262 2023-01-24 04:44:30.760947: step: 428/466, loss: 0.01063844095915556 2023-01-24 04:44:31.508868: step: 430/466, loss: 0.004772020969539881 2023-01-24 04:44:32.257764: step: 432/466, loss: 0.019598014652729034 2023-01-24 04:44:32.943209: step: 434/466, loss: 0.0007601032848469913 2023-01-24 04:44:33.712027: step: 436/466, loss: 0.010198225267231464 2023-01-24 04:44:34.438655: step: 438/466, loss: 0.18594855070114136 2023-01-24 04:44:35.270810: step: 440/466, loss: 0.08964129537343979 2023-01-24 04:44:35.963854: step: 442/466, loss: 0.007389713078737259 2023-01-24 04:44:36.786653: step: 444/466, loss: 0.04020103067159653 2023-01-24 04:44:37.464626: step: 446/466, loss: 0.0469764843583107 2023-01-24 04:44:38.187685: step: 448/466, loss: 0.01508344803005457 2023-01-24 04:44:38.909344: step: 450/466, loss: 0.03385445475578308 2023-01-24 04:44:39.616039: step: 452/466, loss: 0.0008108800393529236 2023-01-24 04:44:40.363740: step: 454/466, loss: 0.06956829875707626 2023-01-24 04:44:41.200456: step: 456/466, loss: 0.017208613455295563 2023-01-24 04:44:42.016181: step: 458/466, loss: 0.06156182661652565 2023-01-24 04:44:42.888479: step: 460/466, loss: 0.02708219736814499 2023-01-24 04:44:43.693861: step: 462/466, loss: 0.026894785463809967 2023-01-24 04:44:44.476273: step: 464/466, loss: 0.3982813358306885 2023-01-24 04:44:45.247835: step: 466/466, loss: 0.07892703264951706 2023-01-24 04:44:46.034138: step: 468/466, loss: 0.02989260107278824 2023-01-24 04:44:46.807416: step: 470/466, loss: 0.01170523650944233 2023-01-24 04:44:47.506336: step: 472/466, loss: 0.03892693296074867 2023-01-24 04:44:48.272867: step: 474/466, loss: 0.05771756172180176 2023-01-24 04:44:48.974594: step: 476/466, loss: 0.17019394040107727 2023-01-24 04:44:49.754526: step: 478/466, loss: 0.07544895261526108 2023-01-24 04:44:50.504618: step: 480/466, loss: 0.1152661144733429 2023-01-24 04:44:51.339488: step: 482/466, loss: 0.021521558985114098 2023-01-24 04:44:52.102038: step: 484/466, loss: 0.06444356590509415 2023-01-24 04:44:52.787008: step: 486/466, loss: 0.0069647375494241714 2023-01-24 04:44:53.601766: step: 488/466, loss: 0.14533472061157227 2023-01-24 04:44:54.370784: step: 490/466, loss: 0.02806529402732849 2023-01-24 04:44:55.108387: step: 492/466, loss: 0.04729820415377617 2023-01-24 04:44:55.854775: step: 494/466, loss: 0.04835475608706474 2023-01-24 04:44:56.670331: step: 496/466, loss: 0.3793639540672302 2023-01-24 04:44:57.466845: step: 498/466, loss: 0.08105266094207764 2023-01-24 04:44:58.195422: step: 500/466, loss: 0.026257047429680824 2023-01-24 04:44:59.117223: step: 502/466, loss: 0.04633655026555061 2023-01-24 04:44:59.850233: step: 504/466, loss: 0.047211624681949615 2023-01-24 04:45:00.606046: step: 506/466, loss: 0.0601053461432457 2023-01-24 04:45:01.341705: step: 508/466, loss: 0.057245686650276184 2023-01-24 04:45:02.193741: step: 510/466, loss: 0.015812266618013382 2023-01-24 04:45:03.069424: step: 512/466, loss: 0.05898257717490196 2023-01-24 04:45:03.812369: step: 514/466, loss: 0.0006638577906414866 2023-01-24 04:45:04.487668: step: 516/466, loss: 0.001410032738931477 2023-01-24 04:45:05.254155: step: 518/466, loss: 0.0344972088932991 2023-01-24 04:45:05.956292: step: 520/466, loss: 0.004521101713180542 2023-01-24 04:45:06.665978: step: 522/466, loss: 0.2457745522260666 2023-01-24 04:45:07.409063: step: 524/466, loss: 0.3025051951408386 2023-01-24 04:45:08.180336: step: 526/466, loss: 0.02308790571987629 2023-01-24 04:45:08.978424: step: 528/466, loss: 0.03719723969697952 2023-01-24 04:45:09.728253: step: 530/466, loss: 0.1055179238319397 2023-01-24 04:45:10.434406: step: 532/466, loss: 0.03662850335240364 2023-01-24 04:45:11.108799: step: 534/466, loss: 0.7362351417541504 2023-01-24 04:45:11.851341: step: 536/466, loss: 0.16023731231689453 2023-01-24 04:45:12.578126: step: 538/466, loss: 0.07985933125019073 2023-01-24 04:45:13.386667: step: 540/466, loss: 0.006084410939365625 2023-01-24 04:45:14.102312: step: 542/466, loss: 0.056004833430051804 2023-01-24 04:45:14.847018: step: 544/466, loss: 0.0024009014014154673 2023-01-24 04:45:15.603014: step: 546/466, loss: 0.017100302502512932 2023-01-24 04:45:16.366168: step: 548/466, loss: 0.051608506590127945 2023-01-24 04:45:17.125051: step: 550/466, loss: 0.015401377342641354 2023-01-24 04:45:17.858943: step: 552/466, loss: 0.012614244595170021 2023-01-24 04:45:18.512793: step: 554/466, loss: 0.012515553273260593 2023-01-24 04:45:19.233635: step: 556/466, loss: 0.005670727230608463 2023-01-24 04:45:19.985321: step: 558/466, loss: 0.016312582418322563 2023-01-24 04:45:20.722341: step: 560/466, loss: 0.028784558176994324 2023-01-24 04:45:21.448490: step: 562/466, loss: 0.35075005888938904 2023-01-24 04:45:22.207556: step: 564/466, loss: 0.49358564615249634 2023-01-24 04:45:23.065707: step: 566/466, loss: 0.047686539590358734 2023-01-24 04:45:23.860608: step: 568/466, loss: 0.07998033612966537 2023-01-24 04:45:24.610371: step: 570/466, loss: 0.022427715361118317 2023-01-24 04:45:25.494267: step: 572/466, loss: 0.09979384392499924 2023-01-24 04:45:26.260050: step: 574/466, loss: 0.08363737165927887 2023-01-24 04:45:27.004939: step: 576/466, loss: 0.0687449499964714 2023-01-24 04:45:27.797137: step: 578/466, loss: 0.03900950402021408 2023-01-24 04:45:28.616877: step: 580/466, loss: 0.04187082126736641 2023-01-24 04:45:29.342094: step: 582/466, loss: 0.01600123941898346 2023-01-24 04:45:30.104683: step: 584/466, loss: 0.023255644366145134 2023-01-24 04:45:30.816103: step: 586/466, loss: 0.0417775884270668 2023-01-24 04:45:31.636633: step: 588/466, loss: 0.22014985978603363 2023-01-24 04:45:32.439846: step: 590/466, loss: 0.07747036218643188 2023-01-24 04:45:33.200986: step: 592/466, loss: 0.061081413179636 2023-01-24 04:45:33.936307: step: 594/466, loss: 0.04934444651007652 2023-01-24 04:45:34.693735: step: 596/466, loss: 0.01672072522342205 2023-01-24 04:45:35.454894: step: 598/466, loss: 0.14264918863773346 2023-01-24 04:45:36.227351: step: 600/466, loss: 0.014670551754534245 2023-01-24 04:45:36.984446: step: 602/466, loss: 0.09861317276954651 2023-01-24 04:45:37.779097: step: 604/466, loss: 0.07894841581583023 2023-01-24 04:45:38.541284: step: 606/466, loss: 0.09325024485588074 2023-01-24 04:45:39.336462: step: 608/466, loss: 0.13695885241031647 2023-01-24 04:45:40.113325: step: 610/466, loss: 0.033022522926330566 2023-01-24 04:45:40.927031: step: 612/466, loss: 0.01769273914396763 2023-01-24 04:45:41.724128: step: 614/466, loss: 0.05903501808643341 2023-01-24 04:45:42.458505: step: 616/466, loss: 0.03772374242544174 2023-01-24 04:45:43.225410: step: 618/466, loss: 0.13912038505077362 2023-01-24 04:45:44.047769: step: 620/466, loss: 0.0425943098962307 2023-01-24 04:45:44.758155: step: 622/466, loss: 0.0020012203603982925 2023-01-24 04:45:45.535933: step: 624/466, loss: 0.10077203065156937 2023-01-24 04:45:46.328641: step: 626/466, loss: 0.1686987429857254 2023-01-24 04:45:47.009641: step: 628/466, loss: 0.011166021227836609 2023-01-24 04:45:47.807689: step: 630/466, loss: 0.8890079259872437 2023-01-24 04:45:48.541403: step: 632/466, loss: 0.04895270988345146 2023-01-24 04:45:49.319207: step: 634/466, loss: 0.004432227462530136 2023-01-24 04:45:50.080549: step: 636/466, loss: 0.01525797601789236 2023-01-24 04:45:50.882622: step: 638/466, loss: 0.08175188302993774 2023-01-24 04:45:51.625505: step: 640/466, loss: 0.04717142507433891 2023-01-24 04:45:52.448983: step: 642/466, loss: 0.15646132826805115 2023-01-24 04:45:53.210484: step: 644/466, loss: 0.01614035665988922 2023-01-24 04:45:54.032467: step: 646/466, loss: 0.029348144307732582 2023-01-24 04:45:54.822247: step: 648/466, loss: 0.040207263082265854 2023-01-24 04:45:55.603180: step: 650/466, loss: 0.004076416604220867 2023-01-24 04:45:56.331394: step: 652/466, loss: 0.024327151477336884 2023-01-24 04:45:57.283654: step: 654/466, loss: 0.04616496339440346 2023-01-24 04:45:58.056788: step: 656/466, loss: 0.08406134694814682 2023-01-24 04:45:58.837439: step: 658/466, loss: 0.04835755378007889 2023-01-24 04:45:59.536922: step: 660/466, loss: 0.017609048634767532 2023-01-24 04:46:00.307539: step: 662/466, loss: 0.011756598949432373 2023-01-24 04:46:01.024667: step: 664/466, loss: 0.013616996817290783 2023-01-24 04:46:01.729946: step: 666/466, loss: 0.0014534511137753725 2023-01-24 04:46:02.511368: step: 668/466, loss: 0.021534455940127373 2023-01-24 04:46:03.302159: step: 670/466, loss: 0.057125575840473175 2023-01-24 04:46:04.133121: step: 672/466, loss: 0.1229112297296524 2023-01-24 04:46:04.903567: step: 674/466, loss: 0.05047660693526268 2023-01-24 04:46:05.620978: step: 676/466, loss: 0.002109276596456766 2023-01-24 04:46:06.449216: step: 678/466, loss: 0.03988087549805641 2023-01-24 04:46:07.262282: step: 680/466, loss: 0.022907430306077003 2023-01-24 04:46:08.025461: step: 682/466, loss: 0.008905788883566856 2023-01-24 04:46:08.736886: step: 684/466, loss: 0.03231525421142578 2023-01-24 04:46:09.542660: step: 686/466, loss: 0.010490043088793755 2023-01-24 04:46:10.268816: step: 688/466, loss: 0.053193338215351105 2023-01-24 04:46:11.037327: step: 690/466, loss: 0.09256144613027573 2023-01-24 04:46:11.784722: step: 692/466, loss: 0.01077374629676342 2023-01-24 04:46:12.549573: step: 694/466, loss: 0.02683631144464016 2023-01-24 04:46:13.244241: step: 696/466, loss: 0.15495428442955017 2023-01-24 04:46:13.925441: step: 698/466, loss: 0.23386859893798828 2023-01-24 04:46:14.726337: step: 700/466, loss: 0.053017597645521164 2023-01-24 04:46:15.519441: step: 702/466, loss: 0.001763337291777134 2023-01-24 04:46:16.406732: step: 704/466, loss: 0.05142718553543091 2023-01-24 04:46:17.191580: step: 706/466, loss: 0.007640931289643049 2023-01-24 04:46:18.029280: step: 708/466, loss: 0.006928376853466034 2023-01-24 04:46:18.788769: step: 710/466, loss: 1.302974820137024 2023-01-24 04:46:19.568328: step: 712/466, loss: 0.14583726227283478 2023-01-24 04:46:20.300582: step: 714/466, loss: 0.08601471781730652 2023-01-24 04:46:21.041808: step: 716/466, loss: 0.04965193197131157 2023-01-24 04:46:21.798223: step: 718/466, loss: 0.03268514946103096 2023-01-24 04:46:22.600138: step: 720/466, loss: 0.044559597969055176 2023-01-24 04:46:23.303197: step: 722/466, loss: 0.036287739872932434 2023-01-24 04:46:24.005070: step: 724/466, loss: 0.17567983269691467 2023-01-24 04:46:24.724402: step: 726/466, loss: 0.08489609509706497 2023-01-24 04:46:25.598029: step: 728/466, loss: 0.06397829204797745 2023-01-24 04:46:26.383821: step: 730/466, loss: 0.08759113401174545 2023-01-24 04:46:27.155259: step: 732/466, loss: 0.03365446254611015 2023-01-24 04:46:27.925764: step: 734/466, loss: 0.09580977261066437 2023-01-24 04:46:28.631663: step: 736/466, loss: 0.0405992791056633 2023-01-24 04:46:29.329209: step: 738/466, loss: 0.03734927996993065 2023-01-24 04:46:30.020221: step: 740/466, loss: 0.033360805362463 2023-01-24 04:46:30.889953: step: 742/466, loss: 0.007269763853400946 2023-01-24 04:46:31.639229: step: 744/466, loss: 0.01740163378417492 2023-01-24 04:46:32.449337: step: 746/466, loss: 0.19998213648796082 2023-01-24 04:46:33.248396: step: 748/466, loss: 0.03510475531220436 2023-01-24 04:46:33.943981: step: 750/466, loss: 0.03854229673743248 2023-01-24 04:46:34.721440: step: 752/466, loss: 0.0308972354978323 2023-01-24 04:46:35.506859: step: 754/466, loss: 0.10259910672903061 2023-01-24 04:46:36.309840: step: 756/466, loss: 0.05756578966975212 2023-01-24 04:46:37.074295: step: 758/466, loss: 0.1863379031419754 2023-01-24 04:46:37.841540: step: 760/466, loss: 0.048143643885850906 2023-01-24 04:46:38.613078: step: 762/466, loss: 0.0705944150686264 2023-01-24 04:46:39.378645: step: 764/466, loss: 0.0309490617364645 2023-01-24 04:46:40.135528: step: 766/466, loss: 0.029179180040955544 2023-01-24 04:46:40.889527: step: 768/466, loss: 0.10912593454122543 2023-01-24 04:46:41.660212: step: 770/466, loss: 0.07420755177736282 2023-01-24 04:46:42.316115: step: 772/466, loss: 0.00907058548182249 2023-01-24 04:46:43.093260: step: 774/466, loss: 0.042753975838422775 2023-01-24 04:46:43.854679: step: 776/466, loss: 0.12360195070505142 2023-01-24 04:46:44.675005: step: 778/466, loss: 0.05424632504582405 2023-01-24 04:46:45.417209: step: 780/466, loss: 0.2799844443798065 2023-01-24 04:46:46.205609: step: 782/466, loss: 0.02033020369708538 2023-01-24 04:46:46.960338: step: 784/466, loss: 0.09192941337823868 2023-01-24 04:46:47.685027: step: 786/466, loss: 0.5942068696022034 2023-01-24 04:46:48.426393: step: 788/466, loss: 0.20447811484336853 2023-01-24 04:46:49.118411: step: 790/466, loss: 0.021643927320837975 2023-01-24 04:46:49.831589: step: 792/466, loss: 0.06576592475175858 2023-01-24 04:46:50.704886: step: 794/466, loss: 0.047535400837659836 2023-01-24 04:46:51.487352: step: 796/466, loss: 0.009031837806105614 2023-01-24 04:46:52.221158: step: 798/466, loss: 0.12657393515110016 2023-01-24 04:46:52.978859: step: 800/466, loss: 0.013000398874282837 2023-01-24 04:46:53.876343: step: 802/466, loss: 0.03871172294020653 2023-01-24 04:46:54.589554: step: 804/466, loss: 0.045810580253601074 2023-01-24 04:46:55.420184: step: 806/466, loss: 0.03326374664902687 2023-01-24 04:46:56.298380: step: 808/466, loss: 0.009571857750415802 2023-01-24 04:46:57.151413: step: 810/466, loss: 0.02861849032342434 2023-01-24 04:46:57.876350: step: 812/466, loss: 0.01981373131275177 2023-01-24 04:46:58.617959: step: 814/466, loss: 0.004079751670360565 2023-01-24 04:46:59.276812: step: 816/466, loss: 0.008983705192804337 2023-01-24 04:47:00.047512: step: 818/466, loss: 0.02426239661872387 2023-01-24 04:47:00.849928: step: 820/466, loss: 0.05498094856739044 2023-01-24 04:47:01.678661: step: 822/466, loss: 0.04358714073896408 2023-01-24 04:47:02.466121: step: 824/466, loss: 0.23724323511123657 2023-01-24 04:47:03.209947: step: 826/466, loss: 0.10743313282728195 2023-01-24 04:47:03.956683: step: 828/466, loss: 0.13279399275779724 2023-01-24 04:47:04.766177: step: 830/466, loss: 0.05824432894587517 2023-01-24 04:47:05.478580: step: 832/466, loss: 0.41754406690597534 2023-01-24 04:47:06.318458: step: 834/466, loss: 0.04348764568567276 2023-01-24 04:47:07.260228: step: 836/466, loss: 0.20859596133232117 2023-01-24 04:47:07.921338: step: 838/466, loss: 0.02455979771912098 2023-01-24 04:47:08.699841: step: 840/466, loss: 0.10641927272081375 2023-01-24 04:47:09.487606: step: 842/466, loss: 0.04542750120162964 2023-01-24 04:47:10.168935: step: 844/466, loss: 0.0511869378387928 2023-01-24 04:47:10.937765: step: 846/466, loss: 0.009238695725798607 2023-01-24 04:47:11.720759: step: 848/466, loss: 0.04486138001084328 2023-01-24 04:47:12.420066: step: 850/466, loss: 0.05206843838095665 2023-01-24 04:47:13.226498: step: 852/466, loss: 0.07903794199228287 2023-01-24 04:47:14.025317: step: 854/466, loss: 0.013895289972424507 2023-01-24 04:47:14.840909: step: 856/466, loss: 0.02503076009452343 2023-01-24 04:47:15.553200: step: 858/466, loss: 0.876710832118988 2023-01-24 04:47:16.314230: step: 860/466, loss: 0.048670414835214615 2023-01-24 04:47:17.155243: step: 862/466, loss: 0.021664870902895927 2023-01-24 04:47:17.881202: step: 864/466, loss: 0.03425924852490425 2023-01-24 04:47:18.670810: step: 866/466, loss: 0.025804908946156502 2023-01-24 04:47:19.380494: step: 868/466, loss: 0.03486303240060806 2023-01-24 04:47:20.083534: step: 870/466, loss: 0.01872149109840393 2023-01-24 04:47:20.801873: step: 872/466, loss: 0.03468276932835579 2023-01-24 04:47:21.553369: step: 874/466, loss: 0.03930312767624855 2023-01-24 04:47:22.234145: step: 876/466, loss: 0.014858567155897617 2023-01-24 04:47:22.979206: step: 878/466, loss: 0.06579507142305374 2023-01-24 04:47:23.747052: step: 880/466, loss: 0.060320112854242325 2023-01-24 04:47:24.448561: step: 882/466, loss: 0.004157866816967726 2023-01-24 04:47:25.197769: step: 884/466, loss: 0.11918888986110687 2023-01-24 04:47:25.904607: step: 886/466, loss: 0.09611214697360992 2023-01-24 04:47:26.647235: step: 888/466, loss: 0.03245295584201813 2023-01-24 04:47:27.371575: step: 890/466, loss: 0.03113154135644436 2023-01-24 04:47:28.232474: step: 892/466, loss: 0.028303897008299828 2023-01-24 04:47:28.908759: step: 894/466, loss: 0.09134092926979065 2023-01-24 04:47:29.755847: step: 896/466, loss: 0.05562019348144531 2023-01-24 04:47:30.499486: step: 898/466, loss: 0.019667336717247963 2023-01-24 04:47:31.316757: step: 900/466, loss: 0.03709959238767624 2023-01-24 04:47:32.116337: step: 902/466, loss: 0.02994011528789997 2023-01-24 04:47:32.866797: step: 904/466, loss: 0.006237436085939407 2023-01-24 04:47:33.659656: step: 906/466, loss: 0.014765221625566483 2023-01-24 04:47:34.538974: step: 908/466, loss: 0.0391659215092659 2023-01-24 04:47:35.253745: step: 910/466, loss: 0.6814891695976257 2023-01-24 04:47:36.049328: step: 912/466, loss: 0.10867901891469955 2023-01-24 04:47:36.882753: step: 914/466, loss: 0.10372887551784515 2023-01-24 04:47:37.740151: step: 916/466, loss: 0.0682687982916832 2023-01-24 04:47:38.533378: step: 918/466, loss: 0.021598655730485916 2023-01-24 04:47:39.242685: step: 920/466, loss: 0.004696763586252928 2023-01-24 04:47:40.038943: step: 922/466, loss: 0.09874019771814346 2023-01-24 04:47:40.769425: step: 924/466, loss: 0.029687268659472466 2023-01-24 04:47:41.491967: step: 926/466, loss: 0.04235919564962387 2023-01-24 04:47:42.256040: step: 928/466, loss: 0.008453264832496643 2023-01-24 04:47:43.082225: step: 930/466, loss: 0.049025628715753555 2023-01-24 04:47:43.878116: step: 932/466, loss: 0.11063175648450851 ================================================== Loss: 0.075 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3564325880305262, 'r': 0.32802619581556963, 'f1': 0.34163993121503006}, 'combined': 0.2517346861584432, 'epoch': 22} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.371123885128413, 'r': 0.28501029100337866, 'f1': 0.3224161470214519}, 'combined': 0.19816797329123384, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3309914853791984, 'r': 0.3360160240566815, 'f1': 0.3334848299018289}, 'combined': 0.24572566413818972, 'epoch': 22} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3582549072884337, 'r': 0.28753445805747013, 'f1': 0.3190223814182306}, 'combined': 0.1960820490668149, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3631336189583612, 'r': 0.33419317873777077, 'f1': 0.3480628561162158}, 'combined': 0.25646736766458006, 'epoch': 22} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3703569458849697, 'r': 0.2825733791267675, 'f1': 0.32056410821446835}, 'combined': 0.19799547860305403, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32421875, 'r': 0.29642857142857143, 'f1': 0.3097014925373134}, 'combined': 0.20646766169154224, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2571428571428571, 'r': 0.391304347826087, 'f1': 0.3103448275862069}, 'combined': 0.15517241379310345, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3507926837520978, 'r': 0.33814550919557057, 'f1': 0.34435301129674534}, 'combined': 0.2537337977976018, 'epoch': 13} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3836009211945402, 'r': 0.271246405281408, 'f1': 0.3177851286241064}, 'combined': 0.19532159125188978, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3346774193548387, 'r': 0.29642857142857143, 'f1': 0.3143939393939394}, 'combined': 0.20959595959595959, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34582504970178923, 'r': 0.33007590132827325, 'f1': 0.3377669902912621}, 'combined': 0.24888094021461418, 'epoch': 19} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36038315043942815, 'r': 0.2939803727200525, 'f1': 0.3238125877697768}, 'combined': 0.20000189244603864, 'epoch': 19} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 19} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:50:27.973809: step: 2/466, loss: 0.03882645443081856 2023-01-24 04:50:28.672206: step: 4/466, loss: 0.013151943683624268 2023-01-24 04:50:29.433492: step: 6/466, loss: 0.009563574567437172 2023-01-24 04:50:30.219663: step: 8/466, loss: 0.008155950345098972 2023-01-24 04:50:30.956411: step: 10/466, loss: 0.020659472793340683 2023-01-24 04:50:31.801053: step: 12/466, loss: 0.29554811120033264 2023-01-24 04:50:32.625013: step: 14/466, loss: 0.023778563365340233 2023-01-24 04:50:33.367621: step: 16/466, loss: 0.0215211883187294 2023-01-24 04:50:34.098610: step: 18/466, loss: 0.023544669151306152 2023-01-24 04:50:34.791681: step: 20/466, loss: 0.01035761646926403 2023-01-24 04:50:35.607573: step: 22/466, loss: 0.010739500634372234 2023-01-24 04:50:36.342131: step: 24/466, loss: 0.0321514792740345 2023-01-24 04:50:37.070739: step: 26/466, loss: 0.019760802388191223 2023-01-24 04:50:37.745783: step: 28/466, loss: 0.020527001470327377 2023-01-24 04:50:38.472033: step: 30/466, loss: 0.011523504741489887 2023-01-24 04:50:39.175204: step: 32/466, loss: 0.07324656844139099 2023-01-24 04:50:39.915308: step: 34/466, loss: 0.1542568802833557 2023-01-24 04:50:40.557805: step: 36/466, loss: 0.0032089881133288145 2023-01-24 04:50:41.253692: step: 38/466, loss: 0.008545069955289364 2023-01-24 04:50:42.057597: step: 40/466, loss: 0.028171386569738388 2023-01-24 04:50:42.840420: step: 42/466, loss: 0.02147684618830681 2023-01-24 04:50:43.522372: step: 44/466, loss: 0.020654737949371338 2023-01-24 04:50:44.250660: step: 46/466, loss: 0.012604920193552971 2023-01-24 04:50:45.060324: step: 48/466, loss: 0.14834651350975037 2023-01-24 04:50:45.798795: step: 50/466, loss: 0.05847810581326485 2023-01-24 04:50:46.515218: step: 52/466, loss: 0.14463350176811218 2023-01-24 04:50:47.275329: step: 54/466, loss: 0.01718887686729431 2023-01-24 04:50:48.011897: step: 56/466, loss: 0.02093418687582016 2023-01-24 04:50:48.731647: step: 58/466, loss: 0.11011721938848495 2023-01-24 04:50:49.485892: step: 60/466, loss: 0.01010575145483017 2023-01-24 04:50:50.244543: step: 62/466, loss: 0.0032435916364192963 2023-01-24 04:50:50.944158: step: 64/466, loss: 0.036733150482177734 2023-01-24 04:50:51.669327: step: 66/466, loss: 0.6481682062149048 2023-01-24 04:50:52.459368: step: 68/466, loss: 0.017467249184846878 2023-01-24 04:50:53.234275: step: 70/466, loss: 0.009255862794816494 2023-01-24 04:50:54.097004: step: 72/466, loss: 0.025226807221770287 2023-01-24 04:50:54.866466: step: 74/466, loss: 0.02177729830145836 2023-01-24 04:50:55.600256: step: 76/466, loss: 0.008818302303552628 2023-01-24 04:50:56.453928: step: 78/466, loss: 0.010397948324680328 2023-01-24 04:50:57.145427: step: 80/466, loss: 0.04460631310939789 2023-01-24 04:50:57.900470: step: 82/466, loss: 0.05118514597415924 2023-01-24 04:50:58.626197: step: 84/466, loss: 0.006314792670309544 2023-01-24 04:50:59.376748: step: 86/466, loss: 0.03415378928184509 2023-01-24 04:51:00.070419: step: 88/466, loss: 0.0070980386808514595 2023-01-24 04:51:00.711048: step: 90/466, loss: 0.023878734558820724 2023-01-24 04:51:01.482498: step: 92/466, loss: 0.05339059233665466 2023-01-24 04:51:02.313596: step: 94/466, loss: 0.02996997907757759 2023-01-24 04:51:03.102640: step: 96/466, loss: 0.01937534101307392 2023-01-24 04:51:03.866547: step: 98/466, loss: 0.07422344386577606 2023-01-24 04:51:04.677676: step: 100/466, loss: 0.6109414100646973 2023-01-24 04:51:05.407248: step: 102/466, loss: 0.03483252227306366 2023-01-24 04:51:06.108879: step: 104/466, loss: 0.10521746426820755 2023-01-24 04:51:06.885375: step: 106/466, loss: 0.014691787771880627 2023-01-24 04:51:07.661256: step: 108/466, loss: 0.02977118454873562 2023-01-24 04:51:08.375514: step: 110/466, loss: 0.202758327126503 2023-01-24 04:51:09.177844: step: 112/466, loss: 0.024275628849864006 2023-01-24 04:51:09.988779: step: 114/466, loss: 0.4476814270019531 2023-01-24 04:51:10.729997: step: 116/466, loss: 0.012520572170615196 2023-01-24 04:51:11.413168: step: 118/466, loss: 0.04074344038963318 2023-01-24 04:51:12.172801: step: 120/466, loss: 0.08878043293952942 2023-01-24 04:51:12.981001: step: 122/466, loss: 0.07741080969572067 2023-01-24 04:51:13.744995: step: 124/466, loss: 0.014786334708333015 2023-01-24 04:51:14.466857: step: 126/466, loss: 0.09789146482944489 2023-01-24 04:51:15.205902: step: 128/466, loss: 0.018605459481477737 2023-01-24 04:51:16.003611: step: 130/466, loss: 0.045480720698833466 2023-01-24 04:51:16.806873: step: 132/466, loss: 0.0738578662276268 2023-01-24 04:51:17.603952: step: 134/466, loss: 0.02474837936460972 2023-01-24 04:51:18.329086: step: 136/466, loss: 0.02559565380215645 2023-01-24 04:51:19.004117: step: 138/466, loss: 0.08189298957586288 2023-01-24 04:51:19.808271: step: 140/466, loss: 0.09571664035320282 2023-01-24 04:51:20.549455: step: 142/466, loss: 0.013432195410132408 2023-01-24 04:51:21.400750: step: 144/466, loss: 0.028093885630369186 2023-01-24 04:51:22.112549: step: 146/466, loss: 0.01979084312915802 2023-01-24 04:51:22.928056: step: 148/466, loss: 0.07469996809959412 2023-01-24 04:51:23.668562: step: 150/466, loss: 0.027421394363045692 2023-01-24 04:51:24.433037: step: 152/466, loss: 0.03365630656480789 2023-01-24 04:51:25.143401: step: 154/466, loss: 0.035850051790475845 2023-01-24 04:51:25.885503: step: 156/466, loss: 0.0052161808125674725 2023-01-24 04:51:26.674556: step: 158/466, loss: 0.18874676525592804 2023-01-24 04:51:27.400066: step: 160/466, loss: 0.10076171159744263 2023-01-24 04:51:28.216301: step: 162/466, loss: 0.07173901796340942 2023-01-24 04:51:28.949684: step: 164/466, loss: 0.003141289809718728 2023-01-24 04:51:29.659563: step: 166/466, loss: 0.10055757313966751 2023-01-24 04:51:30.409260: step: 168/466, loss: 0.006264520343393087 2023-01-24 04:51:31.187405: step: 170/466, loss: 0.07405374199151993 2023-01-24 04:51:31.945647: step: 172/466, loss: 0.022403467446565628 2023-01-24 04:51:32.661731: step: 174/466, loss: 0.14631879329681396 2023-01-24 04:51:33.346482: step: 176/466, loss: 0.015352722257375717 2023-01-24 04:51:34.135562: step: 178/466, loss: 0.07186131924390793 2023-01-24 04:51:34.828841: step: 180/466, loss: 0.04182245582342148 2023-01-24 04:51:35.621373: step: 182/466, loss: 0.012946860864758492 2023-01-24 04:51:36.375815: step: 184/466, loss: 0.05952528491616249 2023-01-24 04:51:37.153319: step: 186/466, loss: 0.02963799051940441 2023-01-24 04:51:37.902346: step: 188/466, loss: 0.0191253200173378 2023-01-24 04:51:38.661633: step: 190/466, loss: 0.02037331834435463 2023-01-24 04:51:39.428110: step: 192/466, loss: 0.026460129767656326 2023-01-24 04:51:40.118138: step: 194/466, loss: 0.038798943161964417 2023-01-24 04:51:40.800129: step: 196/466, loss: 0.033809561282396317 2023-01-24 04:51:41.517553: step: 198/466, loss: 0.015106773935258389 2023-01-24 04:51:42.265442: step: 200/466, loss: 0.02011779323220253 2023-01-24 04:51:43.107936: step: 202/466, loss: 0.011515239253640175 2023-01-24 04:51:43.894348: step: 204/466, loss: 0.014304769225418568 2023-01-24 04:51:44.598545: step: 206/466, loss: 0.04641815647482872 2023-01-24 04:51:45.376139: step: 208/466, loss: 0.012536582536995411 2023-01-24 04:51:46.173664: step: 210/466, loss: 0.005806042347103357 2023-01-24 04:51:46.909092: step: 212/466, loss: 0.0254743043333292 2023-01-24 04:51:47.649132: step: 214/466, loss: 0.06325501203536987 2023-01-24 04:51:48.402040: step: 216/466, loss: 0.0032372265122830868 2023-01-24 04:51:49.193399: step: 218/466, loss: 0.011696635745465755 2023-01-24 04:51:49.926919: step: 220/466, loss: 0.09282013028860092 2023-01-24 04:51:50.626033: step: 222/466, loss: 0.034505460411310196 2023-01-24 04:51:51.379258: step: 224/466, loss: 0.31562793254852295 2023-01-24 04:51:52.106343: step: 226/466, loss: 0.0380619652569294 2023-01-24 04:51:52.913115: step: 228/466, loss: 0.03315262496471405 2023-01-24 04:51:53.696547: step: 230/466, loss: 0.1067478284239769 2023-01-24 04:51:54.502418: step: 232/466, loss: 0.03125142306089401 2023-01-24 04:51:55.326681: step: 234/466, loss: 0.14881651103496552 2023-01-24 04:51:56.063282: step: 236/466, loss: 0.04718930646777153 2023-01-24 04:51:56.841440: step: 238/466, loss: 0.07604897022247314 2023-01-24 04:51:57.540555: step: 240/466, loss: 0.06238268315792084 2023-01-24 04:51:58.209053: step: 242/466, loss: 0.006522961892187595 2023-01-24 04:51:58.937431: step: 244/466, loss: 0.3242731988430023 2023-01-24 04:51:59.676146: step: 246/466, loss: 0.01007294375449419 2023-01-24 04:52:00.433661: step: 248/466, loss: 0.0399547815322876 2023-01-24 04:52:01.150477: step: 250/466, loss: 0.03402939811348915 2023-01-24 04:52:02.002490: step: 252/466, loss: 0.0879935547709465 2023-01-24 04:52:02.729778: step: 254/466, loss: 0.013537813909351826 2023-01-24 04:52:03.505909: step: 256/466, loss: 0.03015812113881111 2023-01-24 04:52:04.349948: step: 258/466, loss: 0.0662144348025322 2023-01-24 04:52:05.078824: step: 260/466, loss: 0.019768787547945976 2023-01-24 04:52:05.857091: step: 262/466, loss: 0.044151514768600464 2023-01-24 04:52:06.568375: step: 264/466, loss: 0.008428049273788929 2023-01-24 04:52:07.200185: step: 266/466, loss: 0.028973642736673355 2023-01-24 04:52:07.972073: step: 268/466, loss: 0.04836282134056091 2023-01-24 04:52:08.878477: step: 270/466, loss: 0.042037785053253174 2023-01-24 04:52:09.599445: step: 272/466, loss: 0.022262291982769966 2023-01-24 04:52:10.409974: step: 274/466, loss: 0.09610701352357864 2023-01-24 04:52:11.211264: step: 276/466, loss: 0.0048899780958890915 2023-01-24 04:52:12.153543: step: 278/466, loss: 0.034485138952732086 2023-01-24 04:52:12.953595: step: 280/466, loss: 0.027752559632062912 2023-01-24 04:52:13.776768: step: 282/466, loss: 0.09731708467006683 2023-01-24 04:52:14.491428: step: 284/466, loss: 0.008839517831802368 2023-01-24 04:52:15.224040: step: 286/466, loss: 0.01897738315165043 2023-01-24 04:52:16.023768: step: 288/466, loss: 0.021372944116592407 2023-01-24 04:52:16.781817: step: 290/466, loss: 0.0011967141181230545 2023-01-24 04:52:17.599713: step: 292/466, loss: 0.002933601150289178 2023-01-24 04:52:18.410754: step: 294/466, loss: 0.07908368110656738 2023-01-24 04:52:19.193284: step: 296/466, loss: 0.027145352214574814 2023-01-24 04:52:19.977318: step: 298/466, loss: 0.009775962680578232 2023-01-24 04:52:20.856723: step: 300/466, loss: 0.05080359801650047 2023-01-24 04:52:21.594083: step: 302/466, loss: 0.03475033491849899 2023-01-24 04:52:22.294458: step: 304/466, loss: 0.008489892818033695 2023-01-24 04:52:23.018350: step: 306/466, loss: 0.016335856169462204 2023-01-24 04:52:23.867388: step: 308/466, loss: 0.02795318141579628 2023-01-24 04:52:24.619730: step: 310/466, loss: 0.03940548375248909 2023-01-24 04:52:25.360715: step: 312/466, loss: 0.03053591586649418 2023-01-24 04:52:26.107992: step: 314/466, loss: 0.01822226122021675 2023-01-24 04:52:26.868238: step: 316/466, loss: 0.0020739452447742224 2023-01-24 04:52:27.626183: step: 318/466, loss: 0.01575908437371254 2023-01-24 04:52:28.549715: step: 320/466, loss: 0.3569834232330322 2023-01-24 04:52:29.371633: step: 322/466, loss: 0.038145650178194046 2023-01-24 04:52:30.066433: step: 324/466, loss: 0.0722237229347229 2023-01-24 04:52:30.855784: step: 326/466, loss: 0.005113533232361078 2023-01-24 04:52:31.601402: step: 328/466, loss: 0.013926339335739613 2023-01-24 04:52:32.363387: step: 330/466, loss: 0.17054429650306702 2023-01-24 04:52:33.130790: step: 332/466, loss: 0.04146190360188484 2023-01-24 04:52:33.982138: step: 334/466, loss: 0.1249271035194397 2023-01-24 04:52:34.739351: step: 336/466, loss: 0.00410437723621726 2023-01-24 04:52:35.488543: step: 338/466, loss: 0.0191140566021204 2023-01-24 04:52:36.220424: step: 340/466, loss: 0.028116457164287567 2023-01-24 04:52:36.973626: step: 342/466, loss: 0.09836491197347641 2023-01-24 04:52:37.765051: step: 344/466, loss: 0.036641448736190796 2023-01-24 04:52:38.562622: step: 346/466, loss: 0.11809373646974564 2023-01-24 04:52:39.313343: step: 348/466, loss: 0.013707575388252735 2023-01-24 04:52:40.064120: step: 350/466, loss: 0.0044649383053183556 2023-01-24 04:52:40.892245: step: 352/466, loss: 0.03884003683924675 2023-01-24 04:52:41.623775: step: 354/466, loss: 0.058105651289224625 2023-01-24 04:52:42.368340: step: 356/466, loss: 0.0760202631354332 2023-01-24 04:52:43.114857: step: 358/466, loss: 0.0017490936443209648 2023-01-24 04:52:43.835549: step: 360/466, loss: 0.06289663910865784 2023-01-24 04:52:44.657609: step: 362/466, loss: 0.13581669330596924 2023-01-24 04:52:45.400995: step: 364/466, loss: 0.010210997425019741 2023-01-24 04:52:46.178634: step: 366/466, loss: 0.0055528851225972176 2023-01-24 04:52:46.830912: step: 368/466, loss: 0.024322085082530975 2023-01-24 04:52:47.570004: step: 370/466, loss: 0.07646214962005615 2023-01-24 04:52:48.342511: step: 372/466, loss: 0.01779305562376976 2023-01-24 04:52:49.083637: step: 374/466, loss: 0.6838045120239258 2023-01-24 04:52:49.850139: step: 376/466, loss: 0.015555030666291714 2023-01-24 04:52:50.687482: step: 378/466, loss: 0.005967474076896906 2023-01-24 04:52:51.517430: step: 380/466, loss: 0.06998220831155777 2023-01-24 04:52:52.485468: step: 382/466, loss: 0.0267830528318882 2023-01-24 04:52:53.235403: step: 384/466, loss: 0.09789497405290604 2023-01-24 04:52:53.991293: step: 386/466, loss: 0.025804026052355766 2023-01-24 04:52:54.736503: step: 388/466, loss: 0.009077346883714199 2023-01-24 04:52:55.490627: step: 390/466, loss: 0.003902832977473736 2023-01-24 04:52:56.267869: step: 392/466, loss: 0.009882147423923016 2023-01-24 04:52:56.999220: step: 394/466, loss: 0.012400878593325615 2023-01-24 04:52:57.741026: step: 396/466, loss: 0.006915054749697447 2023-01-24 04:52:58.522946: step: 398/466, loss: 0.029605450108647346 2023-01-24 04:52:59.484322: step: 400/466, loss: 0.020665552467107773 2023-01-24 04:53:00.255875: step: 402/466, loss: 0.04576048627495766 2023-01-24 04:53:01.015911: step: 404/466, loss: 0.029126698151230812 2023-01-24 04:53:01.809863: step: 406/466, loss: 0.05250953882932663 2023-01-24 04:53:02.569676: step: 408/466, loss: 0.09095323085784912 2023-01-24 04:53:03.291886: step: 410/466, loss: 0.37631654739379883 2023-01-24 04:53:03.991213: step: 412/466, loss: 0.03619658201932907 2023-01-24 04:53:04.750279: step: 414/466, loss: 0.15473692119121552 2023-01-24 04:53:05.523306: step: 416/466, loss: 0.06378611922264099 2023-01-24 04:53:06.207998: step: 418/466, loss: 0.027144750580191612 2023-01-24 04:53:07.007103: step: 420/466, loss: 0.06826693564653397 2023-01-24 04:53:07.769387: step: 422/466, loss: 0.7054346203804016 2023-01-24 04:53:08.459598: step: 424/466, loss: 0.010629300028085709 2023-01-24 04:53:09.172243: step: 426/466, loss: 0.018608879297971725 2023-01-24 04:53:09.910867: step: 428/466, loss: 0.034134648740291595 2023-01-24 04:53:10.661417: step: 430/466, loss: 0.025236694142222404 2023-01-24 04:53:11.391735: step: 432/466, loss: 0.06735636293888092 2023-01-24 04:53:12.170131: step: 434/466, loss: 0.04542417451739311 2023-01-24 04:53:12.980806: step: 436/466, loss: 0.11845054477453232 2023-01-24 04:53:13.713106: step: 438/466, loss: 0.034908097237348557 2023-01-24 04:53:14.425292: step: 440/466, loss: 0.01451411284506321 2023-01-24 04:53:15.208520: step: 442/466, loss: 0.2690739631652832 2023-01-24 04:53:16.037289: step: 444/466, loss: 0.037572916597127914 2023-01-24 04:53:16.805681: step: 446/466, loss: 0.05552734434604645 2023-01-24 04:53:17.567128: step: 448/466, loss: 0.052332472056150436 2023-01-24 04:53:18.394380: step: 450/466, loss: 0.023011988028883934 2023-01-24 04:53:19.168017: step: 452/466, loss: 0.024651074782013893 2023-01-24 04:53:19.963142: step: 454/466, loss: 0.19281929731369019 2023-01-24 04:53:20.761559: step: 456/466, loss: 0.013870988972485065 2023-01-24 04:53:21.540729: step: 458/466, loss: 0.01165796909481287 2023-01-24 04:53:22.243829: step: 460/466, loss: 0.015610731206834316 2023-01-24 04:53:22.986219: step: 462/466, loss: 0.021320592612028122 2023-01-24 04:53:23.758503: step: 464/466, loss: 0.01695173606276512 2023-01-24 04:53:24.497542: step: 466/466, loss: 0.012832703068852425 2023-01-24 04:53:25.244171: step: 468/466, loss: 0.03932918980717659 2023-01-24 04:53:25.954488: step: 470/466, loss: 0.011094323359429836 2023-01-24 04:53:26.749570: step: 472/466, loss: 0.03536432236433029 2023-01-24 04:53:27.528043: step: 474/466, loss: 0.013472940772771835 2023-01-24 04:53:28.367458: step: 476/466, loss: 0.0204615518450737 2023-01-24 04:53:29.228044: step: 478/466, loss: 0.011578397825360298 2023-01-24 04:53:29.954736: step: 480/466, loss: 0.004967503249645233 2023-01-24 04:53:30.613261: step: 482/466, loss: 0.020800841972231865 2023-01-24 04:53:31.351571: step: 484/466, loss: 0.027728581801056862 2023-01-24 04:53:32.170054: step: 486/466, loss: 0.026497885584831238 2023-01-24 04:53:32.943895: step: 488/466, loss: 0.1051311269402504 2023-01-24 04:53:33.661039: step: 490/466, loss: 0.0763152688741684 2023-01-24 04:53:34.504479: step: 492/466, loss: 0.23005081713199615 2023-01-24 04:53:35.284520: step: 494/466, loss: 0.011912272311747074 2023-01-24 04:53:36.010825: step: 496/466, loss: 0.010407421737909317 2023-01-24 04:53:36.724996: step: 498/466, loss: 0.0014458999503403902 2023-01-24 04:53:37.430442: step: 500/466, loss: 0.12212016433477402 2023-01-24 04:53:38.207215: step: 502/466, loss: 0.024932388216257095 2023-01-24 04:53:38.914012: step: 504/466, loss: 0.05399378016591072 2023-01-24 04:53:39.631134: step: 506/466, loss: 0.047883279621601105 2023-01-24 04:53:40.389214: step: 508/466, loss: 0.02004711516201496 2023-01-24 04:53:41.153934: step: 510/466, loss: 0.00319908419623971 2023-01-24 04:53:41.915100: step: 512/466, loss: 1.257390022277832 2023-01-24 04:53:42.646056: step: 514/466, loss: 0.07255180180072784 2023-01-24 04:53:43.323142: step: 516/466, loss: 0.009753470309078693 2023-01-24 04:53:44.086723: step: 518/466, loss: 0.09241458028554916 2023-01-24 04:53:44.816642: step: 520/466, loss: 0.024641767144203186 2023-01-24 04:53:45.530477: step: 522/466, loss: 0.028830142691731453 2023-01-24 04:53:46.293023: step: 524/466, loss: 0.0035950529854744673 2023-01-24 04:53:47.093421: step: 526/466, loss: 0.02892039716243744 2023-01-24 04:53:47.868676: step: 528/466, loss: 0.24303272366523743 2023-01-24 04:53:48.606389: step: 530/466, loss: 0.2736338973045349 2023-01-24 04:53:49.399315: step: 532/466, loss: 0.038426872342824936 2023-01-24 04:53:50.166183: step: 534/466, loss: 0.017501261085271835 2023-01-24 04:53:50.924882: step: 536/466, loss: 0.14486098289489746 2023-01-24 04:53:51.657083: step: 538/466, loss: 0.0013692817883566022 2023-01-24 04:53:52.438888: step: 540/466, loss: 0.09689339995384216 2023-01-24 04:53:53.205436: step: 542/466, loss: 0.03417300805449486 2023-01-24 04:53:53.964280: step: 544/466, loss: 0.023501494899392128 2023-01-24 04:53:54.666878: step: 546/466, loss: 0.00033460595295764506 2023-01-24 04:53:55.409257: step: 548/466, loss: 0.03630896285176277 2023-01-24 04:53:56.310875: step: 550/466, loss: 0.10656667500734329 2023-01-24 04:53:57.101503: step: 552/466, loss: 0.01857171766459942 2023-01-24 04:53:57.801789: step: 554/466, loss: 0.004209447186440229 2023-01-24 04:53:58.557462: step: 556/466, loss: 0.038103025406599045 2023-01-24 04:53:59.299192: step: 558/466, loss: 0.0565560944378376 2023-01-24 04:54:00.066815: step: 560/466, loss: 0.024387158453464508 2023-01-24 04:54:00.794026: step: 562/466, loss: 0.1290791779756546 2023-01-24 04:54:01.540187: step: 564/466, loss: 0.20599617063999176 2023-01-24 04:54:02.392909: step: 566/466, loss: 0.022683100774884224 2023-01-24 04:54:03.213241: step: 568/466, loss: 0.020000936463475227 2023-01-24 04:54:03.977111: step: 570/466, loss: 0.0779845118522644 2023-01-24 04:54:04.717608: step: 572/466, loss: 0.00268647656776011 2023-01-24 04:54:05.449821: step: 574/466, loss: 0.07933858782052994 2023-01-24 04:54:06.297077: step: 576/466, loss: 0.03418390080332756 2023-01-24 04:54:07.065760: step: 578/466, loss: 0.06472688168287277 2023-01-24 04:54:07.906471: step: 580/466, loss: 0.020349211990833282 2023-01-24 04:54:08.612877: step: 582/466, loss: 0.041946232318878174 2023-01-24 04:54:09.351083: step: 584/466, loss: 0.02541203796863556 2023-01-24 04:54:10.065481: step: 586/466, loss: 0.02173200063407421 2023-01-24 04:54:10.773557: step: 588/466, loss: 0.034484103322029114 2023-01-24 04:54:11.489304: step: 590/466, loss: 0.03421414643526077 2023-01-24 04:54:12.270587: step: 592/466, loss: 0.029985573142766953 2023-01-24 04:54:13.112019: step: 594/466, loss: 0.03662244975566864 2023-01-24 04:54:13.880378: step: 596/466, loss: 0.14605748653411865 2023-01-24 04:54:14.616460: step: 598/466, loss: 0.014268821105360985 2023-01-24 04:54:15.339992: step: 600/466, loss: 0.015629857778549194 2023-01-24 04:54:16.123961: step: 602/466, loss: 0.0037779496051371098 2023-01-24 04:54:16.880023: step: 604/466, loss: 0.0434369295835495 2023-01-24 04:54:17.694497: step: 606/466, loss: 0.05154235288500786 2023-01-24 04:54:18.383438: step: 608/466, loss: 0.05169665068387985 2023-01-24 04:54:19.197706: step: 610/466, loss: 0.12863320112228394 2023-01-24 04:54:19.979430: step: 612/466, loss: 0.0178390946239233 2023-01-24 04:54:20.823819: step: 614/466, loss: 0.2360614687204361 2023-01-24 04:54:21.585901: step: 616/466, loss: 0.017110150307416916 2023-01-24 04:54:22.407456: step: 618/466, loss: 1.5750479698181152 2023-01-24 04:54:23.151146: step: 620/466, loss: 0.0319860503077507 2023-01-24 04:54:23.965719: step: 622/466, loss: 0.03700065240263939 2023-01-24 04:54:24.743566: step: 624/466, loss: 0.023093944415450096 2023-01-24 04:54:25.513901: step: 626/466, loss: 0.03461216017603874 2023-01-24 04:54:26.290000: step: 628/466, loss: 4.815478801727295 2023-01-24 04:54:27.024065: step: 630/466, loss: 0.005494902841746807 2023-01-24 04:54:27.763313: step: 632/466, loss: 0.08448342978954315 2023-01-24 04:54:28.442946: step: 634/466, loss: 0.015122022479772568 2023-01-24 04:54:29.155127: step: 636/466, loss: 0.054206009954214096 2023-01-24 04:54:29.922284: step: 638/466, loss: 0.07040295749902725 2023-01-24 04:54:30.669331: step: 640/466, loss: 0.009515570476651192 2023-01-24 04:54:31.549737: step: 642/466, loss: 0.24127714335918427 2023-01-24 04:54:32.355513: step: 644/466, loss: 0.06542390584945679 2023-01-24 04:54:33.076013: step: 646/466, loss: 0.13807275891304016 2023-01-24 04:54:33.825122: step: 648/466, loss: 0.05299195647239685 2023-01-24 04:54:34.477104: step: 650/466, loss: 0.00545166851952672 2023-01-24 04:54:35.197078: step: 652/466, loss: 0.055834099650382996 2023-01-24 04:54:36.038836: step: 654/466, loss: 0.03985896334052086 2023-01-24 04:54:36.699577: step: 656/466, loss: 0.05284273624420166 2023-01-24 04:54:37.439048: step: 658/466, loss: 0.06045251339673996 2023-01-24 04:54:38.146111: step: 660/466, loss: 0.0345003679394722 2023-01-24 04:54:38.947423: step: 662/466, loss: 0.06754326075315475 2023-01-24 04:54:39.698428: step: 664/466, loss: 0.20084981620311737 2023-01-24 04:54:40.413818: step: 666/466, loss: 0.44011354446411133 2023-01-24 04:54:41.200950: step: 668/466, loss: 0.012255952693521976 2023-01-24 04:54:41.954197: step: 670/466, loss: 0.048434365540742874 2023-01-24 04:54:42.736591: step: 672/466, loss: 0.01924644410610199 2023-01-24 04:54:43.488252: step: 674/466, loss: 0.03586205840110779 2023-01-24 04:54:44.203234: step: 676/466, loss: 0.019450347870588303 2023-01-24 04:54:44.974681: step: 678/466, loss: 0.04268264025449753 2023-01-24 04:54:45.766135: step: 680/466, loss: 0.06356403231620789 2023-01-24 04:54:46.493010: step: 682/466, loss: 0.06838064640760422 2023-01-24 04:54:47.241362: step: 684/466, loss: 0.033811304718256 2023-01-24 04:54:48.076032: step: 686/466, loss: 0.020641760900616646 2023-01-24 04:54:48.836737: step: 688/466, loss: 0.03898869827389717 2023-01-24 04:54:49.523357: step: 690/466, loss: 0.06437399983406067 2023-01-24 04:54:50.250000: step: 692/466, loss: 0.03513655439019203 2023-01-24 04:54:51.131305: step: 694/466, loss: 0.028021251782774925 2023-01-24 04:54:51.860935: step: 696/466, loss: 0.032173994928598404 2023-01-24 04:54:52.631109: step: 698/466, loss: 0.144153892993927 2023-01-24 04:54:53.475197: step: 700/466, loss: 2.666944980621338 2023-01-24 04:54:54.257625: step: 702/466, loss: 0.03742769733071327 2023-01-24 04:54:55.012683: step: 704/466, loss: 0.007199693471193314 2023-01-24 04:54:55.831355: step: 706/466, loss: 0.04252056032419205 2023-01-24 04:54:56.582036: step: 708/466, loss: 0.030529698356986046 2023-01-24 04:54:57.332640: step: 710/466, loss: 0.0010512126609683037 2023-01-24 04:54:58.077527: step: 712/466, loss: 0.005386349279433489 2023-01-24 04:54:58.821747: step: 714/466, loss: 0.056022871285676956 2023-01-24 04:54:59.485228: step: 716/466, loss: 0.0016162166139110923 2023-01-24 04:55:00.232195: step: 718/466, loss: 0.0325670950114727 2023-01-24 04:55:00.936488: step: 720/466, loss: 0.04420344531536102 2023-01-24 04:55:01.668953: step: 722/466, loss: 0.10789467394351959 2023-01-24 04:55:02.461167: step: 724/466, loss: 0.03549986705183983 2023-01-24 04:55:03.254545: step: 726/466, loss: 0.08880554139614105 2023-01-24 04:55:03.995198: step: 728/466, loss: 0.11527568101882935 2023-01-24 04:55:04.694455: step: 730/466, loss: 0.02176312729716301 2023-01-24 04:55:05.463546: step: 732/466, loss: 0.5555728077888489 2023-01-24 04:55:06.248954: step: 734/466, loss: 0.3058893382549286 2023-01-24 04:55:07.013241: step: 736/466, loss: 0.037982527166604996 2023-01-24 04:55:07.738230: step: 738/466, loss: 0.07793950289487839 2023-01-24 04:55:08.490009: step: 740/466, loss: 0.031146274879574776 2023-01-24 04:55:09.269697: step: 742/466, loss: 0.049540840089321136 2023-01-24 04:55:09.979509: step: 744/466, loss: 0.0020056506618857384 2023-01-24 04:55:10.741691: step: 746/466, loss: 0.10663189738988876 2023-01-24 04:55:11.501807: step: 748/466, loss: 0.004785729572176933 2023-01-24 04:55:12.362249: step: 750/466, loss: 0.03479137644171715 2023-01-24 04:55:13.237849: step: 752/466, loss: 0.06022670120000839 2023-01-24 04:55:14.039979: step: 754/466, loss: 0.09813099354505539 2023-01-24 04:55:14.763626: step: 756/466, loss: 0.05356891453266144 2023-01-24 04:55:15.504877: step: 758/466, loss: 0.016268325969576836 2023-01-24 04:55:16.308451: step: 760/466, loss: 0.06473978608846664 2023-01-24 04:55:17.084073: step: 762/466, loss: 0.06507152318954468 2023-01-24 04:55:17.911609: step: 764/466, loss: 0.030582893639802933 2023-01-24 04:55:18.680063: step: 766/466, loss: 0.3056143522262573 2023-01-24 04:55:19.546108: step: 768/466, loss: 0.034017164260149 2023-01-24 04:55:20.359762: step: 770/466, loss: 0.05874023959040642 2023-01-24 04:55:21.122019: step: 772/466, loss: 0.07668693363666534 2023-01-24 04:55:21.864763: step: 774/466, loss: 0.024750353768467903 2023-01-24 04:55:22.665522: step: 776/466, loss: 0.01619192771613598 2023-01-24 04:55:23.495101: step: 778/466, loss: 0.10239271819591522 2023-01-24 04:55:24.216254: step: 780/466, loss: 0.035204604268074036 2023-01-24 04:55:24.973220: step: 782/466, loss: 0.03336874395608902 2023-01-24 04:55:25.767580: step: 784/466, loss: 0.03149819001555443 2023-01-24 04:55:26.490071: step: 786/466, loss: 0.03700670972466469 2023-01-24 04:55:27.295857: step: 788/466, loss: 0.07515472173690796 2023-01-24 04:55:28.082186: step: 790/466, loss: 0.016940707340836525 2023-01-24 04:55:28.825008: step: 792/466, loss: 0.052269306033849716 2023-01-24 04:55:29.624363: step: 794/466, loss: 0.05127988010644913 2023-01-24 04:55:30.367898: step: 796/466, loss: 0.021262122318148613 2023-01-24 04:55:31.065915: step: 798/466, loss: 0.004020551685243845 2023-01-24 04:55:31.774844: step: 800/466, loss: 0.006356806959956884 2023-01-24 04:55:32.594342: step: 802/466, loss: 0.06390430778265 2023-01-24 04:55:33.447651: step: 804/466, loss: 0.03449377417564392 2023-01-24 04:55:34.261419: step: 806/466, loss: 0.05775659158825874 2023-01-24 04:55:35.049403: step: 808/466, loss: 1.9378232955932617 2023-01-24 04:55:35.776349: step: 810/466, loss: 0.028914159163832664 2023-01-24 04:55:36.592112: step: 812/466, loss: 0.08311357349157333 2023-01-24 04:55:37.438364: step: 814/466, loss: 0.0034366236068308353 2023-01-24 04:55:38.177666: step: 816/466, loss: 0.024715717881917953 2023-01-24 04:55:38.995782: step: 818/466, loss: 0.05565216392278671 2023-01-24 04:55:39.813011: step: 820/466, loss: 0.1202254593372345 2023-01-24 04:55:40.617868: step: 822/466, loss: 0.02393370307981968 2023-01-24 04:55:41.355017: step: 824/466, loss: 0.028989041224122047 2023-01-24 04:55:42.061601: step: 826/466, loss: 0.01382619421929121 2023-01-24 04:55:42.872908: step: 828/466, loss: 0.05490459129214287 2023-01-24 04:55:43.644304: step: 830/466, loss: 0.043450355529785156 2023-01-24 04:55:44.454670: step: 832/466, loss: 0.015542100183665752 2023-01-24 04:55:45.160151: step: 834/466, loss: 0.0452614389359951 2023-01-24 04:55:46.000309: step: 836/466, loss: 0.030005039647221565 2023-01-24 04:55:46.726653: step: 838/466, loss: 0.008008835837244987 2023-01-24 04:55:47.461919: step: 840/466, loss: 0.02006283588707447 2023-01-24 04:55:48.175478: step: 842/466, loss: 0.057198040187358856 2023-01-24 04:55:48.933310: step: 844/466, loss: 0.12752194702625275 2023-01-24 04:55:49.623618: step: 846/466, loss: 0.014229381456971169 2023-01-24 04:55:50.352423: step: 848/466, loss: 0.014348013326525688 2023-01-24 04:55:51.188555: step: 850/466, loss: 0.03122992441058159 2023-01-24 04:55:51.966175: step: 852/466, loss: 0.06689751148223877 2023-01-24 04:55:52.664285: step: 854/466, loss: 0.08557271957397461 2023-01-24 04:55:53.440392: step: 856/466, loss: 0.07315085828304291 2023-01-24 04:55:54.169653: step: 858/466, loss: 0.011412195861339569 2023-01-24 04:55:54.923776: step: 860/466, loss: 0.18121466040611267 2023-01-24 04:55:55.704865: step: 862/466, loss: 0.047808241099119186 2023-01-24 04:55:56.509083: step: 864/466, loss: 0.039932798594236374 2023-01-24 04:55:57.305883: step: 866/466, loss: 0.06648283451795578 2023-01-24 04:55:58.078326: step: 868/466, loss: 0.03634491562843323 2023-01-24 04:55:58.862677: step: 870/466, loss: 0.040794167667627335 2023-01-24 04:55:59.607747: step: 872/466, loss: 0.03335542976856232 2023-01-24 04:56:00.389676: step: 874/466, loss: 0.3126262128353119 2023-01-24 04:56:01.110517: step: 876/466, loss: 0.006511006038635969 2023-01-24 04:56:01.928304: step: 878/466, loss: 0.0005270981346257031 2023-01-24 04:56:02.649739: step: 880/466, loss: 0.03149344399571419 2023-01-24 04:56:03.524925: step: 882/466, loss: 0.1612115204334259 2023-01-24 04:56:04.354610: step: 884/466, loss: 0.02418387308716774 2023-01-24 04:56:05.110660: step: 886/466, loss: 0.04643942415714264 2023-01-24 04:56:05.801826: step: 888/466, loss: 0.07254820317029953 2023-01-24 04:56:06.532583: step: 890/466, loss: 0.024736449122428894 2023-01-24 04:56:07.294705: step: 892/466, loss: 0.015713006258010864 2023-01-24 04:56:08.072029: step: 894/466, loss: 0.002715908456593752 2023-01-24 04:56:08.820566: step: 896/466, loss: 0.06559024006128311 2023-01-24 04:56:09.578726: step: 898/466, loss: 0.029943106696009636 2023-01-24 04:56:10.424234: step: 900/466, loss: 0.01815328374505043 2023-01-24 04:56:11.267180: step: 902/466, loss: 0.030919160693883896 2023-01-24 04:56:11.992070: step: 904/466, loss: 0.0003961712936870754 2023-01-24 04:56:12.778404: step: 906/466, loss: 0.007957972586154938 2023-01-24 04:56:13.530689: step: 908/466, loss: 0.3450794517993927 2023-01-24 04:56:14.267551: step: 910/466, loss: 0.04624255374073982 2023-01-24 04:56:15.029552: step: 912/466, loss: 0.03033428080379963 2023-01-24 04:56:15.839405: step: 914/466, loss: 0.11813104897737503 2023-01-24 04:56:16.609356: step: 916/466, loss: 0.04941783472895622 2023-01-24 04:56:17.383918: step: 918/466, loss: 0.02447052113711834 2023-01-24 04:56:18.184469: step: 920/466, loss: 0.05541486293077469 2023-01-24 04:56:19.001166: step: 922/466, loss: 0.04049112647771835 2023-01-24 04:56:19.781221: step: 924/466, loss: 0.014253446832299232 2023-01-24 04:56:20.587543: step: 926/466, loss: 0.190837562084198 2023-01-24 04:56:21.371946: step: 928/466, loss: 0.06502583622932434 2023-01-24 04:56:22.166521: step: 930/466, loss: 0.018064746633172035 2023-01-24 04:56:22.937766: step: 932/466, loss: 0.1262487918138504 ================================================== Loss: 0.085 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514281426856457, 'r': 0.3267548195748888, 'f1': 0.33864265470199884}, 'combined': 0.24952616662252544, 'epoch': 23} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3841715799217045, 'r': 0.28937599526569946, 'f1': 0.3301029871919831}, 'combined': 0.20289256773751158, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3215687839480539, 'r': 0.3288910332979147, 'f1': 0.3251886952120095}, 'combined': 0.23961272278779647, 'epoch': 23} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36438282580265113, 'r': 0.29112735302430187, 'f1': 0.32366181636805813}, 'combined': 0.19893360420670889, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3612144323848636, 'r': 0.33379777717538633, 'f1': 0.34696534234995774}, 'combined': 0.2556586733104952, 'epoch': 23} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3825846027224074, 'r': 0.2901680623245272, 'f1': 0.3300286676364637}, 'combined': 0.20384123589310998, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3642241379310345, 'r': 0.30178571428571427, 'f1': 0.33007812499999994}, 'combined': 0.2200520833333333, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21212121212121213, 'r': 0.30434782608695654, 'f1': 0.25}, 'combined': 0.125, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 23} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514281426856457, 'r': 0.3267548195748888, 'f1': 0.33864265470199884}, 'combined': 0.24952616662252544, 'epoch': 23} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3841715799217045, 'r': 0.28937599526569946, 'f1': 0.3301029871919831}, 'combined': 0.20289256773751158, 'epoch': 23} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3642241379310345, 'r': 0.30178571428571427, 'f1': 0.33007812499999994}, 'combined': 0.2200520833333333, 'epoch': 23} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34582504970178923, 'r': 0.33007590132827325, 'f1': 0.3377669902912621}, 'combined': 0.24888094021461418, 'epoch': 19} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36038315043942815, 'r': 0.2939803727200525, 'f1': 0.3238125877697768}, 'combined': 0.20000189244603864, 'epoch': 19} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 19} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:59:13.471123: step: 2/466, loss: 0.012442237697541714 2023-01-24 04:59:14.164322: step: 4/466, loss: 0.022447844967246056 2023-01-24 04:59:14.923080: step: 6/466, loss: 0.16383084654808044 2023-01-24 04:59:15.704953: step: 8/466, loss: 0.06788397580385208 2023-01-24 04:59:16.458226: step: 10/466, loss: 0.012289896607398987 2023-01-24 04:59:17.221836: step: 12/466, loss: 0.030877836048603058 2023-01-24 04:59:17.996022: step: 14/466, loss: 0.02345413900911808 2023-01-24 04:59:18.722138: step: 16/466, loss: 0.00726708397269249 2023-01-24 04:59:19.492742: step: 18/466, loss: 0.005459288135170937 2023-01-24 04:59:20.250874: step: 20/466, loss: 0.01712528057396412 2023-01-24 04:59:20.991676: step: 22/466, loss: 0.007801868487149477 2023-01-24 04:59:21.824813: step: 24/466, loss: 0.01762966439127922 2023-01-24 04:59:22.563939: step: 26/466, loss: 0.015811754390597343 2023-01-24 04:59:23.374666: step: 28/466, loss: 0.03606037050485611 2023-01-24 04:59:24.223878: step: 30/466, loss: 0.01512200478464365 2023-01-24 04:59:24.974838: step: 32/466, loss: 0.004144949372857809 2023-01-24 04:59:25.803798: step: 34/466, loss: 0.019165851175785065 2023-01-24 04:59:26.566500: step: 36/466, loss: 0.01260423380881548 2023-01-24 04:59:27.293213: step: 38/466, loss: 0.01843561790883541 2023-01-24 04:59:28.069636: step: 40/466, loss: 0.08869102597236633 2023-01-24 04:59:28.737786: step: 42/466, loss: 0.008993967436254025 2023-01-24 04:59:29.479677: step: 44/466, loss: 0.010097336955368519 2023-01-24 04:59:30.381100: step: 46/466, loss: 0.195277601480484 2023-01-24 04:59:31.172804: step: 48/466, loss: 0.02338215708732605 2023-01-24 04:59:31.998096: step: 50/466, loss: 0.0014439165825024247 2023-01-24 04:59:32.767880: step: 52/466, loss: 0.023032324388623238 2023-01-24 04:59:33.451070: step: 54/466, loss: 0.02563190460205078 2023-01-24 04:59:34.158720: step: 56/466, loss: 0.03805235028266907 2023-01-24 04:59:35.204052: step: 58/466, loss: 0.07855221629142761 2023-01-24 04:59:35.939155: step: 60/466, loss: 0.025932323187589645 2023-01-24 04:59:36.660892: step: 62/466, loss: 0.10620784759521484 2023-01-24 04:59:37.431973: step: 64/466, loss: 0.0058013866655528545 2023-01-24 04:59:38.236673: step: 66/466, loss: 0.3046168386936188 2023-01-24 04:59:38.954856: step: 68/466, loss: 0.01990501582622528 2023-01-24 04:59:39.680493: step: 70/466, loss: 0.05764950439333916 2023-01-24 04:59:40.506627: step: 72/466, loss: 0.030430598184466362 2023-01-24 04:59:41.244285: step: 74/466, loss: 0.009992522187530994 2023-01-24 04:59:42.105893: step: 76/466, loss: 0.11593419313430786 2023-01-24 04:59:42.894612: step: 78/466, loss: 0.04402603954076767 2023-01-24 04:59:43.664142: step: 80/466, loss: 0.006121122743934393 2023-01-24 04:59:44.399303: step: 82/466, loss: 0.0049262214452028275 2023-01-24 04:59:45.196680: step: 84/466, loss: 0.0043501779437065125 2023-01-24 04:59:45.904984: step: 86/466, loss: 0.010162044316530228 2023-01-24 04:59:46.628079: step: 88/466, loss: 0.0474172867834568 2023-01-24 04:59:47.250538: step: 90/466, loss: 0.015527402982115746 2023-01-24 04:59:48.032453: step: 92/466, loss: 0.03818926587700844 2023-01-24 04:59:48.884936: step: 94/466, loss: 0.023612646386027336 2023-01-24 04:59:49.650440: step: 96/466, loss: 0.07392007857561111 2023-01-24 04:59:50.451262: step: 98/466, loss: 0.0026107749436050653 2023-01-24 04:59:51.235202: step: 100/466, loss: 0.0592830553650856 2023-01-24 04:59:52.037817: step: 102/466, loss: 0.026403101161122322 2023-01-24 04:59:52.853766: step: 104/466, loss: 0.13619732856750488 2023-01-24 04:59:53.610310: step: 106/466, loss: 0.021160613745450974 2023-01-24 04:59:54.285590: step: 108/466, loss: 0.3664078414440155 2023-01-24 04:59:55.035455: step: 110/466, loss: 0.030345715582370758 2023-01-24 04:59:55.785563: step: 112/466, loss: 0.0480051189661026 2023-01-24 04:59:56.534427: step: 114/466, loss: 0.028134595602750778 2023-01-24 04:59:57.371955: step: 116/466, loss: 0.032205626368522644 2023-01-24 04:59:58.134454: step: 118/466, loss: 0.062361881136894226 2023-01-24 04:59:58.990738: step: 120/466, loss: 0.05839879810810089 2023-01-24 04:59:59.800110: step: 122/466, loss: 0.017225315794348717 2023-01-24 05:00:00.540485: step: 124/466, loss: 0.02745700441300869 2023-01-24 05:00:01.286869: step: 126/466, loss: 0.06077186018228531 2023-01-24 05:00:02.110977: step: 128/466, loss: 0.03275851905345917 2023-01-24 05:00:02.861968: step: 130/466, loss: 0.014129195362329483 2023-01-24 05:00:03.581033: step: 132/466, loss: 0.018745819106698036 2023-01-24 05:00:04.352843: step: 134/466, loss: 0.008557483553886414 2023-01-24 05:00:05.071807: step: 136/466, loss: 0.013914437033236027 2023-01-24 05:00:05.951926: step: 138/466, loss: 0.06996177136898041 2023-01-24 05:00:06.632256: step: 140/466, loss: 0.01704161800444126 2023-01-24 05:00:07.362472: step: 142/466, loss: 0.0320325568318367 2023-01-24 05:00:08.184504: step: 144/466, loss: 0.34453633427619934 2023-01-24 05:00:08.944328: step: 146/466, loss: 0.23159049451351166 2023-01-24 05:00:09.595376: step: 148/466, loss: 0.00032263854518532753 2023-01-24 05:00:10.298054: step: 150/466, loss: 0.3419460356235504 2023-01-24 05:00:11.139260: step: 152/466, loss: 0.15512454509735107 2023-01-24 05:00:11.872619: step: 154/466, loss: 0.05583605542778969 2023-01-24 05:00:12.606329: step: 156/466, loss: 0.0402413085103035 2023-01-24 05:00:13.373593: step: 158/466, loss: 0.021404564380645752 2023-01-24 05:00:14.132553: step: 160/466, loss: 0.7854284048080444 2023-01-24 05:00:14.884332: step: 162/466, loss: 0.03168287128210068 2023-01-24 05:00:15.596915: step: 164/466, loss: 0.038331855088472366 2023-01-24 05:00:16.293956: step: 166/466, loss: 0.0038885714020580053 2023-01-24 05:00:16.926028: step: 168/466, loss: 0.01354452408850193 2023-01-24 05:00:17.661272: step: 170/466, loss: 0.413261353969574 2023-01-24 05:00:18.485254: step: 172/466, loss: 0.016408804804086685 2023-01-24 05:00:19.257954: step: 174/466, loss: 0.028176261112093925 2023-01-24 05:00:20.116565: step: 176/466, loss: 0.039729684591293335 2023-01-24 05:00:20.978555: step: 178/466, loss: 0.02926718443632126 2023-01-24 05:00:21.865864: step: 180/466, loss: 0.003973286598920822 2023-01-24 05:00:22.532208: step: 182/466, loss: 0.014472606591880322 2023-01-24 05:00:23.327028: step: 184/466, loss: 0.0029143274296075106 2023-01-24 05:00:23.957694: step: 186/466, loss: 0.00688170874491334 2023-01-24 05:00:24.630628: step: 188/466, loss: 0.004136112052947283 2023-01-24 05:00:25.336515: step: 190/466, loss: 0.09447738528251648 2023-01-24 05:00:26.025061: step: 192/466, loss: 0.0017223696922883391 2023-01-24 05:00:26.696669: step: 194/466, loss: 0.017475929111242294 2023-01-24 05:00:27.659103: step: 196/466, loss: 0.2981652021408081 2023-01-24 05:00:28.445071: step: 198/466, loss: 0.05993760749697685 2023-01-24 05:00:29.152528: step: 200/466, loss: 0.011921750381588936 2023-01-24 05:00:29.823619: step: 202/466, loss: 0.008036823011934757 2023-01-24 05:00:30.593358: step: 204/466, loss: 0.0012072388781234622 2023-01-24 05:00:31.361015: step: 206/466, loss: 0.3928010165691376 2023-01-24 05:00:32.169805: step: 208/466, loss: 0.034359339624643326 2023-01-24 05:00:32.891996: step: 210/466, loss: 0.03412700816988945 2023-01-24 05:00:33.700096: step: 212/466, loss: 0.035888466984033585 2023-01-24 05:00:34.476160: step: 214/466, loss: 0.028900718316435814 2023-01-24 05:00:35.218101: step: 216/466, loss: 0.060817234218120575 2023-01-24 05:00:36.027717: step: 218/466, loss: 0.047735925763845444 2023-01-24 05:00:36.798191: step: 220/466, loss: 0.0510413721203804 2023-01-24 05:00:37.575535: step: 222/466, loss: 0.014088866300880909 2023-01-24 05:00:38.334851: step: 224/466, loss: 0.09515645354986191 2023-01-24 05:00:39.158507: step: 226/466, loss: 0.01028447411954403 2023-01-24 05:00:39.892354: step: 228/466, loss: 0.03016878291964531 2023-01-24 05:00:40.627955: step: 230/466, loss: 0.008594774641096592 2023-01-24 05:00:41.333396: step: 232/466, loss: 0.02591010369360447 2023-01-24 05:00:42.085949: step: 234/466, loss: 0.006289259530603886 2023-01-24 05:00:42.922489: step: 236/466, loss: 0.06279350072145462 2023-01-24 05:00:43.771835: step: 238/466, loss: 0.0070212590508162975 2023-01-24 05:00:44.452745: step: 240/466, loss: 0.010945099405944347 2023-01-24 05:00:45.206497: step: 242/466, loss: 0.012712801806628704 2023-01-24 05:00:45.937416: step: 244/466, loss: 0.007867317646741867 2023-01-24 05:00:46.762533: step: 246/466, loss: 0.0334496907889843 2023-01-24 05:00:47.560185: step: 248/466, loss: 0.004537621047347784 2023-01-24 05:00:48.388215: step: 250/466, loss: 0.03142962604761124 2023-01-24 05:00:49.053079: step: 252/466, loss: 0.0010505338432267308 2023-01-24 05:00:49.843325: step: 254/466, loss: 0.017321258783340454 2023-01-24 05:00:50.583108: step: 256/466, loss: 0.14158202707767487 2023-01-24 05:00:51.363358: step: 258/466, loss: 0.047040604054927826 2023-01-24 05:00:52.072364: step: 260/466, loss: 0.047133516520261765 2023-01-24 05:00:52.763878: step: 262/466, loss: 0.03368383273482323 2023-01-24 05:00:53.509219: step: 264/466, loss: 0.030273066833615303 2023-01-24 05:00:54.326522: step: 266/466, loss: 0.0032059112563729286 2023-01-24 05:00:55.058772: step: 268/466, loss: 0.004067208617925644 2023-01-24 05:00:55.813386: step: 270/466, loss: 0.012437507510185242 2023-01-24 05:00:56.655723: step: 272/466, loss: 0.12539102137088776 2023-01-24 05:00:57.429504: step: 274/466, loss: 0.02563825249671936 2023-01-24 05:00:58.168000: step: 276/466, loss: 0.0011749654076993465 2023-01-24 05:00:58.903189: step: 278/466, loss: 0.014036266133189201 2023-01-24 05:00:59.574145: step: 280/466, loss: 0.026236526668071747 2023-01-24 05:01:00.276917: step: 282/466, loss: 1.3633739948272705 2023-01-24 05:01:01.060198: step: 284/466, loss: 0.018139546737074852 2023-01-24 05:01:01.860690: step: 286/466, loss: 0.0016358124557882547 2023-01-24 05:01:02.603688: step: 288/466, loss: 0.09358922392129898 2023-01-24 05:01:03.344200: step: 290/466, loss: 0.022072920575737953 2023-01-24 05:01:04.207673: step: 292/466, loss: 0.008230620995163918 2023-01-24 05:01:04.964571: step: 294/466, loss: 0.03175661712884903 2023-01-24 05:01:05.657505: step: 296/466, loss: 0.0832146480679512 2023-01-24 05:01:06.372776: step: 298/466, loss: 0.006687816698104143 2023-01-24 05:01:07.086738: step: 300/466, loss: 0.00434772577136755 2023-01-24 05:01:07.768335: step: 302/466, loss: 0.1016339585185051 2023-01-24 05:01:08.447113: step: 304/466, loss: 0.0026131209451705217 2023-01-24 05:01:09.230655: step: 306/466, loss: 0.04954265058040619 2023-01-24 05:01:09.940904: step: 308/466, loss: 0.024544520303606987 2023-01-24 05:01:10.746171: step: 310/466, loss: 0.023840585723519325 2023-01-24 05:01:11.554407: step: 312/466, loss: 0.1760261058807373 2023-01-24 05:01:12.261283: step: 314/466, loss: 0.09610689431428909 2023-01-24 05:01:13.103268: step: 316/466, loss: 0.0886836126446724 2023-01-24 05:01:13.951842: step: 318/466, loss: 0.019128063693642616 2023-01-24 05:01:14.746903: step: 320/466, loss: 0.1219346672296524 2023-01-24 05:01:15.463817: step: 322/466, loss: 0.0004176282382104546 2023-01-24 05:01:16.250096: step: 324/466, loss: 0.01580335572361946 2023-01-24 05:01:16.960005: step: 326/466, loss: 0.0009826215682551265 2023-01-24 05:01:17.694352: step: 328/466, loss: 0.024065284058451653 2023-01-24 05:01:18.449115: step: 330/466, loss: 0.0028311621863394976 2023-01-24 05:01:19.239327: step: 332/466, loss: 0.04555172100663185 2023-01-24 05:01:20.028466: step: 334/466, loss: 0.12674173712730408 2023-01-24 05:01:20.767683: step: 336/466, loss: 0.03965069726109505 2023-01-24 05:01:21.569051: step: 338/466, loss: 0.03905881196260452 2023-01-24 05:01:22.290334: step: 340/466, loss: 0.011771049350500107 2023-01-24 05:01:22.949399: step: 342/466, loss: 0.0562756285071373 2023-01-24 05:01:23.691072: step: 344/466, loss: 0.01050628162920475 2023-01-24 05:01:24.480494: step: 346/466, loss: 0.15805859863758087 2023-01-24 05:01:25.236839: step: 348/466, loss: 0.030752331018447876 2023-01-24 05:01:26.008701: step: 350/466, loss: 0.016457442194223404 2023-01-24 05:01:26.882661: step: 352/466, loss: 0.024749215692281723 2023-01-24 05:01:27.690249: step: 354/466, loss: 0.03467196226119995 2023-01-24 05:01:28.410003: step: 356/466, loss: 0.055692195892333984 2023-01-24 05:01:29.178707: step: 358/466, loss: 0.05298139527440071 2023-01-24 05:01:29.875147: step: 360/466, loss: 0.020247722044587135 2023-01-24 05:01:30.665073: step: 362/466, loss: 0.009396249428391457 2023-01-24 05:01:31.431927: step: 364/466, loss: 0.022053968161344528 2023-01-24 05:01:32.129940: step: 366/466, loss: 0.021803874522447586 2023-01-24 05:01:32.939606: step: 368/466, loss: 0.02070603333413601 2023-01-24 05:01:33.629987: step: 370/466, loss: 0.013286711648106575 2023-01-24 05:01:34.295784: step: 372/466, loss: 0.012056940235197544 2023-01-24 05:01:35.073968: step: 374/466, loss: 0.17006126046180725 2023-01-24 05:01:35.842992: step: 376/466, loss: 0.026832759380340576 2023-01-24 05:01:36.599103: step: 378/466, loss: 0.010535070672631264 2023-01-24 05:01:37.370422: step: 380/466, loss: 0.02465672977268696 2023-01-24 05:01:38.156585: step: 382/466, loss: 0.031762540340423584 2023-01-24 05:01:38.898591: step: 384/466, loss: 0.017145009711384773 2023-01-24 05:01:39.699133: step: 386/466, loss: 0.024914514273405075 2023-01-24 05:01:40.432695: step: 388/466, loss: 0.036123715341091156 2023-01-24 05:01:41.162823: step: 390/466, loss: 0.009714765474200249 2023-01-24 05:01:41.934482: step: 392/466, loss: 0.017705464735627174 2023-01-24 05:01:42.687567: step: 394/466, loss: 0.02804849110543728 2023-01-24 05:01:43.548271: step: 396/466, loss: 0.05484087020158768 2023-01-24 05:01:44.230814: step: 398/466, loss: 0.03912174701690674 2023-01-24 05:01:44.991013: step: 400/466, loss: 0.020371561869978905 2023-01-24 05:01:45.768507: step: 402/466, loss: 0.02459416352212429 2023-01-24 05:01:46.510567: step: 404/466, loss: 0.006846928503364325 2023-01-24 05:01:47.279749: step: 406/466, loss: 0.007404484786093235 2023-01-24 05:01:47.997794: step: 408/466, loss: 0.022011689841747284 2023-01-24 05:01:48.730466: step: 410/466, loss: 0.0031958348117768764 2023-01-24 05:01:49.482669: step: 412/466, loss: 0.012686365284025669 2023-01-24 05:01:50.255336: step: 414/466, loss: 0.03906247019767761 2023-01-24 05:01:51.000415: step: 416/466, loss: 0.15825800597667694 2023-01-24 05:01:51.856336: step: 418/466, loss: 1.7678616046905518 2023-01-24 05:01:52.583753: step: 420/466, loss: 0.009127304889261723 2023-01-24 05:01:53.345839: step: 422/466, loss: 0.029808560386300087 2023-01-24 05:01:54.134527: step: 424/466, loss: 0.05525938421487808 2023-01-24 05:01:54.927134: step: 426/466, loss: 0.026894461363554 2023-01-24 05:01:55.719240: step: 428/466, loss: 0.1667776256799698 2023-01-24 05:01:56.420524: step: 430/466, loss: 0.02654326520860195 2023-01-24 05:01:57.144808: step: 432/466, loss: 0.004692245740443468 2023-01-24 05:01:57.937823: step: 434/466, loss: 0.053976550698280334 2023-01-24 05:01:58.761640: step: 436/466, loss: 0.03274490311741829 2023-01-24 05:01:59.465380: step: 438/466, loss: 0.026439087465405464 2023-01-24 05:02:00.245083: step: 440/466, loss: 0.056312721222639084 2023-01-24 05:02:00.905635: step: 442/466, loss: 0.025374772027134895 2023-01-24 05:02:01.680718: step: 444/466, loss: 0.0024430155754089355 2023-01-24 05:02:02.433064: step: 446/466, loss: 0.012750299647450447 2023-01-24 05:02:03.208791: step: 448/466, loss: 0.31890374422073364 2023-01-24 05:02:03.963686: step: 450/466, loss: 0.0671519860625267 2023-01-24 05:02:04.781055: step: 452/466, loss: 0.07847802340984344 2023-01-24 05:02:05.546933: step: 454/466, loss: 0.04167680814862251 2023-01-24 05:02:06.250857: step: 456/466, loss: 0.010705829598009586 2023-01-24 05:02:07.024633: step: 458/466, loss: 0.053796254098415375 2023-01-24 05:02:07.728752: step: 460/466, loss: 0.00658042635768652 2023-01-24 05:02:08.514358: step: 462/466, loss: 0.017308924347162247 2023-01-24 05:02:09.256097: step: 464/466, loss: 0.034351151436567307 2023-01-24 05:02:10.027231: step: 466/466, loss: 0.00565611245110631 2023-01-24 05:02:10.821782: step: 468/466, loss: 0.061411306262016296 2023-01-24 05:02:11.551532: step: 470/466, loss: 0.002887856913730502 2023-01-24 05:02:12.264198: step: 472/466, loss: 0.04241650179028511 2023-01-24 05:02:13.011300: step: 474/466, loss: 0.04883456975221634 2023-01-24 05:02:13.750004: step: 476/466, loss: 0.012651508674025536 2023-01-24 05:02:14.479746: step: 478/466, loss: 0.012203642167150974 2023-01-24 05:02:15.297679: step: 480/466, loss: 0.024727782234549522 2023-01-24 05:02:15.948596: step: 482/466, loss: 0.012281586416065693 2023-01-24 05:02:16.653508: step: 484/466, loss: 0.0012142674531787634 2023-01-24 05:02:17.392244: step: 486/466, loss: 0.06945552676916122 2023-01-24 05:02:18.109315: step: 488/466, loss: 0.04210897535085678 2023-01-24 05:02:18.935380: step: 490/466, loss: 0.3636569678783417 2023-01-24 05:02:19.691840: step: 492/466, loss: 0.027368493378162384 2023-01-24 05:02:20.404532: step: 494/466, loss: 0.012000566348433495 2023-01-24 05:02:21.164060: step: 496/466, loss: 0.0015399146359413862 2023-01-24 05:02:21.834699: step: 498/466, loss: 0.09635155647993088 2023-01-24 05:02:22.551212: step: 500/466, loss: 0.060200709849596024 2023-01-24 05:02:23.264051: step: 502/466, loss: 0.03938752040266991 2023-01-24 05:02:23.996368: step: 504/466, loss: 0.006137209013104439 2023-01-24 05:02:24.849472: step: 506/466, loss: 0.04328594356775284 2023-01-24 05:02:25.615033: step: 508/466, loss: 0.05471671745181084 2023-01-24 05:02:26.401741: step: 510/466, loss: 0.07688561826944351 2023-01-24 05:02:27.185205: step: 512/466, loss: 0.05143206939101219 2023-01-24 05:02:27.982213: step: 514/466, loss: 0.023206721991300583 2023-01-24 05:02:28.823292: step: 516/466, loss: 0.9250187277793884 2023-01-24 05:02:29.707600: step: 518/466, loss: 0.017074864357709885 2023-01-24 05:02:30.519028: step: 520/466, loss: 0.005635548382997513 2023-01-24 05:02:31.318039: step: 522/466, loss: 0.021923230960965157 2023-01-24 05:02:32.128547: step: 524/466, loss: 0.023732662200927734 2023-01-24 05:02:32.822008: step: 526/466, loss: 0.05662060156464577 2023-01-24 05:02:33.613380: step: 528/466, loss: 0.010736081749200821 2023-01-24 05:02:34.391629: step: 530/466, loss: 0.05972069129347801 2023-01-24 05:02:35.089371: step: 532/466, loss: 0.012271245010197163 2023-01-24 05:02:35.782193: step: 534/466, loss: 0.011294533498585224 2023-01-24 05:02:36.571283: step: 536/466, loss: 0.011247357353568077 2023-01-24 05:02:37.299665: step: 538/466, loss: 0.06178859621286392 2023-01-24 05:02:38.021114: step: 540/466, loss: 0.14391258358955383 2023-01-24 05:02:38.807751: step: 542/466, loss: 0.05927535891532898 2023-01-24 05:02:39.552435: step: 544/466, loss: 0.5981643795967102 2023-01-24 05:02:40.432233: step: 546/466, loss: 0.017532022669911385 2023-01-24 05:02:41.196174: step: 548/466, loss: 0.029608314856886864 2023-01-24 05:02:41.920650: step: 550/466, loss: 0.03293095901608467 2023-01-24 05:02:42.639359: step: 552/466, loss: 0.07986298203468323 2023-01-24 05:02:43.376617: step: 554/466, loss: 0.07265043258666992 2023-01-24 05:02:44.153641: step: 556/466, loss: 0.07248475402593613 2023-01-24 05:02:45.145921: step: 558/466, loss: 0.011308040469884872 2023-01-24 05:02:45.894892: step: 560/466, loss: 0.020601406693458557 2023-01-24 05:02:46.703958: step: 562/466, loss: 0.08716525137424469 2023-01-24 05:02:47.378698: step: 564/466, loss: 0.00024699614732526243 2023-01-24 05:02:48.104377: step: 566/466, loss: 0.057305797934532166 2023-01-24 05:02:48.849765: step: 568/466, loss: 0.029795540496706963 2023-01-24 05:02:49.523761: step: 570/466, loss: 0.03908243775367737 2023-01-24 05:02:50.279074: step: 572/466, loss: 0.016558783128857613 2023-01-24 05:02:51.092423: step: 574/466, loss: 0.6643418073654175 2023-01-24 05:02:51.918580: step: 576/466, loss: 0.10121889412403107 2023-01-24 05:02:52.752918: step: 578/466, loss: 0.021568842232227325 2023-01-24 05:02:53.530237: step: 580/466, loss: 0.09610091149806976 2023-01-24 05:02:54.284298: step: 582/466, loss: 0.0005419608787633479 2023-01-24 05:02:55.073627: step: 584/466, loss: 0.004922699648886919 2023-01-24 05:02:55.818399: step: 586/466, loss: 0.04917950928211212 2023-01-24 05:02:56.575562: step: 588/466, loss: 0.03489411249756813 2023-01-24 05:02:57.324935: step: 590/466, loss: 0.0357198603451252 2023-01-24 05:02:58.004459: step: 592/466, loss: 0.003807668574154377 2023-01-24 05:02:58.697094: step: 594/466, loss: 0.03101886436343193 2023-01-24 05:02:59.402713: step: 596/466, loss: 0.007635398767888546 2023-01-24 05:03:00.267590: step: 598/466, loss: 0.08052492141723633 2023-01-24 05:03:01.037673: step: 600/466, loss: 0.0572916604578495 2023-01-24 05:03:01.756136: step: 602/466, loss: 0.0198881383985281 2023-01-24 05:03:02.593882: step: 604/466, loss: 0.04244111105799675 2023-01-24 05:03:03.388277: step: 606/466, loss: 0.014936062507331371 2023-01-24 05:03:04.130551: step: 608/466, loss: 0.007002471946179867 2023-01-24 05:03:04.938896: step: 610/466, loss: 0.015491640195250511 2023-01-24 05:03:05.721979: step: 612/466, loss: 0.02755819819867611 2023-01-24 05:03:06.451321: step: 614/466, loss: 0.0076740216463804245 2023-01-24 05:03:07.267568: step: 616/466, loss: 0.02576759085059166 2023-01-24 05:03:07.988184: step: 618/466, loss: 0.026631657034158707 2023-01-24 05:03:08.766213: step: 620/466, loss: 0.0049104285426437855 2023-01-24 05:03:09.507049: step: 622/466, loss: 0.0017827756237238646 2023-01-24 05:03:10.279370: step: 624/466, loss: 0.039846137166023254 2023-01-24 05:03:11.071714: step: 626/466, loss: 0.0736478939652443 2023-01-24 05:03:11.808935: step: 628/466, loss: 0.026989759877324104 2023-01-24 05:03:12.608297: step: 630/466, loss: 0.0672062411904335 2023-01-24 05:03:13.348320: step: 632/466, loss: 0.03965132683515549 2023-01-24 05:03:14.086360: step: 634/466, loss: 0.047246597707271576 2023-01-24 05:03:14.850604: step: 636/466, loss: 0.012512200511991978 2023-01-24 05:03:15.641473: step: 638/466, loss: 0.07491574436426163 2023-01-24 05:03:16.450587: step: 640/466, loss: 0.21360142529010773 2023-01-24 05:03:17.200014: step: 642/466, loss: 0.021787557750940323 2023-01-24 05:03:17.922969: step: 644/466, loss: 0.08531290292739868 2023-01-24 05:03:18.753595: step: 646/466, loss: 0.09931932389736176 2023-01-24 05:03:19.524415: step: 648/466, loss: 0.04829653725028038 2023-01-24 05:03:20.278302: step: 650/466, loss: 0.028179535642266273 2023-01-24 05:03:21.052088: step: 652/466, loss: 0.06829191744327545 2023-01-24 05:03:21.887427: step: 654/466, loss: 0.007168058305978775 2023-01-24 05:03:22.672690: step: 656/466, loss: 0.01279025711119175 2023-01-24 05:03:23.396938: step: 658/466, loss: 0.0425628125667572 2023-01-24 05:03:24.199135: step: 660/466, loss: 0.025735652074217796 2023-01-24 05:03:25.009405: step: 662/466, loss: 0.03173866868019104 2023-01-24 05:03:25.790722: step: 664/466, loss: 0.03260715678334236 2023-01-24 05:03:26.500711: step: 666/466, loss: 0.02390960231423378 2023-01-24 05:03:27.284538: step: 668/466, loss: 0.02179085463285446 2023-01-24 05:03:28.093933: step: 670/466, loss: 0.014968041330575943 2023-01-24 05:03:28.802342: step: 672/466, loss: 0.02386454865336418 2023-01-24 05:03:29.477636: step: 674/466, loss: 0.10030235350131989 2023-01-24 05:03:30.209529: step: 676/466, loss: 0.008777577430009842 2023-01-24 05:03:31.000436: step: 678/466, loss: 0.028794730082154274 2023-01-24 05:03:31.757203: step: 680/466, loss: 0.021851878613233566 2023-01-24 05:03:32.558237: step: 682/466, loss: 0.020717613399028778 2023-01-24 05:03:33.303707: step: 684/466, loss: 0.026879925280809402 2023-01-24 05:03:34.147267: step: 686/466, loss: 0.029374847188591957 2023-01-24 05:03:34.943444: step: 688/466, loss: 0.0072135343216359615 2023-01-24 05:03:35.724017: step: 690/466, loss: 0.049425311386585236 2023-01-24 05:03:36.530624: step: 692/466, loss: 0.02336275950074196 2023-01-24 05:03:37.290888: step: 694/466, loss: 0.43844372034072876 2023-01-24 05:03:37.993790: step: 696/466, loss: 0.004980398342013359 2023-01-24 05:03:38.775119: step: 698/466, loss: 0.03083825670182705 2023-01-24 05:03:39.566782: step: 700/466, loss: 0.014827440492808819 2023-01-24 05:03:40.293633: step: 702/466, loss: 0.03472888842225075 2023-01-24 05:03:41.061123: step: 704/466, loss: 0.11552385240793228 2023-01-24 05:03:41.910369: step: 706/466, loss: 0.012255324050784111 2023-01-24 05:03:42.615956: step: 708/466, loss: 0.008407890796661377 2023-01-24 05:03:43.360466: step: 710/466, loss: 0.07686987519264221 2023-01-24 05:03:44.165345: step: 712/466, loss: 0.03082488477230072 2023-01-24 05:03:44.880249: step: 714/466, loss: 0.02362615428864956 2023-01-24 05:03:45.636988: step: 716/466, loss: 0.04834354668855667 2023-01-24 05:03:46.482740: step: 718/466, loss: 0.012062999419867992 2023-01-24 05:03:47.232309: step: 720/466, loss: 0.02275286428630352 2023-01-24 05:03:47.957292: step: 722/466, loss: 0.15338103473186493 2023-01-24 05:03:48.641743: step: 724/466, loss: 0.013424837961792946 2023-01-24 05:03:49.449953: step: 726/466, loss: 0.09174531698226929 2023-01-24 05:03:50.265473: step: 728/466, loss: 0.05888718366622925 2023-01-24 05:03:51.022510: step: 730/466, loss: 0.014505615457892418 2023-01-24 05:03:51.885585: step: 732/466, loss: 0.01789144054055214 2023-01-24 05:03:52.719112: step: 734/466, loss: 0.011775941587984562 2023-01-24 05:03:53.440745: step: 736/466, loss: 0.000976826879195869 2023-01-24 05:03:54.154423: step: 738/466, loss: 0.00819521676748991 2023-01-24 05:03:54.986748: step: 740/466, loss: 0.37824034690856934 2023-01-24 05:03:55.804833: step: 742/466, loss: 0.008182219229638577 2023-01-24 05:03:56.610908: step: 744/466, loss: 0.002007798058912158 2023-01-24 05:03:57.379335: step: 746/466, loss: 0.0034525133669376373 2023-01-24 05:03:58.076720: step: 748/466, loss: 0.011354020796716213 2023-01-24 05:03:58.831688: step: 750/466, loss: 0.010496980510652065 2023-01-24 05:03:59.615263: step: 752/466, loss: 0.040494054555892944 2023-01-24 05:04:00.433943: step: 754/466, loss: 0.02089555747807026 2023-01-24 05:04:01.225887: step: 756/466, loss: 0.04060179740190506 2023-01-24 05:04:02.123519: step: 758/466, loss: 0.04249696433544159 2023-01-24 05:04:02.886815: step: 760/466, loss: 0.017529740929603577 2023-01-24 05:04:03.680063: step: 762/466, loss: 0.03140799328684807 2023-01-24 05:04:04.371745: step: 764/466, loss: 0.026097454130649567 2023-01-24 05:04:05.142207: step: 766/466, loss: 0.008137117139995098 2023-01-24 05:04:05.899590: step: 768/466, loss: 0.03194596618413925 2023-01-24 05:04:06.712735: step: 770/466, loss: 0.03512399643659592 2023-01-24 05:04:07.444483: step: 772/466, loss: 0.08083723485469818 2023-01-24 05:04:08.271857: step: 774/466, loss: 0.10024670511484146 2023-01-24 05:04:09.130951: step: 776/466, loss: 0.01750977709889412 2023-01-24 05:04:09.852969: step: 778/466, loss: 0.0022526816464960575 2023-01-24 05:04:10.522698: step: 780/466, loss: 0.002027718350291252 2023-01-24 05:04:11.255974: step: 782/466, loss: 0.015744207426905632 2023-01-24 05:04:11.985163: step: 784/466, loss: 0.0060505992732942104 2023-01-24 05:04:12.619857: step: 786/466, loss: 0.0058455681428313255 2023-01-24 05:04:13.404580: step: 788/466, loss: 0.0794205516576767 2023-01-24 05:04:14.096311: step: 790/466, loss: 0.05062644183635712 2023-01-24 05:04:14.857781: step: 792/466, loss: 0.10680859535932541 2023-01-24 05:04:15.689242: step: 794/466, loss: 0.4264127016067505 2023-01-24 05:04:16.451913: step: 796/466, loss: 0.04735902324318886 2023-01-24 05:04:17.258623: step: 798/466, loss: 0.29138079285621643 2023-01-24 05:04:17.999384: step: 800/466, loss: 0.06728032231330872 2023-01-24 05:04:18.826408: step: 802/466, loss: 0.0150164058431983 2023-01-24 05:04:19.638300: step: 804/466, loss: 0.03291695564985275 2023-01-24 05:04:20.447547: step: 806/466, loss: 0.030138498172163963 2023-01-24 05:04:21.102443: step: 808/466, loss: 0.022056685760617256 2023-01-24 05:04:21.819206: step: 810/466, loss: 0.0335700586438179 2023-01-24 05:04:22.619311: step: 812/466, loss: 0.028037378564476967 2023-01-24 05:04:23.360201: step: 814/466, loss: 0.20931771397590637 2023-01-24 05:04:24.185625: step: 816/466, loss: 0.15887659788131714 2023-01-24 05:04:24.972380: step: 818/466, loss: 0.05127459764480591 2023-01-24 05:04:25.778200: step: 820/466, loss: 0.025693120434880257 2023-01-24 05:04:26.528641: step: 822/466, loss: 0.05340283364057541 2023-01-24 05:04:27.343382: step: 824/466, loss: 0.08578217029571533 2023-01-24 05:04:28.151548: step: 826/466, loss: 0.15850238502025604 2023-01-24 05:04:28.888071: step: 828/466, loss: 0.0478825606405735 2023-01-24 05:04:29.663689: step: 830/466, loss: 0.013694366440176964 2023-01-24 05:04:30.411972: step: 832/466, loss: 0.205677792429924 2023-01-24 05:04:31.185070: step: 834/466, loss: 0.20604534447193146 2023-01-24 05:04:31.996799: step: 836/466, loss: 0.0973101332783699 2023-01-24 05:04:32.845916: step: 838/466, loss: 0.1259995847940445 2023-01-24 05:04:33.559228: step: 840/466, loss: 0.006384965963661671 2023-01-24 05:04:34.319109: step: 842/466, loss: 0.22864051163196564 2023-01-24 05:04:35.150556: step: 844/466, loss: 0.015518547967076302 2023-01-24 05:04:35.871820: step: 846/466, loss: 0.02271226979792118 2023-01-24 05:04:36.624365: step: 848/466, loss: 0.11442562937736511 2023-01-24 05:04:37.307447: step: 850/466, loss: 0.08857929706573486 2023-01-24 05:04:38.057231: step: 852/466, loss: 0.021136639639735222 2023-01-24 05:04:38.845593: step: 854/466, loss: 0.02740650251507759 2023-01-24 05:04:39.538608: step: 856/466, loss: 0.02360440418124199 2023-01-24 05:04:40.310957: step: 858/466, loss: 0.09424004703760147 2023-01-24 05:04:41.068112: step: 860/466, loss: 0.006110194604843855 2023-01-24 05:04:41.841486: step: 862/466, loss: 0.04518682882189751 2023-01-24 05:04:42.625863: step: 864/466, loss: 0.03704385831952095 2023-01-24 05:04:43.416018: step: 866/466, loss: 0.024167396128177643 2023-01-24 05:04:44.202910: step: 868/466, loss: 0.04980127885937691 2023-01-24 05:04:44.934105: step: 870/466, loss: 0.003607484046369791 2023-01-24 05:04:45.674858: step: 872/466, loss: 0.03922824189066887 2023-01-24 05:04:46.434557: step: 874/466, loss: 0.03403126075863838 2023-01-24 05:04:47.215893: step: 876/466, loss: 0.02481072209775448 2023-01-24 05:04:47.932526: step: 878/466, loss: 0.012723129242658615 2023-01-24 05:04:48.757779: step: 880/466, loss: 0.023607250303030014 2023-01-24 05:04:49.486606: step: 882/466, loss: 0.01605868898332119 2023-01-24 05:04:50.146604: step: 884/466, loss: 0.017645277082920074 2023-01-24 05:04:50.909151: step: 886/466, loss: 0.02243782766163349 2023-01-24 05:04:51.644684: step: 888/466, loss: 0.40102073550224304 2023-01-24 05:04:52.369391: step: 890/466, loss: 0.03958764299750328 2023-01-24 05:04:53.105420: step: 892/466, loss: 0.006702665239572525 2023-01-24 05:04:53.860439: step: 894/466, loss: 0.025415126234292984 2023-01-24 05:04:54.594617: step: 896/466, loss: 0.13924889266490936 2023-01-24 05:04:55.386064: step: 898/466, loss: 0.00555342948064208 2023-01-24 05:04:56.128957: step: 900/466, loss: 0.012884487397968769 2023-01-24 05:04:56.839804: step: 902/466, loss: 1.2119906386942603e-05 2023-01-24 05:04:57.569469: step: 904/466, loss: 0.25289541482925415 2023-01-24 05:04:58.312381: step: 906/466, loss: 0.023039881139993668 2023-01-24 05:04:59.091478: step: 908/466, loss: 0.027683792635798454 2023-01-24 05:04:59.762105: step: 910/466, loss: 0.07846502214670181 2023-01-24 05:05:00.453746: step: 912/466, loss: 0.003875490976497531 2023-01-24 05:05:01.178196: step: 914/466, loss: 0.0930880457162857 2023-01-24 05:05:02.155430: step: 916/466, loss: 0.011779635213315487 2023-01-24 05:05:02.950071: step: 918/466, loss: 0.039599090814590454 2023-01-24 05:05:03.670173: step: 920/466, loss: 0.002111016307026148 2023-01-24 05:05:04.425185: step: 922/466, loss: 0.007790920324623585 2023-01-24 05:05:05.175056: step: 924/466, loss: 0.0693824514746666 2023-01-24 05:05:05.987601: step: 926/466, loss: 0.13453641533851624 2023-01-24 05:05:06.788694: step: 928/466, loss: 0.16406899690628052 2023-01-24 05:05:07.578870: step: 930/466, loss: 0.04843205586075783 2023-01-24 05:05:08.250952: step: 932/466, loss: 0.07312513887882233 ================================================== Loss: 0.062 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3423491591025188, 'r': 0.32415982996614207, 'f1': 0.33300629706073465}, 'combined': 0.24537306099212025, 'epoch': 24} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3689822347533593, 'r': 0.28592129879156414, 'f1': 0.3221844879065918}, 'combined': 0.1980255876889296, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3307790044922398, 'r': 0.34270462325002454, 'f1': 0.33663622824373335}, 'combined': 0.2480477471269614, 'epoch': 24} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35042971162326153, 'r': 0.2839270193827986, 'f1': 0.3136924656464812}, 'combined': 0.19280610083637378, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35474007464701635, 'r': 0.33454614250392245, 'f1': 0.34434729902259203}, 'combined': 0.25372958875348883, 'epoch': 24} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37272456448848196, 'r': 0.2872076730690467, 'f1': 0.32442529329559794}, 'combined': 0.200380328211987, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32575757575757575, 'r': 0.30714285714285716, 'f1': 0.31617647058823534}, 'combined': 0.21078431372549022, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.358695652173913, 'f1': 0.2946428571428571}, 'combined': 0.14732142857142855, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.13793103448275862, 'f1': 0.2162162162162162}, 'combined': 0.14414414414414412, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514281426856457, 'r': 0.3267548195748888, 'f1': 0.33864265470199884}, 'combined': 0.24952616662252544, 'epoch': 23} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3841715799217045, 'r': 0.28937599526569946, 'f1': 0.3301029871919831}, 'combined': 0.20289256773751158, 'epoch': 23} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3642241379310345, 'r': 0.30178571428571427, 'f1': 0.33007812499999994}, 'combined': 0.2200520833333333, 'epoch': 23} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34582504970178923, 'r': 0.33007590132827325, 'f1': 0.3377669902912621}, 'combined': 0.24888094021461418, 'epoch': 19} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36038315043942815, 'r': 0.2939803727200525, 'f1': 0.3238125877697768}, 'combined': 0.20000189244603864, 'epoch': 19} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 19} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:07:52.186149: step: 2/466, loss: 0.0048638260923326015 2023-01-24 05:07:52.879020: step: 4/466, loss: 1.9489420652389526 2023-01-24 05:07:53.638372: step: 6/466, loss: 0.01325779128819704 2023-01-24 05:07:54.546156: step: 8/466, loss: 0.06886614114046097 2023-01-24 05:07:55.205059: step: 10/466, loss: 0.0005495556979440153 2023-01-24 05:07:55.931177: step: 12/466, loss: 0.0020770521368831396 2023-01-24 05:07:56.741291: step: 14/466, loss: 0.009379208087921143 2023-01-24 05:07:57.510894: step: 16/466, loss: 0.044607535004615784 2023-01-24 05:07:58.267982: step: 18/466, loss: 0.05780674144625664 2023-01-24 05:07:58.981848: step: 20/466, loss: 0.0162581168115139 2023-01-24 05:07:59.743436: step: 22/466, loss: 0.008655939251184464 2023-01-24 05:08:00.728397: step: 24/466, loss: 0.0031684592831879854 2023-01-24 05:08:01.453250: step: 26/466, loss: 0.01678566262125969 2023-01-24 05:08:02.245404: step: 28/466, loss: 0.013557526282966137 2023-01-24 05:08:02.963072: step: 30/466, loss: 0.0748433992266655 2023-01-24 05:08:03.649640: step: 32/466, loss: 0.009114764630794525 2023-01-24 05:08:04.457490: step: 34/466, loss: 0.02550913766026497 2023-01-24 05:08:05.141222: step: 36/466, loss: 0.004271187819540501 2023-01-24 05:08:05.890249: step: 38/466, loss: 0.009756631217896938 2023-01-24 05:08:06.622619: step: 40/466, loss: 0.044250279664993286 2023-01-24 05:08:07.399131: step: 42/466, loss: 0.030406568199396133 2023-01-24 05:08:08.045160: step: 44/466, loss: 0.04159717634320259 2023-01-24 05:08:08.724902: step: 46/466, loss: 0.03348518908023834 2023-01-24 05:08:09.534833: step: 48/466, loss: 0.0271090529859066 2023-01-24 05:08:10.253287: step: 50/466, loss: 0.04847847297787666 2023-01-24 05:08:10.995550: step: 52/466, loss: 0.003952601924538612 2023-01-24 05:08:11.704303: step: 54/466, loss: 0.0021677513141185045 2023-01-24 05:08:12.428877: step: 56/466, loss: 0.004650316201150417 2023-01-24 05:08:13.169262: step: 58/466, loss: 0.13764320313930511 2023-01-24 05:08:13.910225: step: 60/466, loss: 0.023560674861073494 2023-01-24 05:08:14.755887: step: 62/466, loss: 0.01893126778304577 2023-01-24 05:08:15.530993: step: 64/466, loss: 0.1631479263305664 2023-01-24 05:08:16.272394: step: 66/466, loss: 0.004469368141144514 2023-01-24 05:08:16.969523: step: 68/466, loss: 0.05100572481751442 2023-01-24 05:08:17.689874: step: 70/466, loss: 0.0174653809517622 2023-01-24 05:08:18.466150: step: 72/466, loss: 0.01131537463515997 2023-01-24 05:08:19.156477: step: 74/466, loss: 0.014669515192508698 2023-01-24 05:08:19.979883: step: 76/466, loss: 0.39461952447891235 2023-01-24 05:08:20.640072: step: 78/466, loss: 0.004725391045212746 2023-01-24 05:08:21.412869: step: 80/466, loss: 0.16633807122707367 2023-01-24 05:08:22.141827: step: 82/466, loss: 0.03558366745710373 2023-01-24 05:08:22.874377: step: 84/466, loss: 0.03334732726216316 2023-01-24 05:08:23.648433: step: 86/466, loss: 0.01573135145008564 2023-01-24 05:08:24.391413: step: 88/466, loss: 0.01893058978021145 2023-01-24 05:08:25.111186: step: 90/466, loss: 0.05342453345656395 2023-01-24 05:08:25.855878: step: 92/466, loss: 0.004875754471868277 2023-01-24 05:08:26.596587: step: 94/466, loss: 0.02153726853430271 2023-01-24 05:08:27.347215: step: 96/466, loss: 0.021911753341555595 2023-01-24 05:08:28.151190: step: 98/466, loss: 0.001786972163245082 2023-01-24 05:08:28.831062: step: 100/466, loss: 0.0024792884942144156 2023-01-24 05:08:29.637717: step: 102/466, loss: 0.008267298340797424 2023-01-24 05:08:30.532086: step: 104/466, loss: 0.011938805691897869 2023-01-24 05:08:31.217799: step: 106/466, loss: 0.01350637711584568 2023-01-24 05:08:31.985715: step: 108/466, loss: 0.014575090259313583 2023-01-24 05:08:32.748927: step: 110/466, loss: 0.007169341668486595 2023-01-24 05:08:33.554605: step: 112/466, loss: 0.010071353055536747 2023-01-24 05:08:34.282105: step: 114/466, loss: 0.01568801887333393 2023-01-24 05:08:35.104439: step: 116/466, loss: 0.00023800335475243628 2023-01-24 05:08:35.829264: step: 118/466, loss: 0.0017856850754469633 2023-01-24 05:08:36.668982: step: 120/466, loss: 0.015911098569631577 2023-01-24 05:08:37.467677: step: 122/466, loss: 0.005675219465047121 2023-01-24 05:08:38.187939: step: 124/466, loss: 0.001905403914861381 2023-01-24 05:08:38.893138: step: 126/466, loss: 0.03077961876988411 2023-01-24 05:08:39.644847: step: 128/466, loss: 0.024013573303818703 2023-01-24 05:08:40.358959: step: 130/466, loss: 0.010623539797961712 2023-01-24 05:08:41.131513: step: 132/466, loss: 0.018636681139469147 2023-01-24 05:08:41.903556: step: 134/466, loss: 0.05404209718108177 2023-01-24 05:08:42.702312: step: 136/466, loss: 0.1491851508617401 2023-01-24 05:08:43.399317: step: 138/466, loss: 0.13492342829704285 2023-01-24 05:08:44.201687: step: 140/466, loss: 0.04337337613105774 2023-01-24 05:08:45.002541: step: 142/466, loss: 0.021991444751620293 2023-01-24 05:08:45.746906: step: 144/466, loss: 0.02849423885345459 2023-01-24 05:08:46.502000: step: 146/466, loss: 0.048355210572481155 2023-01-24 05:08:47.218096: step: 148/466, loss: 0.011831236071884632 2023-01-24 05:08:47.961866: step: 150/466, loss: 0.026754720136523247 2023-01-24 05:08:48.673607: step: 152/466, loss: 0.003418155713006854 2023-01-24 05:08:49.405788: step: 154/466, loss: 0.0065348828211426735 2023-01-24 05:08:50.255273: step: 156/466, loss: 0.034028515219688416 2023-01-24 05:08:51.043255: step: 158/466, loss: 0.00662602111697197 2023-01-24 05:08:51.729349: step: 160/466, loss: 0.12418445199728012 2023-01-24 05:08:52.517766: step: 162/466, loss: 0.006915715057402849 2023-01-24 05:08:53.362419: step: 164/466, loss: 0.01204296387732029 2023-01-24 05:08:54.169740: step: 166/466, loss: 0.04335113987326622 2023-01-24 05:08:54.900041: step: 168/466, loss: 0.06921584904193878 2023-01-24 05:08:55.716936: step: 170/466, loss: 0.002505009062588215 2023-01-24 05:08:56.505523: step: 172/466, loss: 0.02193089760839939 2023-01-24 05:08:57.256506: step: 174/466, loss: 0.001341413240879774 2023-01-24 05:08:57.940183: step: 176/466, loss: 0.029483824968338013 2023-01-24 05:08:58.680302: step: 178/466, loss: 0.0035928364377468824 2023-01-24 05:08:59.481310: step: 180/466, loss: 0.0035043051466345787 2023-01-24 05:09:00.308522: step: 182/466, loss: 0.01978055201470852 2023-01-24 05:09:01.180085: step: 184/466, loss: 0.2719780504703522 2023-01-24 05:09:01.951869: step: 186/466, loss: 0.01555517129600048 2023-01-24 05:09:02.831353: step: 188/466, loss: 0.02810639515519142 2023-01-24 05:09:03.615504: step: 190/466, loss: 0.01859775185585022 2023-01-24 05:09:04.385290: step: 192/466, loss: 0.024280639365315437 2023-01-24 05:09:05.142155: step: 194/466, loss: 0.000761770352255553 2023-01-24 05:09:05.943829: step: 196/466, loss: 0.08165077865123749 2023-01-24 05:09:06.657953: step: 198/466, loss: 0.0318489633500576 2023-01-24 05:09:07.354337: step: 200/466, loss: 0.0034272021148353815 2023-01-24 05:09:08.111840: step: 202/466, loss: 0.006649025250226259 2023-01-24 05:09:08.919497: step: 204/466, loss: 0.028749840334057808 2023-01-24 05:09:09.737582: step: 206/466, loss: 0.020333321765065193 2023-01-24 05:09:10.493224: step: 208/466, loss: 0.015936290845274925 2023-01-24 05:09:11.178119: step: 210/466, loss: 0.37533944845199585 2023-01-24 05:09:11.992433: step: 212/466, loss: 0.03870442137122154 2023-01-24 05:09:12.759400: step: 214/466, loss: 0.07513385266065598 2023-01-24 05:09:13.537214: step: 216/466, loss: 0.035680994391441345 2023-01-24 05:09:14.238314: step: 218/466, loss: 0.044503144919872284 2023-01-24 05:09:15.076850: step: 220/466, loss: 0.011080436408519745 2023-01-24 05:09:15.879029: step: 222/466, loss: 0.08355198055505753 2023-01-24 05:09:16.655297: step: 224/466, loss: 0.005143460351973772 2023-01-24 05:09:17.374410: step: 226/466, loss: 0.05669359862804413 2023-01-24 05:09:18.143866: step: 228/466, loss: 0.007362133823335171 2023-01-24 05:09:18.911217: step: 230/466, loss: 0.1185983419418335 2023-01-24 05:09:19.681532: step: 232/466, loss: 0.019225867465138435 2023-01-24 05:09:20.437645: step: 234/466, loss: 0.0384087935090065 2023-01-24 05:09:21.197182: step: 236/466, loss: 0.009993866086006165 2023-01-24 05:09:21.979912: step: 238/466, loss: 0.007030665874481201 2023-01-24 05:09:22.687970: step: 240/466, loss: 0.002258468419313431 2023-01-24 05:09:23.552567: step: 242/466, loss: 0.005683743394911289 2023-01-24 05:09:24.311315: step: 244/466, loss: 0.0037180185317993164 2023-01-24 05:09:25.122352: step: 246/466, loss: 0.07294444739818573 2023-01-24 05:09:25.786487: step: 248/466, loss: 0.002524001756682992 2023-01-24 05:09:26.541953: step: 250/466, loss: 0.017223449423909187 2023-01-24 05:09:27.258395: step: 252/466, loss: 0.03425934538245201 2023-01-24 05:09:28.093100: step: 254/466, loss: 0.0761067345738411 2023-01-24 05:09:28.885458: step: 256/466, loss: 0.0428534634411335 2023-01-24 05:09:29.583733: step: 258/466, loss: 0.0031506107188761234 2023-01-24 05:09:30.481117: step: 260/466, loss: 0.04430555924773216 2023-01-24 05:09:31.207041: step: 262/466, loss: 0.04317544400691986 2023-01-24 05:09:32.005891: step: 264/466, loss: 0.018620461225509644 2023-01-24 05:09:32.805952: step: 266/466, loss: 0.015939462929964066 2023-01-24 05:09:33.518933: step: 268/466, loss: 0.013469705358147621 2023-01-24 05:09:34.289927: step: 270/466, loss: 0.031078225001692772 2023-01-24 05:09:35.095372: step: 272/466, loss: 0.038427598774433136 2023-01-24 05:09:35.881465: step: 274/466, loss: 0.001793315983377397 2023-01-24 05:09:36.639544: step: 276/466, loss: 0.010245811194181442 2023-01-24 05:09:37.335070: step: 278/466, loss: 0.009740750305354595 2023-01-24 05:09:38.058698: step: 280/466, loss: 0.032210443168878555 2023-01-24 05:09:38.904890: step: 282/466, loss: 0.30516213178634644 2023-01-24 05:09:39.666453: step: 284/466, loss: 0.05225621536374092 2023-01-24 05:09:40.437652: step: 286/466, loss: 0.0587480403482914 2023-01-24 05:09:41.195876: step: 288/466, loss: 0.004880858585238457 2023-01-24 05:09:41.965446: step: 290/466, loss: 0.001988980220630765 2023-01-24 05:09:42.643930: step: 292/466, loss: 0.00037727158633060753 2023-01-24 05:09:43.419431: step: 294/466, loss: 0.010188672691583633 2023-01-24 05:09:44.161227: step: 296/466, loss: 0.023995572701096535 2023-01-24 05:09:44.873764: step: 298/466, loss: 0.4974815547466278 2023-01-24 05:09:45.688649: step: 300/466, loss: 0.004259779583662748 2023-01-24 05:09:46.428780: step: 302/466, loss: 0.025984996929764748 2023-01-24 05:09:47.146615: step: 304/466, loss: 0.08418071269989014 2023-01-24 05:09:47.912193: step: 306/466, loss: 0.06732352077960968 2023-01-24 05:09:48.679922: step: 308/466, loss: 0.028239449486136436 2023-01-24 05:09:49.397894: step: 310/466, loss: 0.011483085341751575 2023-01-24 05:09:50.169366: step: 312/466, loss: 0.07952386140823364 2023-01-24 05:09:50.910636: step: 314/466, loss: 0.19841830432415009 2023-01-24 05:09:51.641282: step: 316/466, loss: 0.006301587913185358 2023-01-24 05:09:52.354168: step: 318/466, loss: 0.018144994974136353 2023-01-24 05:09:53.068445: step: 320/466, loss: 0.059368591755628586 2023-01-24 05:09:53.744108: step: 322/466, loss: 0.001227022847160697 2023-01-24 05:09:54.491690: step: 324/466, loss: 0.03310411050915718 2023-01-24 05:09:55.199668: step: 326/466, loss: 0.012070560827851295 2023-01-24 05:09:55.939895: step: 328/466, loss: 0.369498610496521 2023-01-24 05:09:56.694295: step: 330/466, loss: 0.05131317675113678 2023-01-24 05:09:57.447505: step: 332/466, loss: 0.012179000303149223 2023-01-24 05:09:58.261339: step: 334/466, loss: 0.011431191116571426 2023-01-24 05:09:59.110241: step: 336/466, loss: 0.09756392985582352 2023-01-24 05:09:59.852254: step: 338/466, loss: 0.3518053889274597 2023-01-24 05:10:00.616122: step: 340/466, loss: 0.0009419164853170514 2023-01-24 05:10:01.396513: step: 342/466, loss: 0.003747928887605667 2023-01-24 05:10:02.196406: step: 344/466, loss: 0.0055611394345760345 2023-01-24 05:10:02.958818: step: 346/466, loss: 0.010563087649643421 2023-01-24 05:10:03.699512: step: 348/466, loss: 0.03362543135881424 2023-01-24 05:10:04.488815: step: 350/466, loss: 0.0033438573591411114 2023-01-24 05:10:05.206745: step: 352/466, loss: 0.050573818385601044 2023-01-24 05:10:05.949870: step: 354/466, loss: 0.22646935284137726 2023-01-24 05:10:06.710586: step: 356/466, loss: 0.026994843035936356 2023-01-24 05:10:07.513037: step: 358/466, loss: 0.027258573099970818 2023-01-24 05:10:08.190303: step: 360/466, loss: 0.027738217264413834 2023-01-24 05:10:08.907322: step: 362/466, loss: 0.008679354563355446 2023-01-24 05:10:09.626832: step: 364/466, loss: 0.025107435882091522 2023-01-24 05:10:10.372177: step: 366/466, loss: 0.575447142124176 2023-01-24 05:10:11.114924: step: 368/466, loss: 0.018051810562610626 2023-01-24 05:10:12.028870: step: 370/466, loss: 0.009606994688510895 2023-01-24 05:10:12.828191: step: 372/466, loss: 0.10423516482114792 2023-01-24 05:10:13.586797: step: 374/466, loss: 0.0657115951180458 2023-01-24 05:10:14.413688: step: 376/466, loss: 0.002465600613504648 2023-01-24 05:10:15.241383: step: 378/466, loss: 0.0224370826035738 2023-01-24 05:10:16.044419: step: 380/466, loss: 0.05165081098675728 2023-01-24 05:10:16.847881: step: 382/466, loss: 0.037007659673690796 2023-01-24 05:10:17.699822: step: 384/466, loss: 0.020858481526374817 2023-01-24 05:10:18.448384: step: 386/466, loss: 0.012187846004962921 2023-01-24 05:10:19.153653: step: 388/466, loss: 0.020809736102819443 2023-01-24 05:10:19.947272: step: 390/466, loss: 0.015969304367899895 2023-01-24 05:10:20.741740: step: 392/466, loss: 0.0657949447631836 2023-01-24 05:10:21.529779: step: 394/466, loss: 0.005116751417517662 2023-01-24 05:10:22.252967: step: 396/466, loss: 0.012202229350805283 2023-01-24 05:10:22.981184: step: 398/466, loss: 0.02426939085125923 2023-01-24 05:10:23.694561: step: 400/466, loss: 0.0019004530040547252 2023-01-24 05:10:24.412902: step: 402/466, loss: 0.04898487403988838 2023-01-24 05:10:25.102461: step: 404/466, loss: 0.13768237829208374 2023-01-24 05:10:25.980811: step: 406/466, loss: 0.040279969573020935 2023-01-24 05:10:26.693837: step: 408/466, loss: 0.020777981728315353 2023-01-24 05:10:27.382813: step: 410/466, loss: 0.003094709012657404 2023-01-24 05:10:28.140652: step: 412/466, loss: 0.04134169965982437 2023-01-24 05:10:28.989040: step: 414/466, loss: 0.051051847636699677 2023-01-24 05:10:29.772062: step: 416/466, loss: 0.032947517931461334 2023-01-24 05:10:30.569879: step: 418/466, loss: 0.022777795791625977 2023-01-24 05:10:31.337659: step: 420/466, loss: 0.03082258068025112 2023-01-24 05:10:32.171706: step: 422/466, loss: 0.017670484259724617 2023-01-24 05:10:32.866748: step: 424/466, loss: 0.05445479974150658 2023-01-24 05:10:33.625364: step: 426/466, loss: 0.02118140459060669 2023-01-24 05:10:34.307884: step: 428/466, loss: 0.037338707596063614 2023-01-24 05:10:35.068322: step: 430/466, loss: 0.05138538032770157 2023-01-24 05:10:35.801998: step: 432/466, loss: 0.04034169390797615 2023-01-24 05:10:36.700394: step: 434/466, loss: 0.19528257846832275 2023-01-24 05:10:37.428164: step: 436/466, loss: 0.016089381650090218 2023-01-24 05:10:38.177549: step: 438/466, loss: 0.015455513261258602 2023-01-24 05:10:38.901441: step: 440/466, loss: 0.01979210413992405 2023-01-24 05:10:39.585067: step: 442/466, loss: 0.004141667392104864 2023-01-24 05:10:40.393350: step: 444/466, loss: 0.040368158370256424 2023-01-24 05:10:41.137451: step: 446/466, loss: 0.030878448858857155 2023-01-24 05:10:41.904629: step: 448/466, loss: 0.014130041003227234 2023-01-24 05:10:42.718525: step: 450/466, loss: 0.043796684592962265 2023-01-24 05:10:43.524034: step: 452/466, loss: 0.1610839068889618 2023-01-24 05:10:44.269185: step: 454/466, loss: 0.013636937364935875 2023-01-24 05:10:45.046188: step: 456/466, loss: 0.06379681825637817 2023-01-24 05:10:45.907114: step: 458/466, loss: 0.0052261208184063435 2023-01-24 05:10:46.805848: step: 460/466, loss: 0.6101828217506409 2023-01-24 05:10:47.581369: step: 462/466, loss: 0.03779454901814461 2023-01-24 05:10:48.376379: step: 464/466, loss: 0.03427768871188164 2023-01-24 05:10:49.141295: step: 466/466, loss: 0.16591612994670868 2023-01-24 05:10:49.917663: step: 468/466, loss: 0.03611454367637634 2023-01-24 05:10:50.643121: step: 470/466, loss: 0.033405475318431854 2023-01-24 05:10:51.437742: step: 472/466, loss: 0.05486998334527016 2023-01-24 05:10:52.277471: step: 474/466, loss: 0.05461619049310684 2023-01-24 05:10:53.027585: step: 476/466, loss: 0.33534881472587585 2023-01-24 05:10:53.800796: step: 478/466, loss: 0.020561659708619118 2023-01-24 05:10:54.568593: step: 480/466, loss: 0.008116367273032665 2023-01-24 05:10:55.356643: step: 482/466, loss: 0.22204464673995972 2023-01-24 05:10:56.083288: step: 484/466, loss: 0.01090270560234785 2023-01-24 05:10:56.729635: step: 486/466, loss: 0.004103204235434532 2023-01-24 05:10:57.471049: step: 488/466, loss: 0.005206770729273558 2023-01-24 05:10:58.179527: step: 490/466, loss: 0.00061570800608024 2023-01-24 05:10:58.899042: step: 492/466, loss: 0.014579207636415958 2023-01-24 05:10:59.748551: step: 494/466, loss: 0.08810116350650787 2023-01-24 05:11:00.548520: step: 496/466, loss: 0.058866944164037704 2023-01-24 05:11:01.350096: step: 498/466, loss: 0.04130866751074791 2023-01-24 05:11:02.074526: step: 500/466, loss: 0.032959774136543274 2023-01-24 05:11:02.794923: step: 502/466, loss: 0.026765989139676094 2023-01-24 05:11:03.465182: step: 504/466, loss: 0.042060643434524536 2023-01-24 05:11:04.231476: step: 506/466, loss: 0.015514878556132317 2023-01-24 05:11:04.992804: step: 508/466, loss: 0.009979259222745895 2023-01-24 05:11:05.722377: step: 510/466, loss: 0.08473718911409378 2023-01-24 05:11:06.461502: step: 512/466, loss: 0.010136888362467289 2023-01-24 05:11:07.247883: step: 514/466, loss: 0.15655040740966797 2023-01-24 05:11:08.035840: step: 516/466, loss: 0.009329917840659618 2023-01-24 05:11:08.739827: step: 518/466, loss: 0.049568429589271545 2023-01-24 05:11:09.567724: step: 520/466, loss: 0.04900914058089256 2023-01-24 05:11:10.309276: step: 522/466, loss: 0.0602293498814106 2023-01-24 05:11:11.195675: step: 524/466, loss: 0.031564585864543915 2023-01-24 05:11:11.924118: step: 526/466, loss: 0.015625080093741417 2023-01-24 05:11:12.641023: step: 528/466, loss: 0.822575569152832 2023-01-24 05:11:13.379151: step: 530/466, loss: 0.02063736692070961 2023-01-24 05:11:14.102927: step: 532/466, loss: 0.023112384602427483 2023-01-24 05:11:14.852560: step: 534/466, loss: 0.013427951373159885 2023-01-24 05:11:15.747464: step: 536/466, loss: 0.0413489006459713 2023-01-24 05:11:16.566474: step: 538/466, loss: 0.0034788185730576515 2023-01-24 05:11:17.309133: step: 540/466, loss: 0.018048470839858055 2023-01-24 05:11:18.036711: step: 542/466, loss: 0.029570063576102257 2023-01-24 05:11:18.836095: step: 544/466, loss: 0.0032845090609043837 2023-01-24 05:11:19.580796: step: 546/466, loss: 0.028534725308418274 2023-01-24 05:11:20.328567: step: 548/466, loss: 0.13171236217021942 2023-01-24 05:11:21.065852: step: 550/466, loss: 0.002139911288395524 2023-01-24 05:11:21.774708: step: 552/466, loss: 0.020025255158543587 2023-01-24 05:11:22.530431: step: 554/466, loss: 0.1817851960659027 2023-01-24 05:11:23.283636: step: 556/466, loss: 0.014328244142234325 2023-01-24 05:11:24.097565: step: 558/466, loss: 0.060344427824020386 2023-01-24 05:11:25.058142: step: 560/466, loss: 0.014726397581398487 2023-01-24 05:11:25.863880: step: 562/466, loss: 0.0007497974438592792 2023-01-24 05:11:26.645069: step: 564/466, loss: 0.007038436364382505 2023-01-24 05:11:27.452336: step: 566/466, loss: 0.020402414724230766 2023-01-24 05:11:28.229465: step: 568/466, loss: 0.005311600863933563 2023-01-24 05:11:28.931848: step: 570/466, loss: 0.001791072660125792 2023-01-24 05:11:29.641832: step: 572/466, loss: 0.0005361451185308397 2023-01-24 05:11:30.462467: step: 574/466, loss: 0.03888686001300812 2023-01-24 05:11:31.188393: step: 576/466, loss: 0.07183265686035156 2023-01-24 05:11:31.978782: step: 578/466, loss: 0.009103440679609776 2023-01-24 05:11:32.675085: step: 580/466, loss: 0.007811566349118948 2023-01-24 05:11:33.387381: step: 582/466, loss: 0.02086419053375721 2023-01-24 05:11:34.166025: step: 584/466, loss: 0.029495568946003914 2023-01-24 05:11:34.862434: step: 586/466, loss: 0.0748797282576561 2023-01-24 05:11:35.603969: step: 588/466, loss: 0.015807198360562325 2023-01-24 05:11:36.285294: step: 590/466, loss: 0.004969421774148941 2023-01-24 05:11:37.012195: step: 592/466, loss: 0.02605554275214672 2023-01-24 05:11:37.823117: step: 594/466, loss: 0.07038115710020065 2023-01-24 05:11:38.553422: step: 596/466, loss: 0.025732913985848427 2023-01-24 05:11:39.341515: step: 598/466, loss: 0.05297987535595894 2023-01-24 05:11:40.169282: step: 600/466, loss: 0.0939839631319046 2023-01-24 05:11:40.941762: step: 602/466, loss: 0.06241846829652786 2023-01-24 05:11:41.736612: step: 604/466, loss: 0.010820649564266205 2023-01-24 05:11:42.442572: step: 606/466, loss: 0.028127994388341904 2023-01-24 05:11:43.202345: step: 608/466, loss: 0.019125230610370636 2023-01-24 05:11:44.070206: step: 610/466, loss: 0.0562971793115139 2023-01-24 05:11:44.917889: step: 612/466, loss: 0.026095090433955193 2023-01-24 05:11:45.670187: step: 614/466, loss: 0.03391076251864433 2023-01-24 05:11:46.394033: step: 616/466, loss: 0.8686801791191101 2023-01-24 05:11:47.161398: step: 618/466, loss: 0.011446290649473667 2023-01-24 05:11:47.913722: step: 620/466, loss: 0.12559671700000763 2023-01-24 05:11:48.625753: step: 622/466, loss: 0.037010177969932556 2023-01-24 05:11:49.482659: step: 624/466, loss: 0.03859866037964821 2023-01-24 05:11:50.292044: step: 626/466, loss: 0.013249721378087997 2023-01-24 05:11:51.090083: step: 628/466, loss: 0.013942176476120949 2023-01-24 05:11:51.746143: step: 630/466, loss: 0.019450657069683075 2023-01-24 05:11:52.514624: step: 632/466, loss: 0.01676585152745247 2023-01-24 05:11:53.191828: step: 634/466, loss: 0.002062909072265029 2023-01-24 05:11:54.028485: step: 636/466, loss: 0.03258739411830902 2023-01-24 05:11:54.799265: step: 638/466, loss: 0.04516744613647461 2023-01-24 05:11:55.591190: step: 640/466, loss: 0.03640659898519516 2023-01-24 05:11:56.270343: step: 642/466, loss: 0.03564944118261337 2023-01-24 05:11:57.022493: step: 644/466, loss: 0.008748043328523636 2023-01-24 05:11:57.884845: step: 646/466, loss: 0.032235562801361084 2023-01-24 05:11:58.698149: step: 648/466, loss: 0.07947038114070892 2023-01-24 05:11:59.473217: step: 650/466, loss: 0.03167068213224411 2023-01-24 05:12:00.272477: step: 652/466, loss: 0.011534439399838448 2023-01-24 05:12:01.025311: step: 654/466, loss: 0.19016428291797638 2023-01-24 05:12:01.770046: step: 656/466, loss: 0.16548942029476166 2023-01-24 05:12:02.597638: step: 658/466, loss: 0.02085087075829506 2023-01-24 05:12:03.279670: step: 660/466, loss: 0.01081312820315361 2023-01-24 05:12:04.022493: step: 662/466, loss: 0.016461463645100594 2023-01-24 05:12:04.681574: step: 664/466, loss: 0.04506843909621239 2023-01-24 05:12:05.413513: step: 666/466, loss: 0.018984554335474968 2023-01-24 05:12:06.163360: step: 668/466, loss: 0.010795004665851593 2023-01-24 05:12:06.938406: step: 670/466, loss: 0.00987847801297903 2023-01-24 05:12:07.710699: step: 672/466, loss: 0.08290861546993256 2023-01-24 05:12:08.456180: step: 674/466, loss: 0.0027949621435254812 2023-01-24 05:12:09.209642: step: 676/466, loss: 0.04666345939040184 2023-01-24 05:12:09.997968: step: 678/466, loss: 0.015100638382136822 2023-01-24 05:12:10.761283: step: 680/466, loss: 0.03496446833014488 2023-01-24 05:12:11.543006: step: 682/466, loss: 0.03858804330229759 2023-01-24 05:12:12.315353: step: 684/466, loss: 0.02645958960056305 2023-01-24 05:12:13.138360: step: 686/466, loss: 0.06158650666475296 2023-01-24 05:12:13.937527: step: 688/466, loss: 0.024011608213186264 2023-01-24 05:12:14.652013: step: 690/466, loss: 0.010011572390794754 2023-01-24 05:12:15.380872: step: 692/466, loss: 0.0005916806985624135 2023-01-24 05:12:16.136396: step: 694/466, loss: 0.02358367294073105 2023-01-24 05:12:16.958000: step: 696/466, loss: 0.11142602562904358 2023-01-24 05:12:17.709391: step: 698/466, loss: 0.037874944508075714 2023-01-24 05:12:18.575603: step: 700/466, loss: 0.012585917487740517 2023-01-24 05:12:19.261661: step: 702/466, loss: 0.011423270218074322 2023-01-24 05:12:20.011132: step: 704/466, loss: 0.020701391622424126 2023-01-24 05:12:20.824582: step: 706/466, loss: 0.04733499139547348 2023-01-24 05:12:21.564754: step: 708/466, loss: 0.29787299036979675 2023-01-24 05:12:22.301822: step: 710/466, loss: 0.03941582143306732 2023-01-24 05:12:23.006910: step: 712/466, loss: 0.011034637689590454 2023-01-24 05:12:23.777271: step: 714/466, loss: 0.0179904717952013 2023-01-24 05:12:24.555376: step: 716/466, loss: 0.06562364846467972 2023-01-24 05:12:25.345224: step: 718/466, loss: 0.018577704206109047 2023-01-24 05:12:26.000151: step: 720/466, loss: 0.007526410277932882 2023-01-24 05:12:26.733178: step: 722/466, loss: 0.041528038680553436 2023-01-24 05:12:27.450486: step: 724/466, loss: 0.001094332430511713 2023-01-24 05:12:28.309306: step: 726/466, loss: 0.013120784424245358 2023-01-24 05:12:29.108522: step: 728/466, loss: 0.017572740092873573 2023-01-24 05:12:29.807989: step: 730/466, loss: 0.001205058186315 2023-01-24 05:12:30.531334: step: 732/466, loss: 0.06151802837848663 2023-01-24 05:12:31.281350: step: 734/466, loss: 0.014945479109883308 2023-01-24 05:12:31.945568: step: 736/466, loss: 0.03552056849002838 2023-01-24 05:12:32.678913: step: 738/466, loss: 0.034059032797813416 2023-01-24 05:12:33.419562: step: 740/466, loss: 0.049566540867090225 2023-01-24 05:12:34.139966: step: 742/466, loss: 0.008085060864686966 2023-01-24 05:12:34.941693: step: 744/466, loss: 0.009791059419512749 2023-01-24 05:12:35.805603: step: 746/466, loss: 0.014964860863983631 2023-01-24 05:12:36.556680: step: 748/466, loss: 0.06384597718715668 2023-01-24 05:12:37.325100: step: 750/466, loss: 0.0009342418634332716 2023-01-24 05:12:38.144273: step: 752/466, loss: 0.035021040588617325 2023-01-24 05:12:38.903625: step: 754/466, loss: 0.0690048411488533 2023-01-24 05:12:39.675150: step: 756/466, loss: 0.00836377963423729 2023-01-24 05:12:40.492325: step: 758/466, loss: 0.0028185443952679634 2023-01-24 05:12:41.311224: step: 760/466, loss: 0.007473757956176996 2023-01-24 05:12:42.029487: step: 762/466, loss: 0.01888253726065159 2023-01-24 05:12:42.750338: step: 764/466, loss: 0.07119555026292801 2023-01-24 05:12:43.463525: step: 766/466, loss: 0.05385569855570793 2023-01-24 05:12:44.206003: step: 768/466, loss: 0.01937274821102619 2023-01-24 05:12:44.922397: step: 770/466, loss: 0.010562130250036716 2023-01-24 05:12:45.729179: step: 772/466, loss: 0.02259805239737034 2023-01-24 05:12:46.618932: step: 774/466, loss: 0.044527675956487656 2023-01-24 05:12:47.485845: step: 776/466, loss: 0.06347750872373581 2023-01-24 05:12:48.378390: step: 778/466, loss: 0.06932666897773743 2023-01-24 05:12:49.144050: step: 780/466, loss: 0.04155116528272629 2023-01-24 05:12:49.883451: step: 782/466, loss: 0.023821156471967697 2023-01-24 05:12:50.647392: step: 784/466, loss: 0.04671480134129524 2023-01-24 05:12:51.427853: step: 786/466, loss: 0.004834890365600586 2023-01-24 05:12:52.124246: step: 788/466, loss: 0.0016203763661906123 2023-01-24 05:12:52.830721: step: 790/466, loss: 0.002012968761846423 2023-01-24 05:12:53.532232: step: 792/466, loss: 0.030111519619822502 2023-01-24 05:12:54.300188: step: 794/466, loss: 0.013528553768992424 2023-01-24 05:12:55.125249: step: 796/466, loss: 0.013709068298339844 2023-01-24 05:12:55.880160: step: 798/466, loss: 0.004014394711703062 2023-01-24 05:12:56.681919: step: 800/466, loss: 0.011794732883572578 2023-01-24 05:12:57.438958: step: 802/466, loss: 0.01672566682100296 2023-01-24 05:12:58.171653: step: 804/466, loss: 0.0033746538683772087 2023-01-24 05:12:58.888882: step: 806/466, loss: 0.01093566045165062 2023-01-24 05:12:59.740074: step: 808/466, loss: 0.0042954096570611 2023-01-24 05:13:00.510798: step: 810/466, loss: 0.01798640564084053 2023-01-24 05:13:01.202447: step: 812/466, loss: 0.12018779665231705 2023-01-24 05:13:02.030970: step: 814/466, loss: 0.06234927847981453 2023-01-24 05:13:02.859394: step: 816/466, loss: 0.0871921107172966 2023-01-24 05:13:03.599392: step: 818/466, loss: 0.03763734549283981 2023-01-24 05:13:04.407243: step: 820/466, loss: 0.03183059021830559 2023-01-24 05:13:05.143405: step: 822/466, loss: 0.03925763815641403 2023-01-24 05:13:05.805229: step: 824/466, loss: 0.010070499032735825 2023-01-24 05:13:06.586124: step: 826/466, loss: 0.08802307397127151 2023-01-24 05:13:07.327933: step: 828/466, loss: 0.05833996832370758 2023-01-24 05:13:08.054863: step: 830/466, loss: 0.0007282199221663177 2023-01-24 05:13:08.783564: step: 832/466, loss: 0.016765417531132698 2023-01-24 05:13:09.561057: step: 834/466, loss: 0.000603331602178514 2023-01-24 05:13:10.371971: step: 836/466, loss: 0.015151295810937881 2023-01-24 05:13:11.075159: step: 838/466, loss: 0.04270085692405701 2023-01-24 05:13:11.910918: step: 840/466, loss: 0.06138002872467041 2023-01-24 05:13:12.647440: step: 842/466, loss: 0.03053216077387333 2023-01-24 05:13:13.343115: step: 844/466, loss: 0.09704993665218353 2023-01-24 05:13:14.108635: step: 846/466, loss: 0.034290920943021774 2023-01-24 05:13:14.906707: step: 848/466, loss: 0.037506867200136185 2023-01-24 05:13:15.632330: step: 850/466, loss: 0.000346437533153221 2023-01-24 05:13:16.301080: step: 852/466, loss: 0.03536347299814224 2023-01-24 05:13:17.041964: step: 854/466, loss: 0.01068208273500204 2023-01-24 05:13:17.823685: step: 856/466, loss: 0.1030912697315216 2023-01-24 05:13:18.585137: step: 858/466, loss: 0.023195527493953705 2023-01-24 05:13:19.264395: step: 860/466, loss: 0.04605022817850113 2023-01-24 05:13:19.901315: step: 862/466, loss: 0.029108798131346703 2023-01-24 05:13:20.698803: step: 864/466, loss: 0.017726074904203415 2023-01-24 05:13:21.441202: step: 866/466, loss: 0.009071829728782177 2023-01-24 05:13:22.178262: step: 868/466, loss: 0.05669238418340683 2023-01-24 05:13:22.973185: step: 870/466, loss: 0.048009276390075684 2023-01-24 05:13:23.758557: step: 872/466, loss: 0.04615802317857742 2023-01-24 05:13:24.422765: step: 874/466, loss: 0.0086721396073699 2023-01-24 05:13:25.179014: step: 876/466, loss: 0.016087956726551056 2023-01-24 05:13:25.927700: step: 878/466, loss: 0.09400956332683563 2023-01-24 05:13:26.689275: step: 880/466, loss: 0.04891110584139824 2023-01-24 05:13:27.435562: step: 882/466, loss: 0.050575532019138336 2023-01-24 05:13:28.149560: step: 884/466, loss: 0.06017523258924484 2023-01-24 05:13:28.833003: step: 886/466, loss: 0.010187262669205666 2023-01-24 05:13:29.616743: step: 888/466, loss: 1.000565528869629 2023-01-24 05:13:30.348083: step: 890/466, loss: 0.03032972663640976 2023-01-24 05:13:31.135510: step: 892/466, loss: 0.002549918135628104 2023-01-24 05:13:31.865383: step: 894/466, loss: 0.33435216546058655 2023-01-24 05:13:32.521509: step: 896/466, loss: 0.005525761283934116 2023-01-24 05:13:33.184379: step: 898/466, loss: 0.030115347355604172 2023-01-24 05:13:33.887731: step: 900/466, loss: 0.029519736766815186 2023-01-24 05:13:34.596025: step: 902/466, loss: 0.04865584522485733 2023-01-24 05:13:35.327100: step: 904/466, loss: 0.04875032231211662 2023-01-24 05:13:36.098032: step: 906/466, loss: 0.03611094132065773 2023-01-24 05:13:36.777001: step: 908/466, loss: 0.16952019929885864 2023-01-24 05:13:37.575424: step: 910/466, loss: 0.04466065391898155 2023-01-24 05:13:38.440186: step: 912/466, loss: 0.010506573133170605 2023-01-24 05:13:39.135361: step: 914/466, loss: 0.0016740434803068638 2023-01-24 05:13:39.872445: step: 916/466, loss: 0.019911987707018852 2023-01-24 05:13:40.631855: step: 918/466, loss: 0.018170898780226707 2023-01-24 05:13:41.414345: step: 920/466, loss: 0.18965911865234375 2023-01-24 05:13:42.148934: step: 922/466, loss: 0.027456559240818024 2023-01-24 05:13:42.830840: step: 924/466, loss: 0.03433636948466301 2023-01-24 05:13:43.652776: step: 926/466, loss: 0.0002444031124468893 2023-01-24 05:13:44.437369: step: 928/466, loss: 0.02833491750061512 2023-01-24 05:13:45.212860: step: 930/466, loss: 0.015947267413139343 2023-01-24 05:13:45.917047: step: 932/466, loss: 0.029022732749581337 ================================================== Loss: 0.053 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3227874677002584, 'r': 0.31604996837444654, 'f1': 0.31938318951741773}, 'combined': 0.2353349817496762, 'epoch': 25} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3665796062210238, 'r': 0.2875507560486992, 'f1': 0.32229124040392776}, 'combined': 0.1980912014189995, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3104082761493752, 'r': 0.3357357066511269, 'f1': 0.3225756014679013}, 'combined': 0.2376872852921378, 'epoch': 25} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3483191280428257, 'r': 0.2943372025712536, 'f1': 0.3190609751007019}, 'combined': 0.19610577006189484, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3364869760220804, 'r': 0.3364869760220804, 'f1': 0.3364869760220804}, 'combined': 0.24793777180574342, 'epoch': 25} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.366413363214267, 'r': 0.28830444869892063, 'f1': 0.3226996448094612}, 'combined': 0.19931448649996136, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32196969696969696, 'r': 0.30357142857142855, 'f1': 0.31249999999999994}, 'combined': 0.2083333333333333, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.42391304347826086, 'f1': 0.31451612903225806}, 'combined': 0.15725806451612903, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514281426856457, 'r': 0.3267548195748888, 'f1': 0.33864265470199884}, 'combined': 0.24952616662252544, 'epoch': 23} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3841715799217045, 'r': 0.28937599526569946, 'f1': 0.3301029871919831}, 'combined': 0.20289256773751158, 'epoch': 23} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3642241379310345, 'r': 0.30178571428571427, 'f1': 0.33007812499999994}, 'combined': 0.2200520833333333, 'epoch': 23} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34582504970178923, 'r': 0.33007590132827325, 'f1': 0.3377669902912621}, 'combined': 0.24888094021461418, 'epoch': 19} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36038315043942815, 'r': 0.2939803727200525, 'f1': 0.3238125877697768}, 'combined': 0.20000189244603864, 'epoch': 19} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 19} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:16:31.189043: step: 2/466, loss: 0.00031524227233603597 2023-01-24 05:16:31.946252: step: 4/466, loss: 0.015762172639369965 2023-01-24 05:16:32.746226: step: 6/466, loss: 0.0012802548008039594 2023-01-24 05:16:33.510415: step: 8/466, loss: 0.009078371338546276 2023-01-24 05:16:34.219868: step: 10/466, loss: 0.0006099325837567449 2023-01-24 05:16:34.974896: step: 12/466, loss: 0.45666512846946716 2023-01-24 05:16:35.734153: step: 14/466, loss: 0.010864143259823322 2023-01-24 05:16:36.537949: step: 16/466, loss: 0.012954924255609512 2023-01-24 05:16:37.376151: step: 18/466, loss: 0.028923632577061653 2023-01-24 05:16:38.128621: step: 20/466, loss: 0.0014648281503468752 2023-01-24 05:16:38.970510: step: 22/466, loss: 0.07409375160932541 2023-01-24 05:16:39.738116: step: 24/466, loss: 0.019660940393805504 2023-01-24 05:16:40.510091: step: 26/466, loss: 0.04426463320851326 2023-01-24 05:16:41.235502: step: 28/466, loss: 0.06421208381652832 2023-01-24 05:16:42.010298: step: 30/466, loss: 0.06367946416139603 2023-01-24 05:16:42.796919: step: 32/466, loss: 0.04807402938604355 2023-01-24 05:16:43.622124: step: 34/466, loss: 0.0189279243350029 2023-01-24 05:16:44.466463: step: 36/466, loss: 2.7010581493377686 2023-01-24 05:16:45.218287: step: 38/466, loss: 0.020814199000597 2023-01-24 05:16:46.008192: step: 40/466, loss: 0.03045761026442051 2023-01-24 05:16:46.867101: step: 42/466, loss: 0.019807307049632072 2023-01-24 05:16:47.641489: step: 44/466, loss: 0.12432827800512314 2023-01-24 05:16:48.343902: step: 46/466, loss: 0.004209555219858885 2023-01-24 05:16:49.137558: step: 48/466, loss: 0.09685204923152924 2023-01-24 05:16:49.890819: step: 50/466, loss: 0.008693347685039043 2023-01-24 05:16:50.797632: step: 52/466, loss: 0.02438419498503208 2023-01-24 05:16:51.551157: step: 54/466, loss: 0.004380271770060062 2023-01-24 05:16:52.313705: step: 56/466, loss: 0.029782719910144806 2023-01-24 05:16:53.108828: step: 58/466, loss: 1.0801372528076172 2023-01-24 05:16:53.841516: step: 60/466, loss: 0.013642487116158009 2023-01-24 05:16:54.549456: step: 62/466, loss: 0.19044984877109528 2023-01-24 05:16:55.326316: step: 64/466, loss: 0.020978759974241257 2023-01-24 05:16:56.047114: step: 66/466, loss: 0.3767179846763611 2023-01-24 05:16:56.800616: step: 68/466, loss: 0.012379830703139305 2023-01-24 05:16:57.615479: step: 70/466, loss: 0.0017019481165334582 2023-01-24 05:16:58.408934: step: 72/466, loss: 0.006129485089331865 2023-01-24 05:16:59.174068: step: 74/466, loss: 0.050105977803468704 2023-01-24 05:16:59.985844: step: 76/466, loss: 0.00046785204904153943 2023-01-24 05:17:00.660736: step: 78/466, loss: 0.07924487441778183 2023-01-24 05:17:01.358947: step: 80/466, loss: 0.061756521463394165 2023-01-24 05:17:02.112706: step: 82/466, loss: 0.003477144753560424 2023-01-24 05:17:02.928191: step: 84/466, loss: 0.05212853476405144 2023-01-24 05:17:03.622386: step: 86/466, loss: 0.07153777778148651 2023-01-24 05:17:04.324961: step: 88/466, loss: 0.0008990956703200936 2023-01-24 05:17:05.013917: step: 90/466, loss: 0.004675067961215973 2023-01-24 05:17:05.819402: step: 92/466, loss: 0.037258706986904144 2023-01-24 05:17:06.580436: step: 94/466, loss: 0.012425399385392666 2023-01-24 05:17:07.349809: step: 96/466, loss: 0.0023102019913494587 2023-01-24 05:17:08.093332: step: 98/466, loss: 0.060890063643455505 2023-01-24 05:17:08.855681: step: 100/466, loss: 0.06003446504473686 2023-01-24 05:17:09.626548: step: 102/466, loss: 0.15408965945243835 2023-01-24 05:17:10.377948: step: 104/466, loss: 0.0013166368007659912 2023-01-24 05:17:11.152952: step: 106/466, loss: 0.016299203038215637 2023-01-24 05:17:11.898057: step: 108/466, loss: 0.0035430581774562597 2023-01-24 05:17:12.576980: step: 110/466, loss: 0.12147834151983261 2023-01-24 05:17:13.267087: step: 112/466, loss: 0.019816666841506958 2023-01-24 05:17:13.987510: step: 114/466, loss: 0.017354421317577362 2023-01-24 05:17:14.759981: step: 116/466, loss: 0.0008715191506780684 2023-01-24 05:17:15.533615: step: 118/466, loss: 0.001017579110339284 2023-01-24 05:17:16.287871: step: 120/466, loss: 0.0178241990506649 2023-01-24 05:17:17.044607: step: 122/466, loss: 0.02976835146546364 2023-01-24 05:17:17.808008: step: 124/466, loss: 0.014377070590853691 2023-01-24 05:17:18.559819: step: 126/466, loss: 0.041578684002161026 2023-01-24 05:17:19.331313: step: 128/466, loss: 0.002998858457431197 2023-01-24 05:17:20.035909: step: 130/466, loss: 0.0054906210862100124 2023-01-24 05:17:20.807880: step: 132/466, loss: 0.0224401094019413 2023-01-24 05:17:21.599441: step: 134/466, loss: 0.019206833094358444 2023-01-24 05:17:22.380342: step: 136/466, loss: 0.0006435486720874906 2023-01-24 05:17:23.143485: step: 138/466, loss: 0.22176651656627655 2023-01-24 05:17:23.844081: step: 140/466, loss: 0.023877454921603203 2023-01-24 05:17:24.580834: step: 142/466, loss: 0.00489756790921092 2023-01-24 05:17:25.345822: step: 144/466, loss: 0.008302816189825535 2023-01-24 05:17:26.065020: step: 146/466, loss: 0.0002597762504592538 2023-01-24 05:17:26.907851: step: 148/466, loss: 0.023236991837620735 2023-01-24 05:17:27.692822: step: 150/466, loss: 0.0020385209936648607 2023-01-24 05:17:28.370845: step: 152/466, loss: 0.03643738478422165 2023-01-24 05:17:29.090562: step: 154/466, loss: 0.0633644387125969 2023-01-24 05:17:29.869614: step: 156/466, loss: 0.044321831315755844 2023-01-24 05:17:30.615367: step: 158/466, loss: 0.00029187617474235594 2023-01-24 05:17:31.357806: step: 160/466, loss: 0.02037622407078743 2023-01-24 05:17:32.117732: step: 162/466, loss: 0.010865447111427784 2023-01-24 05:17:32.908808: step: 164/466, loss: 0.03551540896296501 2023-01-24 05:17:33.680073: step: 166/466, loss: 0.044380009174346924 2023-01-24 05:17:34.553564: step: 168/466, loss: 0.09230950474739075 2023-01-24 05:17:35.295621: step: 170/466, loss: 0.01594698242843151 2023-01-24 05:17:36.152151: step: 172/466, loss: 0.04631970077753067 2023-01-24 05:17:36.905101: step: 174/466, loss: 0.0324825793504715 2023-01-24 05:17:37.679312: step: 176/466, loss: 0.021742789074778557 2023-01-24 05:17:38.396348: step: 178/466, loss: 0.014791291207075119 2023-01-24 05:17:39.204047: step: 180/466, loss: 0.02440674975514412 2023-01-24 05:17:39.983146: step: 182/466, loss: 0.03153248503804207 2023-01-24 05:17:40.777098: step: 184/466, loss: 0.002631034003570676 2023-01-24 05:17:41.599476: step: 186/466, loss: 0.06895671784877777 2023-01-24 05:17:42.359748: step: 188/466, loss: 0.03477528318762779 2023-01-24 05:17:43.054620: step: 190/466, loss: 0.001194530283100903 2023-01-24 05:17:43.824552: step: 192/466, loss: 0.005461663007736206 2023-01-24 05:17:44.657781: step: 194/466, loss: 0.012678248807787895 2023-01-24 05:17:45.494398: step: 196/466, loss: 0.012411870993673801 2023-01-24 05:17:46.296051: step: 198/466, loss: 0.06810742616653442 2023-01-24 05:17:47.117478: step: 200/466, loss: 0.020279204472899437 2023-01-24 05:17:47.963305: step: 202/466, loss: 0.00167837121989578 2023-01-24 05:17:48.659093: step: 204/466, loss: 0.014692934229969978 2023-01-24 05:17:49.462185: step: 206/466, loss: 0.07064501196146011 2023-01-24 05:17:50.222918: step: 208/466, loss: 0.39062148332595825 2023-01-24 05:17:50.993227: step: 210/466, loss: 0.0009400771232321858 2023-01-24 05:17:51.775910: step: 212/466, loss: 0.031239798292517662 2023-01-24 05:17:52.532035: step: 214/466, loss: 0.045270588248968124 2023-01-24 05:17:53.307188: step: 216/466, loss: 0.03298955038189888 2023-01-24 05:17:54.072889: step: 218/466, loss: 0.0049577741883695126 2023-01-24 05:17:54.842216: step: 220/466, loss: 0.008334346115589142 2023-01-24 05:17:55.575782: step: 222/466, loss: 0.03591045364737511 2023-01-24 05:17:56.257739: step: 224/466, loss: 0.012412887066602707 2023-01-24 05:17:56.950442: step: 226/466, loss: 0.03839350864291191 2023-01-24 05:17:57.729921: step: 228/466, loss: 0.0374600924551487 2023-01-24 05:17:58.465903: step: 230/466, loss: 0.003158966079354286 2023-01-24 05:17:59.194380: step: 232/466, loss: 0.00404371228069067 2023-01-24 05:17:59.953000: step: 234/466, loss: 0.01028430461883545 2023-01-24 05:18:00.724937: step: 236/466, loss: 0.06801458448171616 2023-01-24 05:18:01.459986: step: 238/466, loss: 0.03367387130856514 2023-01-24 05:18:02.240018: step: 240/466, loss: 0.004517871420830488 2023-01-24 05:18:02.975793: step: 242/466, loss: 0.10528568923473358 2023-01-24 05:18:03.706543: step: 244/466, loss: 0.03185072913765907 2023-01-24 05:18:04.429525: step: 246/466, loss: 0.025077687576413155 2023-01-24 05:18:05.291149: step: 248/466, loss: 0.020623821765184402 2023-01-24 05:18:06.096604: step: 250/466, loss: 0.0257880799472332 2023-01-24 05:18:06.821832: step: 252/466, loss: 0.027103710919618607 2023-01-24 05:18:07.607200: step: 254/466, loss: 0.02695123478770256 2023-01-24 05:18:08.432303: step: 256/466, loss: 0.04977225139737129 2023-01-24 05:18:09.246109: step: 258/466, loss: 0.014846911653876305 2023-01-24 05:18:10.030480: step: 260/466, loss: 0.010261930525302887 2023-01-24 05:18:10.807371: step: 262/466, loss: 0.007246529217809439 2023-01-24 05:18:11.738600: step: 264/466, loss: 0.026592286303639412 2023-01-24 05:18:12.448243: step: 266/466, loss: 0.02041914314031601 2023-01-24 05:18:13.234408: step: 268/466, loss: 0.037839245051145554 2023-01-24 05:18:14.059510: step: 270/466, loss: 0.006573710590600967 2023-01-24 05:18:14.827810: step: 272/466, loss: 0.012767232023179531 2023-01-24 05:18:15.514290: step: 274/466, loss: 0.03212396055459976 2023-01-24 05:18:16.253355: step: 276/466, loss: 0.014931570738554 2023-01-24 05:18:16.979629: step: 278/466, loss: 0.05406482145190239 2023-01-24 05:18:17.783518: step: 280/466, loss: 0.009635468013584614 2023-01-24 05:18:18.555277: step: 282/466, loss: 0.007484063971787691 2023-01-24 05:18:19.293149: step: 284/466, loss: 0.006852707825601101 2023-01-24 05:18:19.976375: step: 286/466, loss: 0.0074304440058767796 2023-01-24 05:18:20.705039: step: 288/466, loss: 0.040117863565683365 2023-01-24 05:18:21.461024: step: 290/466, loss: 0.06904040277004242 2023-01-24 05:18:22.214952: step: 292/466, loss: 0.005339875817298889 2023-01-24 05:18:22.967502: step: 294/466, loss: 0.000554997066501528 2023-01-24 05:18:23.774709: step: 296/466, loss: 0.002527383156120777 2023-01-24 05:18:24.518516: step: 298/466, loss: 0.013496562838554382 2023-01-24 05:18:25.190869: step: 300/466, loss: 0.0026911115273833275 2023-01-24 05:18:26.066002: step: 302/466, loss: 0.9188671112060547 2023-01-24 05:18:26.871149: step: 304/466, loss: 0.05199922248721123 2023-01-24 05:18:27.579463: step: 306/466, loss: 0.012454289011657238 2023-01-24 05:18:28.346835: step: 308/466, loss: 0.012339092791080475 2023-01-24 05:18:29.036919: step: 310/466, loss: 0.002921469509601593 2023-01-24 05:18:29.777573: step: 312/466, loss: 0.0036970670334994793 2023-01-24 05:18:30.563008: step: 314/466, loss: 0.03886334225535393 2023-01-24 05:18:31.310889: step: 316/466, loss: 0.02411399781703949 2023-01-24 05:18:32.112330: step: 318/466, loss: 0.062258653342723846 2023-01-24 05:18:32.792845: step: 320/466, loss: 0.04542381316423416 2023-01-24 05:18:33.503036: step: 322/466, loss: 0.096859410405159 2023-01-24 05:18:34.212480: step: 324/466, loss: 0.009544518776237965 2023-01-24 05:18:34.982128: step: 326/466, loss: 0.16951487958431244 2023-01-24 05:18:35.743180: step: 328/466, loss: 0.1383233368396759 2023-01-24 05:18:36.536482: step: 330/466, loss: 0.014902369119226933 2023-01-24 05:18:37.451706: step: 332/466, loss: 0.07071257382631302 2023-01-24 05:18:38.230507: step: 334/466, loss: 0.0656275525689125 2023-01-24 05:18:39.014085: step: 336/466, loss: 0.04864136502146721 2023-01-24 05:18:39.799268: step: 338/466, loss: 0.003996263723820448 2023-01-24 05:18:40.507496: step: 340/466, loss: 0.009085068479180336 2023-01-24 05:18:41.218505: step: 342/466, loss: 0.001105593633837998 2023-01-24 05:18:41.957770: step: 344/466, loss: 0.05922761932015419 2023-01-24 05:18:42.731006: step: 346/466, loss: 0.04883921891450882 2023-01-24 05:18:43.443718: step: 348/466, loss: 0.025686321780085564 2023-01-24 05:18:44.222137: step: 350/466, loss: 0.058305688202381134 2023-01-24 05:18:45.026291: step: 352/466, loss: 0.28009021282196045 2023-01-24 05:18:45.844334: step: 354/466, loss: 0.31456565856933594 2023-01-24 05:18:46.603276: step: 356/466, loss: 0.020045241340994835 2023-01-24 05:18:47.248475: step: 358/466, loss: 0.0017619299469515681 2023-01-24 05:18:48.000550: step: 360/466, loss: 0.012018238194286823 2023-01-24 05:18:48.678015: step: 362/466, loss: 0.032567549496889114 2023-01-24 05:18:49.392240: step: 364/466, loss: 0.02509705349802971 2023-01-24 05:18:50.057823: step: 366/466, loss: 0.0745500698685646 2023-01-24 05:18:50.709367: step: 368/466, loss: 0.017987968400120735 2023-01-24 05:18:51.422244: step: 370/466, loss: 0.013996962457895279 2023-01-24 05:18:52.178777: step: 372/466, loss: 0.0067430599592626095 2023-01-24 05:18:53.036856: step: 374/466, loss: 0.012736006639897823 2023-01-24 05:18:53.730011: step: 376/466, loss: 0.013728760182857513 2023-01-24 05:18:54.488379: step: 378/466, loss: 0.016943395137786865 2023-01-24 05:18:55.296478: step: 380/466, loss: 0.007950839586555958 2023-01-24 05:18:56.054371: step: 382/466, loss: 0.011489784345030785 2023-01-24 05:18:56.867707: step: 384/466, loss: 0.05696876347064972 2023-01-24 05:18:57.664641: step: 386/466, loss: 0.0263433326035738 2023-01-24 05:18:58.481871: step: 388/466, loss: 0.05339128524065018 2023-01-24 05:18:59.199216: step: 390/466, loss: 0.012195846997201443 2023-01-24 05:18:59.949030: step: 392/466, loss: 0.01823616772890091 2023-01-24 05:19:00.715709: step: 394/466, loss: 0.012851156294345856 2023-01-24 05:19:01.403900: step: 396/466, loss: 0.003447320545092225 2023-01-24 05:19:02.194157: step: 398/466, loss: 0.019181225448846817 2023-01-24 05:19:02.961057: step: 400/466, loss: 0.0008523253491148353 2023-01-24 05:19:03.874838: step: 402/466, loss: 0.01056719571352005 2023-01-24 05:19:04.655579: step: 404/466, loss: 0.005507184658199549 2023-01-24 05:19:05.297752: step: 406/466, loss: 0.0025642230175435543 2023-01-24 05:19:06.051507: step: 408/466, loss: 0.00011972729407716542 2023-01-24 05:19:06.739243: step: 410/466, loss: 0.0066248211078345776 2023-01-24 05:19:07.474137: step: 412/466, loss: 0.0039841653779149055 2023-01-24 05:19:08.270849: step: 414/466, loss: 0.05562155693769455 2023-01-24 05:19:09.053269: step: 416/466, loss: 0.005581381265074015 2023-01-24 05:19:09.764696: step: 418/466, loss: 0.13449573516845703 2023-01-24 05:19:10.525711: step: 420/466, loss: 0.037772390991449356 2023-01-24 05:19:11.230663: step: 422/466, loss: 0.11866430193185806 2023-01-24 05:19:12.042063: step: 424/466, loss: 0.0028409764636307955 2023-01-24 05:19:12.789837: step: 426/466, loss: 0.019561611115932465 2023-01-24 05:19:13.489165: step: 428/466, loss: 0.001310934778302908 2023-01-24 05:19:14.253842: step: 430/466, loss: 0.017845844849944115 2023-01-24 05:19:15.025980: step: 432/466, loss: 7.294760143849999e-05 2023-01-24 05:19:15.809784: step: 434/466, loss: 0.016323518007993698 2023-01-24 05:19:16.611090: step: 436/466, loss: 0.0048583317548036575 2023-01-24 05:19:17.402767: step: 438/466, loss: 0.005380266811698675 2023-01-24 05:19:18.184671: step: 440/466, loss: 0.019052177667617798 2023-01-24 05:19:18.979754: step: 442/466, loss: 0.021041641011834145 2023-01-24 05:19:19.695181: step: 444/466, loss: 1.4660183191299438 2023-01-24 05:19:20.474975: step: 446/466, loss: 0.3665899634361267 2023-01-24 05:19:21.243454: step: 448/466, loss: 0.30819717049598694 2023-01-24 05:19:21.995951: step: 450/466, loss: 0.009679583832621574 2023-01-24 05:19:22.762285: step: 452/466, loss: 0.8086559772491455 2023-01-24 05:19:23.532454: step: 454/466, loss: 0.04148883745074272 2023-01-24 05:19:24.288898: step: 456/466, loss: 0.04705173894762993 2023-01-24 05:19:25.000630: step: 458/466, loss: 0.00868227705359459 2023-01-24 05:19:25.869723: step: 460/466, loss: 0.003283077385276556 2023-01-24 05:19:26.680814: step: 462/466, loss: 0.0010576589265838265 2023-01-24 05:19:27.466026: step: 464/466, loss: 0.004205800127238035 2023-01-24 05:19:28.149038: step: 466/466, loss: 0.0158492773771286 2023-01-24 05:19:28.864329: step: 468/466, loss: 0.0006152232526801527 2023-01-24 05:19:29.734368: step: 470/466, loss: 0.010952494107186794 2023-01-24 05:19:30.489737: step: 472/466, loss: 0.02950318530201912 2023-01-24 05:19:31.254329: step: 474/466, loss: 0.136695995926857 2023-01-24 05:19:31.997382: step: 476/466, loss: 0.054866403341293335 2023-01-24 05:19:32.730390: step: 478/466, loss: 0.16068577766418457 2023-01-24 05:19:33.494891: step: 480/466, loss: 0.002518736757338047 2023-01-24 05:19:34.247149: step: 482/466, loss: 0.010239574126899242 2023-01-24 05:19:35.076551: step: 484/466, loss: 0.01268444862216711 2023-01-24 05:19:35.816381: step: 486/466, loss: 0.000964211649261415 2023-01-24 05:19:36.493339: step: 488/466, loss: 0.021816113963723183 2023-01-24 05:19:37.287094: step: 490/466, loss: 0.0027528852224349976 2023-01-24 05:19:38.091592: step: 492/466, loss: 0.08536282181739807 2023-01-24 05:19:38.814165: step: 494/466, loss: 0.006941162049770355 2023-01-24 05:19:39.553589: step: 496/466, loss: 0.1977323293685913 2023-01-24 05:19:40.295183: step: 498/466, loss: 0.004771554376929998 2023-01-24 05:19:41.173772: step: 500/466, loss: 0.12913726270198822 2023-01-24 05:19:41.942018: step: 502/466, loss: 0.008971030823886395 2023-01-24 05:19:42.817532: step: 504/466, loss: 0.029957806691527367 2023-01-24 05:19:43.540175: step: 506/466, loss: 1.8468188047409058 2023-01-24 05:19:44.357918: step: 508/466, loss: 0.03492060303688049 2023-01-24 05:19:45.208579: step: 510/466, loss: 0.042045388370752335 2023-01-24 05:19:45.901912: step: 512/466, loss: 0.052512023597955704 2023-01-24 05:19:46.694297: step: 514/466, loss: 0.03430125117301941 2023-01-24 05:19:47.482068: step: 516/466, loss: 0.008694916032254696 2023-01-24 05:19:48.247559: step: 518/466, loss: 0.01283710915595293 2023-01-24 05:19:49.002989: step: 520/466, loss: 0.026418892666697502 2023-01-24 05:19:49.694305: step: 522/466, loss: 0.0028335973620414734 2023-01-24 05:19:50.450231: step: 524/466, loss: 0.0052917106077075005 2023-01-24 05:19:51.180852: step: 526/466, loss: 0.011922935955226421 2023-01-24 05:19:51.857340: step: 528/466, loss: 0.015131733380258083 2023-01-24 05:19:52.576347: step: 530/466, loss: 0.0294162817299366 2023-01-24 05:19:53.283539: step: 532/466, loss: 0.005694697145372629 2023-01-24 05:19:54.079543: step: 534/466, loss: 0.16320542991161346 2023-01-24 05:19:54.820780: step: 536/466, loss: 0.79264235496521 2023-01-24 05:19:55.559521: step: 538/466, loss: 0.019455431029200554 2023-01-24 05:19:56.317506: step: 540/466, loss: 0.0004865480586886406 2023-01-24 05:19:57.077801: step: 542/466, loss: 3.612785577774048 2023-01-24 05:19:57.838899: step: 544/466, loss: 0.16626045107841492 2023-01-24 05:19:58.587934: step: 546/466, loss: 0.027813207358121872 2023-01-24 05:19:59.321910: step: 548/466, loss: 0.006704711355268955 2023-01-24 05:20:00.067553: step: 550/466, loss: 0.011759432032704353 2023-01-24 05:20:00.873624: step: 552/466, loss: 0.01733456179499626 2023-01-24 05:20:01.669864: step: 554/466, loss: 0.02009851485490799 2023-01-24 05:20:02.405233: step: 556/466, loss: 0.015334980562329292 2023-01-24 05:20:03.290398: step: 558/466, loss: 0.11342606693506241 2023-01-24 05:20:04.075016: step: 560/466, loss: 0.005834941752254963 2023-01-24 05:20:04.835485: step: 562/466, loss: 0.12517115473747253 2023-01-24 05:20:05.589164: step: 564/466, loss: 0.002540087793022394 2023-01-24 05:20:06.309099: step: 566/466, loss: 0.025609837844967842 2023-01-24 05:20:07.085689: step: 568/466, loss: 0.013363751582801342 2023-01-24 05:20:07.799815: step: 570/466, loss: 0.06508602946996689 2023-01-24 05:20:08.591013: step: 572/466, loss: 0.08497834205627441 2023-01-24 05:20:09.294883: step: 574/466, loss: 0.3588958978652954 2023-01-24 05:20:10.021749: step: 576/466, loss: 1.6701912879943848 2023-01-24 05:20:10.802871: step: 578/466, loss: 0.10761582106351852 2023-01-24 05:20:11.589114: step: 580/466, loss: 0.07897034287452698 2023-01-24 05:20:12.282673: step: 582/466, loss: 0.20795206725597382 2023-01-24 05:20:12.998072: step: 584/466, loss: 1.709664225578308 2023-01-24 05:20:13.750814: step: 586/466, loss: 0.029606152325868607 2023-01-24 05:20:14.499203: step: 588/466, loss: 0.02808300219476223 2023-01-24 05:20:15.298071: step: 590/466, loss: 0.12041884660720825 2023-01-24 05:20:16.098404: step: 592/466, loss: 0.04004635289311409 2023-01-24 05:20:16.862419: step: 594/466, loss: 0.18785744905471802 2023-01-24 05:20:17.634957: step: 596/466, loss: 0.07838691025972366 2023-01-24 05:20:18.386718: step: 598/466, loss: 0.010151730850338936 2023-01-24 05:20:19.153576: step: 600/466, loss: 0.05482480302453041 2023-01-24 05:20:19.929003: step: 602/466, loss: 0.2743576169013977 2023-01-24 05:20:20.644623: step: 604/466, loss: 0.05277765542268753 2023-01-24 05:20:21.420494: step: 606/466, loss: 0.03785436227917671 2023-01-24 05:20:22.152514: step: 608/466, loss: 0.013006249442696571 2023-01-24 05:20:22.882094: step: 610/466, loss: 0.03416730836033821 2023-01-24 05:20:23.641299: step: 612/466, loss: 0.00351434713229537 2023-01-24 05:20:24.411112: step: 614/466, loss: 0.47471874952316284 2023-01-24 05:20:25.130950: step: 616/466, loss: 0.08355723321437836 2023-01-24 05:20:25.877070: step: 618/466, loss: 0.06152806803584099 2023-01-24 05:20:26.635764: step: 620/466, loss: 0.012297751381993294 2023-01-24 05:20:27.427767: step: 622/466, loss: 0.018981346860527992 2023-01-24 05:20:28.128345: step: 624/466, loss: 0.005464603658765554 2023-01-24 05:20:28.891480: step: 626/466, loss: 0.031064271926879883 2023-01-24 05:20:29.685225: step: 628/466, loss: 0.1384933441877365 2023-01-24 05:20:30.394423: step: 630/466, loss: 0.0034443712793290615 2023-01-24 05:20:31.174607: step: 632/466, loss: 0.04686906933784485 2023-01-24 05:20:31.980860: step: 634/466, loss: 0.03641004487872124 2023-01-24 05:20:32.681516: step: 636/466, loss: 0.01964748091995716 2023-01-24 05:20:33.379977: step: 638/466, loss: 0.0178031288087368 2023-01-24 05:20:34.125570: step: 640/466, loss: 0.022527659311890602 2023-01-24 05:20:34.901989: step: 642/466, loss: 0.38067346811294556 2023-01-24 05:20:35.617999: step: 644/466, loss: 0.010407094843685627 2023-01-24 05:20:36.348852: step: 646/466, loss: 0.006377980578690767 2023-01-24 05:20:37.141752: step: 648/466, loss: 0.06780447065830231 2023-01-24 05:20:37.975363: step: 650/466, loss: 0.18436500430107117 2023-01-24 05:20:38.754536: step: 652/466, loss: 0.02482200786471367 2023-01-24 05:20:39.498875: step: 654/466, loss: 0.019808098673820496 2023-01-24 05:20:40.366914: step: 656/466, loss: 0.07673881947994232 2023-01-24 05:20:41.123032: step: 658/466, loss: 0.09009893983602524 2023-01-24 05:20:41.851364: step: 660/466, loss: 0.0017024546395987272 2023-01-24 05:20:42.596647: step: 662/466, loss: 0.002340559847652912 2023-01-24 05:20:43.384825: step: 664/466, loss: 0.011884375475347042 2023-01-24 05:20:44.152922: step: 666/466, loss: 3.983797550201416 2023-01-24 05:20:44.898616: step: 668/466, loss: 0.04353933781385422 2023-01-24 05:20:45.607732: step: 670/466, loss: 0.006013147532939911 2023-01-24 05:20:46.378814: step: 672/466, loss: 0.05005199462175369 2023-01-24 05:20:47.143089: step: 674/466, loss: 0.0044931103475391865 2023-01-24 05:20:47.961440: step: 676/466, loss: 0.05605557933449745 2023-01-24 05:20:48.757149: step: 678/466, loss: 0.008381667546927929 2023-01-24 05:20:49.452811: step: 680/466, loss: 0.4631694257259369 2023-01-24 05:20:50.177323: step: 682/466, loss: 0.0026287208311259747 2023-01-24 05:20:50.876956: step: 684/466, loss: 0.11688584834337234 2023-01-24 05:20:51.579254: step: 686/466, loss: 0.0012536462163552642 2023-01-24 05:20:52.280564: step: 688/466, loss: 0.0023633234668523073 2023-01-24 05:20:53.048122: step: 690/466, loss: 0.03221137076616287 2023-01-24 05:20:53.783318: step: 692/466, loss: 0.01213639322668314 2023-01-24 05:20:54.567429: step: 694/466, loss: 0.004290018230676651 2023-01-24 05:20:55.478591: step: 696/466, loss: 0.03194379061460495 2023-01-24 05:20:56.303339: step: 698/466, loss: 0.033809881657361984 2023-01-24 05:20:57.053931: step: 700/466, loss: 0.045655541121959686 2023-01-24 05:20:57.778809: step: 702/466, loss: 0.0050416202284395695 2023-01-24 05:20:58.569443: step: 704/466, loss: 0.041732631623744965 2023-01-24 05:20:59.384724: step: 706/466, loss: 0.015561552718281746 2023-01-24 05:21:00.139760: step: 708/466, loss: 0.005210685543715954 2023-01-24 05:21:00.933920: step: 710/466, loss: 2.3857340812683105 2023-01-24 05:21:01.626660: step: 712/466, loss: 0.006181302480399609 2023-01-24 05:21:02.468176: step: 714/466, loss: 0.01496371254324913 2023-01-24 05:21:03.261487: step: 716/466, loss: 0.2443210780620575 2023-01-24 05:21:04.110227: step: 718/466, loss: 0.033997077494859695 2023-01-24 05:21:04.858142: step: 720/466, loss: 0.006322094239294529 2023-01-24 05:21:05.609669: step: 722/466, loss: 0.0022107360418885946 2023-01-24 05:21:06.397184: step: 724/466, loss: 0.01752101257443428 2023-01-24 05:21:07.219444: step: 726/466, loss: 0.11376980692148209 2023-01-24 05:21:07.946688: step: 728/466, loss: 0.011744904331862926 2023-01-24 05:21:08.765149: step: 730/466, loss: 0.005395747721195221 2023-01-24 05:21:09.546771: step: 732/466, loss: 0.04061632230877876 2023-01-24 05:21:10.263172: step: 734/466, loss: 0.026483291760087013 2023-01-24 05:21:10.941629: step: 736/466, loss: 0.026913270354270935 2023-01-24 05:21:11.736908: step: 738/466, loss: 0.018517345190048218 2023-01-24 05:21:12.496488: step: 740/466, loss: 0.32893139123916626 2023-01-24 05:21:13.215425: step: 742/466, loss: 0.00489374715834856 2023-01-24 05:21:14.005036: step: 744/466, loss: 0.017249496653676033 2023-01-24 05:21:14.693394: step: 746/466, loss: 0.01171032153069973 2023-01-24 05:21:15.410426: step: 748/466, loss: 0.016885356977581978 2023-01-24 05:21:16.224936: step: 750/466, loss: 2.5229568481445312 2023-01-24 05:21:16.915705: step: 752/466, loss: 0.016198089346289635 2023-01-24 05:21:17.589320: step: 754/466, loss: 0.0012358203530311584 2023-01-24 05:21:18.310524: step: 756/466, loss: 0.004425358027219772 2023-01-24 05:21:19.058108: step: 758/466, loss: 0.01746828854084015 2023-01-24 05:21:19.930149: step: 760/466, loss: 0.019856218248605728 2023-01-24 05:21:20.791831: step: 762/466, loss: 0.0028859437443315983 2023-01-24 05:21:21.522457: step: 764/466, loss: 0.014554270543158054 2023-01-24 05:21:22.267263: step: 766/466, loss: 0.028981972485780716 2023-01-24 05:21:22.952899: step: 768/466, loss: 0.0030971853993833065 2023-01-24 05:21:23.731518: step: 770/466, loss: 0.013405009172856808 2023-01-24 05:21:24.404385: step: 772/466, loss: 0.06289155781269073 2023-01-24 05:21:25.201809: step: 774/466, loss: 0.05634074658155441 2023-01-24 05:21:25.983488: step: 776/466, loss: 0.014514373615384102 2023-01-24 05:21:26.702953: step: 778/466, loss: 0.0527653768658638 2023-01-24 05:21:27.425797: step: 780/466, loss: 0.012727830559015274 2023-01-24 05:21:28.209569: step: 782/466, loss: 0.006672831252217293 2023-01-24 05:21:28.956768: step: 784/466, loss: 0.20137040317058563 2023-01-24 05:21:29.703626: step: 786/466, loss: 0.02964707463979721 2023-01-24 05:21:30.396806: step: 788/466, loss: 0.013826957903802395 2023-01-24 05:21:31.175456: step: 790/466, loss: 0.0012463852763175964 2023-01-24 05:21:31.977805: step: 792/466, loss: 0.022246506065130234 2023-01-24 05:21:32.746369: step: 794/466, loss: 0.14593353867530823 2023-01-24 05:21:33.473303: step: 796/466, loss: 0.0034577662590891123 2023-01-24 05:21:34.227468: step: 798/466, loss: 0.013602708466351032 2023-01-24 05:21:34.961367: step: 800/466, loss: 0.13226613402366638 2023-01-24 05:21:35.722644: step: 802/466, loss: 0.004429709631949663 2023-01-24 05:21:36.506949: step: 804/466, loss: 0.007877406664192677 2023-01-24 05:21:37.277501: step: 806/466, loss: 0.011734271422028542 2023-01-24 05:21:37.974512: step: 808/466, loss: 0.011828706599771976 2023-01-24 05:21:38.777516: step: 810/466, loss: 0.3028920590877533 2023-01-24 05:21:39.532154: step: 812/466, loss: 0.01951698027551174 2023-01-24 05:21:40.299470: step: 814/466, loss: 0.005089250858873129 2023-01-24 05:21:41.040924: step: 816/466, loss: 0.018252501264214516 2023-01-24 05:21:41.839887: step: 818/466, loss: 0.027791481465101242 2023-01-24 05:21:42.624415: step: 820/466, loss: 0.015957748517394066 2023-01-24 05:21:43.415438: step: 822/466, loss: 0.28690820932388306 2023-01-24 05:21:44.255080: step: 824/466, loss: 0.07727184146642685 2023-01-24 05:21:45.020500: step: 826/466, loss: 0.012947415001690388 2023-01-24 05:21:45.759391: step: 828/466, loss: 0.16258849203586578 2023-01-24 05:21:46.646486: step: 830/466, loss: 0.02158299833536148 2023-01-24 05:21:47.369965: step: 832/466, loss: 0.09814544767141342 2023-01-24 05:21:48.083018: step: 834/466, loss: 0.0013304640306159854 2023-01-24 05:21:48.788216: step: 836/466, loss: 0.037100616842508316 2023-01-24 05:21:49.641432: step: 838/466, loss: 0.008366475813090801 2023-01-24 05:21:50.397538: step: 840/466, loss: 0.08925455808639526 2023-01-24 05:21:51.203308: step: 842/466, loss: 0.011081385426223278 2023-01-24 05:21:51.946841: step: 844/466, loss: 0.03384479507803917 2023-01-24 05:21:52.736128: step: 846/466, loss: 0.02601746656000614 2023-01-24 05:21:53.485060: step: 848/466, loss: 0.030930351465940475 2023-01-24 05:21:54.219916: step: 850/466, loss: 0.015173490159213543 2023-01-24 05:21:54.972794: step: 852/466, loss: 0.02299688011407852 2023-01-24 05:21:55.800020: step: 854/466, loss: 0.0044409967958927155 2023-01-24 05:21:56.580377: step: 856/466, loss: 0.00046178355114534497 2023-01-24 05:21:57.565798: step: 858/466, loss: 0.013453601859509945 2023-01-24 05:21:58.344779: step: 860/466, loss: 0.15057678520679474 2023-01-24 05:21:59.074767: step: 862/466, loss: 0.13931144773960114 2023-01-24 05:21:59.844144: step: 864/466, loss: 0.015180341899394989 2023-01-24 05:22:00.618290: step: 866/466, loss: 0.03371549770236015 2023-01-24 05:22:01.378736: step: 868/466, loss: 0.01678081788122654 2023-01-24 05:22:02.181799: step: 870/466, loss: 0.06515835970640182 2023-01-24 05:22:02.905087: step: 872/466, loss: 0.04113662987947464 2023-01-24 05:22:03.698160: step: 874/466, loss: 0.0009553478448651731 2023-01-24 05:22:04.423584: step: 876/466, loss: 0.4426819980144501 2023-01-24 05:22:05.142997: step: 878/466, loss: 0.03187278285622597 2023-01-24 05:22:05.907704: step: 880/466, loss: 0.01739988848567009 2023-01-24 05:22:06.630972: step: 882/466, loss: 0.017624996602535248 2023-01-24 05:22:07.395718: step: 884/466, loss: 0.0038380082696676254 2023-01-24 05:22:08.196211: step: 886/466, loss: 0.03622297942638397 2023-01-24 05:22:08.969886: step: 888/466, loss: 0.006211594678461552 2023-01-24 05:22:09.755335: step: 890/466, loss: 0.04902772605419159 2023-01-24 05:22:10.468387: step: 892/466, loss: 0.030732234939932823 2023-01-24 05:22:11.145487: step: 894/466, loss: 0.06138194352388382 2023-01-24 05:22:11.877046: step: 896/466, loss: 0.03699515387415886 2023-01-24 05:22:12.647384: step: 898/466, loss: 0.022731278091669083 2023-01-24 05:22:13.391592: step: 900/466, loss: 0.02300518937408924 2023-01-24 05:22:14.093173: step: 902/466, loss: 0.012142459861934185 2023-01-24 05:22:14.988263: step: 904/466, loss: 0.006614815443754196 2023-01-24 05:22:15.806096: step: 906/466, loss: 0.06262435764074326 2023-01-24 05:22:16.545831: step: 908/466, loss: 0.012923638336360455 2023-01-24 05:22:17.264157: step: 910/466, loss: 0.06749510020017624 2023-01-24 05:22:17.979379: step: 912/466, loss: 0.019686004146933556 2023-01-24 05:22:18.722611: step: 914/466, loss: 0.01593194529414177 2023-01-24 05:22:19.511436: step: 916/466, loss: 0.01698552817106247 2023-01-24 05:22:20.197572: step: 918/466, loss: 0.012973749078810215 2023-01-24 05:22:20.959332: step: 920/466, loss: 0.06905797868967056 2023-01-24 05:22:21.830651: step: 922/466, loss: 0.055974330753088 2023-01-24 05:22:22.573849: step: 924/466, loss: 0.009109921753406525 2023-01-24 05:22:23.283627: step: 926/466, loss: 0.035047173500061035 2023-01-24 05:22:23.941878: step: 928/466, loss: 0.018603753298521042 2023-01-24 05:22:24.746947: step: 930/466, loss: 0.10363016277551651 2023-01-24 05:22:25.524505: step: 932/466, loss: 0.09053977578878403 ================================================== Loss: 0.099 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3497852025425554, 'r': 0.29336823439053034, 'f1': 0.31910229003882246}, 'combined': 0.23512800318650073, 'epoch': 26} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37595411443753574, 'r': 0.2672366475785427, 'f1': 0.31240721452754744}, 'combined': 0.19201614161205355, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3231546011554558, 'r': 0.30598509672973895, 'f1': 0.3143355672057942}, 'combined': 0.23161568109900624, 'epoch': 26} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3517728642156163, 'r': 0.2664945941027396, 'f1': 0.30325246915139337}, 'combined': 0.18638932250280763, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35840324384787475, 'r': 0.30399667931688806, 'f1': 0.3289656057494867}, 'combined': 0.24239570949962175, 'epoch': 26} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36993476588028734, 'r': 0.2640105379335961, 'f1': 0.3081233537114963}, 'combined': 0.19031148317474775, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.2571428571428571, 'f1': 0.30508474576271183}, 'combined': 0.20338983050847453, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27586206896551724, 'r': 0.34782608695652173, 'f1': 0.3076923076923077}, 'combined': 0.15384615384615385, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514281426856457, 'r': 0.3267548195748888, 'f1': 0.33864265470199884}, 'combined': 0.24952616662252544, 'epoch': 23} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3841715799217045, 'r': 0.28937599526569946, 'f1': 0.3301029871919831}, 'combined': 0.20289256773751158, 'epoch': 23} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3642241379310345, 'r': 0.30178571428571427, 'f1': 0.33007812499999994}, 'combined': 0.2200520833333333, 'epoch': 23} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34582504970178923, 'r': 0.33007590132827325, 'f1': 0.3377669902912621}, 'combined': 0.24888094021461418, 'epoch': 19} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36038315043942815, 'r': 0.2939803727200525, 'f1': 0.3238125877697768}, 'combined': 0.20000189244603864, 'epoch': 19} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 19} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:25:09.562608: step: 2/466, loss: 0.0002814007457345724 2023-01-24 05:25:10.326497: step: 4/466, loss: 0.005627429112792015 2023-01-24 05:25:11.153492: step: 6/466, loss: 0.0013453942956402898 2023-01-24 05:25:11.914070: step: 8/466, loss: 0.026067892089486122 2023-01-24 05:25:12.670101: step: 10/466, loss: 6.268831202760339e-05 2023-01-24 05:25:13.381968: step: 12/466, loss: 0.029802482575178146 2023-01-24 05:25:14.240551: step: 14/466, loss: 0.0027200065087527037 2023-01-24 05:25:15.037976: step: 16/466, loss: 0.005693104583770037 2023-01-24 05:25:15.744996: step: 18/466, loss: 0.0035440269857645035 2023-01-24 05:25:16.661312: step: 20/466, loss: 0.11614356935024261 2023-01-24 05:25:17.485482: step: 22/466, loss: 0.021779097616672516 2023-01-24 05:25:18.228842: step: 24/466, loss: 1.1219671964645386 2023-01-24 05:25:18.976923: step: 26/466, loss: 0.006852391641587019 2023-01-24 05:25:19.719525: step: 28/466, loss: 0.031410735100507736 2023-01-24 05:25:20.517428: step: 30/466, loss: 0.013507161289453506 2023-01-24 05:25:21.173792: step: 32/466, loss: 0.0035800025798380375 2023-01-24 05:25:21.934246: step: 34/466, loss: 0.0002280927001265809 2023-01-24 05:25:22.673025: step: 36/466, loss: 0.002048708265647292 2023-01-24 05:25:23.491594: step: 38/466, loss: 0.03521179407835007 2023-01-24 05:25:24.244361: step: 40/466, loss: 0.00017881934763863683 2023-01-24 05:25:24.913290: step: 42/466, loss: 0.01024035457521677 2023-01-24 05:25:25.711076: step: 44/466, loss: 0.02134045958518982 2023-01-24 05:25:26.535447: step: 46/466, loss: 0.004514336585998535 2023-01-24 05:25:27.339711: step: 48/466, loss: 0.004240673966705799 2023-01-24 05:25:28.100527: step: 50/466, loss: 0.058541689068078995 2023-01-24 05:25:28.898198: step: 52/466, loss: 0.043757617473602295 2023-01-24 05:25:29.650015: step: 54/466, loss: 0.02244669944047928 2023-01-24 05:25:30.367472: step: 56/466, loss: 0.022564508020877838 2023-01-24 05:25:31.109181: step: 58/466, loss: 0.0769951343536377 2023-01-24 05:25:31.768239: step: 60/466, loss: 0.000647237931843847 2023-01-24 05:25:32.518112: step: 62/466, loss: 0.006991761736571789 2023-01-24 05:25:33.353304: step: 64/466, loss: 3.979682683944702 2023-01-24 05:25:34.089563: step: 66/466, loss: 0.006263590883463621 2023-01-24 05:25:34.836189: step: 68/466, loss: 0.22763699293136597 2023-01-24 05:25:35.609546: step: 70/466, loss: 0.023374492302536964 2023-01-24 05:25:36.314089: step: 72/466, loss: 0.005913762375712395 2023-01-24 05:25:37.095652: step: 74/466, loss: 0.026825105771422386 2023-01-24 05:25:37.785892: step: 76/466, loss: 0.029739806428551674 2023-01-24 05:25:38.602403: step: 78/466, loss: 0.0008574838866479695 2023-01-24 05:25:39.315433: step: 80/466, loss: 0.006026826333254576 2023-01-24 05:25:40.005888: step: 82/466, loss: 0.04565891996026039 2023-01-24 05:25:40.747299: step: 84/466, loss: 0.026821792125701904 2023-01-24 05:25:41.432443: step: 86/466, loss: 0.0022763977758586407 2023-01-24 05:25:42.153486: step: 88/466, loss: 0.03604477643966675 2023-01-24 05:25:42.864648: step: 90/466, loss: 0.05254192650318146 2023-01-24 05:25:43.578721: step: 92/466, loss: 0.048561904579401016 2023-01-24 05:25:44.367170: step: 94/466, loss: 0.07324948161840439 2023-01-24 05:25:45.146062: step: 96/466, loss: 0.0026386980898678303 2023-01-24 05:25:45.968149: step: 98/466, loss: 0.024073511362075806 2023-01-24 05:25:46.685334: step: 100/466, loss: 0.0024970003869384527 2023-01-24 05:25:47.423180: step: 102/466, loss: 0.031353145837783813 2023-01-24 05:25:48.192078: step: 104/466, loss: 0.038495924323797226 2023-01-24 05:25:48.895496: step: 106/466, loss: 0.0358666330575943 2023-01-24 05:25:49.605091: step: 108/466, loss: 0.008217512629926205 2023-01-24 05:25:50.429898: step: 110/466, loss: 0.020881768316030502 2023-01-24 05:25:51.184748: step: 112/466, loss: 0.016404718160629272 2023-01-24 05:25:51.925651: step: 114/466, loss: 0.06059930473566055 2023-01-24 05:25:52.752040: step: 116/466, loss: 0.060039568692445755 2023-01-24 05:25:53.521660: step: 118/466, loss: 0.02221786417067051 2023-01-24 05:25:54.217966: step: 120/466, loss: 0.007926841266453266 2023-01-24 05:25:54.963855: step: 122/466, loss: 0.06578806042671204 2023-01-24 05:25:55.727598: step: 124/466, loss: 0.025813933461904526 2023-01-24 05:25:56.516341: step: 126/466, loss: 0.003890152322128415 2023-01-24 05:25:57.176767: step: 128/466, loss: 0.08698451519012451 2023-01-24 05:25:57.908376: step: 130/466, loss: 0.028302686288952827 2023-01-24 05:25:58.720632: step: 132/466, loss: 0.026748361065983772 2023-01-24 05:25:59.419808: step: 134/466, loss: 0.0011858418583869934 2023-01-24 05:26:00.245805: step: 136/466, loss: 0.0025568141136318445 2023-01-24 05:26:00.970230: step: 138/466, loss: 0.09356352686882019 2023-01-24 05:26:01.691390: step: 140/466, loss: 0.005866044666618109 2023-01-24 05:26:02.470194: step: 142/466, loss: 0.01024040300399065 2023-01-24 05:26:03.234544: step: 144/466, loss: 0.00738569488748908 2023-01-24 05:26:03.948280: step: 146/466, loss: 0.04120223969221115 2023-01-24 05:26:04.683668: step: 148/466, loss: 0.0715554878115654 2023-01-24 05:26:05.477789: step: 150/466, loss: 0.027549341320991516 2023-01-24 05:26:06.319203: step: 152/466, loss: 0.014845267869532108 2023-01-24 05:26:07.150071: step: 154/466, loss: 0.03739255666732788 2023-01-24 05:26:07.790568: step: 156/466, loss: 0.012479184195399284 2023-01-24 05:26:08.618770: step: 158/466, loss: 0.0043110898695886135 2023-01-24 05:26:09.315311: step: 160/466, loss: 0.020059850066900253 2023-01-24 05:26:10.096580: step: 162/466, loss: 0.01724727638065815 2023-01-24 05:26:10.917973: step: 164/466, loss: 0.010996063239872456 2023-01-24 05:26:11.669765: step: 166/466, loss: 0.04114590212702751 2023-01-24 05:26:12.416077: step: 168/466, loss: 0.03185999393463135 2023-01-24 05:26:13.195563: step: 170/466, loss: 0.039986953139305115 2023-01-24 05:26:13.926740: step: 172/466, loss: 0.003128435928374529 2023-01-24 05:26:14.666435: step: 174/466, loss: 0.0022137167397886515 2023-01-24 05:26:15.442425: step: 176/466, loss: 0.008617050014436245 2023-01-24 05:26:16.208415: step: 178/466, loss: 0.0037066603545099497 2023-01-24 05:26:17.008556: step: 180/466, loss: 0.0012156914453953505 2023-01-24 05:26:17.815538: step: 182/466, loss: 0.013374599628150463 2023-01-24 05:26:18.656165: step: 184/466, loss: 0.02156377211213112 2023-01-24 05:26:19.357558: step: 186/466, loss: 0.002165052341297269 2023-01-24 05:26:20.185578: step: 188/466, loss: 0.03450706973671913 2023-01-24 05:26:20.928374: step: 190/466, loss: 0.00039736999315209687 2023-01-24 05:26:21.691703: step: 192/466, loss: 0.008332728408277035 2023-01-24 05:26:22.496315: step: 194/466, loss: 0.019774070009589195 2023-01-24 05:26:23.206157: step: 196/466, loss: 0.0986318439245224 2023-01-24 05:26:24.028892: step: 198/466, loss: 0.0016368558863177896 2023-01-24 05:26:24.756260: step: 200/466, loss: 0.044423457235097885 2023-01-24 05:26:25.422363: step: 202/466, loss: 0.029822947457432747 2023-01-24 05:26:26.229770: step: 204/466, loss: 0.07450821995735168 2023-01-24 05:26:26.952066: step: 206/466, loss: 0.03527345880866051 2023-01-24 05:26:27.736592: step: 208/466, loss: 0.04506620019674301 2023-01-24 05:26:28.411086: step: 210/466, loss: 0.014406089670956135 2023-01-24 05:26:29.234760: step: 212/466, loss: 0.05044175311923027 2023-01-24 05:26:30.028413: step: 214/466, loss: 0.031075172126293182 2023-01-24 05:26:30.730396: step: 216/466, loss: 0.0378032922744751 2023-01-24 05:26:31.434413: step: 218/466, loss: 0.0018708063289523125 2023-01-24 05:26:32.358878: step: 220/466, loss: 0.06911762803792953 2023-01-24 05:26:33.089056: step: 222/466, loss: 0.036743901669979095 2023-01-24 05:26:33.826683: step: 224/466, loss: 0.02297184430062771 2023-01-24 05:26:34.583277: step: 226/466, loss: 0.006796710193157196 2023-01-24 05:26:35.261559: step: 228/466, loss: 0.01702827587723732 2023-01-24 05:26:35.930922: step: 230/466, loss: 0.025485971942543983 2023-01-24 05:26:36.690410: step: 232/466, loss: 0.021808648481965065 2023-01-24 05:26:37.440697: step: 234/466, loss: 0.0580766499042511 2023-01-24 05:26:38.282549: step: 236/466, loss: 0.01121596060693264 2023-01-24 05:26:38.991993: step: 238/466, loss: 0.00727180577814579 2023-01-24 05:26:39.834782: step: 240/466, loss: 0.016058299690485 2023-01-24 05:26:40.694384: step: 242/466, loss: 0.010801510885357857 2023-01-24 05:26:41.501565: step: 244/466, loss: 0.014843948185443878 2023-01-24 05:26:42.261973: step: 246/466, loss: 0.0010358254658058286 2023-01-24 05:26:43.007490: step: 248/466, loss: 0.021506547927856445 2023-01-24 05:26:43.815430: step: 250/466, loss: 0.014431829564273357 2023-01-24 05:26:44.512259: step: 252/466, loss: 0.0032666679471731186 2023-01-24 05:26:45.243663: step: 254/466, loss: 0.0034550423733890057 2023-01-24 05:26:45.973651: step: 256/466, loss: 0.03942679986357689 2023-01-24 05:26:46.691171: step: 258/466, loss: 0.01921839639544487 2023-01-24 05:26:47.572195: step: 260/466, loss: 0.006756064482033253 2023-01-24 05:26:48.397666: step: 262/466, loss: 0.013162982650101185 2023-01-24 05:26:49.121152: step: 264/466, loss: 0.01315717026591301 2023-01-24 05:26:49.867831: step: 266/466, loss: 0.004986981861293316 2023-01-24 05:26:50.596082: step: 268/466, loss: 0.0030614964198321104 2023-01-24 05:26:51.403826: step: 270/466, loss: 0.043415263295173645 2023-01-24 05:26:52.130545: step: 272/466, loss: 0.006690033245831728 2023-01-24 05:26:52.860386: step: 274/466, loss: 0.0012875624233856797 2023-01-24 05:26:53.644817: step: 276/466, loss: 0.03934786096215248 2023-01-24 05:26:54.337003: step: 278/466, loss: 0.00044055673060938716 2023-01-24 05:26:55.191416: step: 280/466, loss: 0.016125798225402832 2023-01-24 05:26:55.871972: step: 282/466, loss: 0.0022109998390078545 2023-01-24 05:26:56.706923: step: 284/466, loss: 0.016009317710995674 2023-01-24 05:26:57.501008: step: 286/466, loss: 0.051596127450466156 2023-01-24 05:26:58.214352: step: 288/466, loss: 0.01629328913986683 2023-01-24 05:26:58.973621: step: 290/466, loss: 0.03265248239040375 2023-01-24 05:26:59.663387: step: 292/466, loss: 0.005238786339759827 2023-01-24 05:27:00.419782: step: 294/466, loss: 0.011197719722986221 2023-01-24 05:27:01.200590: step: 296/466, loss: 0.06267337501049042 2023-01-24 05:27:01.959902: step: 298/466, loss: 0.006046592723578215 2023-01-24 05:27:02.631799: step: 300/466, loss: 0.02558089606463909 2023-01-24 05:27:03.293356: step: 302/466, loss: 0.0013408252270892262 2023-01-24 05:27:04.068379: step: 304/466, loss: 0.001228883396834135 2023-01-24 05:27:04.971500: step: 306/466, loss: 0.018673928454518318 2023-01-24 05:27:05.857474: step: 308/466, loss: 0.008346027694642544 2023-01-24 05:27:06.638396: step: 310/466, loss: 0.004616781137883663 2023-01-24 05:27:07.420293: step: 312/466, loss: 0.042829494923353195 2023-01-24 05:27:08.153613: step: 314/466, loss: 0.003077883506193757 2023-01-24 05:27:08.907432: step: 316/466, loss: 0.02967197820544243 2023-01-24 05:27:09.623618: step: 318/466, loss: 0.02122427523136139 2023-01-24 05:27:10.465172: step: 320/466, loss: 0.01630318909883499 2023-01-24 05:27:11.256849: step: 322/466, loss: 0.0977238118648529 2023-01-24 05:27:12.003308: step: 324/466, loss: 0.02882983162999153 2023-01-24 05:27:12.756953: step: 326/466, loss: 0.1824617236852646 2023-01-24 05:27:13.474693: step: 328/466, loss: 0.0032043601386249065 2023-01-24 05:27:14.145395: step: 330/466, loss: 0.002083337400108576 2023-01-24 05:27:14.920346: step: 332/466, loss: 0.0006186572136357427 2023-01-24 05:27:15.670784: step: 334/466, loss: 0.08247829973697662 2023-01-24 05:27:16.531151: step: 336/466, loss: 0.010414715856313705 2023-01-24 05:27:17.384564: step: 338/466, loss: 0.05850432068109512 2023-01-24 05:27:18.089185: step: 340/466, loss: 0.011604820378124714 2023-01-24 05:27:18.895358: step: 342/466, loss: 0.00664097722619772 2023-01-24 05:27:19.748759: step: 344/466, loss: 0.3574390709400177 2023-01-24 05:27:20.691569: step: 346/466, loss: 0.2645539939403534 2023-01-24 05:27:21.533502: step: 348/466, loss: 0.00910822581499815 2023-01-24 05:27:22.297411: step: 350/466, loss: 0.012043699622154236 2023-01-24 05:27:23.071880: step: 352/466, loss: 0.0147309685125947 2023-01-24 05:27:23.750037: step: 354/466, loss: 0.0217413492500782 2023-01-24 05:27:24.498170: step: 356/466, loss: 0.006431126035749912 2023-01-24 05:27:25.179069: step: 358/466, loss: 0.02148101106286049 2023-01-24 05:27:25.922158: step: 360/466, loss: 0.062402743846178055 2023-01-24 05:27:26.631362: step: 362/466, loss: 0.01829441823065281 2023-01-24 05:27:27.386211: step: 364/466, loss: 0.06198782101273537 2023-01-24 05:27:28.121552: step: 366/466, loss: 0.02825883962213993 2023-01-24 05:27:28.927156: step: 368/466, loss: 0.025904733687639236 2023-01-24 05:27:29.717100: step: 370/466, loss: 0.08099622279405594 2023-01-24 05:27:30.492270: step: 372/466, loss: 0.023321600630879402 2023-01-24 05:27:31.236738: step: 374/466, loss: 0.025583907961845398 2023-01-24 05:27:31.980844: step: 376/466, loss: 0.0027648108080029488 2023-01-24 05:27:32.776230: step: 378/466, loss: 0.004878263454884291 2023-01-24 05:27:33.589229: step: 380/466, loss: 0.053702231496572495 2023-01-24 05:27:34.312917: step: 382/466, loss: 0.0037088815588504076 2023-01-24 05:27:35.016050: step: 384/466, loss: 0.0027306238189339638 2023-01-24 05:27:35.702151: step: 386/466, loss: 0.029041174799203873 2023-01-24 05:27:36.434044: step: 388/466, loss: 0.024189729243516922 2023-01-24 05:27:37.262523: step: 390/466, loss: 0.06975167244672775 2023-01-24 05:27:37.937146: step: 392/466, loss: 0.09673363715410233 2023-01-24 05:27:38.702164: step: 394/466, loss: 0.021510764956474304 2023-01-24 05:27:39.405656: step: 396/466, loss: 0.0034308144822716713 2023-01-24 05:27:40.146071: step: 398/466, loss: 0.353545606136322 2023-01-24 05:27:40.884588: step: 400/466, loss: 0.1800854206085205 2023-01-24 05:27:41.685010: step: 402/466, loss: 0.0428028479218483 2023-01-24 05:27:42.478287: step: 404/466, loss: 0.05135725438594818 2023-01-24 05:27:43.265494: step: 406/466, loss: 0.018109343945980072 2023-01-24 05:27:44.029854: step: 408/466, loss: 0.012806784361600876 2023-01-24 05:27:44.790665: step: 410/466, loss: 0.13045519590377808 2023-01-24 05:27:45.556460: step: 412/466, loss: 0.0860825628042221 2023-01-24 05:27:46.331777: step: 414/466, loss: 0.005686786957085133 2023-01-24 05:27:47.021909: step: 416/466, loss: 0.013734704814851284 2023-01-24 05:27:47.755978: step: 418/466, loss: 0.011565088294446468 2023-01-24 05:27:48.530546: step: 420/466, loss: 0.0019097490003332496 2023-01-24 05:27:49.133488: step: 422/466, loss: 0.004762308672070503 2023-01-24 05:27:49.922771: step: 424/466, loss: 0.009719901718199253 2023-01-24 05:27:50.659675: step: 426/466, loss: 0.01680520363152027 2023-01-24 05:27:51.336734: step: 428/466, loss: 0.04889770224690437 2023-01-24 05:27:52.097421: step: 430/466, loss: 0.04680616408586502 2023-01-24 05:27:52.875500: step: 432/466, loss: 0.0015351184410974383 2023-01-24 05:27:53.620947: step: 434/466, loss: 0.017736423760652542 2023-01-24 05:27:54.436415: step: 436/466, loss: 0.015004309825599194 2023-01-24 05:27:55.209113: step: 438/466, loss: 0.0015911321388557553 2023-01-24 05:27:55.883239: step: 440/466, loss: 0.1260666847229004 2023-01-24 05:27:56.655224: step: 442/466, loss: 0.05454300716519356 2023-01-24 05:27:57.352474: step: 444/466, loss: 0.029246529564261436 2023-01-24 05:27:58.041691: step: 446/466, loss: 0.009855160489678383 2023-01-24 05:27:58.740515: step: 448/466, loss: 0.0028746260795742273 2023-01-24 05:27:59.427320: step: 450/466, loss: 0.005132277961820364 2023-01-24 05:28:00.290560: step: 452/466, loss: 0.008061932399868965 2023-01-24 05:28:00.953233: step: 454/466, loss: 0.044358186423778534 2023-01-24 05:28:01.702613: step: 456/466, loss: 0.029368244111537933 2023-01-24 05:28:02.398517: step: 458/466, loss: 0.005659927614033222 2023-01-24 05:28:03.149896: step: 460/466, loss: 0.06808044016361237 2023-01-24 05:28:03.912441: step: 462/466, loss: 0.0015544743509963155 2023-01-24 05:28:04.701502: step: 464/466, loss: 0.0021502196323126554 2023-01-24 05:28:05.386644: step: 466/466, loss: 0.021343419328331947 2023-01-24 05:28:06.159541: step: 468/466, loss: 0.002755869412794709 2023-01-24 05:28:06.991704: step: 470/466, loss: 0.007736708037555218 2023-01-24 05:28:07.716562: step: 472/466, loss: 0.005857081618160009 2023-01-24 05:28:08.461012: step: 474/466, loss: 0.046281494200229645 2023-01-24 05:28:09.187940: step: 476/466, loss: 0.041273847222328186 2023-01-24 05:28:09.948589: step: 478/466, loss: 0.0016959874192252755 2023-01-24 05:28:10.747982: step: 480/466, loss: 0.5545917749404907 2023-01-24 05:28:11.588216: step: 482/466, loss: 0.3655919134616852 2023-01-24 05:28:12.370711: step: 484/466, loss: 0.0027146392967551947 2023-01-24 05:28:13.105408: step: 486/466, loss: 0.09501084685325623 2023-01-24 05:28:13.859951: step: 488/466, loss: 0.007031205575913191 2023-01-24 05:28:14.571186: step: 490/466, loss: 0.05125099793076515 2023-01-24 05:28:15.255315: step: 492/466, loss: 0.04403087496757507 2023-01-24 05:28:16.007024: step: 494/466, loss: 0.04046096280217171 2023-01-24 05:28:16.671807: step: 496/466, loss: 0.006504176650196314 2023-01-24 05:28:17.391544: step: 498/466, loss: 0.01574026048183441 2023-01-24 05:28:18.097577: step: 500/466, loss: 0.00379569036886096 2023-01-24 05:28:18.824008: step: 502/466, loss: 0.02817787230014801 2023-01-24 05:28:19.597707: step: 504/466, loss: 0.0014384161913767457 2023-01-24 05:28:20.318090: step: 506/466, loss: 0.0026149300392717123 2023-01-24 05:28:21.022034: step: 508/466, loss: 0.06416348367929459 2023-01-24 05:28:21.728915: step: 510/466, loss: 0.0029873487073928118 2023-01-24 05:28:22.587122: step: 512/466, loss: 0.004441775381565094 2023-01-24 05:28:23.306512: step: 514/466, loss: 0.037997715175151825 2023-01-24 05:28:24.138760: step: 516/466, loss: 0.004481486044824123 2023-01-24 05:28:24.823548: step: 518/466, loss: 0.0038423393853008747 2023-01-24 05:28:25.591146: step: 520/466, loss: 0.012804090976715088 2023-01-24 05:28:26.279240: step: 522/466, loss: 1.297904372215271 2023-01-24 05:28:26.992362: step: 524/466, loss: 0.016443302854895592 2023-01-24 05:28:27.769951: step: 526/466, loss: 0.009351842105388641 2023-01-24 05:28:28.516059: step: 528/466, loss: 0.012461633421480656 2023-01-24 05:28:29.290702: step: 530/466, loss: 0.05990233272314072 2023-01-24 05:28:30.132516: step: 532/466, loss: 0.00506456708535552 2023-01-24 05:28:30.842419: step: 534/466, loss: 0.00388672505505383 2023-01-24 05:28:31.586818: step: 536/466, loss: 0.00025387172354385257 2023-01-24 05:28:32.241361: step: 538/466, loss: 0.03435612469911575 2023-01-24 05:28:32.983348: step: 540/466, loss: 0.07269947230815887 2023-01-24 05:28:33.779656: step: 542/466, loss: 0.007448124699294567 2023-01-24 05:28:34.440770: step: 544/466, loss: 0.03401390090584755 2023-01-24 05:28:35.266888: step: 546/466, loss: 0.060475341975688934 2023-01-24 05:28:36.004496: step: 548/466, loss: 0.010061251930892467 2023-01-24 05:28:36.869085: step: 550/466, loss: 0.06197541579604149 2023-01-24 05:28:37.584609: step: 552/466, loss: 0.010373301804065704 2023-01-24 05:28:38.402242: step: 554/466, loss: 0.20099429786205292 2023-01-24 05:28:39.228732: step: 556/466, loss: 0.010779143311083317 2023-01-24 05:28:39.991856: step: 558/466, loss: 0.022128930315375328 2023-01-24 05:28:40.715036: step: 560/466, loss: 0.025541655719280243 2023-01-24 05:28:41.451711: step: 562/466, loss: 0.012778117321431637 2023-01-24 05:28:42.121069: step: 564/466, loss: 0.048351503908634186 2023-01-24 05:28:42.887187: step: 566/466, loss: 0.02952878549695015 2023-01-24 05:28:43.678613: step: 568/466, loss: 0.012676065787672997 2023-01-24 05:28:44.432545: step: 570/466, loss: 0.20164689421653748 2023-01-24 05:28:45.234996: step: 572/466, loss: 0.04054632782936096 2023-01-24 05:28:46.063899: step: 574/466, loss: 0.12637276947498322 2023-01-24 05:28:46.839814: step: 576/466, loss: 0.07379309087991714 2023-01-24 05:28:47.573339: step: 578/466, loss: 0.009364991448819637 2023-01-24 05:28:48.308502: step: 580/466, loss: 0.24854327738285065 2023-01-24 05:28:49.015465: step: 582/466, loss: 0.0017289548413828015 2023-01-24 05:28:49.784266: step: 584/466, loss: 0.029230041429400444 2023-01-24 05:28:50.526386: step: 586/466, loss: 0.01573510281741619 2023-01-24 05:28:51.310814: step: 588/466, loss: 0.030351882800459862 2023-01-24 05:28:51.988455: step: 590/466, loss: 0.08203182369470596 2023-01-24 05:28:52.787077: step: 592/466, loss: 0.05282087251543999 2023-01-24 05:28:53.526816: step: 594/466, loss: 0.00015412727952934802 2023-01-24 05:28:54.300153: step: 596/466, loss: 0.006411698181182146 2023-01-24 05:28:55.050703: step: 598/466, loss: 0.052745647728443146 2023-01-24 05:28:55.749567: step: 600/466, loss: 0.017853165045380592 2023-01-24 05:28:56.565617: step: 602/466, loss: 0.07674769312143326 2023-01-24 05:28:57.282653: step: 604/466, loss: 0.04152761772274971 2023-01-24 05:28:58.016127: step: 606/466, loss: 0.05247914791107178 2023-01-24 05:28:58.723328: step: 608/466, loss: 0.03986712917685509 2023-01-24 05:28:59.508425: step: 610/466, loss: 0.0596633218228817 2023-01-24 05:29:00.243761: step: 612/466, loss: 0.0019677767995744944 2023-01-24 05:29:01.041219: step: 614/466, loss: 0.09488515555858612 2023-01-24 05:29:01.759075: step: 616/466, loss: 0.028042137622833252 2023-01-24 05:29:02.578526: step: 618/466, loss: 0.06155802309513092 2023-01-24 05:29:03.398014: step: 620/466, loss: 0.017027219757437706 2023-01-24 05:29:04.287360: step: 622/466, loss: 0.0045541031286120415 2023-01-24 05:29:05.076837: step: 624/466, loss: 0.06806767731904984 2023-01-24 05:29:05.870968: step: 626/466, loss: 0.012947620823979378 2023-01-24 05:29:06.660009: step: 628/466, loss: 0.008482400327920914 2023-01-24 05:29:07.524247: step: 630/466, loss: 0.008181189186871052 2023-01-24 05:29:08.200416: step: 632/466, loss: 0.003240967635065317 2023-01-24 05:29:08.922290: step: 634/466, loss: 0.002436768962070346 2023-01-24 05:29:09.666216: step: 636/466, loss: 0.004692391492426395 2023-01-24 05:29:10.538133: step: 638/466, loss: 0.006729860324412584 2023-01-24 05:29:11.260708: step: 640/466, loss: 0.014092906378209591 2023-01-24 05:29:12.074292: step: 642/466, loss: 0.04447159916162491 2023-01-24 05:29:12.907180: step: 644/466, loss: 0.016485348343849182 2023-01-24 05:29:13.651584: step: 646/466, loss: 0.04199531674385071 2023-01-24 05:29:14.381428: step: 648/466, loss: 0.018296226859092712 2023-01-24 05:29:15.194169: step: 650/466, loss: 0.027963055297732353 2023-01-24 05:29:15.943366: step: 652/466, loss: 0.00020877993665635586 2023-01-24 05:29:16.672216: step: 654/466, loss: 0.006149666849523783 2023-01-24 05:29:17.312670: step: 656/466, loss: 0.0022929850965738297 2023-01-24 05:29:17.988898: step: 658/466, loss: 0.013425644487142563 2023-01-24 05:29:18.801162: step: 660/466, loss: 0.002585696056485176 2023-01-24 05:29:19.550283: step: 662/466, loss: 0.018005739897489548 2023-01-24 05:29:20.367999: step: 664/466, loss: 0.01200743205845356 2023-01-24 05:29:21.096649: step: 666/466, loss: 0.051574330776929855 2023-01-24 05:29:21.834638: step: 668/466, loss: 0.02668682672083378 2023-01-24 05:29:22.546232: step: 670/466, loss: 0.02358873188495636 2023-01-24 05:29:23.364155: step: 672/466, loss: 0.025741780176758766 2023-01-24 05:29:24.084181: step: 674/466, loss: 0.0008982737781479955 2023-01-24 05:29:24.871636: step: 676/466, loss: 0.013151212595403194 2023-01-24 05:29:25.643884: step: 678/466, loss: 0.05363275855779648 2023-01-24 05:29:26.463060: step: 680/466, loss: 0.198809415102005 2023-01-24 05:29:27.180241: step: 682/466, loss: 0.14452099800109863 2023-01-24 05:29:27.911465: step: 684/466, loss: 0.7772390246391296 2023-01-24 05:29:28.719415: step: 686/466, loss: 0.014220787212252617 2023-01-24 05:29:29.497999: step: 688/466, loss: 0.006663024891167879 2023-01-24 05:29:30.278063: step: 690/466, loss: 1.1887304782867432 2023-01-24 05:29:31.087355: step: 692/466, loss: 0.056968383491039276 2023-01-24 05:29:31.817336: step: 694/466, loss: 0.010404759086668491 2023-01-24 05:29:32.605613: step: 696/466, loss: 0.15037499368190765 2023-01-24 05:29:33.383676: step: 698/466, loss: 0.006027761846780777 2023-01-24 05:29:34.129749: step: 700/466, loss: 0.013600733131170273 2023-01-24 05:29:34.973985: step: 702/466, loss: 0.03774077072739601 2023-01-24 05:29:35.673024: step: 704/466, loss: 0.006597063969820738 2023-01-24 05:29:36.475977: step: 706/466, loss: 0.005939188413321972 2023-01-24 05:29:37.235600: step: 708/466, loss: 0.016573762521147728 2023-01-24 05:29:37.976911: step: 710/466, loss: 0.08055862039327621 2023-01-24 05:29:38.940662: step: 712/466, loss: 0.013921476900577545 2023-01-24 05:29:39.742390: step: 714/466, loss: 0.010552269406616688 2023-01-24 05:29:40.608946: step: 716/466, loss: 0.03871457651257515 2023-01-24 05:29:41.395451: step: 718/466, loss: 0.017797337844967842 2023-01-24 05:29:42.108001: step: 720/466, loss: 0.0022084051743149757 2023-01-24 05:29:42.901400: step: 722/466, loss: 0.33818894624710083 2023-01-24 05:29:43.632358: step: 724/466, loss: 0.016701536253094673 2023-01-24 05:29:44.374544: step: 726/466, loss: 0.005369607359170914 2023-01-24 05:29:45.103958: step: 728/466, loss: 0.01709195412695408 2023-01-24 05:29:45.826766: step: 730/466, loss: 0.03921685367822647 2023-01-24 05:29:46.536739: step: 732/466, loss: 0.002594085643067956 2023-01-24 05:29:47.289700: step: 734/466, loss: 0.05583988502621651 2023-01-24 05:29:47.976212: step: 736/466, loss: 0.007267627865076065 2023-01-24 05:29:48.751197: step: 738/466, loss: 0.017063172534108162 2023-01-24 05:29:49.493871: step: 740/466, loss: 0.003912733867764473 2023-01-24 05:29:50.258557: step: 742/466, loss: 0.0022849079687148333 2023-01-24 05:29:50.927320: step: 744/466, loss: 0.0050977920182049274 2023-01-24 05:29:51.754173: step: 746/466, loss: 0.008796028792858124 2023-01-24 05:29:52.528826: step: 748/466, loss: 0.033342909067869186 2023-01-24 05:29:53.285217: step: 750/466, loss: 0.00955971609801054 2023-01-24 05:29:54.078104: step: 752/466, loss: 0.015114396810531616 2023-01-24 05:29:54.792669: step: 754/466, loss: 1.1177012920379639 2023-01-24 05:29:55.517916: step: 756/466, loss: 0.013771473430097103 2023-01-24 05:29:56.376599: step: 758/466, loss: 0.00066518341191113 2023-01-24 05:29:57.094713: step: 760/466, loss: 0.009900188073515892 2023-01-24 05:29:57.834517: step: 762/466, loss: 0.011259110644459724 2023-01-24 05:29:58.569032: step: 764/466, loss: 0.011412428691983223 2023-01-24 05:29:59.298578: step: 766/466, loss: 0.23429237306118011 2023-01-24 05:30:00.108332: step: 768/466, loss: 0.008612806908786297 2023-01-24 05:30:00.932748: step: 770/466, loss: 0.0014791837893426418 2023-01-24 05:30:01.710937: step: 772/466, loss: 0.005687447264790535 2023-01-24 05:30:02.514952: step: 774/466, loss: 0.051985085010528564 2023-01-24 05:30:03.210442: step: 776/466, loss: 0.0002448662417009473 2023-01-24 05:30:03.931330: step: 778/466, loss: 0.02175869606435299 2023-01-24 05:30:04.576711: step: 780/466, loss: 0.025300780311226845 2023-01-24 05:30:05.372745: step: 782/466, loss: 0.04623227193951607 2023-01-24 05:30:06.091996: step: 784/466, loss: 0.004041609354317188 2023-01-24 05:30:06.769095: step: 786/466, loss: 0.02221454679965973 2023-01-24 05:30:07.562541: step: 788/466, loss: 0.00526766199618578 2023-01-24 05:30:08.258936: step: 790/466, loss: 0.03262951225042343 2023-01-24 05:30:09.044400: step: 792/466, loss: 0.0682738646864891 2023-01-24 05:30:09.797589: step: 794/466, loss: 0.0276656411588192 2023-01-24 05:30:10.540485: step: 796/466, loss: 0.041441094130277634 2023-01-24 05:30:11.292454: step: 798/466, loss: 0.1558869183063507 2023-01-24 05:30:12.099533: step: 800/466, loss: 0.013775553554296494 2023-01-24 05:30:12.837616: step: 802/466, loss: 0.03571357578039169 2023-01-24 05:30:13.601151: step: 804/466, loss: 0.047355767339468 2023-01-24 05:30:14.477865: step: 806/466, loss: 0.01611384190618992 2023-01-24 05:30:15.453571: step: 808/466, loss: 0.03671904653310776 2023-01-24 05:30:16.246577: step: 810/466, loss: 0.14984849095344543 2023-01-24 05:30:17.047190: step: 812/466, loss: 0.04282063618302345 2023-01-24 05:30:17.804832: step: 814/466, loss: 1.5589016675949097 2023-01-24 05:30:18.545211: step: 816/466, loss: 0.007101284805685282 2023-01-24 05:30:19.300348: step: 818/466, loss: 0.7906081080436707 2023-01-24 05:30:20.070882: step: 820/466, loss: 0.008964190259575844 2023-01-24 05:30:20.822463: step: 822/466, loss: 0.0995369628071785 2023-01-24 05:30:21.717696: step: 824/466, loss: 0.16250324249267578 2023-01-24 05:30:22.440922: step: 826/466, loss: 0.01729477196931839 2023-01-24 05:30:23.209098: step: 828/466, loss: 0.0016664626309648156 2023-01-24 05:30:23.883971: step: 830/466, loss: 0.02467237040400505 2023-01-24 05:30:24.619162: step: 832/466, loss: 0.8608911037445068 2023-01-24 05:30:25.326636: step: 834/466, loss: 0.015265265479683876 2023-01-24 05:30:26.085864: step: 836/466, loss: 0.04130096361041069 2023-01-24 05:30:26.872024: step: 838/466, loss: 0.02975635416805744 2023-01-24 05:30:27.565950: step: 840/466, loss: 0.06842043995857239 2023-01-24 05:30:28.375197: step: 842/466, loss: 0.005540946964174509 2023-01-24 05:30:29.105564: step: 844/466, loss: 0.06130402162671089 2023-01-24 05:30:29.824866: step: 846/466, loss: 0.030608119443058968 2023-01-24 05:30:30.596634: step: 848/466, loss: 0.0076465848833322525 2023-01-24 05:30:31.352682: step: 850/466, loss: 0.033506572246551514 2023-01-24 05:30:32.113570: step: 852/466, loss: 0.0032035030890256166 2023-01-24 05:30:32.792483: step: 854/466, loss: 0.02916513755917549 2023-01-24 05:30:33.524080: step: 856/466, loss: 0.004772978834807873 2023-01-24 05:30:34.290200: step: 858/466, loss: 4.942668601870537e-05 2023-01-24 05:30:35.145502: step: 860/466, loss: 0.009504769928753376 2023-01-24 05:30:35.909241: step: 862/466, loss: 0.1233750507235527 2023-01-24 05:30:36.547849: step: 864/466, loss: 0.0032240746077150106 2023-01-24 05:30:37.344361: step: 866/466, loss: 0.038527362048625946 2023-01-24 05:30:38.075384: step: 868/466, loss: 0.4711555242538452 2023-01-24 05:30:38.840967: step: 870/466, loss: 0.012075904756784439 2023-01-24 05:30:39.560833: step: 872/466, loss: 0.32846665382385254 2023-01-24 05:30:40.391354: step: 874/466, loss: 0.1630748063325882 2023-01-24 05:30:41.211822: step: 876/466, loss: 0.02237537130713463 2023-01-24 05:30:42.060311: step: 878/466, loss: 0.03680684044957161 2023-01-24 05:30:42.834647: step: 880/466, loss: 0.05501696467399597 2023-01-24 05:30:43.627706: step: 882/466, loss: 0.04206734150648117 2023-01-24 05:30:44.332709: step: 884/466, loss: 0.1176481768488884 2023-01-24 05:30:45.201874: step: 886/466, loss: 0.005531415343284607 2023-01-24 05:30:45.944329: step: 888/466, loss: 0.06910202652215958 2023-01-24 05:30:46.595289: step: 890/466, loss: 0.0008402147796005011 2023-01-24 05:30:47.327014: step: 892/466, loss: 0.0012181682977825403 2023-01-24 05:30:48.155947: step: 894/466, loss: 0.07599858939647675 2023-01-24 05:30:48.932611: step: 896/466, loss: 0.0009424823219887912 2023-01-24 05:30:49.776890: step: 898/466, loss: 0.04718204215168953 2023-01-24 05:30:50.537210: step: 900/466, loss: 0.032861463725566864 2023-01-24 05:30:51.213117: step: 902/466, loss: 0.017354751005768776 2023-01-24 05:30:51.920285: step: 904/466, loss: 0.032805535942316055 2023-01-24 05:30:52.675178: step: 906/466, loss: 0.04162294790148735 2023-01-24 05:30:53.428170: step: 908/466, loss: 0.0005440631066448987 2023-01-24 05:30:54.184374: step: 910/466, loss: 0.2701677680015564 2023-01-24 05:30:54.963093: step: 912/466, loss: 0.3160749673843384 2023-01-24 05:30:55.682179: step: 914/466, loss: 0.004833092913031578 2023-01-24 05:30:56.433370: step: 916/466, loss: 0.11880436539649963 2023-01-24 05:30:57.206058: step: 918/466, loss: 0.00848419964313507 2023-01-24 05:30:57.855128: step: 920/466, loss: 0.029202762991189957 2023-01-24 05:30:58.691957: step: 922/466, loss: 0.0001330649247393012 2023-01-24 05:30:59.435278: step: 924/466, loss: 0.011204649694263935 2023-01-24 05:31:00.287426: step: 926/466, loss: 0.0641421526670456 2023-01-24 05:31:01.048134: step: 928/466, loss: 0.016704823821783066 2023-01-24 05:31:01.723991: step: 930/466, loss: 0.004538416862487793 2023-01-24 05:31:02.558250: step: 932/466, loss: 0.002167261205613613 ================================================== Loss: 0.064 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32441632674967796, 'r': 0.33734373255943745, 'f1': 0.33075376196990425}, 'combined': 0.24371329829361366, 'epoch': 27} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3464548395999559, 'r': 0.29601773990082886, 'f1': 0.3192565157435107}, 'combined': 0.1962259560179627, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29819955065359477, 'r': 0.3395061297763887, 'f1': 0.31751504949806014}, 'combined': 0.23395845752488642, 'epoch': 27} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3273005416732329, 'r': 0.2958184271795337, 'f1': 0.3107641920484132}, 'combined': 0.19100628389317104, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3422116136600626, 'r': 0.35517417478354985, 'f1': 0.34857242432288904}, 'combined': 0.25684283897476035, 'epoch': 27} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3405583370631552, 'r': 0.28891387520349127, 'f1': 0.31261754522721885}, 'combined': 0.19308730734622345, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30514705882352944, 'r': 0.29642857142857143, 'f1': 0.3007246376811594}, 'combined': 0.20048309178743962, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2222222222222222, 'r': 0.34782608695652173, 'f1': 0.2711864406779661}, 'combined': 0.13559322033898305, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 27} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514281426856457, 'r': 0.3267548195748888, 'f1': 0.33864265470199884}, 'combined': 0.24952616662252544, 'epoch': 23} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3841715799217045, 'r': 0.28937599526569946, 'f1': 0.3301029871919831}, 'combined': 0.20289256773751158, 'epoch': 23} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3642241379310345, 'r': 0.30178571428571427, 'f1': 0.33007812499999994}, 'combined': 0.2200520833333333, 'epoch': 23} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3422116136600626, 'r': 0.35517417478354985, 'f1': 0.34857242432288904}, 'combined': 0.25684283897476035, 'epoch': 27} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3405583370631552, 'r': 0.28891387520349127, 'f1': 0.31261754522721885}, 'combined': 0.19308730734622345, 'epoch': 27} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 27} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:33:57.623134: step: 2/466, loss: 0.06499077379703522 2023-01-24 05:33:58.339399: step: 4/466, loss: 0.00030605480424128473 2023-01-24 05:33:59.097819: step: 6/466, loss: 0.048321682959795 2023-01-24 05:33:59.856147: step: 8/466, loss: 0.03479750081896782 2023-01-24 05:34:00.585864: step: 10/466, loss: 0.03079729899764061 2023-01-24 05:34:01.479252: step: 12/466, loss: 0.02482619881629944 2023-01-24 05:34:02.201121: step: 14/466, loss: 0.004145639017224312 2023-01-24 05:34:02.946321: step: 16/466, loss: 0.05340668186545372 2023-01-24 05:34:03.703632: step: 18/466, loss: 0.0020916545763611794 2023-01-24 05:34:04.431996: step: 20/466, loss: 0.00204270938411355 2023-01-24 05:34:05.260056: step: 22/466, loss: 0.028095096349716187 2023-01-24 05:34:06.001476: step: 24/466, loss: 0.0031982657965272665 2023-01-24 05:34:06.814731: step: 26/466, loss: 0.00482933921739459 2023-01-24 05:34:07.555419: step: 28/466, loss: 0.01134143304079771 2023-01-24 05:34:08.336325: step: 30/466, loss: 0.004522103816270828 2023-01-24 05:34:09.172036: step: 32/466, loss: 0.005846688058227301 2023-01-24 05:34:09.966725: step: 34/466, loss: 0.0363161526620388 2023-01-24 05:34:10.685061: step: 36/466, loss: 0.010997610166668892 2023-01-24 05:34:11.478038: step: 38/466, loss: 0.0009249352151528001 2023-01-24 05:34:12.217360: step: 40/466, loss: 0.0014230635715648532 2023-01-24 05:34:13.060645: step: 42/466, loss: 0.08632513135671616 2023-01-24 05:34:13.773059: step: 44/466, loss: 0.028771214187145233 2023-01-24 05:34:14.480905: step: 46/466, loss: 0.01563280262053013 2023-01-24 05:34:15.256448: step: 48/466, loss: 0.059959497302770615 2023-01-24 05:34:15.969736: step: 50/466, loss: 0.0029923501424491405 2023-01-24 05:34:16.648469: step: 52/466, loss: 0.015609527938067913 2023-01-24 05:34:17.427299: step: 54/466, loss: 0.005849896464496851 2023-01-24 05:34:18.201059: step: 56/466, loss: 0.02084076590836048 2023-01-24 05:34:18.960806: step: 58/466, loss: 0.028277015313506126 2023-01-24 05:34:19.716925: step: 60/466, loss: 0.022914856672286987 2023-01-24 05:34:20.529598: step: 62/466, loss: 0.002642903244122863 2023-01-24 05:34:21.252155: step: 64/466, loss: 0.0164827611297369 2023-01-24 05:34:21.968602: step: 66/466, loss: 0.0020859059877693653 2023-01-24 05:34:22.758831: step: 68/466, loss: 0.0057837022468447685 2023-01-24 05:34:23.397890: step: 70/466, loss: 0.031920988112688065 2023-01-24 05:34:24.136141: step: 72/466, loss: 0.048194173723459244 2023-01-24 05:34:24.857337: step: 74/466, loss: 0.037404634058475494 2023-01-24 05:34:25.710768: step: 76/466, loss: 0.0703200027346611 2023-01-24 05:34:26.420945: step: 78/466, loss: 0.0007235261728055775 2023-01-24 05:34:27.196961: step: 80/466, loss: 0.005455330945551395 2023-01-24 05:34:27.900436: step: 82/466, loss: 0.030146855860948563 2023-01-24 05:34:28.639721: step: 84/466, loss: 0.019173510372638702 2023-01-24 05:34:29.413642: step: 86/466, loss: 0.31309449672698975 2023-01-24 05:34:30.320381: step: 88/466, loss: 0.0006127296946942806 2023-01-24 05:34:31.047927: step: 90/466, loss: 0.010310042649507523 2023-01-24 05:34:31.775804: step: 92/466, loss: 0.006365275010466576 2023-01-24 05:34:32.550263: step: 94/466, loss: 0.019108332693576813 2023-01-24 05:34:33.255778: step: 96/466, loss: 0.005697840824723244 2023-01-24 05:34:34.087680: step: 98/466, loss: 0.02297283336520195 2023-01-24 05:34:34.871753: step: 100/466, loss: 0.00931552518159151 2023-01-24 05:34:35.574351: step: 102/466, loss: 0.0034823233727365732 2023-01-24 05:34:36.299530: step: 104/466, loss: 0.0037601529620587826 2023-01-24 05:34:37.053423: step: 106/466, loss: 0.10511481761932373 2023-01-24 05:34:37.776404: step: 108/466, loss: 0.03389532491564751 2023-01-24 05:34:38.577499: step: 110/466, loss: 0.036362238228321075 2023-01-24 05:34:39.268687: step: 112/466, loss: 0.032383453100919724 2023-01-24 05:34:40.010315: step: 114/466, loss: 0.04542417451739311 2023-01-24 05:34:40.780253: step: 116/466, loss: 0.017092755064368248 2023-01-24 05:34:41.474257: step: 118/466, loss: 0.002053190255537629 2023-01-24 05:34:42.264069: step: 120/466, loss: 0.024400081485509872 2023-01-24 05:34:43.036096: step: 122/466, loss: 0.08548931777477264 2023-01-24 05:34:43.745677: step: 124/466, loss: 0.004342307336628437 2023-01-24 05:34:44.453247: step: 126/466, loss: 0.0299422238022089 2023-01-24 05:34:45.181267: step: 128/466, loss: 0.0469515398144722 2023-01-24 05:34:46.002247: step: 130/466, loss: 0.0016981277149170637 2023-01-24 05:34:46.833077: step: 132/466, loss: 0.02915450558066368 2023-01-24 05:34:47.663206: step: 134/466, loss: 0.1387241780757904 2023-01-24 05:34:48.440974: step: 136/466, loss: 0.03041171096265316 2023-01-24 05:34:49.159509: step: 138/466, loss: 0.000548867043107748 2023-01-24 05:34:49.860651: step: 140/466, loss: 0.001786780427210033 2023-01-24 05:34:50.685453: step: 142/466, loss: 0.01875191740691662 2023-01-24 05:34:51.344861: step: 144/466, loss: 2.2222907543182373 2023-01-24 05:34:52.082589: step: 146/466, loss: 0.1311049610376358 2023-01-24 05:34:52.875709: step: 148/466, loss: 0.14866508543491364 2023-01-24 05:34:53.577371: step: 150/466, loss: 0.00995594821870327 2023-01-24 05:34:54.276134: step: 152/466, loss: 0.0277316402643919 2023-01-24 05:34:55.094884: step: 154/466, loss: 0.0845852717757225 2023-01-24 05:34:55.847201: step: 156/466, loss: 0.014286580495536327 2023-01-24 05:34:56.639565: step: 158/466, loss: 0.02495592273771763 2023-01-24 05:34:57.473647: step: 160/466, loss: 0.028022564947605133 2023-01-24 05:34:58.194018: step: 162/466, loss: 0.02017914690077305 2023-01-24 05:34:58.964841: step: 164/466, loss: 0.0022570958826690912 2023-01-24 05:34:59.749842: step: 166/466, loss: 0.013435564003884792 2023-01-24 05:35:00.544641: step: 168/466, loss: 0.013166406191885471 2023-01-24 05:35:01.255382: step: 170/466, loss: 0.0075103770941495895 2023-01-24 05:35:02.024556: step: 172/466, loss: 0.041951779276132584 2023-01-24 05:35:02.784677: step: 174/466, loss: 0.029832616448402405 2023-01-24 05:35:03.597003: step: 176/466, loss: 0.05610651150345802 2023-01-24 05:35:04.307229: step: 178/466, loss: 0.033889614045619965 2023-01-24 05:35:05.003045: step: 180/466, loss: 0.12283995002508163 2023-01-24 05:35:05.753626: step: 182/466, loss: 0.00535232201218605 2023-01-24 05:35:06.461299: step: 184/466, loss: 0.05301175266504288 2023-01-24 05:35:07.259677: step: 186/466, loss: 0.004555729683488607 2023-01-24 05:35:07.976048: step: 188/466, loss: 0.016074948012828827 2023-01-24 05:35:08.731157: step: 190/466, loss: 0.004225687589496374 2023-01-24 05:35:09.457734: step: 192/466, loss: 0.0014638496795669198 2023-01-24 05:35:10.238826: step: 194/466, loss: 0.014197608456015587 2023-01-24 05:35:10.995171: step: 196/466, loss: 0.03307262435555458 2023-01-24 05:35:11.692170: step: 198/466, loss: 0.008677857927978039 2023-01-24 05:35:12.423226: step: 200/466, loss: 0.005501512438058853 2023-01-24 05:35:13.287605: step: 202/466, loss: 0.02331840991973877 2023-01-24 05:35:14.044820: step: 204/466, loss: 0.0021949547808617353 2023-01-24 05:35:14.761260: step: 206/466, loss: 0.09439667314291 2023-01-24 05:35:15.560399: step: 208/466, loss: 0.051236849278211594 2023-01-24 05:35:16.303101: step: 210/466, loss: 0.03168812766671181 2023-01-24 05:35:17.090075: step: 212/466, loss: 0.030869079753756523 2023-01-24 05:35:17.862785: step: 214/466, loss: 0.12366204708814621 2023-01-24 05:35:18.577528: step: 216/466, loss: 0.021326301619410515 2023-01-24 05:35:19.276022: step: 218/466, loss: 0.07159413397312164 2023-01-24 05:35:20.010188: step: 220/466, loss: 0.013327101245522499 2023-01-24 05:35:20.774713: step: 222/466, loss: 0.007618334610015154 2023-01-24 05:35:21.507078: step: 224/466, loss: 0.009082970209419727 2023-01-24 05:35:22.180329: step: 226/466, loss: 0.0043992577120661736 2023-01-24 05:35:23.010719: step: 228/466, loss: 0.8100372552871704 2023-01-24 05:35:23.772535: step: 230/466, loss: 0.009599895216524601 2023-01-24 05:35:24.513800: step: 232/466, loss: 0.0001887738617369905 2023-01-24 05:35:25.256003: step: 234/466, loss: 0.03161048889160156 2023-01-24 05:35:26.044340: step: 236/466, loss: 0.013339421711862087 2023-01-24 05:35:26.811883: step: 238/466, loss: 0.5421699285507202 2023-01-24 05:35:27.659973: step: 240/466, loss: 0.0341641902923584 2023-01-24 05:35:28.447340: step: 242/466, loss: 0.0005802357918582857 2023-01-24 05:35:29.168753: step: 244/466, loss: 0.013874661177396774 2023-01-24 05:35:29.900256: step: 246/466, loss: 0.038976095616817474 2023-01-24 05:35:30.628071: step: 248/466, loss: 0.015630293637514114 2023-01-24 05:35:31.483212: step: 250/466, loss: 0.07874112576246262 2023-01-24 05:35:32.340737: step: 252/466, loss: 0.011092585511505604 2023-01-24 05:35:33.124332: step: 254/466, loss: 0.06348495930433273 2023-01-24 05:35:33.858385: step: 256/466, loss: 0.03054478019475937 2023-01-24 05:35:34.558304: step: 258/466, loss: 0.012458411045372486 2023-01-24 05:35:35.294520: step: 260/466, loss: 0.03350565582513809 2023-01-24 05:35:36.000875: step: 262/466, loss: 0.02486339397728443 2023-01-24 05:35:36.744005: step: 264/466, loss: 0.007128008641302586 2023-01-24 05:35:37.562835: step: 266/466, loss: 0.03835118189454079 2023-01-24 05:35:38.322407: step: 268/466, loss: 0.382914662361145 2023-01-24 05:35:39.069390: step: 270/466, loss: 0.3014521896839142 2023-01-24 05:35:39.850124: step: 272/466, loss: 0.08240548521280289 2023-01-24 05:35:40.620452: step: 274/466, loss: 0.008726700209081173 2023-01-24 05:35:41.343706: step: 276/466, loss: 0.01880439557135105 2023-01-24 05:35:42.061623: step: 278/466, loss: 0.011851079761981964 2023-01-24 05:35:42.879118: step: 280/466, loss: 0.0475764237344265 2023-01-24 05:35:43.647507: step: 282/466, loss: 0.023751405999064445 2023-01-24 05:35:44.406294: step: 284/466, loss: 0.019384315237402916 2023-01-24 05:35:45.189230: step: 286/466, loss: 0.006504491437226534 2023-01-24 05:35:45.879218: step: 288/466, loss: 0.00377333490177989 2023-01-24 05:35:46.595346: step: 290/466, loss: 0.005131383426487446 2023-01-24 05:35:47.343419: step: 292/466, loss: 0.004204194992780685 2023-01-24 05:35:48.114674: step: 294/466, loss: 0.008302164264023304 2023-01-24 05:35:48.962000: step: 296/466, loss: 0.02942466177046299 2023-01-24 05:35:49.824763: step: 298/466, loss: 0.002165537793189287 2023-01-24 05:35:50.634303: step: 300/466, loss: 0.1387287974357605 2023-01-24 05:35:51.440129: step: 302/466, loss: 0.027791647240519524 2023-01-24 05:35:52.204687: step: 304/466, loss: 0.02116568200290203 2023-01-24 05:35:52.994450: step: 306/466, loss: 0.029098939150571823 2023-01-24 05:35:53.752808: step: 308/466, loss: 0.0016982073429971933 2023-01-24 05:35:54.485000: step: 310/466, loss: 0.0003676303313113749 2023-01-24 05:35:55.252602: step: 312/466, loss: 0.06481810659170151 2023-01-24 05:35:55.970843: step: 314/466, loss: 0.0015671990113332868 2023-01-24 05:35:56.803000: step: 316/466, loss: 0.036887165158987045 2023-01-24 05:35:57.477937: step: 318/466, loss: 0.00625829491764307 2023-01-24 05:35:58.279675: step: 320/466, loss: 1.9453067779541016 2023-01-24 05:35:58.984410: step: 322/466, loss: 0.00021286096307449043 2023-01-24 05:35:59.695076: step: 324/466, loss: 2.494503974914551 2023-01-24 05:36:00.419364: step: 326/466, loss: 0.04123297706246376 2023-01-24 05:36:01.125144: step: 328/466, loss: 0.030818086117506027 2023-01-24 05:36:01.924946: step: 330/466, loss: 0.009971718303859234 2023-01-24 05:36:02.729626: step: 332/466, loss: 0.03440983593463898 2023-01-24 05:36:03.428990: step: 334/466, loss: 0.0012510116212069988 2023-01-24 05:36:04.177307: step: 336/466, loss: 0.3301555812358856 2023-01-24 05:36:04.959902: step: 338/466, loss: 0.01839314214885235 2023-01-24 05:36:05.709298: step: 340/466, loss: 0.0001671733771217987 2023-01-24 05:36:06.416493: step: 342/466, loss: 0.03994448855519295 2023-01-24 05:36:07.181714: step: 344/466, loss: 0.004159613512456417 2023-01-24 05:36:07.904514: step: 346/466, loss: 0.022638363763689995 2023-01-24 05:36:08.607715: step: 348/466, loss: 0.0002808289136737585 2023-01-24 05:36:09.421426: step: 350/466, loss: 0.010448471643030643 2023-01-24 05:36:10.150142: step: 352/466, loss: 0.03703666105866432 2023-01-24 05:36:10.877218: step: 354/466, loss: 0.019218124449253082 2023-01-24 05:36:11.607507: step: 356/466, loss: 0.16720406711101532 2023-01-24 05:36:12.269222: step: 358/466, loss: 0.04546864703297615 2023-01-24 05:36:13.030243: step: 360/466, loss: 0.0613517202436924 2023-01-24 05:36:13.852205: step: 362/466, loss: 0.06387585401535034 2023-01-24 05:36:14.578721: step: 364/466, loss: 0.00040805654134601355 2023-01-24 05:36:15.286376: step: 366/466, loss: 0.03251107037067413 2023-01-24 05:36:16.084763: step: 368/466, loss: 0.031106477603316307 2023-01-24 05:36:16.894457: step: 370/466, loss: 0.017203882336616516 2023-01-24 05:36:17.765687: step: 372/466, loss: 0.2872418761253357 2023-01-24 05:36:18.556937: step: 374/466, loss: 0.016309423372149467 2023-01-24 05:36:19.407753: step: 376/466, loss: 0.016587570309638977 2023-01-24 05:36:20.176878: step: 378/466, loss: 0.010483672842383385 2023-01-24 05:36:20.939584: step: 380/466, loss: 0.004580613691359758 2023-01-24 05:36:21.736045: step: 382/466, loss: 0.05155673250555992 2023-01-24 05:36:22.479978: step: 384/466, loss: 0.019467420876026154 2023-01-24 05:36:23.255140: step: 386/466, loss: 0.029801692813634872 2023-01-24 05:36:24.059147: step: 388/466, loss: 0.008031142875552177 2023-01-24 05:36:24.858316: step: 390/466, loss: 0.02196209877729416 2023-01-24 05:36:25.528440: step: 392/466, loss: 0.0014395922189578414 2023-01-24 05:36:26.305036: step: 394/466, loss: 0.021542318165302277 2023-01-24 05:36:27.056704: step: 396/466, loss: 0.009997592307627201 2023-01-24 05:36:27.879230: step: 398/466, loss: 0.05296003073453903 2023-01-24 05:36:28.675334: step: 400/466, loss: 0.007914634421467781 2023-01-24 05:36:29.456634: step: 402/466, loss: 0.670566201210022 2023-01-24 05:36:30.309680: step: 404/466, loss: 0.038577012717723846 2023-01-24 05:36:31.074918: step: 406/466, loss: 0.026723712682724 2023-01-24 05:36:31.820277: step: 408/466, loss: 0.0045420206151902676 2023-01-24 05:36:32.630929: step: 410/466, loss: 0.006481709890067577 2023-01-24 05:36:33.381366: step: 412/466, loss: 0.014898900873959064 2023-01-24 05:36:34.114412: step: 414/466, loss: 0.015765273943543434 2023-01-24 05:36:34.847002: step: 416/466, loss: 0.06360304355621338 2023-01-24 05:36:35.576206: step: 418/466, loss: 0.007630039472132921 2023-01-24 05:36:36.292402: step: 420/466, loss: 0.014952539466321468 2023-01-24 05:36:37.136807: step: 422/466, loss: 0.4710962474346161 2023-01-24 05:36:38.010773: step: 424/466, loss: 0.05748264491558075 2023-01-24 05:36:38.710175: step: 426/466, loss: 0.000547609175555408 2023-01-24 05:36:39.494211: step: 428/466, loss: 0.08756930381059647 2023-01-24 05:36:40.279519: step: 430/466, loss: 0.023667046800255775 2023-01-24 05:36:41.077785: step: 432/466, loss: 0.015044069848954678 2023-01-24 05:36:41.825230: step: 434/466, loss: 3.511402610456571e-05 2023-01-24 05:36:42.651783: step: 436/466, loss: 0.06586025655269623 2023-01-24 05:36:43.382281: step: 438/466, loss: 0.008547937497496605 2023-01-24 05:36:44.116468: step: 440/466, loss: 0.017574824392795563 2023-01-24 05:36:44.888550: step: 442/466, loss: 0.17807810008525848 2023-01-24 05:36:45.643419: step: 444/466, loss: 0.01362221036106348 2023-01-24 05:36:46.539673: step: 446/466, loss: 0.00949972402304411 2023-01-24 05:36:47.414904: step: 448/466, loss: 0.4360661804676056 2023-01-24 05:36:48.151993: step: 450/466, loss: 0.10850492119789124 2023-01-24 05:36:48.911636: step: 452/466, loss: 0.005768525879830122 2023-01-24 05:36:49.654248: step: 454/466, loss: 0.012159998528659344 2023-01-24 05:36:50.373180: step: 456/466, loss: 0.0253811776638031 2023-01-24 05:36:51.246189: step: 458/466, loss: 0.019882716238498688 2023-01-24 05:36:52.033405: step: 460/466, loss: 0.02412649616599083 2023-01-24 05:36:52.760344: step: 462/466, loss: 0.004687316715717316 2023-01-24 05:36:53.582003: step: 464/466, loss: 0.004203404299914837 2023-01-24 05:36:54.271962: step: 466/466, loss: 0.002235273364931345 2023-01-24 05:36:54.992251: step: 468/466, loss: 0.015087980777025223 2023-01-24 05:36:55.728114: step: 470/466, loss: 0.11413915455341339 2023-01-24 05:36:56.556135: step: 472/466, loss: 0.14809414744377136 2023-01-24 05:36:57.237821: step: 474/466, loss: 0.004887235816568136 2023-01-24 05:36:58.011049: step: 476/466, loss: 0.016122309491038322 2023-01-24 05:36:58.726276: step: 478/466, loss: 0.01863691955804825 2023-01-24 05:36:59.536482: step: 480/466, loss: 0.08607181906700134 2023-01-24 05:37:00.337860: step: 482/466, loss: 0.4806332290172577 2023-01-24 05:37:01.118856: step: 484/466, loss: 0.033233266323804855 2023-01-24 05:37:01.993185: step: 486/466, loss: 0.024094315245747566 2023-01-24 05:37:02.770281: step: 488/466, loss: 0.3318593502044678 2023-01-24 05:37:03.448529: step: 490/466, loss: 0.01980016566812992 2023-01-24 05:37:04.128333: step: 492/466, loss: 0.009421803057193756 2023-01-24 05:37:04.955584: step: 494/466, loss: 0.07890952378511429 2023-01-24 05:37:05.701303: step: 496/466, loss: 0.03698199242353439 2023-01-24 05:37:06.579466: step: 498/466, loss: 0.015461564064025879 2023-01-24 05:37:07.316651: step: 500/466, loss: 0.030055489391088486 2023-01-24 05:37:08.051556: step: 502/466, loss: 0.014831021428108215 2023-01-24 05:37:08.744165: step: 504/466, loss: 0.011799895204603672 2023-01-24 05:37:09.411307: step: 506/466, loss: 0.00011390951840439811 2023-01-24 05:37:10.178175: step: 508/466, loss: 0.02746775932610035 2023-01-24 05:37:10.918742: step: 510/466, loss: 0.09469226002693176 2023-01-24 05:37:11.726478: step: 512/466, loss: 0.04830900952219963 2023-01-24 05:37:12.533139: step: 514/466, loss: 0.004041132051497698 2023-01-24 05:37:13.253005: step: 516/466, loss: 0.019389452412724495 2023-01-24 05:37:14.024221: step: 518/466, loss: 0.05219132825732231 2023-01-24 05:37:14.763325: step: 520/466, loss: 0.0018481820588931441 2023-01-24 05:37:15.536957: step: 522/466, loss: 0.023905931040644646 2023-01-24 05:37:16.261939: step: 524/466, loss: 0.009195341728627682 2023-01-24 05:37:17.015612: step: 526/466, loss: 0.0813000500202179 2023-01-24 05:37:17.742745: step: 528/466, loss: 0.0018000929849222302 2023-01-24 05:37:18.507256: step: 530/466, loss: 0.007307850290089846 2023-01-24 05:37:19.264877: step: 532/466, loss: 0.06790435314178467 2023-01-24 05:37:20.241638: step: 534/466, loss: 0.05543149635195732 2023-01-24 05:37:21.015864: step: 536/466, loss: 0.0454183965921402 2023-01-24 05:37:21.810870: step: 538/466, loss: 0.04881078749895096 2023-01-24 05:37:22.528304: step: 540/466, loss: 0.010402346029877663 2023-01-24 05:37:23.237713: step: 542/466, loss: 0.008386926725506783 2023-01-24 05:37:23.926604: step: 544/466, loss: 0.002387000946328044 2023-01-24 05:37:24.682427: step: 546/466, loss: 0.0064791422337293625 2023-01-24 05:37:25.403028: step: 548/466, loss: 0.014200640842318535 2023-01-24 05:37:26.109389: step: 550/466, loss: 0.0030625786166638136 2023-01-24 05:37:26.842861: step: 552/466, loss: 0.007705213502049446 2023-01-24 05:37:27.537211: step: 554/466, loss: 0.37956467270851135 2023-01-24 05:37:28.281847: step: 556/466, loss: 0.054494984447956085 2023-01-24 05:37:29.062353: step: 558/466, loss: 0.043674640357494354 2023-01-24 05:37:29.828580: step: 560/466, loss: 0.008610519580543041 2023-01-24 05:37:30.530785: step: 562/466, loss: 0.037072937935590744 2023-01-24 05:37:31.292632: step: 564/466, loss: 0.033782653510570526 2023-01-24 05:37:31.997470: step: 566/466, loss: 0.005472981370985508 2023-01-24 05:37:32.695790: step: 568/466, loss: 0.010912074707448483 2023-01-24 05:37:33.418700: step: 570/466, loss: 0.0232261773198843 2023-01-24 05:37:34.072763: step: 572/466, loss: 0.010280226357281208 2023-01-24 05:37:34.814025: step: 574/466, loss: 0.01418902724981308 2023-01-24 05:37:35.593933: step: 576/466, loss: 0.011683930642902851 2023-01-24 05:37:36.346262: step: 578/466, loss: 0.004620610736310482 2023-01-24 05:37:37.146689: step: 580/466, loss: 0.12468191981315613 2023-01-24 05:37:37.998791: step: 582/466, loss: 0.03425385430455208 2023-01-24 05:37:38.807062: step: 584/466, loss: 0.011958101764321327 2023-01-24 05:37:39.577506: step: 586/466, loss: 0.012401238083839417 2023-01-24 05:37:40.444041: step: 588/466, loss: 0.03542652353644371 2023-01-24 05:37:41.231414: step: 590/466, loss: 0.003349336562678218 2023-01-24 05:37:42.035669: step: 592/466, loss: 0.0013430201215669513 2023-01-24 05:37:42.756896: step: 594/466, loss: 0.008202009834349155 2023-01-24 05:37:43.538519: step: 596/466, loss: 0.011623851023614407 2023-01-24 05:37:44.248137: step: 598/466, loss: 0.016427496448159218 2023-01-24 05:37:45.028336: step: 600/466, loss: 0.0034484562929719687 2023-01-24 05:37:45.810653: step: 602/466, loss: 0.007288047112524509 2023-01-24 05:37:46.582862: step: 604/466, loss: 0.0009411592618562281 2023-01-24 05:37:47.257898: step: 606/466, loss: 0.0017976914532482624 2023-01-24 05:37:47.972737: step: 608/466, loss: 0.0015250653959810734 2023-01-24 05:37:48.682768: step: 610/466, loss: 0.0024311088491231203 2023-01-24 05:37:49.455733: step: 612/466, loss: 0.0039992425590753555 2023-01-24 05:37:50.311705: step: 614/466, loss: 0.05181852728128433 2023-01-24 05:37:51.084271: step: 616/466, loss: 0.02542915567755699 2023-01-24 05:37:51.825363: step: 618/466, loss: 0.10837765038013458 2023-01-24 05:37:52.481483: step: 620/466, loss: 0.02626294456422329 2023-01-24 05:37:53.250345: step: 622/466, loss: 0.01962939277291298 2023-01-24 05:37:54.106994: step: 624/466, loss: 0.0007828868110664189 2023-01-24 05:37:54.777937: step: 626/466, loss: 0.022047756239771843 2023-01-24 05:37:55.541287: step: 628/466, loss: 0.026129741221666336 2023-01-24 05:37:56.499862: step: 630/466, loss: 0.07522108405828476 2023-01-24 05:37:57.283742: step: 632/466, loss: 0.05754267796874046 2023-01-24 05:37:58.048533: step: 634/466, loss: 0.030316900461912155 2023-01-24 05:37:58.797740: step: 636/466, loss: 0.009969084523618221 2023-01-24 05:37:59.509916: step: 638/466, loss: 0.03778718039393425 2023-01-24 05:38:00.206780: step: 640/466, loss: 0.0012821757700294256 2023-01-24 05:38:00.948838: step: 642/466, loss: 0.006132754497230053 2023-01-24 05:38:01.705413: step: 644/466, loss: 0.018835240975022316 2023-01-24 05:38:02.474361: step: 646/466, loss: 0.006913262885063887 2023-01-24 05:38:03.289028: step: 648/466, loss: 0.001851757988333702 2023-01-24 05:38:04.145418: step: 650/466, loss: 0.039734967052936554 2023-01-24 05:38:04.988044: step: 652/466, loss: 0.01049152109771967 2023-01-24 05:38:05.733253: step: 654/466, loss: 0.028401654213666916 2023-01-24 05:38:06.455229: step: 656/466, loss: 0.013274271972477436 2023-01-24 05:38:07.245705: step: 658/466, loss: 0.012449268251657486 2023-01-24 05:38:07.947151: step: 660/466, loss: 0.05933975428342819 2023-01-24 05:38:08.723263: step: 662/466, loss: 0.00489422120153904 2023-01-24 05:38:09.467147: step: 664/466, loss: 0.022365255281329155 2023-01-24 05:38:10.253086: step: 666/466, loss: 0.010959116742014885 2023-01-24 05:38:11.068688: step: 668/466, loss: 0.0012225598329678178 2023-01-24 05:38:11.877302: step: 670/466, loss: 0.025942856445908546 2023-01-24 05:38:12.569125: step: 672/466, loss: 0.027114970609545708 2023-01-24 05:38:13.327025: step: 674/466, loss: 0.0978192389011383 2023-01-24 05:38:14.173494: step: 676/466, loss: 0.0022755172103643417 2023-01-24 05:38:15.024215: step: 678/466, loss: 0.0016027453821152449 2023-01-24 05:38:15.840312: step: 680/466, loss: 0.8143453598022461 2023-01-24 05:38:16.720782: step: 682/466, loss: 0.2201130986213684 2023-01-24 05:38:17.465254: step: 684/466, loss: 0.0033785824198275805 2023-01-24 05:38:18.216604: step: 686/466, loss: 0.008999837562441826 2023-01-24 05:38:19.014603: step: 688/466, loss: 0.00800447165966034 2023-01-24 05:38:19.786402: step: 690/466, loss: 0.024964090436697006 2023-01-24 05:38:20.576046: step: 692/466, loss: 0.0023913022596389055 2023-01-24 05:38:21.315604: step: 694/466, loss: 0.002989932894706726 2023-01-24 05:38:22.075265: step: 696/466, loss: 0.019625093787908554 2023-01-24 05:38:22.824153: step: 698/466, loss: 0.027111845090985298 2023-01-24 05:38:23.667428: step: 700/466, loss: 0.06732188165187836 2023-01-24 05:38:24.428012: step: 702/466, loss: 0.025172458961606026 2023-01-24 05:38:25.148873: step: 704/466, loss: 0.013280685059726238 2023-01-24 05:38:25.918954: step: 706/466, loss: 0.007359291426837444 2023-01-24 05:38:26.681763: step: 708/466, loss: 0.24157093465328217 2023-01-24 05:38:27.411093: step: 710/466, loss: 0.3335108757019043 2023-01-24 05:38:28.279757: step: 712/466, loss: 0.009455726481974125 2023-01-24 05:38:29.059941: step: 714/466, loss: 0.028666729107499123 2023-01-24 05:38:29.836156: step: 716/466, loss: 0.01238565519452095 2023-01-24 05:38:30.595105: step: 718/466, loss: 0.05475013330578804 2023-01-24 05:38:31.355137: step: 720/466, loss: 0.003509529633447528 2023-01-24 05:38:32.088516: step: 722/466, loss: 0.02143089286983013 2023-01-24 05:38:32.826674: step: 724/466, loss: 0.049197278916835785 2023-01-24 05:38:33.593452: step: 726/466, loss: 0.029327843338251114 2023-01-24 05:38:34.362828: step: 728/466, loss: 0.012504545971751213 2023-01-24 05:38:35.093864: step: 730/466, loss: 0.01041465625166893 2023-01-24 05:38:35.816955: step: 732/466, loss: 0.0908796489238739 2023-01-24 05:38:36.579284: step: 734/466, loss: 0.011886252090334892 2023-01-24 05:38:37.281567: step: 736/466, loss: 0.001345496391877532 2023-01-24 05:38:37.987939: step: 738/466, loss: 0.0027429629117250443 2023-01-24 05:38:38.682586: step: 740/466, loss: 0.013406159356236458 2023-01-24 05:38:39.491903: step: 742/466, loss: 0.004068335052579641 2023-01-24 05:38:40.312434: step: 744/466, loss: 0.020365413278341293 2023-01-24 05:38:41.131820: step: 746/466, loss: 0.051735859364271164 2023-01-24 05:38:41.848666: step: 748/466, loss: 0.0009359294781461358 2023-01-24 05:38:42.578898: step: 750/466, loss: 0.01213749311864376 2023-01-24 05:38:43.339291: step: 752/466, loss: 0.01617128774523735 2023-01-24 05:38:44.077977: step: 754/466, loss: 0.0006245630793273449 2023-01-24 05:38:44.858488: step: 756/466, loss: 0.09827623516321182 2023-01-24 05:38:45.655618: step: 758/466, loss: 0.020308727398514748 2023-01-24 05:38:46.394037: step: 760/466, loss: 0.016715632751584053 2023-01-24 05:38:47.102645: step: 762/466, loss: 0.02906610816717148 2023-01-24 05:38:47.864953: step: 764/466, loss: 0.032195430248975754 2023-01-24 05:38:48.603093: step: 766/466, loss: 0.0008314056321978569 2023-01-24 05:38:49.366443: step: 768/466, loss: 0.023581545799970627 2023-01-24 05:38:50.146176: step: 770/466, loss: 0.013966499827802181 2023-01-24 05:38:50.894424: step: 772/466, loss: 0.035196367651224136 2023-01-24 05:38:51.634992: step: 774/466, loss: 0.026951663196086884 2023-01-24 05:38:52.457251: step: 776/466, loss: 0.08619563281536102 2023-01-24 05:38:53.257516: step: 778/466, loss: 0.053176235407590866 2023-01-24 05:38:54.082578: step: 780/466, loss: 0.006441871169954538 2023-01-24 05:38:54.827919: step: 782/466, loss: 0.0042631844989955425 2023-01-24 05:38:55.617694: step: 784/466, loss: 0.06781040132045746 2023-01-24 05:38:56.365042: step: 786/466, loss: 0.046221598982810974 2023-01-24 05:38:57.075776: step: 788/466, loss: 0.02901509776711464 2023-01-24 05:38:57.791303: step: 790/466, loss: 0.0003212362644262612 2023-01-24 05:38:58.551634: step: 792/466, loss: 0.012461671605706215 2023-01-24 05:38:59.390485: step: 794/466, loss: 0.08590605109930038 2023-01-24 05:39:00.131419: step: 796/466, loss: 0.010314743034541607 2023-01-24 05:39:00.862968: step: 798/466, loss: 0.0017901716055348516 2023-01-24 05:39:01.610810: step: 800/466, loss: 0.015061999671161175 2023-01-24 05:39:02.382921: step: 802/466, loss: 0.005251292604953051 2023-01-24 05:39:03.062179: step: 804/466, loss: 0.06770678609609604 2023-01-24 05:39:03.862369: step: 806/466, loss: 0.1236957386136055 2023-01-24 05:39:04.618221: step: 808/466, loss: 0.010861529037356377 2023-01-24 05:39:05.328431: step: 810/466, loss: 0.01007386390119791 2023-01-24 05:39:06.071919: step: 812/466, loss: 0.001310806954279542 2023-01-24 05:39:06.801408: step: 814/466, loss: 0.018976766616106033 2023-01-24 05:39:07.647080: step: 816/466, loss: 0.06041739508509636 2023-01-24 05:39:08.439481: step: 818/466, loss: 0.04153867065906525 2023-01-24 05:39:09.209825: step: 820/466, loss: 0.017491161823272705 2023-01-24 05:39:09.958028: step: 822/466, loss: 0.017895622178912163 2023-01-24 05:39:10.837842: step: 824/466, loss: 0.06135671213269234 2023-01-24 05:39:11.570073: step: 826/466, loss: 0.021287666633725166 2023-01-24 05:39:12.334866: step: 828/466, loss: 0.0024881153367459774 2023-01-24 05:39:13.142880: step: 830/466, loss: 0.0017700603930279613 2023-01-24 05:39:13.878437: step: 832/466, loss: 0.04236576706171036 2023-01-24 05:39:14.550211: step: 834/466, loss: 0.014054981991648674 2023-01-24 05:39:15.267178: step: 836/466, loss: 0.013621608726680279 2023-01-24 05:39:16.017423: step: 838/466, loss: 0.04358559846878052 2023-01-24 05:39:16.786507: step: 840/466, loss: 0.08273734897375107 2023-01-24 05:39:17.533364: step: 842/466, loss: 0.004501787014305592 2023-01-24 05:39:18.258089: step: 844/466, loss: 0.010186144150793552 2023-01-24 05:39:19.036555: step: 846/466, loss: 0.014502090401947498 2023-01-24 05:39:19.777562: step: 848/466, loss: 0.0019662058912217617 2023-01-24 05:39:20.548114: step: 850/466, loss: 0.02114732936024666 2023-01-24 05:39:21.336885: step: 852/466, loss: 0.008611384779214859 2023-01-24 05:39:22.129022: step: 854/466, loss: 0.08735426515340805 2023-01-24 05:39:22.827632: step: 856/466, loss: 0.0077768550254404545 2023-01-24 05:39:23.635592: step: 858/466, loss: 0.0015171892009675503 2023-01-24 05:39:24.390442: step: 860/466, loss: 0.03444715589284897 2023-01-24 05:39:25.216666: step: 862/466, loss: 0.00725650554522872 2023-01-24 05:39:25.918406: step: 864/466, loss: 0.00915346760302782 2023-01-24 05:39:26.678941: step: 866/466, loss: 0.04942712560296059 2023-01-24 05:39:27.394598: step: 868/466, loss: 0.023312222212553024 2023-01-24 05:39:28.178865: step: 870/466, loss: 0.0256651621311903 2023-01-24 05:39:28.961969: step: 872/466, loss: 0.00711500458419323 2023-01-24 05:39:29.688841: step: 874/466, loss: 0.0023541359696537256 2023-01-24 05:39:30.423018: step: 876/466, loss: 0.11871694773435593 2023-01-24 05:39:31.190524: step: 878/466, loss: 0.012433771975338459 2023-01-24 05:39:31.946446: step: 880/466, loss: 0.005337044131010771 2023-01-24 05:39:32.656397: step: 882/466, loss: 0.12921734154224396 2023-01-24 05:39:33.485530: step: 884/466, loss: 0.03527417778968811 2023-01-24 05:39:34.274325: step: 886/466, loss: 0.005586323793977499 2023-01-24 05:39:35.062052: step: 888/466, loss: 0.010646643117070198 2023-01-24 05:39:35.943539: step: 890/466, loss: 0.02303536795079708 2023-01-24 05:39:36.694324: step: 892/466, loss: 0.012876001186668873 2023-01-24 05:39:37.534061: step: 894/466, loss: 0.01227349042892456 2023-01-24 05:39:38.296018: step: 896/466, loss: 0.009249640628695488 2023-01-24 05:39:39.024262: step: 898/466, loss: 0.011361805722117424 2023-01-24 05:39:39.775292: step: 900/466, loss: 0.0027859059628099203 2023-01-24 05:39:40.562407: step: 902/466, loss: 0.0026826439425349236 2023-01-24 05:39:41.306791: step: 904/466, loss: 0.015420181676745415 2023-01-24 05:39:42.068199: step: 906/466, loss: 0.01276139635592699 2023-01-24 05:39:42.846589: step: 908/466, loss: 0.08549144864082336 2023-01-24 05:39:43.774966: step: 910/466, loss: 0.05327073484659195 2023-01-24 05:39:44.518659: step: 912/466, loss: 0.09231462329626083 2023-01-24 05:39:45.285690: step: 914/466, loss: 0.03827878087759018 2023-01-24 05:39:46.018903: step: 916/466, loss: 0.00647725211456418 2023-01-24 05:39:46.810615: step: 918/466, loss: 0.011623353697359562 2023-01-24 05:39:47.556605: step: 920/466, loss: 0.15060193836688995 2023-01-24 05:39:48.333374: step: 922/466, loss: 0.05920673534274101 2023-01-24 05:39:49.117362: step: 924/466, loss: 0.027103755623102188 2023-01-24 05:39:49.904511: step: 926/466, loss: 0.035496946424245834 2023-01-24 05:39:50.648931: step: 928/466, loss: 0.001616903580725193 2023-01-24 05:39:51.461317: step: 930/466, loss: 0.014035423286259174 2023-01-24 05:39:52.211613: step: 932/466, loss: 0.002209370955824852 ================================================== Loss: 0.056 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33101438492063495, 'r': 0.31656783681214423, 'f1': 0.32362997090203693}, 'combined': 0.2384641890857114, 'epoch': 28} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3717816658562622, 'r': 0.2857628575515638, 'f1': 0.3231458477359183}, 'combined': 0.19861647226695467, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30551118562326185, 'r': 0.3229027142166164, 'f1': 0.3139662922364517}, 'combined': 0.23134358375317493, 'epoch': 28} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35034257020263326, 'r': 0.2874994921853498, 'f1': 0.315825239392569}, 'combined': 0.19411697640713999, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3462574110671937, 'r': 0.33245967741935484, 'f1': 0.3392182962245886}, 'combined': 0.24995032353390736, 'epoch': 28} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3725296188645876, 'r': 0.2886701375617367, 'f1': 0.3252819598866399}, 'combined': 0.20090944581233644, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31896551724137934, 'r': 0.2642857142857143, 'f1': 0.28906249999999994}, 'combined': 0.1927083333333333, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30303030303030304, 'r': 0.43478260869565216, 'f1': 0.35714285714285715}, 'combined': 0.17857142857142858, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514281426856457, 'r': 0.3267548195748888, 'f1': 0.33864265470199884}, 'combined': 0.24952616662252544, 'epoch': 23} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3841715799217045, 'r': 0.28937599526569946, 'f1': 0.3301029871919831}, 'combined': 0.20289256773751158, 'epoch': 23} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3642241379310345, 'r': 0.30178571428571427, 'f1': 0.33007812499999994}, 'combined': 0.2200520833333333, 'epoch': 23} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3422116136600626, 'r': 0.35517417478354985, 'f1': 0.34857242432288904}, 'combined': 0.25684283897476035, 'epoch': 27} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3405583370631552, 'r': 0.28891387520349127, 'f1': 0.31261754522721885}, 'combined': 0.19308730734622345, 'epoch': 27} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 27} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:42:35.689965: step: 2/466, loss: 0.014562848955392838 2023-01-24 05:42:36.519833: step: 4/466, loss: 0.004749378655105829 2023-01-24 05:42:37.349282: step: 6/466, loss: 0.024124283343553543 2023-01-24 05:42:38.083203: step: 8/466, loss: 0.04171415790915489 2023-01-24 05:42:38.815897: step: 10/466, loss: 0.006787622347474098 2023-01-24 05:42:39.564179: step: 12/466, loss: 0.0034408585634082556 2023-01-24 05:42:40.521302: step: 14/466, loss: 0.017816148698329926 2023-01-24 05:42:41.352540: step: 16/466, loss: 0.037109583616256714 2023-01-24 05:42:42.172782: step: 18/466, loss: 0.004230950027704239 2023-01-24 05:42:42.858390: step: 20/466, loss: 0.047705430537462234 2023-01-24 05:42:43.651053: step: 22/466, loss: 0.07174509763717651 2023-01-24 05:42:44.495248: step: 24/466, loss: 0.024512307718396187 2023-01-24 05:42:45.210906: step: 26/466, loss: 0.00012372205674182624 2023-01-24 05:42:45.995496: step: 28/466, loss: 0.004223550204187632 2023-01-24 05:42:46.726014: step: 30/466, loss: 0.030073709785938263 2023-01-24 05:42:47.398510: step: 32/466, loss: 0.005309813655912876 2023-01-24 05:42:48.199755: step: 34/466, loss: 0.004884167108684778 2023-01-24 05:42:48.989835: step: 36/466, loss: 0.07684051245450974 2023-01-24 05:42:49.707966: step: 38/466, loss: 0.0611296109855175 2023-01-24 05:42:50.475343: step: 40/466, loss: 0.043774817138910294 2023-01-24 05:42:51.201774: step: 42/466, loss: 0.002685240935534239 2023-01-24 05:42:51.965938: step: 44/466, loss: 0.005610991735011339 2023-01-24 05:42:52.707323: step: 46/466, loss: 0.13258616626262665 2023-01-24 05:42:53.527271: step: 48/466, loss: 0.019392507150769234 2023-01-24 05:42:54.334709: step: 50/466, loss: 0.017014844343066216 2023-01-24 05:42:55.085224: step: 52/466, loss: 0.030051592737436295 2023-01-24 05:42:55.870336: step: 54/466, loss: 0.003113041166216135 2023-01-24 05:42:56.620268: step: 56/466, loss: 0.007894654758274555 2023-01-24 05:42:57.494441: step: 58/466, loss: 0.020002620294690132 2023-01-24 05:42:58.266535: step: 60/466, loss: 2.020765542984009 2023-01-24 05:42:59.055364: step: 62/466, loss: 0.016793828457593918 2023-01-24 05:42:59.757055: step: 64/466, loss: 0.004198416136205196 2023-01-24 05:43:00.538600: step: 66/466, loss: 0.01203058660030365 2023-01-24 05:43:01.389376: step: 68/466, loss: 0.5350288152694702 2023-01-24 05:43:02.132053: step: 70/466, loss: 0.0024632588028907776 2023-01-24 05:43:02.942436: step: 72/466, loss: 0.023018931970000267 2023-01-24 05:43:03.668176: step: 74/466, loss: 0.005367817357182503 2023-01-24 05:43:04.612452: step: 76/466, loss: 0.02340589463710785 2023-01-24 05:43:05.420016: step: 78/466, loss: 0.002251754282042384 2023-01-24 05:43:06.191186: step: 80/466, loss: 0.0617501474916935 2023-01-24 05:43:06.988776: step: 82/466, loss: 0.004363054409623146 2023-01-24 05:43:07.803653: step: 84/466, loss: 0.008134029805660248 2023-01-24 05:43:08.460665: step: 86/466, loss: 0.0098897535353899 2023-01-24 05:43:09.253758: step: 88/466, loss: 0.00210469588637352 2023-01-24 05:43:10.030977: step: 90/466, loss: 0.019904451444745064 2023-01-24 05:43:10.768601: step: 92/466, loss: 0.013698196038603783 2023-01-24 05:43:11.546863: step: 94/466, loss: 0.0030992329120635986 2023-01-24 05:43:12.298479: step: 96/466, loss: 0.0009557530866004527 2023-01-24 05:43:13.165581: step: 98/466, loss: 0.06982994079589844 2023-01-24 05:43:13.936606: step: 100/466, loss: 0.015310260467231274 2023-01-24 05:43:14.772341: step: 102/466, loss: 0.03443342447280884 2023-01-24 05:43:15.567251: step: 104/466, loss: 0.02068159729242325 2023-01-24 05:43:16.379123: step: 106/466, loss: 0.011591408401727676 2023-01-24 05:43:17.078933: step: 108/466, loss: 0.012503300793468952 2023-01-24 05:43:17.836843: step: 110/466, loss: 0.04376915469765663 2023-01-24 05:43:18.564461: step: 112/466, loss: 0.012198442593216896 2023-01-24 05:43:19.281594: step: 114/466, loss: 0.005721665918827057 2023-01-24 05:43:20.009243: step: 116/466, loss: 0.0016128283459693193 2023-01-24 05:43:20.750177: step: 118/466, loss: 0.004805414471775293 2023-01-24 05:43:21.539229: step: 120/466, loss: 0.002220995258539915 2023-01-24 05:43:22.194803: step: 122/466, loss: 0.0013943741796538234 2023-01-24 05:43:22.945154: step: 124/466, loss: 0.010333836078643799 2023-01-24 05:43:23.726032: step: 126/466, loss: 0.01896827109158039 2023-01-24 05:43:24.421721: step: 128/466, loss: 0.0013134570326656103 2023-01-24 05:43:25.164278: step: 130/466, loss: 0.08043935149908066 2023-01-24 05:43:25.873588: step: 132/466, loss: 0.020539134740829468 2023-01-24 05:43:26.549294: step: 134/466, loss: 0.025884512811899185 2023-01-24 05:43:27.274658: step: 136/466, loss: 0.0005941173294559121 2023-01-24 05:43:28.021761: step: 138/466, loss: 0.024872949346899986 2023-01-24 05:43:28.777496: step: 140/466, loss: 0.010048205964267254 2023-01-24 05:43:29.678687: step: 142/466, loss: 0.006408365443348885 2023-01-24 05:43:30.420694: step: 144/466, loss: 0.005972626153379679 2023-01-24 05:43:31.141514: step: 146/466, loss: 0.07775118947029114 2023-01-24 05:43:31.920726: step: 148/466, loss: 0.014798705466091633 2023-01-24 05:43:32.725930: step: 150/466, loss: 0.009567567147314548 2023-01-24 05:43:33.607366: step: 152/466, loss: 0.009304573759436607 2023-01-24 05:43:34.465390: step: 154/466, loss: 0.01903250254690647 2023-01-24 05:43:35.374500: step: 156/466, loss: 0.08058947324752808 2023-01-24 05:43:36.159854: step: 158/466, loss: 0.06970684230327606 2023-01-24 05:43:36.919608: step: 160/466, loss: 0.058795832097530365 2023-01-24 05:43:37.656015: step: 162/466, loss: 0.003970024641603231 2023-01-24 05:43:38.351739: step: 164/466, loss: 0.0026512411423027515 2023-01-24 05:43:39.054181: step: 166/466, loss: 0.00022080437338445336 2023-01-24 05:43:39.840359: step: 168/466, loss: 0.0018055520486086607 2023-01-24 05:43:40.623287: step: 170/466, loss: 0.15070666372776031 2023-01-24 05:43:41.406408: step: 172/466, loss: 7.411004543304443 2023-01-24 05:43:42.128316: step: 174/466, loss: 0.08071133494377136 2023-01-24 05:43:42.881968: step: 176/466, loss: 0.01623368076980114 2023-01-24 05:43:43.622611: step: 178/466, loss: 0.0038287367206066847 2023-01-24 05:43:44.333187: step: 180/466, loss: 0.011720797047019005 2023-01-24 05:43:45.083366: step: 182/466, loss: 0.02469259686768055 2023-01-24 05:43:45.898817: step: 184/466, loss: 0.006625003181397915 2023-01-24 05:43:46.741533: step: 186/466, loss: 0.02409050427377224 2023-01-24 05:43:47.657651: step: 188/466, loss: 0.022759372368454933 2023-01-24 05:43:48.427424: step: 190/466, loss: 0.00044331394019536674 2023-01-24 05:43:49.166112: step: 192/466, loss: 0.08735527843236923 2023-01-24 05:43:49.927781: step: 194/466, loss: 0.007105493452399969 2023-01-24 05:43:50.715580: step: 196/466, loss: 0.009793892502784729 2023-01-24 05:43:51.588687: step: 198/466, loss: 0.037961721420288086 2023-01-24 05:43:52.393150: step: 200/466, loss: 0.01566535048186779 2023-01-24 05:43:53.199830: step: 202/466, loss: 0.05330275371670723 2023-01-24 05:43:53.912470: step: 204/466, loss: 0.12019774317741394 2023-01-24 05:43:54.693090: step: 206/466, loss: 0.028496457263827324 2023-01-24 05:43:55.421543: step: 208/466, loss: 0.015642846003174782 2023-01-24 05:43:56.199567: step: 210/466, loss: 0.01874103955924511 2023-01-24 05:43:56.955938: step: 212/466, loss: 0.008261686190962791 2023-01-24 05:43:57.753237: step: 214/466, loss: 0.013057042844593525 2023-01-24 05:43:58.561059: step: 216/466, loss: 0.02398427575826645 2023-01-24 05:43:59.299563: step: 218/466, loss: 0.002774237422272563 2023-01-24 05:44:00.044699: step: 220/466, loss: 0.027512196451425552 2023-01-24 05:44:00.754466: step: 222/466, loss: 0.004296524450182915 2023-01-24 05:44:01.500240: step: 224/466, loss: 0.10447569191455841 2023-01-24 05:44:02.220844: step: 226/466, loss: 0.013976804912090302 2023-01-24 05:44:03.055899: step: 228/466, loss: 0.04611645266413689 2023-01-24 05:44:03.796181: step: 230/466, loss: 0.12256401777267456 2023-01-24 05:44:04.521051: step: 232/466, loss: 0.0076015181839466095 2023-01-24 05:44:05.372336: step: 234/466, loss: 0.008073708973824978 2023-01-24 05:44:06.088351: step: 236/466, loss: 0.030057324096560478 2023-01-24 05:44:06.841624: step: 238/466, loss: 0.003706106450408697 2023-01-24 05:44:07.654824: step: 240/466, loss: 0.11645306646823883 2023-01-24 05:44:08.535825: step: 242/466, loss: 0.01141782570630312 2023-01-24 05:44:09.273463: step: 244/466, loss: 0.009234164841473103 2023-01-24 05:44:10.044884: step: 246/466, loss: 0.00295763136819005 2023-01-24 05:44:10.740633: step: 248/466, loss: 0.008472193032503128 2023-01-24 05:44:11.406370: step: 250/466, loss: 0.0012959379237145185 2023-01-24 05:44:12.136697: step: 252/466, loss: 0.008279495872557163 2023-01-24 05:44:12.924084: step: 254/466, loss: 0.055974967777729034 2023-01-24 05:44:13.674992: step: 256/466, loss: 0.0235903263092041 2023-01-24 05:44:14.469684: step: 258/466, loss: 0.008927579037845135 2023-01-24 05:44:15.246877: step: 260/466, loss: 0.0002345545799471438 2023-01-24 05:44:16.090109: step: 262/466, loss: 0.020156797021627426 2023-01-24 05:44:16.976550: step: 264/466, loss: 0.4345041811466217 2023-01-24 05:44:17.677087: step: 266/466, loss: 0.008493071421980858 2023-01-24 05:44:18.465759: step: 268/466, loss: 0.01874028518795967 2023-01-24 05:44:19.249009: step: 270/466, loss: 0.008288813754916191 2023-01-24 05:44:19.984215: step: 272/466, loss: 0.00955624133348465 2023-01-24 05:44:20.762578: step: 274/466, loss: 0.013928272761404514 2023-01-24 05:44:21.501806: step: 276/466, loss: 0.00287935184314847 2023-01-24 05:44:22.159098: step: 278/466, loss: 0.04072069376707077 2023-01-24 05:44:23.039742: step: 280/466, loss: 0.01191603485494852 2023-01-24 05:44:23.817577: step: 282/466, loss: 0.029306577518582344 2023-01-24 05:44:24.583235: step: 284/466, loss: 4.033674240112305 2023-01-24 05:44:25.404534: step: 286/466, loss: 0.0014825006946921349 2023-01-24 05:44:26.111143: step: 288/466, loss: 0.0002236310683656484 2023-01-24 05:44:26.830569: step: 290/466, loss: 0.03675243258476257 2023-01-24 05:44:27.573840: step: 292/466, loss: 0.013011719100177288 2023-01-24 05:44:28.334481: step: 294/466, loss: 0.0008605459006503224 2023-01-24 05:44:29.097185: step: 296/466, loss: 0.005612206179648638 2023-01-24 05:44:29.894378: step: 298/466, loss: 0.09135919064283371 2023-01-24 05:44:30.706345: step: 300/466, loss: 0.029406633228063583 2023-01-24 05:44:31.457951: step: 302/466, loss: 0.011398477479815483 2023-01-24 05:44:32.289924: step: 304/466, loss: 0.007227979600429535 2023-01-24 05:44:33.072520: step: 306/466, loss: 0.00731433741748333 2023-01-24 05:44:33.791356: step: 308/466, loss: 0.000489395868498832 2023-01-24 05:44:34.685963: step: 310/466, loss: 0.013301452621817589 2023-01-24 05:44:35.473846: step: 312/466, loss: 0.008809504099190235 2023-01-24 05:44:36.189792: step: 314/466, loss: 0.022511150687932968 2023-01-24 05:44:36.878286: step: 316/466, loss: 0.0037717344239354134 2023-01-24 05:44:37.667264: step: 318/466, loss: 0.005426387302577496 2023-01-24 05:44:38.386331: step: 320/466, loss: 0.05997195839881897 2023-01-24 05:44:39.129970: step: 322/466, loss: 0.020946403965353966 2023-01-24 05:44:39.809722: step: 324/466, loss: 0.009468648582696915 2023-01-24 05:44:40.640344: step: 326/466, loss: 0.005700098816305399 2023-01-24 05:44:41.326651: step: 328/466, loss: 0.02063606120646 2023-01-24 05:44:42.140475: step: 330/466, loss: 0.023080935701727867 2023-01-24 05:44:42.919250: step: 332/466, loss: 0.0172546599060297 2023-01-24 05:44:43.660465: step: 334/466, loss: 0.01309296116232872 2023-01-24 05:44:44.389563: step: 336/466, loss: 0.021634528413414955 2023-01-24 05:44:45.108336: step: 338/466, loss: 0.03849001228809357 2023-01-24 05:44:45.884064: step: 340/466, loss: 0.010629228316247463 2023-01-24 05:44:46.592098: step: 342/466, loss: 0.035972755402326584 2023-01-24 05:44:47.271295: step: 344/466, loss: 0.00018546557112131268 2023-01-24 05:44:48.080388: step: 346/466, loss: 0.0004207846650388092 2023-01-24 05:44:48.855990: step: 348/466, loss: 0.004989704582840204 2023-01-24 05:44:49.618278: step: 350/466, loss: 0.03478853777050972 2023-01-24 05:44:50.394575: step: 352/466, loss: 0.04219071939587593 2023-01-24 05:44:51.152439: step: 354/466, loss: 0.01165375579148531 2023-01-24 05:44:51.803491: step: 356/466, loss: 0.005104635842144489 2023-01-24 05:44:52.562571: step: 358/466, loss: 0.09205269813537598 2023-01-24 05:44:53.325170: step: 360/466, loss: 0.002441998338326812 2023-01-24 05:44:53.996148: step: 362/466, loss: 0.08703027665615082 2023-01-24 05:44:54.815153: step: 364/466, loss: 0.03320421278476715 2023-01-24 05:44:55.580892: step: 366/466, loss: 0.012047701515257359 2023-01-24 05:44:56.335883: step: 368/466, loss: 0.0029933189507573843 2023-01-24 05:44:57.052642: step: 370/466, loss: 0.0032152493949979544 2023-01-24 05:44:57.844618: step: 372/466, loss: 0.18299029767513275 2023-01-24 05:44:58.618089: step: 374/466, loss: 0.0006103235646151006 2023-01-24 05:44:59.363129: step: 376/466, loss: 0.00045644465717487037 2023-01-24 05:45:00.060387: step: 378/466, loss: 0.008309612050652504 2023-01-24 05:45:00.737680: step: 380/466, loss: 0.030501268804073334 2023-01-24 05:45:01.496732: step: 382/466, loss: 0.014829293824732304 2023-01-24 05:45:02.286791: step: 384/466, loss: 0.39675837755203247 2023-01-24 05:45:03.031646: step: 386/466, loss: 0.017327308654785156 2023-01-24 05:45:03.825725: step: 388/466, loss: 0.03276915103197098 2023-01-24 05:45:04.587355: step: 390/466, loss: 0.0019592309836298227 2023-01-24 05:45:05.441896: step: 392/466, loss: 0.0044213952496647835 2023-01-24 05:45:06.227577: step: 394/466, loss: 0.03716374561190605 2023-01-24 05:45:06.995724: step: 396/466, loss: 0.044696152210235596 2023-01-24 05:45:07.752510: step: 398/466, loss: 0.023156609386205673 2023-01-24 05:45:08.482692: step: 400/466, loss: 0.0072260950691998005 2023-01-24 05:45:09.245909: step: 402/466, loss: 6.983886123634875e-05 2023-01-24 05:45:09.958358: step: 404/466, loss: 0.001076485961675644 2023-01-24 05:45:10.712460: step: 406/466, loss: 0.056770969182252884 2023-01-24 05:45:11.482990: step: 408/466, loss: 0.0008376673213206232 2023-01-24 05:45:12.107405: step: 410/466, loss: 0.00022090923448558897 2023-01-24 05:45:12.916604: step: 412/466, loss: 0.010586812160909176 2023-01-24 05:45:13.705092: step: 414/466, loss: 0.005161886103451252 2023-01-24 05:45:14.430991: step: 416/466, loss: 0.004693008493632078 2023-01-24 05:45:15.154070: step: 418/466, loss: 0.007503495551645756 2023-01-24 05:45:15.878514: step: 420/466, loss: 0.0016249733744189143 2023-01-24 05:45:16.661840: step: 422/466, loss: 0.019222719594836235 2023-01-24 05:45:17.414520: step: 424/466, loss: 0.019780205562710762 2023-01-24 05:45:18.142328: step: 426/466, loss: 0.03432883322238922 2023-01-24 05:45:18.818189: step: 428/466, loss: 0.016445733606815338 2023-01-24 05:45:19.643102: step: 430/466, loss: 0.001471205847337842 2023-01-24 05:45:20.385942: step: 432/466, loss: 0.707700252532959 2023-01-24 05:45:21.108413: step: 434/466, loss: 0.01202785037457943 2023-01-24 05:45:21.859113: step: 436/466, loss: 0.020741842687129974 2023-01-24 05:45:22.545982: step: 438/466, loss: 0.02251707948744297 2023-01-24 05:45:23.246456: step: 440/466, loss: 0.00407326640561223 2023-01-24 05:45:24.106404: step: 442/466, loss: 0.03363305330276489 2023-01-24 05:45:24.912486: step: 444/466, loss: 0.009023510850965977 2023-01-24 05:45:25.676378: step: 446/466, loss: 0.030631855130195618 2023-01-24 05:45:26.417958: step: 448/466, loss: 0.0005966068711131811 2023-01-24 05:45:27.169757: step: 450/466, loss: 0.019086359068751335 2023-01-24 05:45:27.872370: step: 452/466, loss: 0.0014762079808861017 2023-01-24 05:45:28.651481: step: 454/466, loss: 0.0017003518296405673 2023-01-24 05:45:29.534557: step: 456/466, loss: 0.036129433661699295 2023-01-24 05:45:30.359242: step: 458/466, loss: 0.05053102597594261 2023-01-24 05:45:31.093779: step: 460/466, loss: 0.013784201815724373 2023-01-24 05:45:31.862242: step: 462/466, loss: 0.003369506448507309 2023-01-24 05:45:32.654828: step: 464/466, loss: 0.021329237148165703 2023-01-24 05:45:33.463538: step: 466/466, loss: 0.03785283491015434 2023-01-24 05:45:34.187053: step: 468/466, loss: 0.08001746237277985 2023-01-24 05:45:35.069305: step: 470/466, loss: 0.0024658029433339834 2023-01-24 05:45:35.769812: step: 472/466, loss: 0.03431737795472145 2023-01-24 05:45:36.437299: step: 474/466, loss: 0.021164868026971817 2023-01-24 05:45:37.258910: step: 476/466, loss: 0.032166868448257446 2023-01-24 05:45:38.024059: step: 478/466, loss: 0.09220929443836212 2023-01-24 05:45:38.760396: step: 480/466, loss: 0.0032411126885563135 2023-01-24 05:45:39.470030: step: 482/466, loss: 0.005250800866633654 2023-01-24 05:45:40.285465: step: 484/466, loss: 0.0031083542853593826 2023-01-24 05:45:41.013422: step: 486/466, loss: 0.008143751882016659 2023-01-24 05:45:41.861194: step: 488/466, loss: 0.01995263621211052 2023-01-24 05:45:42.706756: step: 490/466, loss: 0.07909484952688217 2023-01-24 05:45:43.514947: step: 492/466, loss: 0.027795715257525444 2023-01-24 05:45:44.263774: step: 494/466, loss: 0.01582321524620056 2023-01-24 05:45:45.018526: step: 496/466, loss: 0.009483584202826023 2023-01-24 05:45:45.797405: step: 498/466, loss: 0.004073833581060171 2023-01-24 05:45:46.562870: step: 500/466, loss: 0.03123282827436924 2023-01-24 05:45:47.264182: step: 502/466, loss: 0.14656507968902588 2023-01-24 05:45:48.012303: step: 504/466, loss: 0.043541837483644485 2023-01-24 05:45:48.815011: step: 506/466, loss: 0.0035433454904705286 2023-01-24 05:45:49.603057: step: 508/466, loss: 0.023579321801662445 2023-01-24 05:45:50.347104: step: 510/466, loss: 0.007745890412479639 2023-01-24 05:45:51.139258: step: 512/466, loss: 0.044587232172489166 2023-01-24 05:45:51.862278: step: 514/466, loss: 0.05123988911509514 2023-01-24 05:45:52.561401: step: 516/466, loss: 0.037435825914144516 2023-01-24 05:45:53.323905: step: 518/466, loss: 0.06502517312765121 2023-01-24 05:45:54.128804: step: 520/466, loss: 0.021820692345499992 2023-01-24 05:45:54.836597: step: 522/466, loss: 0.033464353531599045 2023-01-24 05:45:55.592634: step: 524/466, loss: 0.05899073928594589 2023-01-24 05:45:56.333854: step: 526/466, loss: 0.025905214250087738 2023-01-24 05:45:57.115681: step: 528/466, loss: 0.028305260464549065 2023-01-24 05:45:57.909534: step: 530/466, loss: 0.3684265911579132 2023-01-24 05:45:58.604223: step: 532/466, loss: 0.05670701712369919 2023-01-24 05:45:59.292952: step: 534/466, loss: 0.00019752232765313238 2023-01-24 05:45:59.981430: step: 536/466, loss: 0.011019325815141201 2023-01-24 05:46:00.785740: step: 538/466, loss: 0.010654658079147339 2023-01-24 05:46:01.474701: step: 540/466, loss: 0.005419179797172546 2023-01-24 05:46:02.266205: step: 542/466, loss: 0.1257794350385666 2023-01-24 05:46:02.974321: step: 544/466, loss: 0.0339568629860878 2023-01-24 05:46:03.684684: step: 546/466, loss: 0.01898978278040886 2023-01-24 05:46:04.410924: step: 548/466, loss: 0.003557452466338873 2023-01-24 05:46:05.150309: step: 550/466, loss: 0.013799430802464485 2023-01-24 05:46:05.915303: step: 552/466, loss: 0.0009106646757572889 2023-01-24 05:46:06.703646: step: 554/466, loss: 0.010609936900436878 2023-01-24 05:46:07.526562: step: 556/466, loss: 0.02367284893989563 2023-01-24 05:46:08.266733: step: 558/466, loss: 0.019093787297606468 2023-01-24 05:46:09.012476: step: 560/466, loss: 0.012392389588057995 2023-01-24 05:46:09.892477: step: 562/466, loss: 0.0020131270866841078 2023-01-24 05:46:10.682205: step: 564/466, loss: 0.04604710638523102 2023-01-24 05:46:11.485540: step: 566/466, loss: 0.0003468830545898527 2023-01-24 05:46:12.308493: step: 568/466, loss: 0.054404839873313904 2023-01-24 05:46:13.013024: step: 570/466, loss: 0.0021197283640503883 2023-01-24 05:46:13.715134: step: 572/466, loss: 0.06230594962835312 2023-01-24 05:46:14.464660: step: 574/466, loss: 0.028980540111660957 2023-01-24 05:46:15.196523: step: 576/466, loss: 0.002518613124266267 2023-01-24 05:46:15.925562: step: 578/466, loss: 0.03597286343574524 2023-01-24 05:46:16.653541: step: 580/466, loss: 0.014612109400331974 2023-01-24 05:46:17.424191: step: 582/466, loss: 0.02597770281136036 2023-01-24 05:46:18.168464: step: 584/466, loss: 0.006040181033313274 2023-01-24 05:46:18.888653: step: 586/466, loss: 0.0018081383313983679 2023-01-24 05:46:19.621279: step: 588/466, loss: 0.004814724437892437 2023-01-24 05:46:20.364793: step: 590/466, loss: 0.026073535904288292 2023-01-24 05:46:21.105192: step: 592/466, loss: 0.011254251934587955 2023-01-24 05:46:21.862026: step: 594/466, loss: 0.051905952394008636 2023-01-24 05:46:22.555159: step: 596/466, loss: 0.278527170419693 2023-01-24 05:46:23.332690: step: 598/466, loss: 0.004648893140256405 2023-01-24 05:46:24.057097: step: 600/466, loss: 0.02231455221772194 2023-01-24 05:46:24.775882: step: 602/466, loss: 0.00720774894580245 2023-01-24 05:46:25.456355: step: 604/466, loss: 0.025287862867116928 2023-01-24 05:46:26.241084: step: 606/466, loss: 0.0005568001070059836 2023-01-24 05:46:27.000476: step: 608/466, loss: 0.0004496572364587337 2023-01-24 05:46:27.835651: step: 610/466, loss: 0.008366197347640991 2023-01-24 05:46:28.630760: step: 612/466, loss: 0.09989674389362335 2023-01-24 05:46:29.422874: step: 614/466, loss: 0.036410532891750336 2023-01-24 05:46:30.187301: step: 616/466, loss: 0.014120755717158318 2023-01-24 05:46:30.941123: step: 618/466, loss: 0.03373485058546066 2023-01-24 05:46:31.798608: step: 620/466, loss: 0.056192394345998764 2023-01-24 05:46:32.541406: step: 622/466, loss: 0.005703099071979523 2023-01-24 05:46:33.362820: step: 624/466, loss: 0.039040133357048035 2023-01-24 05:46:34.130275: step: 626/466, loss: 0.004824712872505188 2023-01-24 05:46:34.830569: step: 628/466, loss: 0.02520856261253357 2023-01-24 05:46:35.645356: step: 630/466, loss: 0.008502018637955189 2023-01-24 05:46:36.408407: step: 632/466, loss: 0.01860482059419155 2023-01-24 05:46:37.087134: step: 634/466, loss: 0.0026551811024546623 2023-01-24 05:46:37.789431: step: 636/466, loss: 0.004953205585479736 2023-01-24 05:46:38.571753: step: 638/466, loss: 0.0004915996687486768 2023-01-24 05:46:39.407257: step: 640/466, loss: 0.03200971707701683 2023-01-24 05:46:40.185474: step: 642/466, loss: 0.02307523973286152 2023-01-24 05:46:40.897668: step: 644/466, loss: 0.0009260879596695304 2023-01-24 05:46:41.659402: step: 646/466, loss: 0.036600060760974884 2023-01-24 05:46:42.510437: step: 648/466, loss: 0.009183496236801147 2023-01-24 05:46:43.288824: step: 650/466, loss: 0.022826118394732475 2023-01-24 05:46:44.063528: step: 652/466, loss: 0.05116073787212372 2023-01-24 05:46:44.706105: step: 654/466, loss: 0.04589260369539261 2023-01-24 05:46:45.446461: step: 656/466, loss: 0.0015795762883499265 2023-01-24 05:46:46.212161: step: 658/466, loss: 0.00048648411757312715 2023-01-24 05:46:47.067639: step: 660/466, loss: 0.08884550631046295 2023-01-24 05:46:47.759120: step: 662/466, loss: 0.0012818826362490654 2023-01-24 05:46:48.512985: step: 664/466, loss: 0.39736467599868774 2023-01-24 05:46:49.299838: step: 666/466, loss: 0.01191942859441042 2023-01-24 05:46:50.017384: step: 668/466, loss: 0.03434213995933533 2023-01-24 05:46:50.665162: step: 670/466, loss: 0.0016377634601667523 2023-01-24 05:46:51.374220: step: 672/466, loss: 0.023413589224219322 2023-01-24 05:46:52.096660: step: 674/466, loss: 0.004105696454644203 2023-01-24 05:46:52.860743: step: 676/466, loss: 0.014797130599617958 2023-01-24 05:46:53.640485: step: 678/466, loss: 0.0023186160251498222 2023-01-24 05:46:54.360731: step: 680/466, loss: 0.023976871743798256 2023-01-24 05:46:55.097282: step: 682/466, loss: 0.0022996345069259405 2023-01-24 05:46:55.858362: step: 684/466, loss: 0.019209645688533783 2023-01-24 05:46:56.622643: step: 686/466, loss: 0.0162705909460783 2023-01-24 05:46:57.440450: step: 688/466, loss: 0.4258826971054077 2023-01-24 05:46:58.145198: step: 690/466, loss: 0.012139077298343182 2023-01-24 05:46:58.820472: step: 692/466, loss: 0.003205450950190425 2023-01-24 05:46:59.575056: step: 694/466, loss: 0.008439544588327408 2023-01-24 05:47:00.246615: step: 696/466, loss: 0.004512060433626175 2023-01-24 05:47:01.049348: step: 698/466, loss: 0.0006865372997708619 2023-01-24 05:47:01.879386: step: 700/466, loss: 0.006161834113299847 2023-01-24 05:47:02.652984: step: 702/466, loss: 0.011986282654106617 2023-01-24 05:47:03.421894: step: 704/466, loss: 0.17363914847373962 2023-01-24 05:47:04.160358: step: 706/466, loss: 0.03010350465774536 2023-01-24 05:47:04.944095: step: 708/466, loss: 0.02479240484535694 2023-01-24 05:47:05.730722: step: 710/466, loss: 0.009199201129376888 2023-01-24 05:47:06.517447: step: 712/466, loss: 0.513110339641571 2023-01-24 05:47:07.329069: step: 714/466, loss: 0.006795932538807392 2023-01-24 05:47:08.160141: step: 716/466, loss: 0.017703594639897346 2023-01-24 05:47:08.896957: step: 718/466, loss: 0.005541081074625254 2023-01-24 05:47:09.647176: step: 720/466, loss: 0.010504513047635555 2023-01-24 05:47:10.380493: step: 722/466, loss: 0.011415023356676102 2023-01-24 05:47:11.107148: step: 724/466, loss: 0.05735626071691513 2023-01-24 05:47:11.892710: step: 726/466, loss: 0.07205421477556229 2023-01-24 05:47:12.568117: step: 728/466, loss: 0.04259423911571503 2023-01-24 05:47:13.304548: step: 730/466, loss: 0.1016535609960556 2023-01-24 05:47:14.055034: step: 732/466, loss: 0.003966829739511013 2023-01-24 05:47:14.721209: step: 734/466, loss: 0.017321214079856873 2023-01-24 05:47:15.460013: step: 736/466, loss: 0.05669906735420227 2023-01-24 05:47:16.267039: step: 738/466, loss: 0.0017302916385233402 2023-01-24 05:47:16.975820: step: 740/466, loss: 0.003923018462955952 2023-01-24 05:47:17.693696: step: 742/466, loss: 0.016879770904779434 2023-01-24 05:47:18.467824: step: 744/466, loss: 2.7057571060140617e-05 2023-01-24 05:47:19.205040: step: 746/466, loss: 0.0012021064758300781 2023-01-24 05:47:19.999636: step: 748/466, loss: 0.010458866134285927 2023-01-24 05:47:20.737904: step: 750/466, loss: 0.05704216659069061 2023-01-24 05:47:21.462976: step: 752/466, loss: 0.018793189898133278 2023-01-24 05:47:22.251220: step: 754/466, loss: 0.0011075339280068874 2023-01-24 05:47:23.050144: step: 756/466, loss: 0.05403747782111168 2023-01-24 05:47:23.876203: step: 758/466, loss: 0.05381093919277191 2023-01-24 05:47:24.554985: step: 760/466, loss: 0.01267674658447504 2023-01-24 05:47:25.342956: step: 762/466, loss: 0.009959987364709377 2023-01-24 05:47:26.088736: step: 764/466, loss: 0.01536529790610075 2023-01-24 05:47:26.757583: step: 766/466, loss: 0.0005679702153429389 2023-01-24 05:47:27.450378: step: 768/466, loss: 0.014717141166329384 2023-01-24 05:47:28.213745: step: 770/466, loss: 0.024072684347629547 2023-01-24 05:47:28.989262: step: 772/466, loss: 0.04764863848686218 2023-01-24 05:47:29.731648: step: 774/466, loss: 2.5167637431877665e-06 2023-01-24 05:47:30.560344: step: 776/466, loss: 0.018080471083521843 2023-01-24 05:47:31.379916: step: 778/466, loss: 0.08379022777080536 2023-01-24 05:47:32.087849: step: 780/466, loss: 0.012476377189159393 2023-01-24 05:47:32.838888: step: 782/466, loss: 0.40508633852005005 2023-01-24 05:47:33.609895: step: 784/466, loss: 0.17186686396598816 2023-01-24 05:47:34.428964: step: 786/466, loss: 0.011937204748392105 2023-01-24 05:47:35.222030: step: 788/466, loss: 0.001307436847127974 2023-01-24 05:47:35.973410: step: 790/466, loss: 0.020467674359679222 2023-01-24 05:47:36.766545: step: 792/466, loss: 0.06974517554044724 2023-01-24 05:47:37.462902: step: 794/466, loss: 0.006880198139697313 2023-01-24 05:47:38.210651: step: 796/466, loss: 0.0707889124751091 2023-01-24 05:47:38.934428: step: 798/466, loss: 0.0010480673518031836 2023-01-24 05:47:39.735762: step: 800/466, loss: 0.03564343601465225 2023-01-24 05:47:40.454393: step: 802/466, loss: 0.026431893929839134 2023-01-24 05:47:41.272880: step: 804/466, loss: 0.030676953494548798 2023-01-24 05:47:41.970625: step: 806/466, loss: 0.05701667442917824 2023-01-24 05:47:42.784699: step: 808/466, loss: 0.2994881272315979 2023-01-24 05:47:43.567867: step: 810/466, loss: 0.022319236770272255 2023-01-24 05:47:44.251599: step: 812/466, loss: 0.004756301175802946 2023-01-24 05:47:45.052275: step: 814/466, loss: 0.052547529339790344 2023-01-24 05:47:45.855525: step: 816/466, loss: 0.00465787248685956 2023-01-24 05:47:46.529398: step: 818/466, loss: 0.014711483381688595 2023-01-24 05:47:47.297307: step: 820/466, loss: 0.005789772141724825 2023-01-24 05:47:48.037598: step: 822/466, loss: 0.007069493178278208 2023-01-24 05:47:48.783259: step: 824/466, loss: 0.6527952551841736 2023-01-24 05:47:49.521011: step: 826/466, loss: 0.0037681246176362038 2023-01-24 05:47:50.264114: step: 828/466, loss: 0.010844358243048191 2023-01-24 05:47:50.964049: step: 830/466, loss: 0.009292328730225563 2023-01-24 05:47:51.751495: step: 832/466, loss: 0.00010707331966841593 2023-01-24 05:47:52.522223: step: 834/466, loss: 0.0015324027044698596 2023-01-24 05:47:53.320024: step: 836/466, loss: 0.005519147031009197 2023-01-24 05:47:54.205959: step: 838/466, loss: 0.036789048463106155 2023-01-24 05:47:54.957544: step: 840/466, loss: 0.017365090548992157 2023-01-24 05:47:55.755267: step: 842/466, loss: 0.03410469368100166 2023-01-24 05:47:56.453935: step: 844/466, loss: 0.030230188742280006 2023-01-24 05:47:57.251620: step: 846/466, loss: 0.0429796427488327 2023-01-24 05:47:57.896830: step: 848/466, loss: 0.0044829887337982655 2023-01-24 05:47:58.720206: step: 850/466, loss: 0.49169623851776123 2023-01-24 05:47:59.496619: step: 852/466, loss: 0.002044479828327894 2023-01-24 05:48:00.281739: step: 854/466, loss: 0.0002728290855884552 2023-01-24 05:48:01.119678: step: 856/466, loss: 0.0035052099265158176 2023-01-24 05:48:01.936484: step: 858/466, loss: 0.0392683781683445 2023-01-24 05:48:02.704567: step: 860/466, loss: 0.05269757658243179 2023-01-24 05:48:03.310634: step: 862/466, loss: 0.000325383385643363 2023-01-24 05:48:04.103784: step: 864/466, loss: 0.0006225865217857063 2023-01-24 05:48:04.842409: step: 866/466, loss: 0.021601015701889992 2023-01-24 05:48:05.614271: step: 868/466, loss: 0.39107298851013184 2023-01-24 05:48:06.330165: step: 870/466, loss: 0.0002772028965409845 2023-01-24 05:48:07.089717: step: 872/466, loss: 0.021326979622244835 2023-01-24 05:48:07.922802: step: 874/466, loss: 0.346127450466156 2023-01-24 05:48:08.697084: step: 876/466, loss: 0.008033477701246738 2023-01-24 05:48:09.411775: step: 878/466, loss: 0.07257543504238129 2023-01-24 05:48:10.094084: step: 880/466, loss: 0.008361267857253551 2023-01-24 05:48:10.964845: step: 882/466, loss: 0.5972766876220703 2023-01-24 05:48:11.632257: step: 884/466, loss: 0.008155311457812786 2023-01-24 05:48:12.408425: step: 886/466, loss: 0.04740725830197334 2023-01-24 05:48:13.140529: step: 888/466, loss: 0.7012631297111511 2023-01-24 05:48:13.872632: step: 890/466, loss: 0.01653783954679966 2023-01-24 05:48:14.651799: step: 892/466, loss: 0.016953716054558754 2023-01-24 05:48:15.395770: step: 894/466, loss: 0.9254959225654602 2023-01-24 05:48:16.232114: step: 896/466, loss: 0.047596968710422516 2023-01-24 05:48:17.028511: step: 898/466, loss: 0.01078968495130539 2023-01-24 05:48:17.863777: step: 900/466, loss: 0.020899973809719086 2023-01-24 05:48:18.556686: step: 902/466, loss: 0.009160463698208332 2023-01-24 05:48:19.321497: step: 904/466, loss: 0.030137361958622932 2023-01-24 05:48:20.127490: step: 906/466, loss: 0.028629913926124573 2023-01-24 05:48:20.927360: step: 908/466, loss: 0.04084698110818863 2023-01-24 05:48:21.708435: step: 910/466, loss: 0.07203912734985352 2023-01-24 05:48:22.397529: step: 912/466, loss: 0.002872600918635726 2023-01-24 05:48:23.094311: step: 914/466, loss: 0.022516105324029922 2023-01-24 05:48:23.895812: step: 916/466, loss: 0.022481245920062065 2023-01-24 05:48:24.688982: step: 918/466, loss: 0.016639167442917824 2023-01-24 05:48:25.480135: step: 920/466, loss: 0.013560550287365913 2023-01-24 05:48:26.336277: step: 922/466, loss: 0.06358363479375839 2023-01-24 05:48:27.124664: step: 924/466, loss: 0.004070811904966831 2023-01-24 05:48:27.836752: step: 926/466, loss: 0.0028404947370290756 2023-01-24 05:48:28.570595: step: 928/466, loss: 0.01054287701845169 2023-01-24 05:48:29.298273: step: 930/466, loss: 0.017033016309142113 2023-01-24 05:48:30.004136: step: 932/466, loss: 0.014236865565180779 ================================================== Loss: 0.070 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3433991683991684, 'r': 0.3134250474383302, 'f1': 0.32772817460317466}, 'combined': 0.241483918128655, 'epoch': 29} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3773858763846644, 'r': 0.28426468610792904, 'f1': 0.32427230859719314}, 'combined': 0.1993088335768114, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31501318854883165, 'r': 0.31740418049227626, 'f1': 0.31620416468701246}, 'combined': 0.23299254240095654, 'epoch': 29} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35813459947309784, 'r': 0.2926524500027134, 'f1': 0.32209911998391155}, 'combined': 0.19797311764864808, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3600931677018634, 'r': 0.3300284629981025, 'f1': 0.34440594059405943}, 'combined': 0.25377279833246486, 'epoch': 29} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3732928220606822, 'r': 0.2841675462053636, 'f1': 0.3226892764375249}, 'combined': 0.19930808250553012, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.325, 'r': 0.2785714285714286, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.358695652173913, 'f1': 0.2946428571428571}, 'combined': 0.14732142857142855, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 29} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514281426856457, 'r': 0.3267548195748888, 'f1': 0.33864265470199884}, 'combined': 0.24952616662252544, 'epoch': 23} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3841715799217045, 'r': 0.28937599526569946, 'f1': 0.3301029871919831}, 'combined': 0.20289256773751158, 'epoch': 23} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3642241379310345, 'r': 0.30178571428571427, 'f1': 0.33007812499999994}, 'combined': 0.2200520833333333, 'epoch': 23} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3600931677018634, 'r': 0.3300284629981025, 'f1': 0.34440594059405943}, 'combined': 0.25377279833246486, 'epoch': 29} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3732928220606822, 'r': 0.2841675462053636, 'f1': 0.3226892764375249}, 'combined': 0.19930808250553012, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 29} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:51:22.591538: step: 2/466, loss: 0.0021244355011731386 2023-01-24 05:51:23.352407: step: 4/466, loss: 0.01118533406406641 2023-01-24 05:51:24.061048: step: 6/466, loss: 0.004588813055306673 2023-01-24 05:51:24.764902: step: 8/466, loss: 0.008610702119767666 2023-01-24 05:51:25.493454: step: 10/466, loss: 0.016871176660060883 2023-01-24 05:51:26.230305: step: 12/466, loss: 0.011558053083717823 2023-01-24 05:51:27.138248: step: 14/466, loss: 0.02135724015533924 2023-01-24 05:51:27.907621: step: 16/466, loss: 0.005487372167408466 2023-01-24 05:51:28.708669: step: 18/466, loss: 0.010226622223854065 2023-01-24 05:51:29.460586: step: 20/466, loss: 0.004341086372733116 2023-01-24 05:51:30.222086: step: 22/466, loss: 0.0025654153432697058 2023-01-24 05:51:30.988956: step: 24/466, loss: 0.08291388303041458 2023-01-24 05:51:31.749976: step: 26/466, loss: 0.02562730386853218 2023-01-24 05:51:32.479415: step: 28/466, loss: 0.011620445176959038 2023-01-24 05:51:33.228883: step: 30/466, loss: 0.029996804893016815 2023-01-24 05:51:34.008244: step: 32/466, loss: 0.0023577671963721514 2023-01-24 05:51:34.806000: step: 34/466, loss: 0.35937052965164185 2023-01-24 05:51:35.541090: step: 36/466, loss: 0.02752179652452469 2023-01-24 05:51:36.325531: step: 38/466, loss: 0.00525928009301424 2023-01-24 05:51:37.088680: step: 40/466, loss: 0.04858270287513733 2023-01-24 05:51:37.827220: step: 42/466, loss: 0.09995172917842865 2023-01-24 05:51:38.575037: step: 44/466, loss: 0.0004004990041721612 2023-01-24 05:51:39.382281: step: 46/466, loss: 0.05577151104807854 2023-01-24 05:51:40.212957: step: 48/466, loss: 0.09152302145957947 2023-01-24 05:51:40.934269: step: 50/466, loss: 0.0006231117877177894 2023-01-24 05:51:41.733772: step: 52/466, loss: 0.00014527323946822435 2023-01-24 05:51:42.559750: step: 54/466, loss: 0.003497096709907055 2023-01-24 05:51:43.311523: step: 56/466, loss: 5.375292676035315e-05 2023-01-24 05:51:44.105305: step: 58/466, loss: 0.02126486599445343 2023-01-24 05:51:44.792167: step: 60/466, loss: 0.04060050845146179 2023-01-24 05:51:45.493609: step: 62/466, loss: 0.035850729793310165 2023-01-24 05:51:46.277511: step: 64/466, loss: 0.00572303868830204 2023-01-24 05:51:46.980225: step: 66/466, loss: 0.009731757454574108 2023-01-24 05:51:47.776018: step: 68/466, loss: 0.1124640703201294 2023-01-24 05:51:48.437148: step: 70/466, loss: 0.01516781747341156 2023-01-24 05:51:49.185644: step: 72/466, loss: 0.00239353789947927 2023-01-24 05:51:50.006894: step: 74/466, loss: 0.1437223106622696 2023-01-24 05:51:50.708412: step: 76/466, loss: 0.006624910980463028 2023-01-24 05:51:51.484096: step: 78/466, loss: 0.008828969672322273 2023-01-24 05:51:52.255799: step: 80/466, loss: 0.12912790477275848 2023-01-24 05:51:53.082593: step: 82/466, loss: 0.0045758625492453575 2023-01-24 05:51:53.768846: step: 84/466, loss: 0.009333760477602482 2023-01-24 05:51:54.496887: step: 86/466, loss: 0.01652279868721962 2023-01-24 05:51:55.223897: step: 88/466, loss: 0.022450599819421768 2023-01-24 05:51:55.922083: step: 90/466, loss: 0.027127673849463463 2023-01-24 05:51:56.594270: step: 92/466, loss: 0.0020243090111762285 2023-01-24 05:51:57.363137: step: 94/466, loss: 0.003077466506510973 2023-01-24 05:51:58.066386: step: 96/466, loss: 0.0033880032133311033 2023-01-24 05:51:58.990012: step: 98/466, loss: 0.06507313996553421 2023-01-24 05:51:59.733733: step: 100/466, loss: 0.024417519569396973 2023-01-24 05:52:00.633047: step: 102/466, loss: 0.04545615240931511 2023-01-24 05:52:01.398459: step: 104/466, loss: 0.009091264568269253 2023-01-24 05:52:02.150919: step: 106/466, loss: 0.02733277529478073 2023-01-24 05:52:02.879119: step: 108/466, loss: 0.008861579932272434 2023-01-24 05:52:03.642487: step: 110/466, loss: 0.019939109683036804 2023-01-24 05:52:04.441578: step: 112/466, loss: 0.13370677828788757 2023-01-24 05:52:05.161409: step: 114/466, loss: 0.002368063433095813 2023-01-24 05:52:05.927634: step: 116/466, loss: 0.001968635944649577 2023-01-24 05:52:06.643541: step: 118/466, loss: 0.002208284568041563 2023-01-24 05:52:07.406375: step: 120/466, loss: 0.0009069097577594221 2023-01-24 05:52:08.128677: step: 122/466, loss: 0.01672951504588127 2023-01-24 05:52:08.862702: step: 124/466, loss: 0.013292327523231506 2023-01-24 05:52:09.652770: step: 126/466, loss: 0.02614814229309559 2023-01-24 05:52:10.406347: step: 128/466, loss: 0.02433999814093113 2023-01-24 05:52:11.122160: step: 130/466, loss: 0.0002162880846299231 2023-01-24 05:52:11.817319: step: 132/466, loss: 0.016634326428174973 2023-01-24 05:52:12.561519: step: 134/466, loss: 0.0033332400489598513 2023-01-24 05:52:13.316427: step: 136/466, loss: 0.0007578277145512402 2023-01-24 05:52:14.028948: step: 138/466, loss: 0.001614563399925828 2023-01-24 05:52:14.801347: step: 140/466, loss: 0.014187711291015148 2023-01-24 05:52:15.636227: step: 142/466, loss: 0.017399389296770096 2023-01-24 05:52:16.389806: step: 144/466, loss: 0.0022756143007427454 2023-01-24 05:52:17.189025: step: 146/466, loss: 0.030932974070310593 2023-01-24 05:52:17.915288: step: 148/466, loss: 0.010401797480881214 2023-01-24 05:52:18.808810: step: 150/466, loss: 0.038547106087207794 2023-01-24 05:52:19.538725: step: 152/466, loss: 0.004876116290688515 2023-01-24 05:52:20.236447: step: 154/466, loss: 0.04435432702302933 2023-01-24 05:52:20.867850: step: 156/466, loss: 0.013212048448622227 2023-01-24 05:52:21.643395: step: 158/466, loss: 0.0058258832432329655 2023-01-24 05:52:22.364986: step: 160/466, loss: 0.006186299957334995 2023-01-24 05:52:23.126577: step: 162/466, loss: 0.023623663932085037 2023-01-24 05:52:23.840143: step: 164/466, loss: 0.1430150866508484 2023-01-24 05:52:24.507260: step: 166/466, loss: 0.02687109261751175 2023-01-24 05:52:25.203484: step: 168/466, loss: 0.019926466047763824 2023-01-24 05:52:25.933767: step: 170/466, loss: 0.005367781035602093 2023-01-24 05:52:26.663305: step: 172/466, loss: 0.02796401083469391 2023-01-24 05:52:27.597130: step: 174/466, loss: 0.0037129439879208803 2023-01-24 05:52:28.269758: step: 176/466, loss: 0.0015536812134087086 2023-01-24 05:52:28.967816: step: 178/466, loss: 0.016774123534560204 2023-01-24 05:52:29.671363: step: 180/466, loss: 0.0027977072168141603 2023-01-24 05:52:30.372413: step: 182/466, loss: 0.00121384731028229 2023-01-24 05:52:31.149835: step: 184/466, loss: 0.05058206245303154 2023-01-24 05:52:31.891788: step: 186/466, loss: 0.006122369784861803 2023-01-24 05:52:32.623218: step: 188/466, loss: 0.006557526532560587 2023-01-24 05:52:33.424748: step: 190/466, loss: 0.0757964700460434 2023-01-24 05:52:34.322444: step: 192/466, loss: 0.05410829558968544 2023-01-24 05:52:35.147699: step: 194/466, loss: 0.001608781749382615 2023-01-24 05:52:35.957915: step: 196/466, loss: 0.03472140058875084 2023-01-24 05:52:36.692335: step: 198/466, loss: 0.011966650374233723 2023-01-24 05:52:37.363428: step: 200/466, loss: 0.003614018438383937 2023-01-24 05:52:38.157345: step: 202/466, loss: 0.0036884110886603594 2023-01-24 05:52:38.941189: step: 204/466, loss: 0.003991882316768169 2023-01-24 05:52:39.696410: step: 206/466, loss: 0.001457979902625084 2023-01-24 05:52:40.582915: step: 208/466, loss: 0.060240548104047775 2023-01-24 05:52:41.307405: step: 210/466, loss: 0.012254497967660427 2023-01-24 05:52:42.055248: step: 212/466, loss: 0.0019502416253089905 2023-01-24 05:52:42.787943: step: 214/466, loss: 0.043386176228523254 2023-01-24 05:52:43.556481: step: 216/466, loss: 0.007872240617871284 2023-01-24 05:52:44.288160: step: 218/466, loss: 0.0024368164595216513 2023-01-24 05:52:45.016950: step: 220/466, loss: 0.021933574229478836 2023-01-24 05:52:45.870175: step: 222/466, loss: 0.06005479395389557 2023-01-24 05:52:46.684539: step: 224/466, loss: 1.3206063508987427 2023-01-24 05:52:47.433666: step: 226/466, loss: 0.003284501377493143 2023-01-24 05:52:48.228938: step: 228/466, loss: 0.0009186320821754634 2023-01-24 05:52:48.951118: step: 230/466, loss: 0.001569428015500307 2023-01-24 05:52:49.696616: step: 232/466, loss: 0.0002581567969173193 2023-01-24 05:52:50.396282: step: 234/466, loss: 0.004090083763003349 2023-01-24 05:52:51.141660: step: 236/466, loss: 0.0028223921544849873 2023-01-24 05:52:51.893096: step: 238/466, loss: 0.00975649245083332 2023-01-24 05:52:52.622249: step: 240/466, loss: 0.02854604460299015 2023-01-24 05:52:53.446311: step: 242/466, loss: 0.014190112240612507 2023-01-24 05:52:54.196351: step: 244/466, loss: 0.47467219829559326 2023-01-24 05:52:54.935195: step: 246/466, loss: 0.17608670890331268 2023-01-24 05:52:55.641752: step: 248/466, loss: 0.040568236261606216 2023-01-24 05:52:56.404844: step: 250/466, loss: 0.009722213260829449 2023-01-24 05:52:57.095285: step: 252/466, loss: 0.006252758204936981 2023-01-24 05:52:57.883697: step: 254/466, loss: 0.014669787138700485 2023-01-24 05:52:58.556709: step: 256/466, loss: 0.001633264822885394 2023-01-24 05:52:59.311665: step: 258/466, loss: 0.025014188140630722 2023-01-24 05:53:00.070201: step: 260/466, loss: 0.006355243269354105 2023-01-24 05:53:00.806131: step: 262/466, loss: 0.06679047644138336 2023-01-24 05:53:01.482213: step: 264/466, loss: 0.015492793172597885 2023-01-24 05:53:02.236016: step: 266/466, loss: 0.023767642676830292 2023-01-24 05:53:03.009856: step: 268/466, loss: 0.004239395260810852 2023-01-24 05:53:03.751591: step: 270/466, loss: 0.009462187997996807 2023-01-24 05:53:04.510340: step: 272/466, loss: 0.03155773505568504 2023-01-24 05:53:05.253950: step: 274/466, loss: 0.021629920229315758 2023-01-24 05:53:06.014089: step: 276/466, loss: 0.0036083217710256577 2023-01-24 05:53:06.799701: step: 278/466, loss: 0.010857186280190945 2023-01-24 05:53:07.540036: step: 280/466, loss: 0.0001240857964148745 2023-01-24 05:53:08.392189: step: 282/466, loss: 0.02709275670349598 2023-01-24 05:53:09.101527: step: 284/466, loss: 0.000314620032440871 2023-01-24 05:53:09.800443: step: 286/466, loss: 0.01560733187943697 2023-01-24 05:53:10.679840: step: 288/466, loss: 0.1660701185464859 2023-01-24 05:53:11.517430: step: 290/466, loss: 0.0015013131778687239 2023-01-24 05:53:12.185358: step: 292/466, loss: 0.030416211113333702 2023-01-24 05:53:12.992809: step: 294/466, loss: 0.027990423142910004 2023-01-24 05:53:13.777922: step: 296/466, loss: 0.04260283708572388 2023-01-24 05:53:14.547487: step: 298/466, loss: 0.010933523997664452 2023-01-24 05:53:15.358607: step: 300/466, loss: 0.07986847311258316 2023-01-24 05:53:16.221131: step: 302/466, loss: 0.01713685505092144 2023-01-24 05:53:17.028377: step: 304/466, loss: 0.04338371008634567 2023-01-24 05:53:17.805547: step: 306/466, loss: 0.130144402384758 2023-01-24 05:53:18.532376: step: 308/466, loss: 0.030209003016352654 2023-01-24 05:53:19.202384: step: 310/466, loss: 3.997722524218261e-05 2023-01-24 05:53:19.958426: step: 312/466, loss: 0.012881987728178501 2023-01-24 05:53:20.790716: step: 314/466, loss: 0.015983590856194496 2023-01-24 05:53:21.545655: step: 316/466, loss: 0.00582438800483942 2023-01-24 05:53:22.362475: step: 318/466, loss: 0.08379048109054565 2023-01-24 05:53:23.165384: step: 320/466, loss: 0.005372151732444763 2023-01-24 05:53:23.876349: step: 322/466, loss: 0.004701228812336922 2023-01-24 05:53:24.726119: step: 324/466, loss: 0.012270765379071236 2023-01-24 05:53:25.421478: step: 326/466, loss: 1.4472018847300205e-05 2023-01-24 05:53:26.132699: step: 328/466, loss: 0.002292596735060215 2023-01-24 05:53:26.911755: step: 330/466, loss: 0.019992461428046227 2023-01-24 05:53:27.665370: step: 332/466, loss: 0.0058171385899186134 2023-01-24 05:53:28.392959: step: 334/466, loss: 0.0016815406270325184 2023-01-24 05:53:29.123335: step: 336/466, loss: 0.015655461698770523 2023-01-24 05:53:29.826835: step: 338/466, loss: 0.12064294517040253 2023-01-24 05:53:30.590679: step: 340/466, loss: 0.001417037914507091 2023-01-24 05:53:31.475975: step: 342/466, loss: 0.03021203726530075 2023-01-24 05:53:32.210832: step: 344/466, loss: 0.019122948870062828 2023-01-24 05:53:32.994665: step: 346/466, loss: 0.0028658395167440176 2023-01-24 05:53:33.700211: step: 348/466, loss: 0.03777375444769859 2023-01-24 05:53:34.442385: step: 350/466, loss: 0.0848744586110115 2023-01-24 05:53:35.202697: step: 352/466, loss: 0.007534432224929333 2023-01-24 05:53:36.018157: step: 354/466, loss: 0.030503325164318085 2023-01-24 05:53:36.732738: step: 356/466, loss: 0.061688248068094254 2023-01-24 05:53:37.412095: step: 358/466, loss: 0.002680381527170539 2023-01-24 05:53:38.207200: step: 360/466, loss: 0.020026415586471558 2023-01-24 05:53:38.959624: step: 362/466, loss: 0.01093121524900198 2023-01-24 05:53:39.695941: step: 364/466, loss: 0.009857839904725552 2023-01-24 05:53:40.516667: step: 366/466, loss: 0.0017632795497775078 2023-01-24 05:53:41.374743: step: 368/466, loss: 0.005583813413977623 2023-01-24 05:53:42.128579: step: 370/466, loss: 0.0028505113441497087 2023-01-24 05:53:42.994538: step: 372/466, loss: 0.007300149649381638 2023-01-24 05:53:43.786144: step: 374/466, loss: 0.3070095181465149 2023-01-24 05:53:44.577214: step: 376/466, loss: 0.037516094744205475 2023-01-24 05:53:45.323207: step: 378/466, loss: 0.0004592242185026407 2023-01-24 05:53:46.082133: step: 380/466, loss: 0.034510307013988495 2023-01-24 05:53:46.745996: step: 382/466, loss: 0.0066200257278978825 2023-01-24 05:53:47.472041: step: 384/466, loss: 0.002755881519988179 2023-01-24 05:53:48.184384: step: 386/466, loss: 0.003863600315526128 2023-01-24 05:53:48.930104: step: 388/466, loss: 0.006329555530101061 2023-01-24 05:53:49.688213: step: 390/466, loss: 0.0013895792653784156 2023-01-24 05:53:50.418147: step: 392/466, loss: 0.008196533657610416 2023-01-24 05:53:51.217474: step: 394/466, loss: 0.024616515263915062 2023-01-24 05:53:52.014559: step: 396/466, loss: 0.008718425408005714 2023-01-24 05:53:52.807524: step: 398/466, loss: 0.011273697949945927 2023-01-24 05:53:53.522664: step: 400/466, loss: 0.010760881938040257 2023-01-24 05:53:54.231742: step: 402/466, loss: 0.035104431211948395 2023-01-24 05:53:55.002347: step: 404/466, loss: 0.0038427747786045074 2023-01-24 05:53:55.757269: step: 406/466, loss: 0.11372831463813782 2023-01-24 05:53:56.528375: step: 408/466, loss: 0.02684140019118786 2023-01-24 05:53:57.305108: step: 410/466, loss: 0.075205959379673 2023-01-24 05:53:58.038822: step: 412/466, loss: 0.1784360706806183 2023-01-24 05:53:58.879125: step: 414/466, loss: 0.02089475654065609 2023-01-24 05:53:59.726552: step: 416/466, loss: 0.0006547744851559401 2023-01-24 05:54:00.426565: step: 418/466, loss: 0.018957240507006645 2023-01-24 05:54:01.270143: step: 420/466, loss: 0.025710005313158035 2023-01-24 05:54:02.005314: step: 422/466, loss: 0.036043502390384674 2023-01-24 05:54:02.737346: step: 424/466, loss: 0.0004383635532576591 2023-01-24 05:54:03.587826: step: 426/466, loss: 0.09480899572372437 2023-01-24 05:54:04.302803: step: 428/466, loss: 0.17611780762672424 2023-01-24 05:54:05.027521: step: 430/466, loss: 0.017583386972546577 2023-01-24 05:54:05.764126: step: 432/466, loss: 0.009646718390285969 2023-01-24 05:54:06.421205: step: 434/466, loss: 0.0010779794538393617 2023-01-24 05:54:07.188887: step: 436/466, loss: 0.0019341098377481103 2023-01-24 05:54:07.917339: step: 438/466, loss: 0.05516645312309265 2023-01-24 05:54:08.718302: step: 440/466, loss: 0.004943589214235544 2023-01-24 05:54:09.420932: step: 442/466, loss: 0.03822134807705879 2023-01-24 05:54:10.209450: step: 444/466, loss: 0.005139884538948536 2023-01-24 05:54:10.923718: step: 446/466, loss: 0.03897429630160332 2023-01-24 05:54:11.707592: step: 448/466, loss: 0.004841428250074387 2023-01-24 05:54:12.404477: step: 450/466, loss: 0.001536195632070303 2023-01-24 05:54:13.156032: step: 452/466, loss: 0.01192461047321558 2023-01-24 05:54:13.898293: step: 454/466, loss: 0.016185369342565536 2023-01-24 05:54:14.708018: step: 456/466, loss: 0.007985131815075874 2023-01-24 05:54:15.438220: step: 458/466, loss: 0.005550692789256573 2023-01-24 05:54:16.165854: step: 460/466, loss: 0.0020444553811103106 2023-01-24 05:54:16.992899: step: 462/466, loss: 0.005862903781235218 2023-01-24 05:54:17.668340: step: 464/466, loss: 0.024352507665753365 2023-01-24 05:54:18.403356: step: 466/466, loss: 0.002665027743205428 2023-01-24 05:54:19.118040: step: 468/466, loss: 0.0034131731372326612 2023-01-24 05:54:19.903125: step: 470/466, loss: 0.011681389063596725 2023-01-24 05:54:20.604530: step: 472/466, loss: 0.004937691614031792 2023-01-24 05:54:21.490285: step: 474/466, loss: 0.00297374720685184 2023-01-24 05:54:22.287454: step: 476/466, loss: 0.030566386878490448 2023-01-24 05:54:23.072398: step: 478/466, loss: 0.01928372122347355 2023-01-24 05:54:23.859080: step: 480/466, loss: 0.07454699277877808 2023-01-24 05:54:24.556456: step: 482/466, loss: 0.02079848386347294 2023-01-24 05:54:25.245416: step: 484/466, loss: 0.01979495771229267 2023-01-24 05:54:25.983076: step: 486/466, loss: 0.004218693822622299 2023-01-24 05:54:26.730586: step: 488/466, loss: 0.022521065548062325 2023-01-24 05:54:27.615260: step: 490/466, loss: 0.07107479870319366 2023-01-24 05:54:28.291824: step: 492/466, loss: 0.0097987474873662 2023-01-24 05:54:29.031813: step: 494/466, loss: 0.00504559138789773 2023-01-24 05:54:29.739718: step: 496/466, loss: 0.004600842017680407 2023-01-24 05:54:30.459508: step: 498/466, loss: 0.08354654908180237 2023-01-24 05:54:31.193616: step: 500/466, loss: 0.0246112197637558 2023-01-24 05:54:31.926876: step: 502/466, loss: 0.0013995743356645107 2023-01-24 05:54:32.595597: step: 504/466, loss: 0.00629635201767087 2023-01-24 05:54:33.380172: step: 506/466, loss: 0.00231743766926229 2023-01-24 05:54:34.023570: step: 508/466, loss: 0.04809323325753212 2023-01-24 05:54:34.827082: step: 510/466, loss: 0.01773303560912609 2023-01-24 05:54:35.590337: step: 512/466, loss: 0.004355045035481453 2023-01-24 05:54:36.387956: step: 514/466, loss: 8.419121877523139e-05 2023-01-24 05:54:37.235682: step: 516/466, loss: 0.0020344394724816084 2023-01-24 05:54:37.944121: step: 518/466, loss: 0.007168296258896589 2023-01-24 05:54:38.704321: step: 520/466, loss: 0.014897344633936882 2023-01-24 05:54:39.482851: step: 522/466, loss: 0.00015005006571300328 2023-01-24 05:54:40.185209: step: 524/466, loss: 0.0010431658010929823 2023-01-24 05:54:40.908785: step: 526/466, loss: 0.0006886800401844084 2023-01-24 05:54:41.682532: step: 528/466, loss: 0.4004390835762024 2023-01-24 05:54:42.596101: step: 530/466, loss: 0.02424515038728714 2023-01-24 05:54:43.345960: step: 532/466, loss: 0.04411615431308746 2023-01-24 05:54:44.189926: step: 534/466, loss: 0.014269612729549408 2023-01-24 05:54:45.024689: step: 536/466, loss: 0.00011343916412442923 2023-01-24 05:54:45.759583: step: 538/466, loss: 0.023898938670754433 2023-01-24 05:54:46.538686: step: 540/466, loss: 0.028368115425109863 2023-01-24 05:54:47.254918: step: 542/466, loss: 0.0327945202589035 2023-01-24 05:54:47.928145: step: 544/466, loss: 0.0063098277896642685 2023-01-24 05:54:48.615431: step: 546/466, loss: 0.01982375793159008 2023-01-24 05:54:49.429033: step: 548/466, loss: 0.02865714766085148 2023-01-24 05:54:50.265471: step: 550/466, loss: 0.35284262895584106 2023-01-24 05:54:51.083653: step: 552/466, loss: 0.0012877887347713113 2023-01-24 05:54:51.845855: step: 554/466, loss: 0.002769460901618004 2023-01-24 05:54:52.576275: step: 556/466, loss: 0.026639414951205254 2023-01-24 05:54:53.333850: step: 558/466, loss: 0.2336762398481369 2023-01-24 05:54:54.063920: step: 560/466, loss: 0.010595796629786491 2023-01-24 05:54:54.749859: step: 562/466, loss: 0.023905830457806587 2023-01-24 05:54:55.559455: step: 564/466, loss: 0.0017976739909499884 2023-01-24 05:54:56.304129: step: 566/466, loss: 0.009105820208787918 2023-01-24 05:54:57.057693: step: 568/466, loss: 0.023006802424788475 2023-01-24 05:54:57.834172: step: 570/466, loss: 0.032034676522016525 2023-01-24 05:54:58.571159: step: 572/466, loss: 0.01416066288948059 2023-01-24 05:54:59.390562: step: 574/466, loss: 0.006038175895810127 2023-01-24 05:55:00.145157: step: 576/466, loss: 0.00783812440931797 2023-01-24 05:55:00.856453: step: 578/466, loss: 0.00037845782935619354 2023-01-24 05:55:01.780429: step: 580/466, loss: 0.17190302908420563 2023-01-24 05:55:02.521876: step: 582/466, loss: 0.0016727076144888997 2023-01-24 05:55:03.207186: step: 584/466, loss: 0.01053509209305048 2023-01-24 05:55:04.024478: step: 586/466, loss: 0.001931222970597446 2023-01-24 05:55:04.747720: step: 588/466, loss: 0.01740656979382038 2023-01-24 05:55:05.485885: step: 590/466, loss: 0.38087451457977295 2023-01-24 05:55:06.253880: step: 592/466, loss: 0.0052076056599617004 2023-01-24 05:55:07.003266: step: 594/466, loss: 0.04615316540002823 2023-01-24 05:55:07.762127: step: 596/466, loss: 0.04258754849433899 2023-01-24 05:55:08.442983: step: 598/466, loss: 0.03115232288837433 2023-01-24 05:55:09.190609: step: 600/466, loss: 0.0031860233284533024 2023-01-24 05:55:09.954986: step: 602/466, loss: 0.025794459506869316 2023-01-24 05:55:10.691268: step: 604/466, loss: 0.009873680770397186 2023-01-24 05:55:11.404176: step: 606/466, loss: 0.0011760890483856201 2023-01-24 05:55:12.166624: step: 608/466, loss: 0.022366557270288467 2023-01-24 05:55:12.914843: step: 610/466, loss: 0.09690196812152863 2023-01-24 05:55:13.680228: step: 612/466, loss: 0.02277245558798313 2023-01-24 05:55:14.392493: step: 614/466, loss: 0.1421499401330948 2023-01-24 05:55:15.133355: step: 616/466, loss: 0.030725853517651558 2023-01-24 05:55:15.865226: step: 618/466, loss: 0.0011555668897926807 2023-01-24 05:55:16.619378: step: 620/466, loss: 0.008171006105840206 2023-01-24 05:55:17.345899: step: 622/466, loss: 0.3574966788291931 2023-01-24 05:55:18.116817: step: 624/466, loss: 0.0005149018252268434 2023-01-24 05:55:18.800008: step: 626/466, loss: 0.08907132595777512 2023-01-24 05:55:19.529162: step: 628/466, loss: 0.003892571199685335 2023-01-24 05:55:20.328025: step: 630/466, loss: 0.047221384942531586 2023-01-24 05:55:21.018147: step: 632/466, loss: 0.0013371066888794303 2023-01-24 05:55:21.761478: step: 634/466, loss: 0.09941345453262329 2023-01-24 05:55:22.471944: step: 636/466, loss: 0.03925804793834686 2023-01-24 05:55:23.248888: step: 638/466, loss: 0.02559492364525795 2023-01-24 05:55:23.984620: step: 640/466, loss: 0.06727918982505798 2023-01-24 05:55:25.456630: step: 642/466, loss: 0.13944895565509796 2023-01-24 05:55:26.152211: step: 644/466, loss: 0.024638397619128227 2023-01-24 05:55:26.884233: step: 646/466, loss: 0.001768801361322403 2023-01-24 05:55:27.615803: step: 648/466, loss: 0.004804654978215694 2023-01-24 05:55:28.367968: step: 650/466, loss: 0.040076322853565216 2023-01-24 05:55:29.170977: step: 652/466, loss: 0.00041040178621187806 2023-01-24 05:55:29.940165: step: 654/466, loss: 0.049572572112083435 2023-01-24 05:55:30.671195: step: 656/466, loss: 0.0026933804620057344 2023-01-24 05:55:31.478887: step: 658/466, loss: 0.02136015146970749 2023-01-24 05:55:32.240495: step: 660/466, loss: 0.0073014600202441216 2023-01-24 05:55:33.033269: step: 662/466, loss: 0.012390895746648312 2023-01-24 05:55:33.768480: step: 664/466, loss: 0.027630962431430817 2023-01-24 05:55:34.589675: step: 666/466, loss: 0.021774159744381905 2023-01-24 05:55:35.328211: step: 668/466, loss: 0.011346152052283287 2023-01-24 05:55:36.115845: step: 670/466, loss: 0.1491927206516266 2023-01-24 05:55:36.871399: step: 672/466, loss: 0.021823476999998093 2023-01-24 05:55:37.629522: step: 674/466, loss: 0.015337795950472355 2023-01-24 05:55:38.334764: step: 676/466, loss: 0.0159169789403677 2023-01-24 05:55:39.071697: step: 678/466, loss: 0.0011591583024710417 2023-01-24 05:55:39.783571: step: 680/466, loss: 0.0018728708382695913 2023-01-24 05:55:40.644503: step: 682/466, loss: 0.03095311112701893 2023-01-24 05:55:41.506586: step: 684/466, loss: 0.4649326205253601 2023-01-24 05:55:42.279733: step: 686/466, loss: 0.027271777391433716 2023-01-24 05:55:43.049702: step: 688/466, loss: 0.010774504393339157 2023-01-24 05:55:43.768620: step: 690/466, loss: 0.0026382540818303823 2023-01-24 05:55:44.535606: step: 692/466, loss: 0.0033890206832438707 2023-01-24 05:55:45.278583: step: 694/466, loss: 0.012756789103150368 2023-01-24 05:55:45.937338: step: 696/466, loss: 0.0001581639371579513 2023-01-24 05:55:46.752177: step: 698/466, loss: 0.017527710646390915 2023-01-24 05:55:47.501311: step: 700/466, loss: 0.022267047315835953 2023-01-24 05:55:48.204594: step: 702/466, loss: 0.004587067756801844 2023-01-24 05:55:48.955279: step: 704/466, loss: 0.07056285440921783 2023-01-24 05:55:49.668485: step: 706/466, loss: 0.029599877074360847 2023-01-24 05:55:50.322886: step: 708/466, loss: 0.02155863121151924 2023-01-24 05:55:51.089525: step: 710/466, loss: 0.008911887183785439 2023-01-24 05:55:51.902330: step: 712/466, loss: 0.14788082242012024 2023-01-24 05:55:52.642681: step: 714/466, loss: 0.00010579630179563537 2023-01-24 05:55:53.497197: step: 716/466, loss: 0.0151499779894948 2023-01-24 05:55:54.217328: step: 718/466, loss: 0.01636345311999321 2023-01-24 05:55:54.960112: step: 720/466, loss: 0.008408303372561932 2023-01-24 05:55:55.718323: step: 722/466, loss: 0.01703471876680851 2023-01-24 05:55:56.443997: step: 724/466, loss: 0.027688410133123398 2023-01-24 05:55:57.326795: step: 726/466, loss: 0.1199537143111229 2023-01-24 05:55:58.092241: step: 728/466, loss: 0.012684978544712067 2023-01-24 05:55:58.817104: step: 730/466, loss: 0.014809907414019108 2023-01-24 05:55:59.620157: step: 732/466, loss: 0.02575354091823101 2023-01-24 05:56:00.451859: step: 734/466, loss: 0.4311109781265259 2023-01-24 05:56:01.195584: step: 736/466, loss: 0.011944697238504887 2023-01-24 05:56:01.947147: step: 738/466, loss: 0.0004669880145229399 2023-01-24 05:56:02.701770: step: 740/466, loss: 0.021837793290615082 2023-01-24 05:56:03.405756: step: 742/466, loss: 0.001047839061357081 2023-01-24 05:56:04.218687: step: 744/466, loss: 0.001859480980783701 2023-01-24 05:56:04.917336: step: 746/466, loss: 0.0020473224576562643 2023-01-24 05:56:05.730474: step: 748/466, loss: 0.010712344199419022 2023-01-24 05:56:06.411427: step: 750/466, loss: 0.003235904034227133 2023-01-24 05:56:07.205657: step: 752/466, loss: 0.015598940663039684 2023-01-24 05:56:07.959847: step: 754/466, loss: 0.01041698083281517 2023-01-24 05:56:08.780048: step: 756/466, loss: 0.02159099653363228 2023-01-24 05:56:09.548686: step: 758/466, loss: 0.005649959202855825 2023-01-24 05:56:10.429679: step: 760/466, loss: 0.2386442869901657 2023-01-24 05:56:11.158760: step: 762/466, loss: 0.001370615209452808 2023-01-24 05:56:11.911463: step: 764/466, loss: 0.002000574953854084 2023-01-24 05:56:12.671937: step: 766/466, loss: 0.011318007484078407 2023-01-24 05:56:13.573142: step: 768/466, loss: 0.0028875821735709906 2023-01-24 05:56:14.342657: step: 770/466, loss: 0.017588937655091286 2023-01-24 05:56:14.997291: step: 772/466, loss: 0.0038840211927890778 2023-01-24 05:56:15.749105: step: 774/466, loss: 0.00015768868615850806 2023-01-24 05:56:16.544669: step: 776/466, loss: 0.012369371019303799 2023-01-24 05:56:17.326292: step: 778/466, loss: 0.006792670115828514 2023-01-24 05:56:18.095513: step: 780/466, loss: 0.2840106189250946 2023-01-24 05:56:18.894779: step: 782/466, loss: 0.01502148900181055 2023-01-24 05:56:19.695559: step: 784/466, loss: 0.024477217346429825 2023-01-24 05:56:20.509766: step: 786/466, loss: 0.05349123477935791 2023-01-24 05:56:21.243605: step: 788/466, loss: 0.00770318740978837 2023-01-24 05:56:22.101546: step: 790/466, loss: 0.07015214115381241 2023-01-24 05:56:22.932961: step: 792/466, loss: 0.0007451863493770361 2023-01-24 05:56:23.722514: step: 794/466, loss: 0.017964649945497513 2023-01-24 05:56:24.416092: step: 796/466, loss: 0.04023731127381325 2023-01-24 05:56:25.117821: step: 798/466, loss: 0.001383331953547895 2023-01-24 05:56:25.838319: step: 800/466, loss: 0.10122286528348923 2023-01-24 05:56:26.636234: step: 802/466, loss: 0.014714941382408142 2023-01-24 05:56:27.352581: step: 804/466, loss: 0.008850020356476307 2023-01-24 05:56:28.140500: step: 806/466, loss: 0.07820001244544983 2023-01-24 05:56:28.913601: step: 808/466, loss: 0.014597302302718163 2023-01-24 05:56:29.739796: step: 810/466, loss: 0.003665260039269924 2023-01-24 05:56:30.505920: step: 812/466, loss: 0.09740063548088074 2023-01-24 05:56:31.165876: step: 814/466, loss: 0.021296529099345207 2023-01-24 05:56:32.007432: step: 816/466, loss: 0.07871195673942566 2023-01-24 05:56:32.742279: step: 818/466, loss: 0.034476276487112045 2023-01-24 05:56:33.483845: step: 820/466, loss: 0.01530569139868021 2023-01-24 05:56:34.254685: step: 822/466, loss: 0.0636683851480484 2023-01-24 05:56:34.953435: step: 824/466, loss: 0.002796899527311325 2023-01-24 05:56:35.721714: step: 826/466, loss: 0.014578304253518581 2023-01-24 05:56:36.407623: step: 828/466, loss: 0.06623385101556778 2023-01-24 05:56:37.182964: step: 830/466, loss: 0.012619067914783955 2023-01-24 05:56:37.935593: step: 832/466, loss: 0.003821233520284295 2023-01-24 05:56:38.643870: step: 834/466, loss: 0.014842454344034195 2023-01-24 05:56:39.417571: step: 836/466, loss: 0.027856387197971344 2023-01-24 05:56:40.199459: step: 838/466, loss: 0.015989501029253006 2023-01-24 05:56:40.959108: step: 840/466, loss: 0.0018745105480775237 2023-01-24 05:56:41.700496: step: 842/466, loss: 0.011679274961352348 2023-01-24 05:56:42.504119: step: 844/466, loss: 0.05877411365509033 2023-01-24 05:56:43.215953: step: 846/466, loss: 0.056277453899383545 2023-01-24 05:56:43.893746: step: 848/466, loss: 0.020335165783762932 2023-01-24 05:56:44.645031: step: 850/466, loss: 0.004025626461952925 2023-01-24 05:56:45.407924: step: 852/466, loss: 0.006656455807387829 2023-01-24 05:56:46.268944: step: 854/466, loss: 0.043784309178590775 2023-01-24 05:56:46.937916: step: 856/466, loss: 0.005373646505177021 2023-01-24 05:56:47.710188: step: 858/466, loss: 0.030034927651286125 2023-01-24 05:56:48.490767: step: 860/466, loss: 0.0360288992524147 2023-01-24 05:56:49.274430: step: 862/466, loss: 0.003689620876684785 2023-01-24 05:56:50.021974: step: 864/466, loss: 0.0027504567988216877 2023-01-24 05:56:50.830890: step: 866/466, loss: 0.010373730212450027 2023-01-24 05:56:51.599749: step: 868/466, loss: 0.013135841116309166 2023-01-24 05:56:52.363907: step: 870/466, loss: 9.005170431919396e-05 2023-01-24 05:56:53.072579: step: 872/466, loss: 0.03494952619075775 2023-01-24 05:56:53.967536: step: 874/466, loss: 0.007895917631685734 2023-01-24 05:56:54.717764: step: 876/466, loss: 0.0025822457391768694 2023-01-24 05:56:55.442239: step: 878/466, loss: 0.022746095433831215 2023-01-24 05:56:56.199284: step: 880/466, loss: 0.044358815997838974 2023-01-24 05:56:56.971228: step: 882/466, loss: 0.019776368513703346 2023-01-24 05:56:57.782556: step: 884/466, loss: 0.0091138556599617 2023-01-24 05:56:58.499317: step: 886/466, loss: 4.838859604205936e-05 2023-01-24 05:56:59.244412: step: 888/466, loss: 0.0009778942912817001 2023-01-24 05:56:59.946218: step: 890/466, loss: 2.5119843485299498e-05 2023-01-24 05:57:00.701157: step: 892/466, loss: 0.0066669778898358345 2023-01-24 05:57:01.462762: step: 894/466, loss: 0.050770752131938934 2023-01-24 05:57:02.207271: step: 896/466, loss: 0.0034824397880584 2023-01-24 05:57:02.903428: step: 898/466, loss: 0.007224982138723135 2023-01-24 05:57:03.680063: step: 900/466, loss: 0.0012097193393856287 2023-01-24 05:57:04.486890: step: 902/466, loss: 0.008332643657922745 2023-01-24 05:57:05.249226: step: 904/466, loss: 0.02265145443379879 2023-01-24 05:57:05.933935: step: 906/466, loss: 0.1696164309978485 2023-01-24 05:57:06.781193: step: 908/466, loss: 0.0021269218996167183 2023-01-24 05:57:07.571791: step: 910/466, loss: 0.6533875465393066 2023-01-24 05:57:08.378847: step: 912/466, loss: 0.03232376649975777 2023-01-24 05:57:09.095243: step: 914/466, loss: 0.0012437463738024235 2023-01-24 05:57:09.788557: step: 916/466, loss: 0.06054536998271942 2023-01-24 05:57:10.506853: step: 918/466, loss: 0.001823619706556201 2023-01-24 05:57:11.211223: step: 920/466, loss: 0.008600911125540733 2023-01-24 05:57:12.089928: step: 922/466, loss: 0.027335256338119507 2023-01-24 05:57:12.887293: step: 924/466, loss: 0.10726413130760193 2023-01-24 05:57:13.636969: step: 926/466, loss: 0.004401802085340023 2023-01-24 05:57:14.376529: step: 928/466, loss: 0.01408754289150238 2023-01-24 05:57:15.270097: step: 930/466, loss: 0.023488802835345268 2023-01-24 05:57:16.035355: step: 932/466, loss: 0.08783465623855591 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34145184824902725, 'r': 0.33302893738140416, 'f1': 0.33718780019212297}, 'combined': 0.2484541685626169, 'epoch': 30} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3745543418502639, 'r': 0.29769773663943055, 'f1': 0.3317326443015362}, 'combined': 0.20389421064387103, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3194826443045653, 'r': 0.3491878617066975, 'f1': 0.33367543629996305}, 'combined': 0.2458661109578675, 'epoch': 30} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3536964876307047, 'r': 0.2966882149276622, 'f1': 0.3226938737290501}, 'combined': 0.1983386736090747, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3502059108527132, 'r': 0.342896110056926, 'f1': 0.34651246404602104}, 'combined': 0.2553249735075944, 'epoch': 30} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3740327981736859, 'r': 0.2963116972544784, 'f1': 0.330666676646302}, 'combined': 0.20423530028153952, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.3535714285714286, 'f1': 0.3639705882352941}, 'combined': 0.24264705882352938, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.391304347826087, 'f1': 0.3050847457627119}, 'combined': 0.15254237288135594, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 30} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34145184824902725, 'r': 0.33302893738140416, 'f1': 0.33718780019212297}, 'combined': 0.2484541685626169, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3745543418502639, 'r': 0.29769773663943055, 'f1': 0.3317326443015362}, 'combined': 0.20389421064387103, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.3535714285714286, 'f1': 0.3639705882352941}, 'combined': 0.24264705882352938, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3600931677018634, 'r': 0.3300284629981025, 'f1': 0.34440594059405943}, 'combined': 0.25377279833246486, 'epoch': 29} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3732928220606822, 'r': 0.2841675462053636, 'f1': 0.3226892764375249}, 'combined': 0.19930808250553012, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 29} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:00:06.640442: step: 2/466, loss: 0.0021119576413184404 2023-01-24 06:00:07.398256: step: 4/466, loss: 0.003949753474444151 2023-01-24 06:00:08.128357: step: 6/466, loss: 0.023282263427972794 2023-01-24 06:00:08.917478: step: 8/466, loss: 0.013862239196896553 2023-01-24 06:00:09.625383: step: 10/466, loss: 0.10889230668544769 2023-01-24 06:00:10.355987: step: 12/466, loss: 0.012851127423346043 2023-01-24 06:00:11.110827: step: 14/466, loss: 0.17665880918502808 2023-01-24 06:00:11.862065: step: 16/466, loss: 0.03574291244149208 2023-01-24 06:00:12.678322: step: 18/466, loss: 0.0005902175325900316 2023-01-24 06:00:13.556717: step: 20/466, loss: 0.0087283318862319 2023-01-24 06:00:14.265570: step: 22/466, loss: 0.006415482610464096 2023-01-24 06:00:15.090567: step: 24/466, loss: 0.04369017109274864 2023-01-24 06:00:15.783652: step: 26/466, loss: 0.005777179729193449 2023-01-24 06:00:16.567232: step: 28/466, loss: 0.013707002624869347 2023-01-24 06:00:17.289315: step: 30/466, loss: 0.02515527606010437 2023-01-24 06:00:18.023640: step: 32/466, loss: 0.005246358923614025 2023-01-24 06:00:18.754170: step: 34/466, loss: 0.005221569444984198 2023-01-24 06:00:19.469769: step: 36/466, loss: 0.006976307835429907 2023-01-24 06:00:20.276615: step: 38/466, loss: 0.0067510176450014114 2023-01-24 06:00:20.993518: step: 40/466, loss: 0.018617145717144012 2023-01-24 06:00:21.680134: step: 42/466, loss: 0.013348113745450974 2023-01-24 06:00:22.325277: step: 44/466, loss: 0.001935301930643618 2023-01-24 06:00:23.017418: step: 46/466, loss: 0.0013969270512461662 2023-01-24 06:00:23.677032: step: 48/466, loss: 0.00413518724963069 2023-01-24 06:00:24.374135: step: 50/466, loss: 0.0023705631028860807 2023-01-24 06:00:25.083887: step: 52/466, loss: 0.004117606673389673 2023-01-24 06:00:25.831561: step: 54/466, loss: 0.06089463829994202 2023-01-24 06:00:26.538654: step: 56/466, loss: 0.038913093507289886 2023-01-24 06:00:27.265738: step: 58/466, loss: 0.004359858576208353 2023-01-24 06:00:28.061981: step: 60/466, loss: 0.019135290756821632 2023-01-24 06:00:28.957537: step: 62/466, loss: 0.029449697583913803 2023-01-24 06:00:29.682579: step: 64/466, loss: 0.0006406322354450822 2023-01-24 06:00:30.412404: step: 66/466, loss: 0.013644445687532425 2023-01-24 06:00:31.227589: step: 68/466, loss: 0.028983967378735542 2023-01-24 06:00:31.986362: step: 70/466, loss: 0.0011641232995316386 2023-01-24 06:00:32.629194: step: 72/466, loss: 0.008698353543877602 2023-01-24 06:00:33.447982: step: 74/466, loss: 0.005477309226989746 2023-01-24 06:00:34.195496: step: 76/466, loss: 0.008166252635419369 2023-01-24 06:00:34.961309: step: 78/466, loss: 0.008764025755226612 2023-01-24 06:00:35.696858: step: 80/466, loss: 0.03627845272421837 2023-01-24 06:00:36.500231: step: 82/466, loss: 0.0008104249718599021 2023-01-24 06:00:37.182391: step: 84/466, loss: 0.027701199054718018 2023-01-24 06:00:37.949530: step: 86/466, loss: 0.0011427691206336021 2023-01-24 06:00:38.694503: step: 88/466, loss: 0.004959442652761936 2023-01-24 06:00:39.411115: step: 90/466, loss: 0.0018794368952512741 2023-01-24 06:00:40.070662: step: 92/466, loss: 0.0008936856174841523 2023-01-24 06:00:40.825881: step: 94/466, loss: 0.0008424253901466727 2023-01-24 06:00:41.554769: step: 96/466, loss: 0.0719582661986351 2023-01-24 06:00:42.285771: step: 98/466, loss: 0.0009956827852874994 2023-01-24 06:00:43.034310: step: 100/466, loss: 0.021351397037506104 2023-01-24 06:00:43.767994: step: 102/466, loss: 0.018605533987283707 2023-01-24 06:00:44.503498: step: 104/466, loss: 0.005402641836553812 2023-01-24 06:00:45.270654: step: 106/466, loss: 0.05505645275115967 2023-01-24 06:00:46.041187: step: 108/466, loss: 0.009099229238927364 2023-01-24 06:00:46.757531: step: 110/466, loss: 0.019879935309290886 2023-01-24 06:00:47.525500: step: 112/466, loss: 0.016863131895661354 2023-01-24 06:00:48.208374: step: 114/466, loss: 0.015752049162983894 2023-01-24 06:00:48.995238: step: 116/466, loss: 0.0007166631985455751 2023-01-24 06:00:49.712541: step: 118/466, loss: 0.0003036932903341949 2023-01-24 06:00:50.478849: step: 120/466, loss: 0.047832999378442764 2023-01-24 06:00:51.253039: step: 122/466, loss: 0.11420870572328568 2023-01-24 06:00:52.083775: step: 124/466, loss: 0.0011902085971087217 2023-01-24 06:00:52.817565: step: 126/466, loss: 0.002473770873621106 2023-01-24 06:00:53.542835: step: 128/466, loss: 0.0057588787749409676 2023-01-24 06:00:54.276314: step: 130/466, loss: 0.005072615575045347 2023-01-24 06:00:55.113864: step: 132/466, loss: 0.032617054879665375 2023-01-24 06:00:55.817639: step: 134/466, loss: 0.00042786714038811624 2023-01-24 06:00:56.559496: step: 136/466, loss: 0.004805801901966333 2023-01-24 06:00:57.366912: step: 138/466, loss: 0.00014069517783354968 2023-01-24 06:00:58.193348: step: 140/466, loss: 0.00585377961397171 2023-01-24 06:00:58.907468: step: 142/466, loss: 0.027600059285759926 2023-01-24 06:00:59.605284: step: 144/466, loss: 0.00015916845586616546 2023-01-24 06:01:00.349761: step: 146/466, loss: 0.012031443417072296 2023-01-24 06:01:01.064011: step: 148/466, loss: 0.0018937510903924704 2023-01-24 06:01:01.803194: step: 150/466, loss: 0.00020203908206894994 2023-01-24 06:01:02.552178: step: 152/466, loss: 0.01154989656060934 2023-01-24 06:01:03.301554: step: 154/466, loss: 0.016319116577506065 2023-01-24 06:01:04.040400: step: 156/466, loss: 1.0363105535507202 2023-01-24 06:01:04.706878: step: 158/466, loss: 0.0228382907807827 2023-01-24 06:01:05.460294: step: 160/466, loss: 0.015598494559526443 2023-01-24 06:01:06.211030: step: 162/466, loss: 0.0042947870679199696 2023-01-24 06:01:06.920170: step: 164/466, loss: 0.010354123078286648 2023-01-24 06:01:07.784799: step: 166/466, loss: 0.13110190629959106 2023-01-24 06:01:08.478238: step: 168/466, loss: 0.04255157709121704 2023-01-24 06:01:09.162479: step: 170/466, loss: 0.01943907141685486 2023-01-24 06:01:09.884142: step: 172/466, loss: 0.0006557477172464132 2023-01-24 06:01:10.570144: step: 174/466, loss: 0.016860581934452057 2023-01-24 06:01:11.314312: step: 176/466, loss: 0.00529084075242281 2023-01-24 06:01:12.021264: step: 178/466, loss: 0.010471130721271038 2023-01-24 06:01:12.817173: step: 180/466, loss: 0.04970330744981766 2023-01-24 06:01:13.601287: step: 182/466, loss: 0.003287299070507288 2023-01-24 06:01:14.339924: step: 184/466, loss: 0.09002115577459335 2023-01-24 06:01:15.141682: step: 186/466, loss: 0.011329753324389458 2023-01-24 06:01:15.903685: step: 188/466, loss: 0.0005845078267157078 2023-01-24 06:01:16.626980: step: 190/466, loss: 0.02527262084186077 2023-01-24 06:01:17.383620: step: 192/466, loss: 0.0019953681621700525 2023-01-24 06:01:18.130878: step: 194/466, loss: 0.0026451025623828173 2023-01-24 06:01:18.829530: step: 196/466, loss: 0.001436003134585917 2023-01-24 06:01:19.663926: step: 198/466, loss: 0.0031633705366402864 2023-01-24 06:01:20.393442: step: 200/466, loss: 0.0010856760200113058 2023-01-24 06:01:21.062560: step: 202/466, loss: 0.0021711231674999 2023-01-24 06:01:21.796684: step: 204/466, loss: 0.0067644547671079636 2023-01-24 06:01:22.507474: step: 206/466, loss: 0.0005549822235479951 2023-01-24 06:01:23.361325: step: 208/466, loss: 0.06292501091957092 2023-01-24 06:01:24.035603: step: 210/466, loss: 0.002018541330471635 2023-01-24 06:01:24.737368: step: 212/466, loss: 0.019470317289233208 2023-01-24 06:01:25.509522: step: 214/466, loss: 0.008875842206180096 2023-01-24 06:01:26.299297: step: 216/466, loss: 0.024308178573846817 2023-01-24 06:01:27.149270: step: 218/466, loss: 0.27486276626586914 2023-01-24 06:01:28.004745: step: 220/466, loss: 0.004777147900313139 2023-01-24 06:01:28.760204: step: 222/466, loss: 0.0049628280103206635 2023-01-24 06:01:29.504692: step: 224/466, loss: 0.0005460731917992234 2023-01-24 06:01:30.266488: step: 226/466, loss: 0.001001058961264789 2023-01-24 06:01:31.021533: step: 228/466, loss: 0.03983161970973015 2023-01-24 06:01:31.810452: step: 230/466, loss: 0.11256561428308487 2023-01-24 06:01:32.602720: step: 232/466, loss: 0.006923142354935408 2023-01-24 06:01:33.299408: step: 234/466, loss: 0.028024213388562202 2023-01-24 06:01:34.132820: step: 236/466, loss: 0.03840837627649307 2023-01-24 06:01:34.916157: step: 238/466, loss: 0.04603128880262375 2023-01-24 06:01:35.713667: step: 240/466, loss: 0.004212265834212303 2023-01-24 06:01:36.409859: step: 242/466, loss: 0.027081385254859924 2023-01-24 06:01:37.250511: step: 244/466, loss: 0.002239649184048176 2023-01-24 06:01:38.013056: step: 246/466, loss: 0.036767009645700455 2023-01-24 06:01:38.785666: step: 248/466, loss: 0.012738176621496677 2023-01-24 06:01:39.504278: step: 250/466, loss: 0.004856251645833254 2023-01-24 06:01:40.367628: step: 252/466, loss: 0.006001537665724754 2023-01-24 06:01:41.163542: step: 254/466, loss: 0.03852864354848862 2023-01-24 06:01:41.983302: step: 256/466, loss: 0.0019090332789346576 2023-01-24 06:01:42.806584: step: 258/466, loss: 0.0013001691550016403 2023-01-24 06:01:43.665031: step: 260/466, loss: 0.007493459153920412 2023-01-24 06:01:44.453233: step: 262/466, loss: 0.041227787733078 2023-01-24 06:01:45.220273: step: 264/466, loss: 0.09032338857650757 2023-01-24 06:01:45.930272: step: 266/466, loss: 0.028157012537121773 2023-01-24 06:01:46.670493: step: 268/466, loss: 0.040503330528736115 2023-01-24 06:01:47.536463: step: 270/466, loss: 0.0026988424360752106 2023-01-24 06:01:48.300102: step: 272/466, loss: 0.47166407108306885 2023-01-24 06:01:49.150195: step: 274/466, loss: 0.0011497886152938008 2023-01-24 06:01:49.901540: step: 276/466, loss: 0.00470120320096612 2023-01-24 06:01:50.590376: step: 278/466, loss: 0.0031544240191578865 2023-01-24 06:01:51.308551: step: 280/466, loss: 0.002785501768812537 2023-01-24 06:01:52.016085: step: 282/466, loss: 0.003542037680745125 2023-01-24 06:01:52.704996: step: 284/466, loss: 0.005036836955696344 2023-01-24 06:01:53.457867: step: 286/466, loss: 0.010355237871408463 2023-01-24 06:01:54.154547: step: 288/466, loss: 0.23964522778987885 2023-01-24 06:01:54.915389: step: 290/466, loss: 0.015845391899347305 2023-01-24 06:01:55.623110: step: 292/466, loss: 0.01506973896175623 2023-01-24 06:01:56.427015: step: 294/466, loss: 0.009640523232519627 2023-01-24 06:01:57.141934: step: 296/466, loss: 0.0004169405438005924 2023-01-24 06:01:57.937398: step: 298/466, loss: 0.21283280849456787 2023-01-24 06:01:58.727121: step: 300/466, loss: 0.003256069030612707 2023-01-24 06:01:59.459829: step: 302/466, loss: 0.034554723650217056 2023-01-24 06:02:00.319473: step: 304/466, loss: 0.00668883603066206 2023-01-24 06:02:01.029440: step: 306/466, loss: 0.002489378210157156 2023-01-24 06:02:01.725122: step: 308/466, loss: 0.01227173674851656 2023-01-24 06:02:02.525681: step: 310/466, loss: 0.007570523303002119 2023-01-24 06:02:03.289112: step: 312/466, loss: 0.009839157573878765 2023-01-24 06:02:04.196008: step: 314/466, loss: 0.03226277232170105 2023-01-24 06:02:05.008045: step: 316/466, loss: 0.004619147628545761 2023-01-24 06:02:05.822583: step: 318/466, loss: 0.03376193344593048 2023-01-24 06:02:06.555281: step: 320/466, loss: 0.0007725472096353769 2023-01-24 06:02:07.304142: step: 322/466, loss: 0.00034353527007624507 2023-01-24 06:02:08.091649: step: 324/466, loss: 0.015253189019858837 2023-01-24 06:02:08.815199: step: 326/466, loss: 0.0205977950245142 2023-01-24 06:02:09.584363: step: 328/466, loss: 4.4319975131656975e-05 2023-01-24 06:02:10.228964: step: 330/466, loss: 0.00025746741448529065 2023-01-24 06:02:11.010097: step: 332/466, loss: 0.002880845917388797 2023-01-24 06:02:11.790634: step: 334/466, loss: 0.20469032227993011 2023-01-24 06:02:12.529441: step: 336/466, loss: 0.001424286630935967 2023-01-24 06:02:13.319285: step: 338/466, loss: 0.028453297913074493 2023-01-24 06:02:14.055149: step: 340/466, loss: 0.03045223280787468 2023-01-24 06:02:14.835476: step: 342/466, loss: 0.10660101473331451 2023-01-24 06:02:15.609122: step: 344/466, loss: 0.037694379687309265 2023-01-24 06:02:16.382138: step: 346/466, loss: 0.8764700889587402 2023-01-24 06:02:17.175466: step: 348/466, loss: 0.016882333904504776 2023-01-24 06:02:17.847275: step: 350/466, loss: 0.007390057668089867 2023-01-24 06:02:18.709465: step: 352/466, loss: 0.0014865277335047722 2023-01-24 06:02:19.451563: step: 354/466, loss: 0.001074317959137261 2023-01-24 06:02:20.377639: step: 356/466, loss: 0.016747722402215004 2023-01-24 06:02:21.248652: step: 358/466, loss: 0.000478467351058498 2023-01-24 06:02:22.000329: step: 360/466, loss: 0.009283812716603279 2023-01-24 06:02:22.796036: step: 362/466, loss: 0.09626930207014084 2023-01-24 06:02:23.505276: step: 364/466, loss: 0.1485295444726944 2023-01-24 06:02:24.291795: step: 366/466, loss: 0.028950830921530724 2023-01-24 06:02:25.185748: step: 368/466, loss: 0.1542282998561859 2023-01-24 06:02:25.962246: step: 370/466, loss: 0.01782785914838314 2023-01-24 06:02:26.645145: step: 372/466, loss: 0.03975386917591095 2023-01-24 06:02:27.458646: step: 374/466, loss: 0.02707074210047722 2023-01-24 06:02:28.198198: step: 376/466, loss: 0.003940037917345762 2023-01-24 06:02:28.951980: step: 378/466, loss: 0.0002613243996165693 2023-01-24 06:02:29.725664: step: 380/466, loss: 0.003921948838979006 2023-01-24 06:02:30.500628: step: 382/466, loss: 0.0002668288070708513 2023-01-24 06:02:31.330241: step: 384/466, loss: 0.0003136082086712122 2023-01-24 06:02:32.134779: step: 386/466, loss: 0.00648491270840168 2023-01-24 06:02:32.862302: step: 388/466, loss: 0.684620201587677 2023-01-24 06:02:33.504979: step: 390/466, loss: 0.0011094522196799517 2023-01-24 06:02:34.231884: step: 392/466, loss: 0.061549510806798935 2023-01-24 06:02:35.059583: step: 394/466, loss: 0.0019716816022992134 2023-01-24 06:02:35.823668: step: 396/466, loss: 0.37270647287368774 2023-01-24 06:02:36.559169: step: 398/466, loss: 0.002892756834626198 2023-01-24 06:02:37.365108: step: 400/466, loss: 0.007956295274198055 2023-01-24 06:02:38.200305: step: 402/466, loss: 0.0010787455830723047 2023-01-24 06:02:38.971069: step: 404/466, loss: 0.056287217885255814 2023-01-24 06:02:39.700048: step: 406/466, loss: 0.029866410419344902 2023-01-24 06:02:40.519472: step: 408/466, loss: 0.04059431701898575 2023-01-24 06:02:41.291559: step: 410/466, loss: 0.013068633154034615 2023-01-24 06:02:42.057428: step: 412/466, loss: 0.023778825998306274 2023-01-24 06:02:42.786487: step: 414/466, loss: 0.008005055598914623 2023-01-24 06:02:43.674365: step: 416/466, loss: 0.042082663625478745 2023-01-24 06:02:44.429161: step: 418/466, loss: 0.02101258747279644 2023-01-24 06:02:45.116638: step: 420/466, loss: 0.003775278339162469 2023-01-24 06:02:45.842040: step: 422/466, loss: 7.323760655708611e-05 2023-01-24 06:02:46.579099: step: 424/466, loss: 0.0001701459987089038 2023-01-24 06:02:47.329379: step: 426/466, loss: 0.013946725986897945 2023-01-24 06:02:48.080826: step: 428/466, loss: 0.010495316237211227 2023-01-24 06:02:48.805371: step: 430/466, loss: 0.0005667632794938982 2023-01-24 06:02:49.518909: step: 432/466, loss: 0.08862043917179108 2023-01-24 06:02:50.270686: step: 434/466, loss: 0.0020771543495357037 2023-01-24 06:02:51.014185: step: 436/466, loss: 0.0011141763534396887 2023-01-24 06:02:51.697268: step: 438/466, loss: 0.20008327066898346 2023-01-24 06:02:52.542796: step: 440/466, loss: 0.02583439089357853 2023-01-24 06:02:53.286551: step: 442/466, loss: 0.009787281975150108 2023-01-24 06:02:53.949453: step: 444/466, loss: 0.010717857629060745 2023-01-24 06:02:54.702549: step: 446/466, loss: 0.0028945282101631165 2023-01-24 06:02:55.523151: step: 448/466, loss: 0.011020984500646591 2023-01-24 06:02:56.220365: step: 450/466, loss: 0.011754285544157028 2023-01-24 06:02:57.082704: step: 452/466, loss: 0.00658143125474453 2023-01-24 06:02:57.900715: step: 454/466, loss: 0.0025760687422007322 2023-01-24 06:02:58.650991: step: 456/466, loss: 0.012058288790285587 2023-01-24 06:02:59.368393: step: 458/466, loss: 0.0031139289494603872 2023-01-24 06:03:00.090802: step: 460/466, loss: 0.002164160367101431 2023-01-24 06:03:00.818187: step: 462/466, loss: 0.00015576444275211543 2023-01-24 06:03:01.587054: step: 464/466, loss: 2.8912174457218498e-05 2023-01-24 06:03:02.427527: step: 466/466, loss: 0.008501943200826645 2023-01-24 06:03:03.216104: step: 468/466, loss: 0.021276382729411125 2023-01-24 06:03:04.013065: step: 470/466, loss: 0.013494429178535938 2023-01-24 06:03:04.781284: step: 472/466, loss: 0.01890728250145912 2023-01-24 06:03:05.493055: step: 474/466, loss: 0.0011015519266948104 2023-01-24 06:03:06.186744: step: 476/466, loss: 0.019345303997397423 2023-01-24 06:03:06.858838: step: 478/466, loss: 0.0008879891829565167 2023-01-24 06:03:07.651866: step: 480/466, loss: 0.07371818274259567 2023-01-24 06:03:08.391496: step: 482/466, loss: 0.02774639055132866 2023-01-24 06:03:09.133766: step: 484/466, loss: 0.0016840663738548756 2023-01-24 06:03:09.891960: step: 486/466, loss: 0.04393770173192024 2023-01-24 06:03:10.615077: step: 488/466, loss: 0.007707908283919096 2023-01-24 06:03:11.439541: step: 490/466, loss: 0.10483228415250778 2023-01-24 06:03:12.109199: step: 492/466, loss: 0.014694461598992348 2023-01-24 06:03:12.810282: step: 494/466, loss: 0.00035671706427820027 2023-01-24 06:03:13.497722: step: 496/466, loss: 0.0007526023546233773 2023-01-24 06:03:14.226631: step: 498/466, loss: 0.005434651393443346 2023-01-24 06:03:14.944438: step: 500/466, loss: 0.0034361332654953003 2023-01-24 06:03:15.723777: step: 502/466, loss: 0.012881132774055004 2023-01-24 06:03:16.479009: step: 504/466, loss: 0.029119957238435745 2023-01-24 06:03:17.287624: step: 506/466, loss: 0.054783616214990616 2023-01-24 06:03:18.128253: step: 508/466, loss: 0.004122794605791569 2023-01-24 06:03:18.876318: step: 510/466, loss: 0.01581837795674801 2023-01-24 06:03:19.552898: step: 512/466, loss: 0.0014336195308715105 2023-01-24 06:03:20.264892: step: 514/466, loss: 0.00047153281047940254 2023-01-24 06:03:21.023789: step: 516/466, loss: 0.04796868935227394 2023-01-24 06:03:21.726315: step: 518/466, loss: 0.0025941431522369385 2023-01-24 06:03:22.415925: step: 520/466, loss: 0.022482289001345634 2023-01-24 06:03:23.170190: step: 522/466, loss: 0.026926374062895775 2023-01-24 06:03:23.988037: step: 524/466, loss: 0.011951807886362076 2023-01-24 06:03:24.723687: step: 526/466, loss: 0.03389818221330643 2023-01-24 06:03:25.524684: step: 528/466, loss: 0.12745307385921478 2023-01-24 06:03:26.234563: step: 530/466, loss: 19.91595458984375 2023-01-24 06:03:26.941123: step: 532/466, loss: 0.011495614424347878 2023-01-24 06:03:27.705648: step: 534/466, loss: 0.1675240844488144 2023-01-24 06:03:28.408784: step: 536/466, loss: 0.0011641534510999918 2023-01-24 06:03:29.153187: step: 538/466, loss: 0.012672476470470428 2023-01-24 06:03:29.943165: step: 540/466, loss: 0.01503435242921114 2023-01-24 06:03:30.733308: step: 542/466, loss: 0.27556371688842773 2023-01-24 06:03:31.528011: step: 544/466, loss: 0.016031546518206596 2023-01-24 06:03:32.291546: step: 546/466, loss: 0.02003948763012886 2023-01-24 06:03:33.065238: step: 548/466, loss: 0.0018222718499600887 2023-01-24 06:03:33.774190: step: 550/466, loss: 0.01665370538830757 2023-01-24 06:03:34.499078: step: 552/466, loss: 0.00013914398732595146 2023-01-24 06:03:35.337547: step: 554/466, loss: 0.0043900106102228165 2023-01-24 06:03:36.049615: step: 556/466, loss: 0.6020170450210571 2023-01-24 06:03:36.739053: step: 558/466, loss: 0.025239666923880577 2023-01-24 06:03:37.498008: step: 560/466, loss: 0.026258215308189392 2023-01-24 06:03:38.200264: step: 562/466, loss: 0.036790695041418076 2023-01-24 06:03:38.982544: step: 564/466, loss: 0.12813805043697357 2023-01-24 06:03:39.746936: step: 566/466, loss: 0.021396158263087273 2023-01-24 06:03:40.477048: step: 568/466, loss: 0.008455985225737095 2023-01-24 06:03:41.294859: step: 570/466, loss: 0.017628345638513565 2023-01-24 06:03:42.026615: step: 572/466, loss: 1.1817795038223267 2023-01-24 06:03:42.853446: step: 574/466, loss: 0.0001591477048350498 2023-01-24 06:03:43.539301: step: 576/466, loss: 0.017058134078979492 2023-01-24 06:03:44.356389: step: 578/466, loss: 0.016770707443356514 2023-01-24 06:03:45.128405: step: 580/466, loss: 0.011051390320062637 2023-01-24 06:03:45.870441: step: 582/466, loss: 0.3217681050300598 2023-01-24 06:03:46.548104: step: 584/466, loss: 0.012333834543824196 2023-01-24 06:03:47.247569: step: 586/466, loss: 0.11268869787454605 2023-01-24 06:03:48.034414: step: 588/466, loss: 0.013719167560338974 2023-01-24 06:03:48.823871: step: 590/466, loss: 0.018657803535461426 2023-01-24 06:03:49.624957: step: 592/466, loss: 0.019210534170269966 2023-01-24 06:03:50.425549: step: 594/466, loss: 0.8516140580177307 2023-01-24 06:03:51.137154: step: 596/466, loss: 0.00032440427457913756 2023-01-24 06:03:51.932970: step: 598/466, loss: 0.05943857878446579 2023-01-24 06:03:52.646611: step: 600/466, loss: 0.0002008227165788412 2023-01-24 06:03:53.371385: step: 602/466, loss: 0.0069481548853218555 2023-01-24 06:03:54.064980: step: 604/466, loss: 0.04889529198408127 2023-01-24 06:03:54.755543: step: 606/466, loss: 0.0031339051201939583 2023-01-24 06:03:55.530069: step: 608/466, loss: 0.07272807508707047 2023-01-24 06:03:56.258583: step: 610/466, loss: 0.029821842908859253 2023-01-24 06:03:57.003416: step: 612/466, loss: 0.0008868348668329418 2023-01-24 06:03:57.762499: step: 614/466, loss: 0.0166871827095747 2023-01-24 06:03:58.564851: step: 616/466, loss: 0.041613008826971054 2023-01-24 06:03:59.257665: step: 618/466, loss: 0.005279239267110825 2023-01-24 06:04:00.028061: step: 620/466, loss: 0.00036231501144357026 2023-01-24 06:04:00.686673: step: 622/466, loss: 0.0006214659078978002 2023-01-24 06:04:01.432854: step: 624/466, loss: 0.020328793674707413 2023-01-24 06:04:02.176975: step: 626/466, loss: 0.00814978126436472 2023-01-24 06:04:02.977196: step: 628/466, loss: 0.0077381557784974575 2023-01-24 06:04:03.757215: step: 630/466, loss: 0.058892786502838135 2023-01-24 06:04:04.495523: step: 632/466, loss: 0.007260517682880163 2023-01-24 06:04:05.321454: step: 634/466, loss: 0.12914660573005676 2023-01-24 06:04:06.054952: step: 636/466, loss: 0.0056647504679858685 2023-01-24 06:04:06.818051: step: 638/466, loss: 0.011224090121686459 2023-01-24 06:04:07.459101: step: 640/466, loss: 0.02375660464167595 2023-01-24 06:04:08.165467: step: 642/466, loss: 0.017788778990507126 2023-01-24 06:04:08.889588: step: 644/466, loss: 0.0006204199744388461 2023-01-24 06:04:09.625706: step: 646/466, loss: 0.0039198934100568295 2023-01-24 06:04:10.465417: step: 648/466, loss: 0.006853078491985798 2023-01-24 06:04:11.240161: step: 650/466, loss: 0.012796571478247643 2023-01-24 06:04:11.996831: step: 652/466, loss: 0.0021514042746275663 2023-01-24 06:04:12.738753: step: 654/466, loss: 10.409101486206055 2023-01-24 06:04:13.504089: step: 656/466, loss: 0.03252642601728439 2023-01-24 06:04:14.291128: step: 658/466, loss: 0.048868775367736816 2023-01-24 06:04:14.983585: step: 660/466, loss: 0.009728114120662212 2023-01-24 06:04:15.830963: step: 662/466, loss: 0.0049397689290344715 2023-01-24 06:04:16.600928: step: 664/466, loss: 0.0002577665145508945 2023-01-24 06:04:17.368525: step: 666/466, loss: 0.004854061175137758 2023-01-24 06:04:18.108285: step: 668/466, loss: 0.007043915335088968 2023-01-24 06:04:18.809353: step: 670/466, loss: 0.07948621362447739 2023-01-24 06:04:19.629134: step: 672/466, loss: 0.0197658259421587 2023-01-24 06:04:20.403195: step: 674/466, loss: 0.19008009135723114 2023-01-24 06:04:21.174603: step: 676/466, loss: 0.002508687088266015 2023-01-24 06:04:21.917532: step: 678/466, loss: 0.026402266696095467 2023-01-24 06:04:22.727768: step: 680/466, loss: 0.0015470042126253247 2023-01-24 06:04:23.466064: step: 682/466, loss: 0.0009299801313318312 2023-01-24 06:04:24.131730: step: 684/466, loss: 0.0004883540677838027 2023-01-24 06:04:24.852132: step: 686/466, loss: 0.019189296290278435 2023-01-24 06:04:25.610904: step: 688/466, loss: 0.04578102380037308 2023-01-24 06:04:26.407255: step: 690/466, loss: 0.0030191573314368725 2023-01-24 06:04:27.199233: step: 692/466, loss: 0.005563544109463692 2023-01-24 06:04:28.013387: step: 694/466, loss: 0.3574367165565491 2023-01-24 06:04:28.714156: step: 696/466, loss: 0.010373993776738644 2023-01-24 06:04:29.444938: step: 698/466, loss: 0.12190108746290207 2023-01-24 06:04:30.136033: step: 700/466, loss: 0.0032017051707953215 2023-01-24 06:04:30.879894: step: 702/466, loss: 0.017082445323467255 2023-01-24 06:04:31.633043: step: 704/466, loss: 0.006890237331390381 2023-01-24 06:04:32.465307: step: 706/466, loss: 0.0007328407955355942 2023-01-24 06:04:33.202938: step: 708/466, loss: 0.01048083696514368 2023-01-24 06:04:33.893339: step: 710/466, loss: 0.046984102576971054 2023-01-24 06:04:34.621245: step: 712/466, loss: 0.24207662045955658 2023-01-24 06:04:35.339188: step: 714/466, loss: 0.012634899467229843 2023-01-24 06:04:36.138137: step: 716/466, loss: 0.03797135129570961 2023-01-24 06:04:36.926473: step: 718/466, loss: 0.019333388656377792 2023-01-24 06:04:37.751373: step: 720/466, loss: 0.0033987818751484156 2023-01-24 06:04:38.456668: step: 722/466, loss: 0.001986326416954398 2023-01-24 06:04:39.238883: step: 724/466, loss: 0.050077229738235474 2023-01-24 06:04:39.987539: step: 726/466, loss: 0.012111474759876728 2023-01-24 06:04:40.777785: step: 728/466, loss: 0.0119631951674819 2023-01-24 06:04:41.564122: step: 730/466, loss: 0.03998979181051254 2023-01-24 06:04:42.329927: step: 732/466, loss: 0.07630390673875809 2023-01-24 06:04:43.013259: step: 734/466, loss: 0.011790863238275051 2023-01-24 06:04:43.696199: step: 736/466, loss: 0.0075907232239842415 2023-01-24 06:04:44.493256: step: 738/466, loss: 0.0070110103115439415 2023-01-24 06:04:45.219354: step: 740/466, loss: 0.02544263005256653 2023-01-24 06:04:45.962042: step: 742/466, loss: 0.0033943578600883484 2023-01-24 06:04:46.692742: step: 744/466, loss: 0.0024433997459709644 2023-01-24 06:04:47.379420: step: 746/466, loss: 0.08233567327260971 2023-01-24 06:04:48.167221: step: 748/466, loss: 0.0069228848442435265 2023-01-24 06:04:48.986822: step: 750/466, loss: 0.04751036688685417 2023-01-24 06:04:49.781140: step: 752/466, loss: 0.011901628226041794 2023-01-24 06:04:50.491154: step: 754/466, loss: 0.19462859630584717 2023-01-24 06:04:51.228913: step: 756/466, loss: 0.011455833911895752 2023-01-24 06:04:51.969709: step: 758/466, loss: 0.09103307127952576 2023-01-24 06:04:52.736014: step: 760/466, loss: 0.0032544638961553574 2023-01-24 06:04:53.524526: step: 762/466, loss: 0.018020547926425934 2023-01-24 06:04:54.286547: step: 764/466, loss: 0.013624212704598904 2023-01-24 06:04:55.121529: step: 766/466, loss: 0.01824241690337658 2023-01-24 06:04:55.831927: step: 768/466, loss: 0.009417587891221046 2023-01-24 06:04:56.614008: step: 770/466, loss: 0.02756587788462639 2023-01-24 06:04:57.343581: step: 772/466, loss: 0.00769635196775198 2023-01-24 06:04:58.125494: step: 774/466, loss: 0.01789870485663414 2023-01-24 06:04:58.938925: step: 776/466, loss: 0.007817920297384262 2023-01-24 06:04:59.712829: step: 778/466, loss: 0.010252301581203938 2023-01-24 06:05:00.524311: step: 780/466, loss: 0.01401289738714695 2023-01-24 06:05:01.200997: step: 782/466, loss: 0.008630058728158474 2023-01-24 06:05:01.922964: step: 784/466, loss: 0.04713859409093857 2023-01-24 06:05:02.734405: step: 786/466, loss: 0.00617067189887166 2023-01-24 06:05:03.537279: step: 788/466, loss: 0.06235240399837494 2023-01-24 06:05:04.283614: step: 790/466, loss: 0.00010020119952969253 2023-01-24 06:05:05.046943: step: 792/466, loss: 0.012778275646269321 2023-01-24 06:05:05.779183: step: 794/466, loss: 0.0429111085832119 2023-01-24 06:05:06.659319: step: 796/466, loss: 0.004496218170970678 2023-01-24 06:05:07.406584: step: 798/466, loss: 0.016231169924139977 2023-01-24 06:05:08.134810: step: 800/466, loss: 0.008096449077129364 2023-01-24 06:05:08.926856: step: 802/466, loss: 0.0170602947473526 2023-01-24 06:05:09.606908: step: 804/466, loss: 0.016482815146446228 2023-01-24 06:05:10.322812: step: 806/466, loss: 0.20911498367786407 2023-01-24 06:05:11.011749: step: 808/466, loss: 0.013375013135373592 2023-01-24 06:05:11.762350: step: 810/466, loss: 0.008207838982343674 2023-01-24 06:05:12.496419: step: 812/466, loss: 0.021015865728259087 2023-01-24 06:05:13.276179: step: 814/466, loss: 0.0028387221973389387 2023-01-24 06:05:14.051975: step: 816/466, loss: 0.06083836778998375 2023-01-24 06:05:14.820089: step: 818/466, loss: 0.01898682489991188 2023-01-24 06:05:15.578063: step: 820/466, loss: 0.02709573693573475 2023-01-24 06:05:16.330224: step: 822/466, loss: 0.04168681800365448 2023-01-24 06:05:17.077478: step: 824/466, loss: 0.10127529501914978 2023-01-24 06:05:17.823462: step: 826/466, loss: 0.019474346190690994 2023-01-24 06:05:18.591991: step: 828/466, loss: 0.047736093401908875 2023-01-24 06:05:19.381102: step: 830/466, loss: 0.06600034236907959 2023-01-24 06:05:20.176766: step: 832/466, loss: 0.008739227429032326 2023-01-24 06:05:20.904438: step: 834/466, loss: 2.102041721343994 2023-01-24 06:05:21.703790: step: 836/466, loss: 0.041133981198072433 2023-01-24 06:05:22.547821: step: 838/466, loss: 0.027052856981754303 2023-01-24 06:05:23.254173: step: 840/466, loss: 0.07918006181716919 2023-01-24 06:05:23.951803: step: 842/466, loss: 0.0011637471616268158 2023-01-24 06:05:24.737120: step: 844/466, loss: 0.008314933627843857 2023-01-24 06:05:25.473104: step: 846/466, loss: 0.02303183451294899 2023-01-24 06:05:26.255333: step: 848/466, loss: 0.04594963416457176 2023-01-24 06:05:27.013579: step: 850/466, loss: 0.0016797209391370416 2023-01-24 06:05:27.770510: step: 852/466, loss: 0.045600421726703644 2023-01-24 06:05:28.686972: step: 854/466, loss: 0.0024260608479380608 2023-01-24 06:05:29.482968: step: 856/466, loss: 0.014224640093743801 2023-01-24 06:05:30.195430: step: 858/466, loss: 0.0008135157404467463 2023-01-24 06:05:30.956323: step: 860/466, loss: 0.011436758562922478 2023-01-24 06:05:31.684055: step: 862/466, loss: 0.00037625571712851524 2023-01-24 06:05:32.400292: step: 864/466, loss: 0.003735210048034787 2023-01-24 06:05:33.170899: step: 866/466, loss: 0.02508869767189026 2023-01-24 06:05:33.908849: step: 868/466, loss: 0.012735653668642044 2023-01-24 06:05:34.645957: step: 870/466, loss: 0.017817430198192596 2023-01-24 06:05:35.399768: step: 872/466, loss: 0.0025312600191682577 2023-01-24 06:05:36.210914: step: 874/466, loss: 0.00425300607457757 2023-01-24 06:05:36.919047: step: 876/466, loss: 0.12249448150396347 2023-01-24 06:05:37.650066: step: 878/466, loss: 0.006648956798017025 2023-01-24 06:05:38.356834: step: 880/466, loss: 0.04195516183972359 2023-01-24 06:05:39.032164: step: 882/466, loss: 0.0004320128355175257 2023-01-24 06:05:39.830506: step: 884/466, loss: 0.022593876346945763 2023-01-24 06:05:40.611970: step: 886/466, loss: 0.0333063080906868 2023-01-24 06:05:41.327699: step: 888/466, loss: 0.0011965618468821049 2023-01-24 06:05:41.992652: step: 890/466, loss: 0.0011643233010545373 2023-01-24 06:05:42.775767: step: 892/466, loss: 0.0015710833249613643 2023-01-24 06:05:43.613144: step: 894/466, loss: 0.0017799792112782598 2023-01-24 06:05:44.385642: step: 896/466, loss: 0.011207039467990398 2023-01-24 06:05:45.129797: step: 898/466, loss: 0.001885769423097372 2023-01-24 06:05:46.013661: step: 900/466, loss: 0.00987928081303835 2023-01-24 06:05:46.723400: step: 902/466, loss: 0.029370475560426712 2023-01-24 06:05:47.489308: step: 904/466, loss: 0.019185485318303108 2023-01-24 06:05:48.215458: step: 906/466, loss: 0.01172910537570715 2023-01-24 06:05:48.906989: step: 908/466, loss: 0.0012697859201580286 2023-01-24 06:05:49.675775: step: 910/466, loss: 0.00720939738675952 2023-01-24 06:05:50.371946: step: 912/466, loss: 0.0008965595043264329 2023-01-24 06:05:51.146958: step: 914/466, loss: 0.005025980528444052 2023-01-24 06:05:52.054234: step: 916/466, loss: 0.00024496426340192556 2023-01-24 06:05:52.994602: step: 918/466, loss: 0.029787475243210793 2023-01-24 06:05:53.719978: step: 920/466, loss: 0.046670347452163696 2023-01-24 06:05:54.375900: step: 922/466, loss: 0.005342547781765461 2023-01-24 06:05:55.111851: step: 924/466, loss: 0.0036600539460778236 2023-01-24 06:05:55.838026: step: 926/466, loss: 0.003369266865774989 2023-01-24 06:05:56.581300: step: 928/466, loss: 0.02210007980465889 2023-01-24 06:05:57.291662: step: 930/466, loss: 0.06508655846118927 2023-01-24 06:05:58.061103: step: 932/466, loss: 0.002537196036428213 ================================================== Loss: 0.108 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35001556178026766, 'r': 0.30485226348603955, 'f1': 0.3258765575195596}, 'combined': 0.24011956869862283, 'epoch': 31} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3805845852196598, 'r': 0.2862629289173871, 'f1': 0.32675313548038054}, 'combined': 0.20083363449038022, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3247620435120435, 'r': 0.3124371082743948, 'f1': 0.31848037922747785}, 'combined': 0.23466975311498367, 'epoch': 31} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3622890754345028, 'r': 0.284117515830351, 'f1': 0.318476554898713}, 'combined': 0.19574656544994068, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36364628820960704, 'r': 0.31603415559772297, 'f1': 0.3381725888324873}, 'combined': 0.2491798022976222, 'epoch': 31} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37557626222681734, 'r': 0.27793945228916983, 'f1': 0.31946427085826895}, 'combined': 0.19731616729481322, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2823275862068966, 'r': 0.23392857142857143, 'f1': 0.255859375}, 'combined': 0.17057291666666666, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.391304347826087, 'f1': 0.3050847457627119}, 'combined': 0.15254237288135594, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34145184824902725, 'r': 0.33302893738140416, 'f1': 0.33718780019212297}, 'combined': 0.2484541685626169, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3745543418502639, 'r': 0.29769773663943055, 'f1': 0.3317326443015362}, 'combined': 0.20389421064387103, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.3535714285714286, 'f1': 0.3639705882352941}, 'combined': 0.24264705882352938, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3600931677018634, 'r': 0.3300284629981025, 'f1': 0.34440594059405943}, 'combined': 0.25377279833246486, 'epoch': 29} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3732928220606822, 'r': 0.2841675462053636, 'f1': 0.3226892764375249}, 'combined': 0.19930808250553012, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 29} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:08:41.126194: step: 2/466, loss: 5.15372994414065e-05 2023-01-24 06:08:41.878656: step: 4/466, loss: 0.06286688148975372 2023-01-24 06:08:42.686827: step: 6/466, loss: 0.143926739692688 2023-01-24 06:08:43.457882: step: 8/466, loss: 0.04372314363718033 2023-01-24 06:08:44.189607: step: 10/466, loss: 0.05686626583337784 2023-01-24 06:08:44.898486: step: 12/466, loss: 0.0015271787997335196 2023-01-24 06:08:45.721865: step: 14/466, loss: 0.0009403207222931087 2023-01-24 06:08:46.558740: step: 16/466, loss: 0.003312920220196247 2023-01-24 06:08:47.305037: step: 18/466, loss: 0.0007510375580750406 2023-01-24 06:08:48.016876: step: 20/466, loss: 0.01566898077726364 2023-01-24 06:08:48.723331: step: 22/466, loss: 0.009687750600278378 2023-01-24 06:08:49.773122: step: 24/466, loss: 0.021713193506002426 2023-01-24 06:08:50.520580: step: 26/466, loss: 0.05182737484574318 2023-01-24 06:08:51.247799: step: 28/466, loss: 0.02287333831191063 2023-01-24 06:08:52.072740: step: 30/466, loss: 0.03470327705144882 2023-01-24 06:08:52.819566: step: 32/466, loss: 0.027573635801672935 2023-01-24 06:08:53.589574: step: 34/466, loss: 0.0009407330653630197 2023-01-24 06:08:54.300768: step: 36/466, loss: 0.02692416124045849 2023-01-24 06:08:55.072059: step: 38/466, loss: 0.028733985498547554 2023-01-24 06:08:55.761928: step: 40/466, loss: 0.0004459666379261762 2023-01-24 06:08:56.498251: step: 42/466, loss: 0.016065159812569618 2023-01-24 06:08:57.276343: step: 44/466, loss: 0.007120391353964806 2023-01-24 06:08:58.007935: step: 46/466, loss: 0.0037614544853568077 2023-01-24 06:08:58.780956: step: 48/466, loss: 0.006791574414819479 2023-01-24 06:08:59.539478: step: 50/466, loss: 0.0027036392129957676 2023-01-24 06:09:00.347221: step: 52/466, loss: 0.02349858544766903 2023-01-24 06:09:01.144735: step: 54/466, loss: 0.04122764989733696 2023-01-24 06:09:01.874167: step: 56/466, loss: 0.006118678953498602 2023-01-24 06:09:02.666272: step: 58/466, loss: 0.03250905126333237 2023-01-24 06:09:03.391953: step: 60/466, loss: 0.0003115589206572622 2023-01-24 06:09:04.140863: step: 62/466, loss: 0.033898092806339264 2023-01-24 06:09:04.941613: step: 64/466, loss: 0.0010923035442829132 2023-01-24 06:09:05.671784: step: 66/466, loss: 0.016168424859642982 2023-01-24 06:09:06.394210: step: 68/466, loss: 0.0022301855497062206 2023-01-24 06:09:07.132082: step: 70/466, loss: 0.009409364312887192 2023-01-24 06:09:07.906157: step: 72/466, loss: 0.026693210005760193 2023-01-24 06:09:08.628705: step: 74/466, loss: 0.009221694432199001 2023-01-24 06:09:09.332233: step: 76/466, loss: 0.0024394956417381763 2023-01-24 06:09:10.073966: step: 78/466, loss: 0.0045931036584079266 2023-01-24 06:09:10.862098: step: 80/466, loss: 0.00041875254828482866 2023-01-24 06:09:11.707139: step: 82/466, loss: 0.0021951228845864534 2023-01-24 06:09:12.403573: step: 84/466, loss: 0.0005747769610024989 2023-01-24 06:09:13.152060: step: 86/466, loss: 0.0007826169021427631 2023-01-24 06:09:13.865169: step: 88/466, loss: 0.00010447607201058418 2023-01-24 06:09:14.668243: step: 90/466, loss: 0.003849487518891692 2023-01-24 06:09:15.461567: step: 92/466, loss: 0.693045437335968 2023-01-24 06:09:16.227374: step: 94/466, loss: 0.37063682079315186 2023-01-24 06:09:16.911660: step: 96/466, loss: 0.08187183737754822 2023-01-24 06:09:17.682228: step: 98/466, loss: 0.007195903453975916 2023-01-24 06:09:18.523275: step: 100/466, loss: 0.08682779222726822 2023-01-24 06:09:19.312834: step: 102/466, loss: 0.016507381573319435 2023-01-24 06:09:20.168835: step: 104/466, loss: 0.007591415196657181 2023-01-24 06:09:20.992924: step: 106/466, loss: 0.010487313382327557 2023-01-24 06:09:21.793854: step: 108/466, loss: 0.007627226412296295 2023-01-24 06:09:22.590719: step: 110/466, loss: 0.014922836795449257 2023-01-24 06:09:23.354632: step: 112/466, loss: 0.023592745885252953 2023-01-24 06:09:24.092091: step: 114/466, loss: 0.0010895115556195378 2023-01-24 06:09:24.814228: step: 116/466, loss: 0.0007410570979118347 2023-01-24 06:09:25.561808: step: 118/466, loss: 0.004684681538492441 2023-01-24 06:09:26.353998: step: 120/466, loss: 0.001338861184194684 2023-01-24 06:09:27.171247: step: 122/466, loss: 0.02555706538259983 2023-01-24 06:09:28.095013: step: 124/466, loss: 0.017490247264504433 2023-01-24 06:09:28.989708: step: 126/466, loss: 0.031979188323020935 2023-01-24 06:09:29.742330: step: 128/466, loss: 0.02571520209312439 2023-01-24 06:09:30.436552: step: 130/466, loss: 0.01015580352395773 2023-01-24 06:09:31.175032: step: 132/466, loss: 0.0033969765063375235 2023-01-24 06:09:31.991820: step: 134/466, loss: 0.003304438665509224 2023-01-24 06:09:32.705107: step: 136/466, loss: 0.008730168454349041 2023-01-24 06:09:33.480472: step: 138/466, loss: 0.007856298238039017 2023-01-24 06:09:34.233384: step: 140/466, loss: 0.06530187278985977 2023-01-24 06:09:35.048558: step: 142/466, loss: 0.01866009458899498 2023-01-24 06:09:35.855407: step: 144/466, loss: 0.034205153584480286 2023-01-24 06:09:36.568196: step: 146/466, loss: 0.013796073384582996 2023-01-24 06:09:37.261349: step: 148/466, loss: 0.034375231713056564 2023-01-24 06:09:38.048290: step: 150/466, loss: 0.028176940977573395 2023-01-24 06:09:38.795146: step: 152/466, loss: 0.04444631189107895 2023-01-24 06:09:39.540389: step: 154/466, loss: 0.0017726004589349031 2023-01-24 06:09:40.287542: step: 156/466, loss: 0.006879525724798441 2023-01-24 06:09:41.060808: step: 158/466, loss: 0.0015480904839932919 2023-01-24 06:09:41.817908: step: 160/466, loss: 0.021863074973225594 2023-01-24 06:09:42.488314: step: 162/466, loss: 0.003709911135956645 2023-01-24 06:09:43.222733: step: 164/466, loss: 0.0016268891049548984 2023-01-24 06:09:43.987440: step: 166/466, loss: 0.005751646589487791 2023-01-24 06:09:44.861758: step: 168/466, loss: 0.06992904841899872 2023-01-24 06:09:45.629905: step: 170/466, loss: 0.549974262714386 2023-01-24 06:09:46.390911: step: 172/466, loss: 0.016287731006741524 2023-01-24 06:09:47.066754: step: 174/466, loss: 0.0040813288651406765 2023-01-24 06:09:47.776848: step: 176/466, loss: 0.8938317894935608 2023-01-24 06:09:48.597596: step: 178/466, loss: 0.061728768050670624 2023-01-24 06:09:49.300266: step: 180/466, loss: 0.1072620376944542 2023-01-24 06:09:49.996308: step: 182/466, loss: 0.00487162871286273 2023-01-24 06:09:50.797215: step: 184/466, loss: 0.15155097842216492 2023-01-24 06:09:51.545923: step: 186/466, loss: 0.15379630029201508 2023-01-24 06:09:52.270167: step: 188/466, loss: 0.04495794698596001 2023-01-24 06:09:53.069007: step: 190/466, loss: 0.0044115460477769375 2023-01-24 06:09:53.797111: step: 192/466, loss: 0.0011705057695508003 2023-01-24 06:09:54.597619: step: 194/466, loss: 0.0030282491352409124 2023-01-24 06:09:55.398287: step: 196/466, loss: 0.004321829881519079 2023-01-24 06:09:56.172107: step: 198/466, loss: 0.015348169021308422 2023-01-24 06:09:56.935468: step: 200/466, loss: 0.01626484841108322 2023-01-24 06:09:57.742225: step: 202/466, loss: 0.03765762597322464 2023-01-24 06:09:58.446541: step: 204/466, loss: 0.002001148881390691 2023-01-24 06:09:59.224838: step: 206/466, loss: 0.0076205311343073845 2023-01-24 06:10:00.031886: step: 208/466, loss: 0.026217781007289886 2023-01-24 06:10:00.731800: step: 210/466, loss: 0.014518878422677517 2023-01-24 06:10:01.549774: step: 212/466, loss: 3.662377275759354e-05 2023-01-24 06:10:02.389930: step: 214/466, loss: 0.0026546171866357327 2023-01-24 06:10:03.035157: step: 216/466, loss: 0.0005058245151303709 2023-01-24 06:10:03.837320: step: 218/466, loss: 0.057214125990867615 2023-01-24 06:10:04.617491: step: 220/466, loss: 0.0011696420842781663 2023-01-24 06:10:05.395107: step: 222/466, loss: 0.0006456954870373011 2023-01-24 06:10:06.145333: step: 224/466, loss: 0.007920566946268082 2023-01-24 06:10:06.848827: step: 226/466, loss: 0.0024360103998333216 2023-01-24 06:10:07.648816: step: 228/466, loss: 0.0069953678175807 2023-01-24 06:10:08.331105: step: 230/466, loss: 0.002587408060207963 2023-01-24 06:10:09.023255: step: 232/466, loss: 0.06114426627755165 2023-01-24 06:10:09.753603: step: 234/466, loss: 0.021435175091028214 2023-01-24 06:10:10.522525: step: 236/466, loss: 0.013130392879247665 2023-01-24 06:10:11.290221: step: 238/466, loss: 0.04634890332818031 2023-01-24 06:10:12.054704: step: 240/466, loss: 0.0003861374862026423 2023-01-24 06:10:12.824388: step: 242/466, loss: 0.018936574459075928 2023-01-24 06:10:13.592257: step: 244/466, loss: 0.07208773493766785 2023-01-24 06:10:14.318412: step: 246/466, loss: 0.01850968413054943 2023-01-24 06:10:14.990395: step: 248/466, loss: 0.0026213873643428087 2023-01-24 06:10:15.720697: step: 250/466, loss: 0.000575187848880887 2023-01-24 06:10:16.434028: step: 252/466, loss: 0.010430560447275639 2023-01-24 06:10:17.223201: step: 254/466, loss: 0.02402244694530964 2023-01-24 06:10:17.936717: step: 256/466, loss: 0.4019804894924164 2023-01-24 06:10:18.654893: step: 258/466, loss: 0.002005958929657936 2023-01-24 06:10:19.371175: step: 260/466, loss: 0.005219338461756706 2023-01-24 06:10:20.066793: step: 262/466, loss: 0.007266393397003412 2023-01-24 06:10:20.793709: step: 264/466, loss: 0.011721421964466572 2023-01-24 06:10:21.533049: step: 266/466, loss: 0.01827792450785637 2023-01-24 06:10:22.261024: step: 268/466, loss: 0.04693610966205597 2023-01-24 06:10:23.046011: step: 270/466, loss: 0.01974484883248806 2023-01-24 06:10:23.918747: step: 272/466, loss: 0.09845882654190063 2023-01-24 06:10:24.609585: step: 274/466, loss: 0.4635373651981354 2023-01-24 06:10:25.375653: step: 276/466, loss: 0.6648179292678833 2023-01-24 06:10:26.095944: step: 278/466, loss: 0.035920653492212296 2023-01-24 06:10:26.800149: step: 280/466, loss: 0.007692283485084772 2023-01-24 06:10:27.483120: step: 282/466, loss: 0.032454658299684525 2023-01-24 06:10:28.250956: step: 284/466, loss: 0.001869324711151421 2023-01-24 06:10:28.952363: step: 286/466, loss: 0.0011435500346124172 2023-01-24 06:10:29.795067: step: 288/466, loss: 0.01853904314339161 2023-01-24 06:10:30.433821: step: 290/466, loss: 0.05531471222639084 2023-01-24 06:10:31.212874: step: 292/466, loss: 0.13581180572509766 2023-01-24 06:10:31.992309: step: 294/466, loss: 0.043170515447854996 2023-01-24 06:10:32.658763: step: 296/466, loss: 0.000367786327842623 2023-01-24 06:10:33.465186: step: 298/466, loss: 0.0017710586544126272 2023-01-24 06:10:34.212684: step: 300/466, loss: 0.0012787083396688104 2023-01-24 06:10:34.989871: step: 302/466, loss: 0.009321542456746101 2023-01-24 06:10:35.708499: step: 304/466, loss: 0.04730689898133278 2023-01-24 06:10:36.477443: step: 306/466, loss: 0.07945113629102707 2023-01-24 06:10:37.240134: step: 308/466, loss: 0.0039559840224683285 2023-01-24 06:10:38.029844: step: 310/466, loss: 0.00273063569329679 2023-01-24 06:10:38.729303: step: 312/466, loss: 0.24587573111057281 2023-01-24 06:10:39.426910: step: 314/466, loss: 0.010982971638441086 2023-01-24 06:10:40.154722: step: 316/466, loss: 0.0003546890802681446 2023-01-24 06:10:40.977084: step: 318/466, loss: 0.21356633305549622 2023-01-24 06:10:41.720486: step: 320/466, loss: 0.00359840365126729 2023-01-24 06:10:42.489112: step: 322/466, loss: 0.0009715608903206885 2023-01-24 06:10:43.322572: step: 324/466, loss: 0.003452699165791273 2023-01-24 06:10:44.079425: step: 326/466, loss: 0.008932402357459068 2023-01-24 06:10:44.826358: step: 328/466, loss: 0.008256279863417149 2023-01-24 06:10:45.574889: step: 330/466, loss: 0.01303121168166399 2023-01-24 06:10:46.328703: step: 332/466, loss: 0.012796856462955475 2023-01-24 06:10:47.135003: step: 334/466, loss: 0.005467746406793594 2023-01-24 06:10:47.909157: step: 336/466, loss: 0.028389442712068558 2023-01-24 06:10:48.667734: step: 338/466, loss: 0.015709152445197105 2023-01-24 06:10:49.433467: step: 340/466, loss: 0.08886820822954178 2023-01-24 06:10:50.217876: step: 342/466, loss: 0.03574886545538902 2023-01-24 06:10:50.913065: step: 344/466, loss: 0.06135426089167595 2023-01-24 06:10:51.680379: step: 346/466, loss: 0.008707728236913681 2023-01-24 06:10:52.473495: step: 348/466, loss: 0.09459453821182251 2023-01-24 06:10:53.290786: step: 350/466, loss: 0.002856465056538582 2023-01-24 06:10:54.021825: step: 352/466, loss: 0.004025793168693781 2023-01-24 06:10:54.763248: step: 354/466, loss: 0.01567946933209896 2023-01-24 06:10:55.472403: step: 356/466, loss: 0.020442336797714233 2023-01-24 06:10:56.218096: step: 358/466, loss: 0.0505521185696125 2023-01-24 06:10:57.057320: step: 360/466, loss: 0.018883030861616135 2023-01-24 06:10:57.854962: step: 362/466, loss: 0.12958039343357086 2023-01-24 06:10:58.565490: step: 364/466, loss: 0.005207682028412819 2023-01-24 06:10:59.370942: step: 366/466, loss: 0.004336261190474033 2023-01-24 06:11:00.155248: step: 368/466, loss: 0.03535531833767891 2023-01-24 06:11:00.828649: step: 370/466, loss: 0.012042288668453693 2023-01-24 06:11:01.613551: step: 372/466, loss: 0.06027388945221901 2023-01-24 06:11:02.472562: step: 374/466, loss: 0.0804840475320816 2023-01-24 06:11:03.226962: step: 376/466, loss: 0.0014674561098217964 2023-01-24 06:11:03.922319: step: 378/466, loss: 0.05795981362462044 2023-01-24 06:11:04.647495: step: 380/466, loss: 0.00041502335807308555 2023-01-24 06:11:05.406157: step: 382/466, loss: 0.0005715118604712188 2023-01-24 06:11:06.206877: step: 384/466, loss: 0.06926651298999786 2023-01-24 06:11:06.980002: step: 386/466, loss: 0.03368639945983887 2023-01-24 06:11:07.777397: step: 388/466, loss: 0.011914392933249474 2023-01-24 06:11:08.579570: step: 390/466, loss: 0.01934785209596157 2023-01-24 06:11:09.358804: step: 392/466, loss: 0.017015738412737846 2023-01-24 06:11:10.115020: step: 394/466, loss: 0.0006627896218560636 2023-01-24 06:11:10.973979: step: 396/466, loss: 0.000683549209497869 2023-01-24 06:11:11.677141: step: 398/466, loss: 0.02618207037448883 2023-01-24 06:11:12.523157: step: 400/466, loss: 0.008119520731270313 2023-01-24 06:11:13.157708: step: 402/466, loss: 0.007547646760940552 2023-01-24 06:11:13.865710: step: 404/466, loss: 0.000945351435802877 2023-01-24 06:11:14.775411: step: 406/466, loss: 0.021251145750284195 2023-01-24 06:11:15.502711: step: 408/466, loss: 0.008209510706365108 2023-01-24 06:11:16.205749: step: 410/466, loss: 0.021134501323103905 2023-01-24 06:11:16.939198: step: 412/466, loss: 0.1187206283211708 2023-01-24 06:11:17.738018: step: 414/466, loss: 0.011240017600357533 2023-01-24 06:11:18.522413: step: 416/466, loss: 0.01897376775741577 2023-01-24 06:11:19.257883: step: 418/466, loss: 0.017858237028121948 2023-01-24 06:11:19.996180: step: 420/466, loss: 0.01124533824622631 2023-01-24 06:11:20.712702: step: 422/466, loss: 0.007958485744893551 2023-01-24 06:11:21.433471: step: 424/466, loss: 0.019424919039011 2023-01-24 06:11:22.164832: step: 426/466, loss: 0.0003154211735818535 2023-01-24 06:11:22.913280: step: 428/466, loss: 0.0011403568787500262 2023-01-24 06:11:23.631374: step: 430/466, loss: 0.0004980181693099439 2023-01-24 06:11:24.363226: step: 432/466, loss: 0.011258955113589764 2023-01-24 06:11:25.177224: step: 434/466, loss: 0.04757676273584366 2023-01-24 06:11:25.985252: step: 436/466, loss: 0.048159319907426834 2023-01-24 06:11:26.786140: step: 438/466, loss: 0.01208664383739233 2023-01-24 06:11:27.504802: step: 440/466, loss: 0.06914924085140228 2023-01-24 06:11:28.272328: step: 442/466, loss: 0.03010084666311741 2023-01-24 06:11:29.014871: step: 444/466, loss: 0.008922251872718334 2023-01-24 06:11:29.775026: step: 446/466, loss: 0.01286082249134779 2023-01-24 06:11:30.551806: step: 448/466, loss: 0.014049254357814789 2023-01-24 06:11:31.301606: step: 450/466, loss: 0.007900919765233994 2023-01-24 06:11:32.094719: step: 452/466, loss: 0.001978015759959817 2023-01-24 06:11:32.825315: step: 454/466, loss: 0.03158547729253769 2023-01-24 06:11:33.591090: step: 456/466, loss: 0.031130792573094368 2023-01-24 06:11:34.318080: step: 458/466, loss: 0.05741674825549126 2023-01-24 06:11:35.102790: step: 460/466, loss: 0.031495485454797745 2023-01-24 06:11:35.861167: step: 462/466, loss: 0.10114021599292755 2023-01-24 06:11:36.550648: step: 464/466, loss: 0.010003827512264252 2023-01-24 06:11:37.263010: step: 466/466, loss: 0.0012490164954215288 2023-01-24 06:11:37.986549: step: 468/466, loss: 0.0016084901290014386 2023-01-24 06:11:38.751180: step: 470/466, loss: 0.06793329864740372 2023-01-24 06:11:39.511725: step: 472/466, loss: 0.0016267584869638085 2023-01-24 06:11:40.271533: step: 474/466, loss: 0.04030391946434975 2023-01-24 06:11:41.018748: step: 476/466, loss: 0.004462345503270626 2023-01-24 06:11:41.754016: step: 478/466, loss: 0.007912660017609596 2023-01-24 06:11:42.551886: step: 480/466, loss: 0.0009775584330782294 2023-01-24 06:11:43.276916: step: 482/466, loss: 0.007115909829735756 2023-01-24 06:11:43.992763: step: 484/466, loss: 0.002461756346747279 2023-01-24 06:11:44.819029: step: 486/466, loss: 0.00989474169909954 2023-01-24 06:11:45.565578: step: 488/466, loss: 0.014085104689002037 2023-01-24 06:11:46.274875: step: 490/466, loss: 0.009780194610357285 2023-01-24 06:11:47.028453: step: 492/466, loss: 0.006210966035723686 2023-01-24 06:11:47.758278: step: 494/466, loss: 0.014013716019690037 2023-01-24 06:11:48.469861: step: 496/466, loss: 0.0018455483950674534 2023-01-24 06:11:49.114770: step: 498/466, loss: 0.0010371323442086577 2023-01-24 06:11:49.882898: step: 500/466, loss: 0.00023467614664696157 2023-01-24 06:11:50.605240: step: 502/466, loss: 0.00018893061496783048 2023-01-24 06:11:51.350907: step: 504/466, loss: 0.03937339410185814 2023-01-24 06:11:52.080590: step: 506/466, loss: 0.0881049633026123 2023-01-24 06:11:52.865246: step: 508/466, loss: 0.00766101386398077 2023-01-24 06:11:53.577204: step: 510/466, loss: 0.012610274367034435 2023-01-24 06:11:54.342958: step: 512/466, loss: 0.18752272427082062 2023-01-24 06:11:55.123283: step: 514/466, loss: 0.02388453483581543 2023-01-24 06:11:55.924215: step: 516/466, loss: 0.17496129870414734 2023-01-24 06:11:56.747425: step: 518/466, loss: 0.5000375509262085 2023-01-24 06:11:57.462160: step: 520/466, loss: 0.04777266085147858 2023-01-24 06:11:58.308690: step: 522/466, loss: 0.03219824656844139 2023-01-24 06:11:59.048993: step: 524/466, loss: 0.005659267771989107 2023-01-24 06:11:59.734886: step: 526/466, loss: 0.01143594179302454 2023-01-24 06:12:00.623426: step: 528/466, loss: 0.007292766124010086 2023-01-24 06:12:01.335625: step: 530/466, loss: 0.029840456321835518 2023-01-24 06:12:02.102824: step: 532/466, loss: 0.0034368287306278944 2023-01-24 06:12:02.750115: step: 534/466, loss: 0.016929039731621742 2023-01-24 06:12:03.415722: step: 536/466, loss: 0.013368791900575161 2023-01-24 06:12:04.141769: step: 538/466, loss: 0.08718257397413254 2023-01-24 06:12:04.943710: step: 540/466, loss: 0.0047819907777011395 2023-01-24 06:12:05.704675: step: 542/466, loss: 0.008324574679136276 2023-01-24 06:12:06.383474: step: 544/466, loss: 0.0233193039894104 2023-01-24 06:12:07.161057: step: 546/466, loss: 0.017024584114551544 2023-01-24 06:12:07.973538: step: 548/466, loss: 0.5465644001960754 2023-01-24 06:12:08.704461: step: 550/466, loss: 0.07017926126718521 2023-01-24 06:12:09.456729: step: 552/466, loss: 0.01461838185787201 2023-01-24 06:12:10.262448: step: 554/466, loss: 0.0295786764472723 2023-01-24 06:12:11.095074: step: 556/466, loss: 0.0040992312133312225 2023-01-24 06:12:11.822129: step: 558/466, loss: 0.06020784378051758 2023-01-24 06:12:12.530071: step: 560/466, loss: 0.018876129761338234 2023-01-24 06:12:13.207277: step: 562/466, loss: 0.006293662823736668 2023-01-24 06:12:13.901399: step: 564/466, loss: 0.006968436297029257 2023-01-24 06:12:14.547136: step: 566/466, loss: 0.0026204369496554136 2023-01-24 06:12:15.398922: step: 568/466, loss: 0.003658514702692628 2023-01-24 06:12:16.137939: step: 570/466, loss: 0.0021792915649712086 2023-01-24 06:12:16.864350: step: 572/466, loss: 0.22942087054252625 2023-01-24 06:12:17.618314: step: 574/466, loss: 0.006244773976504803 2023-01-24 06:12:18.362261: step: 576/466, loss: 0.0036456582602113485 2023-01-24 06:12:19.119458: step: 578/466, loss: 0.030625438317656517 2023-01-24 06:12:19.863209: step: 580/466, loss: 0.010105275548994541 2023-01-24 06:12:20.627803: step: 582/466, loss: 0.0260683111846447 2023-01-24 06:12:21.375304: step: 584/466, loss: 0.031024497002363205 2023-01-24 06:12:22.122800: step: 586/466, loss: 0.009688056074082851 2023-01-24 06:12:22.897560: step: 588/466, loss: 0.02166881412267685 2023-01-24 06:12:23.691407: step: 590/466, loss: 0.010086444206535816 2023-01-24 06:12:24.435845: step: 592/466, loss: 0.0005045717116445303 2023-01-24 06:12:25.123569: step: 594/466, loss: 0.0016831730026751757 2023-01-24 06:12:25.857964: step: 596/466, loss: 0.0021537907887250185 2023-01-24 06:12:26.647663: step: 598/466, loss: 0.06127836927771568 2023-01-24 06:12:27.396303: step: 600/466, loss: 0.00690503278747201 2023-01-24 06:12:28.138897: step: 602/466, loss: 0.001794489799067378 2023-01-24 06:12:29.006643: step: 604/466, loss: 0.0634308010339737 2023-01-24 06:12:29.883672: step: 606/466, loss: 0.018846353515982628 2023-01-24 06:12:30.777853: step: 608/466, loss: 0.009228182956576347 2023-01-24 06:12:31.459069: step: 610/466, loss: 0.015570910647511482 2023-01-24 06:12:32.292118: step: 612/466, loss: 0.001527966232970357 2023-01-24 06:12:33.074028: step: 614/466, loss: 0.009683456271886826 2023-01-24 06:12:33.870221: step: 616/466, loss: 0.003080724971368909 2023-01-24 06:12:34.622906: step: 618/466, loss: 0.033230457454919815 2023-01-24 06:12:35.357144: step: 620/466, loss: 0.0030834791250526905 2023-01-24 06:12:36.108593: step: 622/466, loss: 0.010715140029788017 2023-01-24 06:12:36.878294: step: 624/466, loss: 0.06441494822502136 2023-01-24 06:12:37.631784: step: 626/466, loss: 0.006688097957521677 2023-01-24 06:12:38.312489: step: 628/466, loss: 0.01309305801987648 2023-01-24 06:12:39.149543: step: 630/466, loss: 0.057056933641433716 2023-01-24 06:12:39.917449: step: 632/466, loss: 0.005508675705641508 2023-01-24 06:12:40.701627: step: 634/466, loss: 0.020695330575108528 2023-01-24 06:12:41.389559: step: 636/466, loss: 0.002080296166241169 2023-01-24 06:12:42.015963: step: 638/466, loss: 0.0001161619002232328 2023-01-24 06:12:42.780848: step: 640/466, loss: 0.022997191175818443 2023-01-24 06:12:43.536615: step: 642/466, loss: 0.004008348099887371 2023-01-24 06:12:44.225796: step: 644/466, loss: 0.018378565087914467 2023-01-24 06:12:45.024603: step: 646/466, loss: 0.05346864089369774 2023-01-24 06:12:45.809902: step: 648/466, loss: 0.028049439191818237 2023-01-24 06:12:46.599910: step: 650/466, loss: 0.01510514598339796 2023-01-24 06:12:47.279649: step: 652/466, loss: 0.012815630063414574 2023-01-24 06:12:48.045035: step: 654/466, loss: 0.3772117495536804 2023-01-24 06:12:48.847504: step: 656/466, loss: 0.0002463465789332986 2023-01-24 06:12:49.650814: step: 658/466, loss: 0.00645784754306078 2023-01-24 06:12:50.401534: step: 660/466, loss: 0.002490751910954714 2023-01-24 06:12:51.128942: step: 662/466, loss: 0.0003407985786907375 2023-01-24 06:12:51.812341: step: 664/466, loss: 0.0006427292246371508 2023-01-24 06:12:52.576421: step: 666/466, loss: 0.3637174665927887 2023-01-24 06:12:53.416461: step: 668/466, loss: 0.04300692677497864 2023-01-24 06:12:54.165404: step: 670/466, loss: 0.07485006749629974 2023-01-24 06:12:54.925253: step: 672/466, loss: 0.011753874830901623 2023-01-24 06:12:55.762274: step: 674/466, loss: 0.011387944221496582 2023-01-24 06:12:56.519078: step: 676/466, loss: 0.0013695204397663474 2023-01-24 06:12:57.283605: step: 678/466, loss: 0.0006198549526743591 2023-01-24 06:12:58.070421: step: 680/466, loss: 0.03149678558111191 2023-01-24 06:12:58.793119: step: 682/466, loss: 0.06519640982151031 2023-01-24 06:12:59.442228: step: 684/466, loss: 0.0003538952150847763 2023-01-24 06:13:00.246115: step: 686/466, loss: 6.4656453132629395 2023-01-24 06:13:01.025440: step: 688/466, loss: 0.04752850532531738 2023-01-24 06:13:01.750881: step: 690/466, loss: 0.014662222005426884 2023-01-24 06:13:02.570972: step: 692/466, loss: 0.00392181845381856 2023-01-24 06:13:03.298838: step: 694/466, loss: 0.005736898630857468 2023-01-24 06:13:04.071908: step: 696/466, loss: 0.005092841573059559 2023-01-24 06:13:04.805283: step: 698/466, loss: 0.006942718289792538 2023-01-24 06:13:05.572416: step: 700/466, loss: 0.014697852544486523 2023-01-24 06:13:06.445128: step: 702/466, loss: 0.005182123742997646 2023-01-24 06:13:07.150949: step: 704/466, loss: 0.0068984078243374825 2023-01-24 06:13:07.929249: step: 706/466, loss: 0.00032237821142189205 2023-01-24 06:13:08.634588: step: 708/466, loss: 0.009095367044210434 2023-01-24 06:13:09.399525: step: 710/466, loss: 0.00819557998329401 2023-01-24 06:13:10.184424: step: 712/466, loss: 0.01657663844525814 2023-01-24 06:13:10.962687: step: 714/466, loss: 0.018086804077029228 2023-01-24 06:13:11.634557: step: 716/466, loss: 0.013770289719104767 2023-01-24 06:13:12.413069: step: 718/466, loss: 0.021133458241820335 2023-01-24 06:13:13.207687: step: 720/466, loss: 0.07392483204603195 2023-01-24 06:13:13.909011: step: 722/466, loss: 0.002734451787546277 2023-01-24 06:13:14.728312: step: 724/466, loss: 0.0009729207376949489 2023-01-24 06:13:15.430983: step: 726/466, loss: 0.03714947775006294 2023-01-24 06:13:16.117597: step: 728/466, loss: 0.02827623300254345 2023-01-24 06:13:16.873968: step: 730/466, loss: 0.0007589849410578609 2023-01-24 06:13:17.725832: step: 732/466, loss: 0.02701164409518242 2023-01-24 06:13:18.458724: step: 734/466, loss: 0.011115744709968567 2023-01-24 06:13:19.190158: step: 736/466, loss: 0.0006149124819785357 2023-01-24 06:13:19.846539: step: 738/466, loss: 0.0014990844065323472 2023-01-24 06:13:20.668070: step: 740/466, loss: 0.007459838874638081 2023-01-24 06:13:21.515028: step: 742/466, loss: 0.006524314172565937 2023-01-24 06:13:22.257273: step: 744/466, loss: 0.006916932761669159 2023-01-24 06:13:22.944118: step: 746/466, loss: 0.0017152292421087623 2023-01-24 06:13:23.703587: step: 748/466, loss: 0.10608824342489243 2023-01-24 06:13:24.435312: step: 750/466, loss: 0.011215800419449806 2023-01-24 06:13:25.191341: step: 752/466, loss: 0.00193583476357162 2023-01-24 06:13:26.004243: step: 754/466, loss: 0.0022075334563851357 2023-01-24 06:13:26.729807: step: 756/466, loss: 0.14459285140037537 2023-01-24 06:13:27.520089: step: 758/466, loss: 0.01011571940034628 2023-01-24 06:13:28.318280: step: 760/466, loss: 0.01083743292838335 2023-01-24 06:13:29.174818: step: 762/466, loss: 0.0022419507149606943 2023-01-24 06:13:29.877506: step: 764/466, loss: 0.0012138750171288848 2023-01-24 06:13:30.519072: step: 766/466, loss: 0.0013211843324825168 2023-01-24 06:13:31.255274: step: 768/466, loss: 0.05762624740600586 2023-01-24 06:13:31.974771: step: 770/466, loss: 0.006621016189455986 2023-01-24 06:13:32.643699: step: 772/466, loss: 0.01991111785173416 2023-01-24 06:13:33.372556: step: 774/466, loss: 0.010002116672694683 2023-01-24 06:13:34.096804: step: 776/466, loss: 0.006916820537298918 2023-01-24 06:13:34.825322: step: 778/466, loss: 0.012478718534111977 2023-01-24 06:13:35.615715: step: 780/466, loss: 0.05239632725715637 2023-01-24 06:13:36.379337: step: 782/466, loss: 0.0016832815017551184 2023-01-24 06:13:37.183013: step: 784/466, loss: 0.03918365761637688 2023-01-24 06:13:37.953671: step: 786/466, loss: 0.006064407993108034 2023-01-24 06:13:38.734926: step: 788/466, loss: 0.007597712334245443 2023-01-24 06:13:39.452819: step: 790/466, loss: 0.0005627631326206028 2023-01-24 06:13:40.290961: step: 792/466, loss: 0.03723245859146118 2023-01-24 06:13:41.062291: step: 794/466, loss: 0.02122754231095314 2023-01-24 06:13:41.767033: step: 796/466, loss: 3.544481296557933e-05 2023-01-24 06:13:42.542224: step: 798/466, loss: 0.08708756417036057 2023-01-24 06:13:43.236464: step: 800/466, loss: 0.03159240633249283 2023-01-24 06:13:43.998312: step: 802/466, loss: 0.014514084905385971 2023-01-24 06:13:44.793443: step: 804/466, loss: 0.05164254456758499 2023-01-24 06:13:45.514070: step: 806/466, loss: 0.0032412000000476837 2023-01-24 06:13:46.383489: step: 808/466, loss: 0.033188801258802414 2023-01-24 06:13:47.157192: step: 810/466, loss: 0.039974067360162735 2023-01-24 06:13:47.876124: step: 812/466, loss: 0.0007288824999704957 2023-01-24 06:13:48.564263: step: 814/466, loss: 0.034891486167907715 2023-01-24 06:13:49.354941: step: 816/466, loss: 0.007756277918815613 2023-01-24 06:13:50.189129: step: 818/466, loss: 0.007071830797940493 2023-01-24 06:13:50.903784: step: 820/466, loss: 0.0004805404460057616 2023-01-24 06:13:51.593656: step: 822/466, loss: 0.001777339493855834 2023-01-24 06:13:52.322435: step: 824/466, loss: 0.015256262384355068 2023-01-24 06:13:53.027368: step: 826/466, loss: 0.0020203653257340193 2023-01-24 06:13:53.711994: step: 828/466, loss: 0.0001835294533520937 2023-01-24 06:13:54.555885: step: 830/466, loss: 0.006643530912697315 2023-01-24 06:13:55.253068: step: 832/466, loss: 0.011377407237887383 2023-01-24 06:13:55.936806: step: 834/466, loss: 3.36966036229569e-06 2023-01-24 06:13:56.728098: step: 836/466, loss: 0.009512822143733501 2023-01-24 06:13:57.618464: step: 838/466, loss: 0.00871200580149889 2023-01-24 06:13:58.359621: step: 840/466, loss: 0.0054242052137851715 2023-01-24 06:13:59.085308: step: 842/466, loss: 0.01555589772760868 2023-01-24 06:13:59.902999: step: 844/466, loss: 0.00017592482618056238 2023-01-24 06:14:00.656196: step: 846/466, loss: 0.0005512124043889344 2023-01-24 06:14:01.371336: step: 848/466, loss: 0.0009457360720261931 2023-01-24 06:14:02.188623: step: 850/466, loss: 0.0033583471085876226 2023-01-24 06:14:02.918613: step: 852/466, loss: 0.007074189838021994 2023-01-24 06:14:03.641955: step: 854/466, loss: 0.024136371910572052 2023-01-24 06:14:04.404907: step: 856/466, loss: 0.06818605214357376 2023-01-24 06:14:05.135404: step: 858/466, loss: 0.0001580503158038482 2023-01-24 06:14:05.838015: step: 860/466, loss: 0.026508208364248276 2023-01-24 06:14:06.647855: step: 862/466, loss: 0.000835965562146157 2023-01-24 06:14:07.461751: step: 864/466, loss: 0.06645052880048752 2023-01-24 06:14:08.215933: step: 866/466, loss: 0.00231425859965384 2023-01-24 06:14:08.945763: step: 868/466, loss: 0.002165533835068345 2023-01-24 06:14:09.727653: step: 870/466, loss: 0.00231713755056262 2023-01-24 06:14:10.565097: step: 872/466, loss: 0.0013481914065778255 2023-01-24 06:14:11.326462: step: 874/466, loss: 0.010383480228483677 2023-01-24 06:14:12.088304: step: 876/466, loss: 0.016029154881834984 2023-01-24 06:14:12.853402: step: 878/466, loss: 0.05106610804796219 2023-01-24 06:14:13.727034: step: 880/466, loss: 0.02417607232928276 2023-01-24 06:14:14.453745: step: 882/466, loss: 0.007658890448510647 2023-01-24 06:14:15.193963: step: 884/466, loss: 0.021339895203709602 2023-01-24 06:14:15.930905: step: 886/466, loss: 0.013452693819999695 2023-01-24 06:14:16.699523: step: 888/466, loss: 0.014736589044332504 2023-01-24 06:14:17.359429: step: 890/466, loss: 0.23204010725021362 2023-01-24 06:14:18.108913: step: 892/466, loss: 0.005269082263112068 2023-01-24 06:14:18.828540: step: 894/466, loss: 0.010547298938035965 2023-01-24 06:14:19.641754: step: 896/466, loss: 0.01578962802886963 2023-01-24 06:14:20.478854: step: 898/466, loss: 0.012825076468288898 2023-01-24 06:14:21.220016: step: 900/466, loss: 5.141352448845282e-05 2023-01-24 06:14:22.019057: step: 902/466, loss: 0.014206547290086746 2023-01-24 06:14:22.873738: step: 904/466, loss: 0.18870453536510468 2023-01-24 06:14:23.621696: step: 906/466, loss: 0.0011588651686906815 2023-01-24 06:14:24.319869: step: 908/466, loss: 0.00014258567534852773 2023-01-24 06:14:25.068121: step: 910/466, loss: 0.025311194360256195 2023-01-24 06:14:25.856328: step: 912/466, loss: 0.011471842415630817 2023-01-24 06:14:26.598667: step: 914/466, loss: 0.01299799419939518 2023-01-24 06:14:27.446880: step: 916/466, loss: 0.03922403231263161 2023-01-24 06:14:28.164623: step: 918/466, loss: 0.0002841950918082148 2023-01-24 06:14:29.036107: step: 920/466, loss: 0.0006640457431785762 2023-01-24 06:14:29.815905: step: 922/466, loss: 0.0005649054073728621 2023-01-24 06:14:30.605803: step: 924/466, loss: 0.0057207574136555195 2023-01-24 06:14:31.253807: step: 926/466, loss: 0.00019332297961227596 2023-01-24 06:14:31.990877: step: 928/466, loss: 0.004037438426166773 2023-01-24 06:14:32.761453: step: 930/466, loss: 1.7265130281448364 2023-01-24 06:14:33.435589: step: 932/466, loss: 0.00019567122217267752 ================================================== Loss: 0.052 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34954766875434934, 'r': 0.31771030993042376, 'f1': 0.33286944996686546}, 'combined': 0.24527222629137455, 'epoch': 32} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3679515937441042, 'r': 0.29429750522167086, 'f1': 0.3270287154798346}, 'combined': 0.20100301536809345, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3191720083856176, 'r': 0.32159456632402833, 'f1': 0.3203787078502135}, 'combined': 0.2360685215738415, 'epoch': 32} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3484966619842382, 'r': 0.2965543518791351, 'f1': 0.32043419669337253}, 'combined': 0.1969497989432436, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3610420979986198, 'r': 0.3308981657179001, 'f1': 0.34531353135313536}, 'combined': 0.2544415494180997, 'epoch': 32} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36538047151543657, 'r': 0.2919244321813107, 'f1': 0.32454797180850914}, 'combined': 0.20045610023466745, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36693548387096775, 'r': 0.325, 'f1': 0.3446969696969697}, 'combined': 0.2297979797979798, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24324324324324326, 'r': 0.391304347826087, 'f1': 0.30000000000000004}, 'combined': 0.15000000000000002, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34145184824902725, 'r': 0.33302893738140416, 'f1': 0.33718780019212297}, 'combined': 0.2484541685626169, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3745543418502639, 'r': 0.29769773663943055, 'f1': 0.3317326443015362}, 'combined': 0.20389421064387103, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.3535714285714286, 'f1': 0.3639705882352941}, 'combined': 0.24264705882352938, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3600931677018634, 'r': 0.3300284629981025, 'f1': 0.34440594059405943}, 'combined': 0.25377279833246486, 'epoch': 29} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3732928220606822, 'r': 0.2841675462053636, 'f1': 0.3226892764375249}, 'combined': 0.19930808250553012, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 29} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:17:17.793949: step: 2/466, loss: 0.019329171627759933 2023-01-24 06:17:18.529040: step: 4/466, loss: 1.0869028568267822 2023-01-24 06:17:19.327022: step: 6/466, loss: 0.14202919602394104 2023-01-24 06:17:19.979918: step: 8/466, loss: 0.0003967805823776871 2023-01-24 06:17:20.757217: step: 10/466, loss: 0.010574987158179283 2023-01-24 06:17:21.532454: step: 12/466, loss: 0.002730625681579113 2023-01-24 06:17:22.235699: step: 14/466, loss: 0.03457174077630043 2023-01-24 06:17:22.984225: step: 16/466, loss: 0.004452232271432877 2023-01-24 06:17:23.748773: step: 18/466, loss: 0.00018031950457952917 2023-01-24 06:17:24.501574: step: 20/466, loss: 0.021097086369991302 2023-01-24 06:17:25.262689: step: 22/466, loss: 0.0024196114391088486 2023-01-24 06:17:25.945201: step: 24/466, loss: 0.0004382265906315297 2023-01-24 06:17:26.874317: step: 26/466, loss: 0.027091480791568756 2023-01-24 06:17:27.685267: step: 28/466, loss: 0.0008335936581715941 2023-01-24 06:17:28.448625: step: 30/466, loss: 0.02781643532216549 2023-01-24 06:17:29.189899: step: 32/466, loss: 0.0023864025715738535 2023-01-24 06:17:29.904807: step: 34/466, loss: 0.0026451097801327705 2023-01-24 06:17:30.672949: step: 36/466, loss: 0.0025420142337679863 2023-01-24 06:17:31.487250: step: 38/466, loss: 0.0019144106190651655 2023-01-24 06:17:32.204621: step: 40/466, loss: 0.03274979442358017 2023-01-24 06:17:32.946659: step: 42/466, loss: 0.005071322433650494 2023-01-24 06:17:33.630033: step: 44/466, loss: 0.3955758213996887 2023-01-24 06:17:34.383798: step: 46/466, loss: 0.0027988648507744074 2023-01-24 06:17:35.160186: step: 48/466, loss: 0.05165601149201393 2023-01-24 06:17:35.954928: step: 50/466, loss: 0.019785480573773384 2023-01-24 06:17:36.636432: step: 52/466, loss: 0.007184821646660566 2023-01-24 06:17:37.416987: step: 54/466, loss: 0.019311172887682915 2023-01-24 06:17:38.235837: step: 56/466, loss: 0.0055588241666555405 2023-01-24 06:17:38.942771: step: 58/466, loss: 0.0013960510259494185 2023-01-24 06:17:39.734458: step: 60/466, loss: 0.028932584449648857 2023-01-24 06:17:40.497482: step: 62/466, loss: 0.0005302618956193328 2023-01-24 06:17:41.271978: step: 64/466, loss: 0.0064671579748392105 2023-01-24 06:17:42.067128: step: 66/466, loss: 0.00612952746450901 2023-01-24 06:17:42.796248: step: 68/466, loss: 0.08574938774108887 2023-01-24 06:17:43.423179: step: 70/466, loss: 0.004624377936124802 2023-01-24 06:17:44.233195: step: 72/466, loss: 0.002651211339980364 2023-01-24 06:17:44.886336: step: 74/466, loss: 0.0033132026437669992 2023-01-24 06:17:45.665858: step: 76/466, loss: 0.013541768305003643 2023-01-24 06:17:46.457806: step: 78/466, loss: 0.005505187902599573 2023-01-24 06:17:47.162443: step: 80/466, loss: 0.28946197032928467 2023-01-24 06:17:47.934552: step: 82/466, loss: 0.0002097875694744289 2023-01-24 06:17:48.593479: step: 84/466, loss: 0.001316684065386653 2023-01-24 06:17:49.281685: step: 86/466, loss: 0.01842031069099903 2023-01-24 06:17:50.123849: step: 88/466, loss: 0.02234254591166973 2023-01-24 06:17:50.931826: step: 90/466, loss: 0.007133010309189558 2023-01-24 06:17:51.697680: step: 92/466, loss: 0.0002999457356054336 2023-01-24 06:17:52.462894: step: 94/466, loss: 0.04126357287168503 2023-01-24 06:17:53.223530: step: 96/466, loss: 0.0036950150970369577 2023-01-24 06:17:53.957310: step: 98/466, loss: 0.002125050174072385 2023-01-24 06:17:54.696113: step: 100/466, loss: 0.010867869481444359 2023-01-24 06:17:55.440862: step: 102/466, loss: 0.023277217522263527 2023-01-24 06:17:56.176868: step: 104/466, loss: 0.0017478482332080603 2023-01-24 06:17:56.935140: step: 106/466, loss: 0.019343003630638123 2023-01-24 06:17:57.752651: step: 108/466, loss: 0.0020293628331273794 2023-01-24 06:17:58.489677: step: 110/466, loss: 0.0034124022349715233 2023-01-24 06:17:59.228479: step: 112/466, loss: 0.004740450065582991 2023-01-24 06:17:59.935954: step: 114/466, loss: 0.012356654740869999 2023-01-24 06:18:00.661055: step: 116/466, loss: 0.007313753943890333 2023-01-24 06:18:01.446679: step: 118/466, loss: 0.04777335375547409 2023-01-24 06:18:02.233151: step: 120/466, loss: 0.05528281256556511 2023-01-24 06:18:02.939256: step: 122/466, loss: 0.006278525106608868 2023-01-24 06:18:03.648266: step: 124/466, loss: 0.0035879218485206366 2023-01-24 06:18:04.344601: step: 126/466, loss: 0.006362420506775379 2023-01-24 06:18:05.122557: step: 128/466, loss: 0.0008267164812423289 2023-01-24 06:18:05.900278: step: 130/466, loss: 0.006699729710817337 2023-01-24 06:18:06.758838: step: 132/466, loss: 0.008580324240028858 2023-01-24 06:18:07.437411: step: 134/466, loss: 0.0008092653006315231 2023-01-24 06:18:08.138581: step: 136/466, loss: 0.00058770488249138 2023-01-24 06:18:08.976055: step: 138/466, loss: 0.06414636969566345 2023-01-24 06:18:09.817397: step: 140/466, loss: 1.0099543333053589 2023-01-24 06:18:10.507969: step: 142/466, loss: 0.0010454514995217323 2023-01-24 06:18:11.230817: step: 144/466, loss: 0.0009470380609855056 2023-01-24 06:18:12.033794: step: 146/466, loss: 0.006444782484322786 2023-01-24 06:18:12.791999: step: 148/466, loss: 0.12940701842308044 2023-01-24 06:18:13.550319: step: 150/466, loss: 0.02667311765253544 2023-01-24 06:18:14.286652: step: 152/466, loss: 0.00028765155002474785 2023-01-24 06:18:15.010468: step: 154/466, loss: 0.0047229896299541 2023-01-24 06:18:15.865076: step: 156/466, loss: 0.002519721630960703 2023-01-24 06:18:16.546458: step: 158/466, loss: 0.008568288758397102 2023-01-24 06:18:17.239386: step: 160/466, loss: 0.0011734687723219395 2023-01-24 06:18:18.055775: step: 162/466, loss: 0.017831817269325256 2023-01-24 06:18:18.766497: step: 164/466, loss: 0.004616545047610998 2023-01-24 06:18:19.523060: step: 166/466, loss: 1.8955062627792358 2023-01-24 06:18:20.296920: step: 168/466, loss: 0.10970849543809891 2023-01-24 06:18:20.981072: step: 170/466, loss: 0.004755795001983643 2023-01-24 06:18:21.794381: step: 172/466, loss: 0.0019417761359363794 2023-01-24 06:18:22.477585: step: 174/466, loss: 0.0001321727322647348 2023-01-24 06:18:23.239658: step: 176/466, loss: 0.263048380613327 2023-01-24 06:18:23.958398: step: 178/466, loss: 0.0063339099287986755 2023-01-24 06:18:24.710163: step: 180/466, loss: 0.041270166635513306 2023-01-24 06:18:25.571638: step: 182/466, loss: 1.3670841455459595 2023-01-24 06:18:26.427580: step: 184/466, loss: 0.0091679897159338 2023-01-24 06:18:27.164617: step: 186/466, loss: 0.0008498663082718849 2023-01-24 06:18:27.940748: step: 188/466, loss: 0.0013691213680431247 2023-01-24 06:18:28.707425: step: 190/466, loss: 0.029640894383192062 2023-01-24 06:18:29.454663: step: 192/466, loss: 0.0028292567003518343 2023-01-24 06:18:30.248088: step: 194/466, loss: 0.016314124688506126 2023-01-24 06:18:30.991831: step: 196/466, loss: 0.000857730396091938 2023-01-24 06:18:31.747339: step: 198/466, loss: 0.010542549192905426 2023-01-24 06:18:32.488118: step: 200/466, loss: 0.019882574677467346 2023-01-24 06:18:33.212361: step: 202/466, loss: 0.003643724601715803 2023-01-24 06:18:33.966125: step: 204/466, loss: 0.0018131457036361098 2023-01-24 06:18:34.766198: step: 206/466, loss: 0.025027979165315628 2023-01-24 06:18:35.533832: step: 208/466, loss: 0.0016643248964101076 2023-01-24 06:18:36.381971: step: 210/466, loss: 0.006344994530081749 2023-01-24 06:18:37.145444: step: 212/466, loss: 0.0005301121855154634 2023-01-24 06:18:37.947319: step: 214/466, loss: 0.04560599476099014 2023-01-24 06:18:38.754741: step: 216/466, loss: 0.0034986361861228943 2023-01-24 06:18:39.500323: step: 218/466, loss: 0.3330421447753906 2023-01-24 06:18:40.230370: step: 220/466, loss: 0.012391243129968643 2023-01-24 06:18:40.972456: step: 222/466, loss: 0.0019246727460995317 2023-01-24 06:18:41.759680: step: 224/466, loss: 0.02810569852590561 2023-01-24 06:18:42.467228: step: 226/466, loss: 4.9588707042858005e-05 2023-01-24 06:18:43.332665: step: 228/466, loss: 0.014155753888189793 2023-01-24 06:18:44.042128: step: 230/466, loss: 0.014453071169555187 2023-01-24 06:18:44.765212: step: 232/466, loss: 0.0010232643689960241 2023-01-24 06:18:45.569564: step: 234/466, loss: 0.06366181373596191 2023-01-24 06:18:46.372261: step: 236/466, loss: 0.0008305907249450684 2023-01-24 06:18:47.083323: step: 238/466, loss: 0.012591608799993992 2023-01-24 06:18:47.787278: step: 240/466, loss: 0.0005710592959076166 2023-01-24 06:18:48.493161: step: 242/466, loss: 0.0009176023304462433 2023-01-24 06:18:49.236076: step: 244/466, loss: 0.0002121399447787553 2023-01-24 06:18:49.981458: step: 246/466, loss: 0.0004354036063887179 2023-01-24 06:18:50.742755: step: 248/466, loss: 0.0007151217432692647 2023-01-24 06:18:51.579318: step: 250/466, loss: 0.0004262593574821949 2023-01-24 06:18:52.397560: step: 252/466, loss: 0.021437974646687508 2023-01-24 06:18:53.346819: step: 254/466, loss: 0.07851515710353851 2023-01-24 06:18:54.069769: step: 256/466, loss: 0.04753747954964638 2023-01-24 06:18:54.780232: step: 258/466, loss: 0.010555329732596874 2023-01-24 06:18:55.443277: step: 260/466, loss: 0.0038088206201791763 2023-01-24 06:18:56.379602: step: 262/466, loss: 0.03028637170791626 2023-01-24 06:18:57.165883: step: 264/466, loss: 0.007273413706570864 2023-01-24 06:18:57.985675: step: 266/466, loss: 0.017216956242918968 2023-01-24 06:18:58.734384: step: 268/466, loss: 0.048719123005867004 2023-01-24 06:18:59.498551: step: 270/466, loss: 0.0027608266100287437 2023-01-24 06:19:00.286147: step: 272/466, loss: 0.04781614616513252 2023-01-24 06:19:01.035292: step: 274/466, loss: 0.013051237910985947 2023-01-24 06:19:01.800953: step: 276/466, loss: 0.2669077515602112 2023-01-24 06:19:02.622367: step: 278/466, loss: 0.0018297962378710508 2023-01-24 06:19:03.406767: step: 280/466, loss: 0.011756602674722672 2023-01-24 06:19:04.149897: step: 282/466, loss: 0.014492223039269447 2023-01-24 06:19:04.951284: step: 284/466, loss: 0.009878966957330704 2023-01-24 06:19:05.705268: step: 286/466, loss: 0.06663220375776291 2023-01-24 06:19:06.454072: step: 288/466, loss: 0.007071053143590689 2023-01-24 06:19:07.172593: step: 290/466, loss: 0.031226148828864098 2023-01-24 06:19:07.907324: step: 292/466, loss: 0.023413024842739105 2023-01-24 06:19:08.647912: step: 294/466, loss: 0.09983746707439423 2023-01-24 06:19:09.321205: step: 296/466, loss: 0.08914503455162048 2023-01-24 06:19:10.051881: step: 298/466, loss: 0.001067915465682745 2023-01-24 06:19:10.819826: step: 300/466, loss: 0.025817418470978737 2023-01-24 06:19:11.543911: step: 302/466, loss: 0.014062085188925266 2023-01-24 06:19:12.330300: step: 304/466, loss: 0.0725163072347641 2023-01-24 06:19:13.117010: step: 306/466, loss: 0.029730848968029022 2023-01-24 06:19:13.852799: step: 308/466, loss: 0.025710735470056534 2023-01-24 06:19:14.574696: step: 310/466, loss: 0.007411581929773092 2023-01-24 06:19:15.296595: step: 312/466, loss: 0.0043141599744558334 2023-01-24 06:19:16.085549: step: 314/466, loss: 2.60351824760437 2023-01-24 06:19:16.826583: step: 316/466, loss: 0.010887504555284977 2023-01-24 06:19:17.502471: step: 318/466, loss: 0.07423939555883408 2023-01-24 06:19:18.256779: step: 320/466, loss: 0.0009458021959289908 2023-01-24 06:19:19.061484: step: 322/466, loss: 0.017295386642217636 2023-01-24 06:19:19.779734: step: 324/466, loss: 0.0011327442480251193 2023-01-24 06:19:20.614427: step: 326/466, loss: 0.01153257954865694 2023-01-24 06:19:21.268197: step: 328/466, loss: 0.0021202610805630684 2023-01-24 06:19:22.012709: step: 330/466, loss: 0.0021633352153003216 2023-01-24 06:19:22.833443: step: 332/466, loss: 0.0060116020031273365 2023-01-24 06:19:23.602676: step: 334/466, loss: 0.0008030192693695426 2023-01-24 06:19:24.339330: step: 336/466, loss: 0.001924928743392229 2023-01-24 06:19:25.049497: step: 338/466, loss: 0.0022883862257003784 2023-01-24 06:19:25.833157: step: 340/466, loss: 0.011111796833574772 2023-01-24 06:19:26.636192: step: 342/466, loss: 0.01955723576247692 2023-01-24 06:19:27.417172: step: 344/466, loss: 0.013217715546488762 2023-01-24 06:19:28.186291: step: 346/466, loss: 0.0002433011686662212 2023-01-24 06:19:28.894266: step: 348/466, loss: 0.011723429895937443 2023-01-24 06:19:29.692854: step: 350/466, loss: 0.194093719124794 2023-01-24 06:19:30.424482: step: 352/466, loss: 0.008440490812063217 2023-01-24 06:19:31.165506: step: 354/466, loss: 0.0025795488618314266 2023-01-24 06:19:31.889178: step: 356/466, loss: 0.0007878990145400167 2023-01-24 06:19:32.648174: step: 358/466, loss: 0.004591137170791626 2023-01-24 06:19:33.407141: step: 360/466, loss: 0.012769699096679688 2023-01-24 06:19:34.100868: step: 362/466, loss: 0.005436086095869541 2023-01-24 06:19:34.949419: step: 364/466, loss: 0.0008671208051964641 2023-01-24 06:19:35.661156: step: 366/466, loss: 0.005928007420152426 2023-01-24 06:19:36.381190: step: 368/466, loss: 0.002474587643519044 2023-01-24 06:19:37.097140: step: 370/466, loss: 0.050172630697488785 2023-01-24 06:19:37.806231: step: 372/466, loss: 0.008266598917543888 2023-01-24 06:19:38.612097: step: 374/466, loss: 0.5412901639938354 2023-01-24 06:19:39.419502: step: 376/466, loss: 0.03616834059357643 2023-01-24 06:19:40.151491: step: 378/466, loss: 0.009188574738800526 2023-01-24 06:19:40.855538: step: 380/466, loss: 0.009282363578677177 2023-01-24 06:19:41.592731: step: 382/466, loss: 0.0029915831983089447 2023-01-24 06:19:42.314287: step: 384/466, loss: 0.002243778435513377 2023-01-24 06:19:43.041263: step: 386/466, loss: 0.00568827148526907 2023-01-24 06:19:43.887928: step: 388/466, loss: 0.013106818310916424 2023-01-24 06:19:44.653519: step: 390/466, loss: 0.0006954700802452862 2023-01-24 06:19:45.376059: step: 392/466, loss: 0.03494095802307129 2023-01-24 06:19:46.130857: step: 394/466, loss: 0.001071300357580185 2023-01-24 06:19:46.820226: step: 396/466, loss: 0.003233947092667222 2023-01-24 06:19:47.656801: step: 398/466, loss: 0.0007663085707463324 2023-01-24 06:19:48.408240: step: 400/466, loss: 0.002137871226295829 2023-01-24 06:19:49.109845: step: 402/466, loss: 0.005546262953430414 2023-01-24 06:19:49.842519: step: 404/466, loss: 0.02801138162612915 2023-01-24 06:19:50.613118: step: 406/466, loss: 0.007742196787148714 2023-01-24 06:19:51.273232: step: 408/466, loss: 0.007167985662817955 2023-01-24 06:19:52.051579: step: 410/466, loss: 0.03604840487241745 2023-01-24 06:19:52.879623: step: 412/466, loss: 0.05173136666417122 2023-01-24 06:19:53.611803: step: 414/466, loss: 0.0077619957737624645 2023-01-24 06:19:54.338558: step: 416/466, loss: 0.10107298940420151 2023-01-24 06:19:55.140920: step: 418/466, loss: 0.02580912411212921 2023-01-24 06:19:55.990124: step: 420/466, loss: 0.002658374607563019 2023-01-24 06:19:56.963777: step: 422/466, loss: 0.0006400442798621953 2023-01-24 06:19:57.753730: step: 424/466, loss: 0.0002528093755245209 2023-01-24 06:19:58.512070: step: 426/466, loss: 0.002691936446353793 2023-01-24 06:19:59.280673: step: 428/466, loss: 0.03798580914735794 2023-01-24 06:20:00.066303: step: 430/466, loss: 0.023139648139476776 2023-01-24 06:20:00.836604: step: 432/466, loss: 0.0007980384398251772 2023-01-24 06:20:01.617393: step: 434/466, loss: 0.03384041413664818 2023-01-24 06:20:02.394991: step: 436/466, loss: 0.00030431634513661265 2023-01-24 06:20:03.202578: step: 438/466, loss: 0.031009627506136894 2023-01-24 06:20:03.939639: step: 440/466, loss: 0.0068207900039851665 2023-01-24 06:20:04.763825: step: 442/466, loss: 0.07781907171010971 2023-01-24 06:20:05.502700: step: 444/466, loss: 0.04145520552992821 2023-01-24 06:20:06.257351: step: 446/466, loss: 0.028595779091119766 2023-01-24 06:20:07.132293: step: 448/466, loss: 0.01590229943394661 2023-01-24 06:20:08.073282: step: 450/466, loss: 0.0007838807068765163 2023-01-24 06:20:08.815888: step: 452/466, loss: 0.002579035935923457 2023-01-24 06:20:09.558238: step: 454/466, loss: 0.1090591549873352 2023-01-24 06:20:10.323041: step: 456/466, loss: 0.04496876895427704 2023-01-24 06:20:11.102855: step: 458/466, loss: 0.008547370322048664 2023-01-24 06:20:11.916772: step: 460/466, loss: 0.058345843106508255 2023-01-24 06:20:12.629327: step: 462/466, loss: 9.482367750024423e-05 2023-01-24 06:20:13.414834: step: 464/466, loss: 0.029485873878002167 2023-01-24 06:20:14.145133: step: 466/466, loss: 0.006282643880695105 2023-01-24 06:20:14.928822: step: 468/466, loss: 0.060245078057050705 2023-01-24 06:20:15.718041: step: 470/466, loss: 0.004400915931910276 2023-01-24 06:20:16.411640: step: 472/466, loss: 0.05323219299316406 2023-01-24 06:20:17.134664: step: 474/466, loss: 0.004026977811008692 2023-01-24 06:20:17.859906: step: 476/466, loss: 0.008997835218906403 2023-01-24 06:20:18.587081: step: 478/466, loss: 0.0028516759630292654 2023-01-24 06:20:19.411714: step: 480/466, loss: 0.059091534465551376 2023-01-24 06:20:20.189538: step: 482/466, loss: 0.3593703508377075 2023-01-24 06:20:20.902383: step: 484/466, loss: 0.00869487039744854 2023-01-24 06:20:21.629105: step: 486/466, loss: 0.05974646285176277 2023-01-24 06:20:22.386563: step: 488/466, loss: 0.00953682791441679 2023-01-24 06:20:23.184594: step: 490/466, loss: 0.05419261381030083 2023-01-24 06:20:23.889670: step: 492/466, loss: 0.023732004687190056 2023-01-24 06:20:24.569290: step: 494/466, loss: 0.003286184510216117 2023-01-24 06:20:25.494217: step: 496/466, loss: 0.14133943617343903 2023-01-24 06:20:26.259358: step: 498/466, loss: 0.026308268308639526 2023-01-24 06:20:27.149700: step: 500/466, loss: 0.0018950769444927573 2023-01-24 06:20:27.906305: step: 502/466, loss: 0.0204459298402071 2023-01-24 06:20:28.682967: step: 504/466, loss: 0.2291094958782196 2023-01-24 06:20:29.523429: step: 506/466, loss: 0.025624655187129974 2023-01-24 06:20:30.272188: step: 508/466, loss: 7.082007505232468e-05 2023-01-24 06:20:31.032180: step: 510/466, loss: 0.003036458045244217 2023-01-24 06:20:31.849135: step: 512/466, loss: 0.015858786180615425 2023-01-24 06:20:32.662743: step: 514/466, loss: 0.009644883684813976 2023-01-24 06:20:33.388844: step: 516/466, loss: 0.071327805519104 2023-01-24 06:20:34.249228: step: 518/466, loss: 0.009094729088246822 2023-01-24 06:20:35.000540: step: 520/466, loss: 0.07092177867889404 2023-01-24 06:20:35.847553: step: 522/466, loss: 0.018077434971928596 2023-01-24 06:20:36.628705: step: 524/466, loss: 0.00019851350225508213 2023-01-24 06:20:37.407853: step: 526/466, loss: 0.020662939175963402 2023-01-24 06:20:38.149991: step: 528/466, loss: 0.0026878654025495052 2023-01-24 06:20:38.930598: step: 530/466, loss: 0.0020404020324349403 2023-01-24 06:20:39.761185: step: 532/466, loss: 0.0069939009845256805 2023-01-24 06:20:40.543276: step: 534/466, loss: 0.0030733118765056133 2023-01-24 06:20:41.403639: step: 536/466, loss: 0.014625020325183868 2023-01-24 06:20:42.148774: step: 538/466, loss: 0.0217538233846426 2023-01-24 06:20:42.842605: step: 540/466, loss: 0.03101446107029915 2023-01-24 06:20:43.583798: step: 542/466, loss: 0.00269150803796947 2023-01-24 06:20:44.366633: step: 544/466, loss: 0.005434195511043072 2023-01-24 06:20:45.098197: step: 546/466, loss: 0.42131295800209045 2023-01-24 06:20:45.819629: step: 548/466, loss: 0.005677481181919575 2023-01-24 06:20:46.527134: step: 550/466, loss: 0.017299525439739227 2023-01-24 06:20:47.286007: step: 552/466, loss: 0.0023705079220235348 2023-01-24 06:20:48.019810: step: 554/466, loss: 0.0041278693825006485 2023-01-24 06:20:48.792521: step: 556/466, loss: 0.005961044691503048 2023-01-24 06:20:49.464018: step: 558/466, loss: 0.01285065058618784 2023-01-24 06:20:50.175577: step: 560/466, loss: 0.003253075759857893 2023-01-24 06:20:50.938429: step: 562/466, loss: 0.03593744710087776 2023-01-24 06:20:51.669640: step: 564/466, loss: 0.000874812831170857 2023-01-24 06:20:52.407515: step: 566/466, loss: 0.00014044287672732025 2023-01-24 06:20:53.125463: step: 568/466, loss: 0.0013199172681197524 2023-01-24 06:20:53.886110: step: 570/466, loss: 0.006412571761757135 2023-01-24 06:20:54.695503: step: 572/466, loss: 0.03231941536068916 2023-01-24 06:20:55.453645: step: 574/466, loss: 0.003953961189836264 2023-01-24 06:20:56.189835: step: 576/466, loss: 0.00022906172671355307 2023-01-24 06:20:56.968141: step: 578/466, loss: 0.010939808562397957 2023-01-24 06:20:57.695439: step: 580/466, loss: 0.009735723957419395 2023-01-24 06:20:58.413926: step: 582/466, loss: 0.003221513470634818 2023-01-24 06:20:59.152289: step: 584/466, loss: 0.019574739038944244 2023-01-24 06:20:59.981135: step: 586/466, loss: 0.01354447565972805 2023-01-24 06:21:00.774638: step: 588/466, loss: 0.07380802184343338 2023-01-24 06:21:01.519492: step: 590/466, loss: 0.00236341031268239 2023-01-24 06:21:02.257366: step: 592/466, loss: 0.004276533145457506 2023-01-24 06:21:02.981302: step: 594/466, loss: 0.0004312160308472812 2023-01-24 06:21:03.638252: step: 596/466, loss: 0.0008968772599473596 2023-01-24 06:21:04.328657: step: 598/466, loss: 0.03833211213350296 2023-01-24 06:21:05.125199: step: 600/466, loss: 0.012998173013329506 2023-01-24 06:21:05.936477: step: 602/466, loss: 0.03347988799214363 2023-01-24 06:21:06.763540: step: 604/466, loss: 0.014307713136076927 2023-01-24 06:21:07.476772: step: 606/466, loss: 0.0022048174869269133 2023-01-24 06:21:08.113634: step: 608/466, loss: 0.00022488315880764276 2023-01-24 06:21:08.848961: step: 610/466, loss: 0.0007502142107114196 2023-01-24 06:21:09.549107: step: 612/466, loss: 0.4031977951526642 2023-01-24 06:21:10.225590: step: 614/466, loss: 0.006559982430189848 2023-01-24 06:21:10.907833: step: 616/466, loss: 0.0009073030669242144 2023-01-24 06:21:11.649124: step: 618/466, loss: 0.03044399805366993 2023-01-24 06:21:12.424278: step: 620/466, loss: 0.04347848892211914 2023-01-24 06:21:13.265691: step: 622/466, loss: 0.015411065891385078 2023-01-24 06:21:13.991588: step: 624/466, loss: 0.00570999551564455 2023-01-24 06:21:14.695103: step: 626/466, loss: 0.00024472447694279253 2023-01-24 06:21:15.432289: step: 628/466, loss: 0.03959134966135025 2023-01-24 06:21:16.243860: step: 630/466, loss: 0.04707716777920723 2023-01-24 06:21:17.028083: step: 632/466, loss: 0.028172489255666733 2023-01-24 06:21:17.725402: step: 634/466, loss: 0.004007617477327585 2023-01-24 06:21:18.460325: step: 636/466, loss: 0.0007090616854839027 2023-01-24 06:21:19.228336: step: 638/466, loss: 0.05749453231692314 2023-01-24 06:21:19.949439: step: 640/466, loss: 0.005273169372230768 2023-01-24 06:21:20.733546: step: 642/466, loss: 0.005346581339836121 2023-01-24 06:21:21.500239: step: 644/466, loss: 0.024699220433831215 2023-01-24 06:21:22.230060: step: 646/466, loss: 0.00016093281737994403 2023-01-24 06:21:22.891617: step: 648/466, loss: 0.06450103223323822 2023-01-24 06:21:23.601777: step: 650/466, loss: 0.030389755964279175 2023-01-24 06:21:24.440639: step: 652/466, loss: 0.023881230503320694 2023-01-24 06:21:25.144287: step: 654/466, loss: 0.004552459344267845 2023-01-24 06:21:25.822938: step: 656/466, loss: 0.0003522764891386032 2023-01-24 06:21:26.611074: step: 658/466, loss: 0.06739848852157593 2023-01-24 06:21:27.341596: step: 660/466, loss: 0.0010958056664094329 2023-01-24 06:21:28.164199: step: 662/466, loss: 0.02298307977616787 2023-01-24 06:21:28.960763: step: 664/466, loss: 0.0022193677723407745 2023-01-24 06:21:29.726147: step: 666/466, loss: 0.024256214499473572 2023-01-24 06:21:30.472434: step: 668/466, loss: 0.01720893569290638 2023-01-24 06:21:31.194048: step: 670/466, loss: 0.0009447914198972285 2023-01-24 06:21:31.942250: step: 672/466, loss: 0.017255809158086777 2023-01-24 06:21:32.661398: step: 674/466, loss: 0.011553768999874592 2023-01-24 06:21:33.414377: step: 676/466, loss: 0.000498365901876241 2023-01-24 06:21:34.265384: step: 678/466, loss: 0.005982627626508474 2023-01-24 06:21:35.121979: step: 680/466, loss: 0.018326004967093468 2023-01-24 06:21:35.839583: step: 682/466, loss: 0.002403522375971079 2023-01-24 06:21:36.677279: step: 684/466, loss: 0.015257167629897594 2023-01-24 06:21:37.444567: step: 686/466, loss: 0.0046143620274960995 2023-01-24 06:21:38.252341: step: 688/466, loss: 0.021672574803233147 2023-01-24 06:21:38.945012: step: 690/466, loss: 0.03176340088248253 2023-01-24 06:21:39.769634: step: 692/466, loss: 0.010413500480353832 2023-01-24 06:21:40.554155: step: 694/466, loss: 0.004266362637281418 2023-01-24 06:21:41.240932: step: 696/466, loss: 0.003301274497061968 2023-01-24 06:21:41.987333: step: 698/466, loss: 0.015240832231938839 2023-01-24 06:21:42.742366: step: 700/466, loss: 0.015804987400770187 2023-01-24 06:21:43.520453: step: 702/466, loss: 0.012961991131305695 2023-01-24 06:21:44.250635: step: 704/466, loss: 0.0013605301501229405 2023-01-24 06:21:45.014591: step: 706/466, loss: 0.13008445501327515 2023-01-24 06:21:45.739208: step: 708/466, loss: 0.005032180342823267 2023-01-24 06:21:46.410808: step: 710/466, loss: 0.0004104235558770597 2023-01-24 06:21:47.149863: step: 712/466, loss: 0.009015659801661968 2023-01-24 06:21:47.868320: step: 714/466, loss: 0.009201515465974808 2023-01-24 06:21:48.655994: step: 716/466, loss: 0.0029313755221664906 2023-01-24 06:21:49.469346: step: 718/466, loss: 0.0174380112439394 2023-01-24 06:21:50.189496: step: 720/466, loss: 0.005240896716713905 2023-01-24 06:21:50.988157: step: 722/466, loss: 0.011967699974775314 2023-01-24 06:21:51.683720: step: 724/466, loss: 0.0059184362180531025 2023-01-24 06:21:52.447313: step: 726/466, loss: 0.024834871292114258 2023-01-24 06:21:53.179949: step: 728/466, loss: 0.031435705721378326 2023-01-24 06:21:53.898887: step: 730/466, loss: 0.019492171704769135 2023-01-24 06:21:54.699140: step: 732/466, loss: 0.006483915727585554 2023-01-24 06:21:55.446833: step: 734/466, loss: 0.04379876330494881 2023-01-24 06:21:56.423617: step: 736/466, loss: 0.06285839527845383 2023-01-24 06:21:57.234965: step: 738/466, loss: 0.0008022096590138972 2023-01-24 06:21:57.963317: step: 740/466, loss: 0.0004810819518752396 2023-01-24 06:21:58.644170: step: 742/466, loss: 0.0028347100596874952 2023-01-24 06:21:59.318717: step: 744/466, loss: 0.01241873949766159 2023-01-24 06:22:00.080011: step: 746/466, loss: 0.02304881066083908 2023-01-24 06:22:00.846395: step: 748/466, loss: 0.013269875198602676 2023-01-24 06:22:01.590481: step: 750/466, loss: 0.06503497809171677 2023-01-24 06:22:02.357513: step: 752/466, loss: 0.007842977531254292 2023-01-24 06:22:03.143895: step: 754/466, loss: 0.004290265962481499 2023-01-24 06:22:03.895968: step: 756/466, loss: 0.003009806852787733 2023-01-24 06:22:04.721262: step: 758/466, loss: 0.030276000499725342 2023-01-24 06:22:05.503622: step: 760/466, loss: 0.012533142231404781 2023-01-24 06:22:06.330309: step: 762/466, loss: 0.0018880884163081646 2023-01-24 06:22:07.118571: step: 764/466, loss: 0.7592372894287109 2023-01-24 06:22:07.971164: step: 766/466, loss: 0.01072653941810131 2023-01-24 06:22:08.728795: step: 768/466, loss: 0.0017617539269849658 2023-01-24 06:22:09.413269: step: 770/466, loss: 0.003082792041823268 2023-01-24 06:22:10.164476: step: 772/466, loss: 0.09356488287448883 2023-01-24 06:22:10.867199: step: 774/466, loss: 0.01210882980376482 2023-01-24 06:22:11.665878: step: 776/466, loss: 0.10668861865997314 2023-01-24 06:22:12.397868: step: 778/466, loss: 0.009636450558900833 2023-01-24 06:22:13.153578: step: 780/466, loss: 0.020347947254776955 2023-01-24 06:22:13.788348: step: 782/466, loss: 0.02077638916671276 2023-01-24 06:22:14.497699: step: 784/466, loss: 0.00040024143527261913 2023-01-24 06:22:15.281948: step: 786/466, loss: 0.006125684827566147 2023-01-24 06:22:16.052276: step: 788/466, loss: 0.017130881547927856 2023-01-24 06:22:16.786976: step: 790/466, loss: 0.005997342057526112 2023-01-24 06:22:17.514481: step: 792/466, loss: 0.030117981135845184 2023-01-24 06:22:18.200017: step: 794/466, loss: 0.0025042355991899967 2023-01-24 06:22:18.953487: step: 796/466, loss: 0.021588584408164024 2023-01-24 06:22:19.684808: step: 798/466, loss: 0.004534664563834667 2023-01-24 06:22:20.454143: step: 800/466, loss: 0.013607624918222427 2023-01-24 06:22:21.212368: step: 802/466, loss: 0.006609110161662102 2023-01-24 06:22:21.950986: step: 804/466, loss: 0.00018314311455469579 2023-01-24 06:22:22.802959: step: 806/466, loss: 0.00016394034901168197 2023-01-24 06:22:23.613262: step: 808/466, loss: 0.016977539286017418 2023-01-24 06:22:24.222913: step: 810/466, loss: 0.0015409706393256783 2023-01-24 06:22:24.957571: step: 812/466, loss: 0.0012982721673324704 2023-01-24 06:22:25.734930: step: 814/466, loss: 0.04246694967150688 2023-01-24 06:22:26.479513: step: 816/466, loss: 0.0236224215477705 2023-01-24 06:22:27.288050: step: 818/466, loss: 0.0008643745095469058 2023-01-24 06:22:27.998161: step: 820/466, loss: 0.0011652348330244422 2023-01-24 06:22:28.759737: step: 822/466, loss: 0.004280023276805878 2023-01-24 06:22:29.551128: step: 824/466, loss: 0.8916021585464478 2023-01-24 06:22:30.305119: step: 826/466, loss: 0.017976932227611542 2023-01-24 06:22:31.113629: step: 828/466, loss: 0.01639091596007347 2023-01-24 06:22:31.994841: step: 830/466, loss: 0.014316645450890064 2023-01-24 06:22:32.764600: step: 832/466, loss: 0.007979301735758781 2023-01-24 06:22:33.512533: step: 834/466, loss: 0.027367806062102318 2023-01-24 06:22:34.282381: step: 836/466, loss: 0.0001122185931308195 2023-01-24 06:22:35.081970: step: 838/466, loss: 2.0951132682967e-05 2023-01-24 06:22:35.861632: step: 840/466, loss: 0.0003048728685826063 2023-01-24 06:22:36.567672: step: 842/466, loss: 0.003254613606259227 2023-01-24 06:22:37.292966: step: 844/466, loss: 0.01293003000319004 2023-01-24 06:22:38.131004: step: 846/466, loss: 0.013491634279489517 2023-01-24 06:22:39.000498: step: 848/466, loss: 0.01315171830356121 2023-01-24 06:22:39.804881: step: 850/466, loss: 0.0018613581778481603 2023-01-24 06:22:40.528904: step: 852/466, loss: 0.0012655846076086164 2023-01-24 06:22:41.271564: step: 854/466, loss: 0.01288510486483574 2023-01-24 06:22:41.931892: step: 856/466, loss: 0.003832991700619459 2023-01-24 06:22:42.666204: step: 858/466, loss: 0.0038616168312728405 2023-01-24 06:22:43.418086: step: 860/466, loss: 0.00018942559836432338 2023-01-24 06:22:44.205265: step: 862/466, loss: 0.008290973491966724 2023-01-24 06:22:44.925466: step: 864/466, loss: 0.01464066468179226 2023-01-24 06:22:45.691853: step: 866/466, loss: 0.007042956072837114 2023-01-24 06:22:46.489779: step: 868/466, loss: 0.02514898031949997 2023-01-24 06:22:47.158708: step: 870/466, loss: 0.0013938520569354296 2023-01-24 06:22:47.886912: step: 872/466, loss: 0.00681871734559536 2023-01-24 06:22:48.612693: step: 874/466, loss: 0.0021813763305544853 2023-01-24 06:22:49.354168: step: 876/466, loss: 0.009251315146684647 2023-01-24 06:22:50.131958: step: 878/466, loss: 0.011302115395665169 2023-01-24 06:22:50.865321: step: 880/466, loss: 0.0006099729798734188 2023-01-24 06:22:51.645042: step: 882/466, loss: 0.0019406548235565424 2023-01-24 06:22:52.395525: step: 884/466, loss: 0.04168505594134331 2023-01-24 06:22:53.094237: step: 886/466, loss: 0.0035822519566863775 2023-01-24 06:22:53.875886: step: 888/466, loss: 0.01661309041082859 2023-01-24 06:22:54.714074: step: 890/466, loss: 0.002855786122381687 2023-01-24 06:22:55.490517: step: 892/466, loss: 0.7210519313812256 2023-01-24 06:22:56.350883: step: 894/466, loss: 0.09248753637075424 2023-01-24 06:22:57.054063: step: 896/466, loss: 0.003342061536386609 2023-01-24 06:22:57.812150: step: 898/466, loss: 0.004297652281820774 2023-01-24 06:22:58.547345: step: 900/466, loss: 0.030501289293169975 2023-01-24 06:22:59.288627: step: 902/466, loss: 0.00955427996814251 2023-01-24 06:23:00.062734: step: 904/466, loss: 0.39566174149513245 2023-01-24 06:23:00.827192: step: 906/466, loss: 0.002507054479792714 2023-01-24 06:23:01.505778: step: 908/466, loss: 5.768853225163184e-05 2023-01-24 06:23:02.367055: step: 910/466, loss: 0.01109325885772705 2023-01-24 06:23:03.120087: step: 912/466, loss: 0.004711176734417677 2023-01-24 06:23:03.854003: step: 914/466, loss: 0.1867443025112152 2023-01-24 06:23:04.615788: step: 916/466, loss: 0.005634450353682041 2023-01-24 06:23:05.304852: step: 918/466, loss: 0.003459567204117775 2023-01-24 06:23:06.002841: step: 920/466, loss: 0.0048894197680056095 2023-01-24 06:23:06.717885: step: 922/466, loss: 0.010429268702864647 2023-01-24 06:23:07.449755: step: 924/466, loss: 0.009192475117743015 2023-01-24 06:23:08.252754: step: 926/466, loss: 0.018873605877161026 2023-01-24 06:23:08.995365: step: 928/466, loss: 0.004493965767323971 2023-01-24 06:23:09.721083: step: 930/466, loss: 0.00835600309073925 2023-01-24 06:23:10.491903: step: 932/466, loss: 0.033090561628341675 ================================================== Loss: 0.047 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561785714285714, 'r': 0.3210338167525074, 'f1': 0.33769425434844597}, 'combined': 0.24882734530938122, 'epoch': 33} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37857267048883675, 'r': 0.28679747764305813, 'f1': 0.32635575042141096}, 'combined': 0.20058938806389162, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3278678220678728, 'r': 0.3247571216687469, 'f1': 0.32630505837832147}, 'combined': 0.2404353061735, 'epoch': 33} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3643399972548169, 'r': 0.2917874436889226, 'f1': 0.3240524014045247}, 'combined': 0.19917367110717127, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648998973305955, 'r': 0.33720351043643265, 'f1': 0.3505054240631164}, 'combined': 0.2582671545728226, 'epoch': 33} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38345301846271085, 'r': 0.28991171119066544, 'f1': 0.3301851607255264}, 'combined': 0.20393789338929577, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.325, 'f1': 0.3729508196721312}, 'combined': 0.24863387978142076, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2109375, 'r': 0.29347826086956524, 'f1': 0.24545454545454548}, 'combined': 0.12272727272727274, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 33} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561785714285714, 'r': 0.3210338167525074, 'f1': 0.33769425434844597}, 'combined': 0.24882734530938122, 'epoch': 33} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37857267048883675, 'r': 0.28679747764305813, 'f1': 0.32635575042141096}, 'combined': 0.20058938806389162, 'epoch': 33} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.325, 'f1': 0.3729508196721312}, 'combined': 0.24863387978142076, 'epoch': 33} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3600931677018634, 'r': 0.3300284629981025, 'f1': 0.34440594059405943}, 'combined': 0.25377279833246486, 'epoch': 29} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3732928220606822, 'r': 0.2841675462053636, 'f1': 0.3226892764375249}, 'combined': 0.19930808250553012, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 29} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:26:00.330624: step: 2/466, loss: 0.012847150675952435 2023-01-24 06:26:01.078332: step: 4/466, loss: 0.009129938669502735 2023-01-24 06:26:01.923842: step: 6/466, loss: 0.3339959383010864 2023-01-24 06:26:02.689066: step: 8/466, loss: 0.009937900118529797 2023-01-24 06:26:03.421834: step: 10/466, loss: 0.0019312704680487514 2023-01-24 06:26:04.195034: step: 12/466, loss: 0.011718211695551872 2023-01-24 06:26:05.009980: step: 14/466, loss: 0.001805720617994666 2023-01-24 06:26:05.716584: step: 16/466, loss: 0.008648179471492767 2023-01-24 06:26:06.556475: step: 18/466, loss: 0.0250471830368042 2023-01-24 06:26:07.248704: step: 20/466, loss: 0.0016264470759779215 2023-01-24 06:26:08.032539: step: 22/466, loss: 0.004471521824598312 2023-01-24 06:26:08.838229: step: 24/466, loss: 0.2154838740825653 2023-01-24 06:26:09.599052: step: 26/466, loss: 0.15590013563632965 2023-01-24 06:26:10.353791: step: 28/466, loss: 0.002068422269076109 2023-01-24 06:26:11.079704: step: 30/466, loss: 0.031578317284584045 2023-01-24 06:26:11.864032: step: 32/466, loss: 0.0032016118057072163 2023-01-24 06:26:12.605402: step: 34/466, loss: 0.0034452658146619797 2023-01-24 06:26:13.308725: step: 36/466, loss: 0.002187530044466257 2023-01-24 06:26:13.993868: step: 38/466, loss: 0.048533741384744644 2023-01-24 06:26:14.859925: step: 40/466, loss: 0.08503540605306625 2023-01-24 06:26:15.619672: step: 42/466, loss: 0.0007634704816155136 2023-01-24 06:26:16.395588: step: 44/466, loss: 7.228372123790905e-05 2023-01-24 06:26:17.242261: step: 46/466, loss: 0.07904239743947983 2023-01-24 06:26:18.020035: step: 48/466, loss: 0.011844335123896599 2023-01-24 06:26:18.707978: step: 50/466, loss: 0.02444412000477314 2023-01-24 06:26:19.520599: step: 52/466, loss: 0.0446506068110466 2023-01-24 06:26:20.329021: step: 54/466, loss: 0.0055224960669875145 2023-01-24 06:26:21.108177: step: 56/466, loss: 0.004008932039141655 2023-01-24 06:26:21.869602: step: 58/466, loss: 0.01605677790939808 2023-01-24 06:26:22.717048: step: 60/466, loss: 0.021131541579961777 2023-01-24 06:26:23.472018: step: 62/466, loss: 0.018017586320638657 2023-01-24 06:26:24.206198: step: 64/466, loss: 0.005739844869822264 2023-01-24 06:26:24.958881: step: 66/466, loss: 0.02099965140223503 2023-01-24 06:26:25.764136: step: 68/466, loss: 0.0035145084839314222 2023-01-24 06:26:26.493817: step: 70/466, loss: 0.04764877259731293 2023-01-24 06:26:27.256059: step: 72/466, loss: 0.004909931216388941 2023-01-24 06:26:28.019945: step: 74/466, loss: 2.8016456781188026e-05 2023-01-24 06:26:28.739207: step: 76/466, loss: 0.011395934037864208 2023-01-24 06:26:29.627906: step: 78/466, loss: 0.00350642460398376 2023-01-24 06:26:30.586179: step: 80/466, loss: 0.010868704877793789 2023-01-24 06:26:31.348624: step: 82/466, loss: 0.02996048517525196 2023-01-24 06:26:32.153651: step: 84/466, loss: 0.002716638846322894 2023-01-24 06:26:32.860581: step: 86/466, loss: 0.00235667172819376 2023-01-24 06:26:33.645402: step: 88/466, loss: 0.003214767901226878 2023-01-24 06:26:34.426034: step: 90/466, loss: 0.06307035684585571 2023-01-24 06:26:35.126554: step: 92/466, loss: 0.020926734432578087 2023-01-24 06:26:35.764029: step: 94/466, loss: 0.00556641211733222 2023-01-24 06:26:36.527778: step: 96/466, loss: 0.0022218553349375725 2023-01-24 06:26:37.194400: step: 98/466, loss: 0.0005310930428095162 2023-01-24 06:26:37.856562: step: 100/466, loss: 0.007593709509819746 2023-01-24 06:26:38.561518: step: 102/466, loss: 7.28668092051521e-05 2023-01-24 06:26:39.257388: step: 104/466, loss: 0.012354198843240738 2023-01-24 06:26:40.036109: step: 106/466, loss: 0.0002113124937750399 2023-01-24 06:26:40.784580: step: 108/466, loss: 0.0017441267846152186 2023-01-24 06:26:41.544042: step: 110/466, loss: 0.006075084675103426 2023-01-24 06:26:42.309100: step: 112/466, loss: 0.005706703290343285 2023-01-24 06:26:43.070325: step: 114/466, loss: 0.00020052462059538811 2023-01-24 06:26:43.811294: step: 116/466, loss: 0.009616516530513763 2023-01-24 06:26:44.593757: step: 118/466, loss: 0.002562432549893856 2023-01-24 06:26:45.351420: step: 120/466, loss: 0.0027562868781387806 2023-01-24 06:26:46.044156: step: 122/466, loss: 0.0033706985414028168 2023-01-24 06:26:46.806484: step: 124/466, loss: 0.0015648682601749897 2023-01-24 06:26:47.516415: step: 126/466, loss: 0.046386826783418655 2023-01-24 06:26:48.199994: step: 128/466, loss: 0.00020761314954143018 2023-01-24 06:26:48.959169: step: 130/466, loss: 0.015245960094034672 2023-01-24 06:26:49.751899: step: 132/466, loss: 0.023303933441638947 2023-01-24 06:26:50.485173: step: 134/466, loss: 0.00248493580147624 2023-01-24 06:26:51.300468: step: 136/466, loss: 0.007371986750513315 2023-01-24 06:26:52.061084: step: 138/466, loss: 0.0011075693182647228 2023-01-24 06:26:52.761474: step: 140/466, loss: 0.0002633397525642067 2023-01-24 06:26:53.560951: step: 142/466, loss: 0.015081997029483318 2023-01-24 06:26:54.286762: step: 144/466, loss: 0.0006969335372559726 2023-01-24 06:26:54.999362: step: 146/466, loss: 2.260213477711659e-05 2023-01-24 06:26:55.766248: step: 148/466, loss: 0.004124639555811882 2023-01-24 06:26:56.583952: step: 150/466, loss: 0.000673601112794131 2023-01-24 06:26:57.317535: step: 152/466, loss: 0.02401557005941868 2023-01-24 06:26:58.039264: step: 154/466, loss: 0.0060744090005755424 2023-01-24 06:26:58.817489: step: 156/466, loss: 0.0022823396138846874 2023-01-24 06:26:59.606365: step: 158/466, loss: 0.006407279521226883 2023-01-24 06:27:00.354788: step: 160/466, loss: 0.010667653754353523 2023-01-24 06:27:01.094950: step: 162/466, loss: 0.022322572767734528 2023-01-24 06:27:01.876845: step: 164/466, loss: 0.0059477174654603004 2023-01-24 06:27:02.582067: step: 166/466, loss: 7.60244220145978e-05 2023-01-24 06:27:03.285156: step: 168/466, loss: 0.00989892240613699 2023-01-24 06:27:04.070085: step: 170/466, loss: 0.06553342938423157 2023-01-24 06:27:04.792120: step: 172/466, loss: 0.0013819290325045586 2023-01-24 06:27:05.506181: step: 174/466, loss: 0.0024518663994967937 2023-01-24 06:27:06.330022: step: 176/466, loss: 0.003064458491280675 2023-01-24 06:27:07.091193: step: 178/466, loss: 0.5088443160057068 2023-01-24 06:27:07.832138: step: 180/466, loss: 0.004672045353800058 2023-01-24 06:27:08.571881: step: 182/466, loss: 0.0006907099741511047 2023-01-24 06:27:09.396108: step: 184/466, loss: 0.038431692868471146 2023-01-24 06:27:10.186207: step: 186/466, loss: 0.0007816114812158048 2023-01-24 06:27:10.942893: step: 188/466, loss: 0.012371831573545933 2023-01-24 06:27:11.760322: step: 190/466, loss: 0.019759811460971832 2023-01-24 06:27:12.527243: step: 192/466, loss: 0.010684781707823277 2023-01-24 06:27:13.269304: step: 194/466, loss: 0.0015153209678828716 2023-01-24 06:27:14.061632: step: 196/466, loss: 0.002155774272978306 2023-01-24 06:27:14.813767: step: 198/466, loss: 0.0011441691312938929 2023-01-24 06:27:15.530449: step: 200/466, loss: 0.007144713308662176 2023-01-24 06:27:16.316402: step: 202/466, loss: 0.025695007294416428 2023-01-24 06:27:17.103719: step: 204/466, loss: 0.0004251356585882604 2023-01-24 06:27:17.896934: step: 206/466, loss: 0.014349368400871754 2023-01-24 06:27:18.625544: step: 208/466, loss: 0.061793696135282516 2023-01-24 06:27:19.401267: step: 210/466, loss: 0.0003937285509891808 2023-01-24 06:27:20.214324: step: 212/466, loss: 0.01733911596238613 2023-01-24 06:27:20.916715: step: 214/466, loss: 0.003946369048207998 2023-01-24 06:27:21.674503: step: 216/466, loss: 0.003022226504981518 2023-01-24 06:27:22.489244: step: 218/466, loss: 0.01839815080165863 2023-01-24 06:27:23.318915: step: 220/466, loss: 0.01183647383004427 2023-01-24 06:27:24.125580: step: 222/466, loss: 0.030242666602134705 2023-01-24 06:27:24.897812: step: 224/466, loss: 0.0011466683354228735 2023-01-24 06:27:25.694665: step: 226/466, loss: 0.0030892265494912863 2023-01-24 06:27:26.413987: step: 228/466, loss: 9.504199988441542e-05 2023-01-24 06:27:27.144692: step: 230/466, loss: 0.000864933361299336 2023-01-24 06:27:27.930197: step: 232/466, loss: 0.018242958933115005 2023-01-24 06:27:28.706661: step: 234/466, loss: 0.006578810978680849 2023-01-24 06:27:29.450814: step: 236/466, loss: 0.0023516130167990923 2023-01-24 06:27:30.322852: step: 238/466, loss: 0.02836759015917778 2023-01-24 06:27:31.071506: step: 240/466, loss: 0.0028621014207601547 2023-01-24 06:27:31.821381: step: 242/466, loss: 0.0016216520452871919 2023-01-24 06:27:32.595275: step: 244/466, loss: 0.023854777216911316 2023-01-24 06:27:33.230718: step: 246/466, loss: 0.008852701634168625 2023-01-24 06:27:33.959362: step: 248/466, loss: 9.626195242162794e-05 2023-01-24 06:27:34.609759: step: 250/466, loss: 0.0032315291464328766 2023-01-24 06:27:35.322504: step: 252/466, loss: 0.0030613539274781942 2023-01-24 06:27:35.987402: step: 254/466, loss: 0.0009762575500644743 2023-01-24 06:27:36.777015: step: 256/466, loss: 0.0010766517370939255 2023-01-24 06:27:37.477696: step: 258/466, loss: 0.00029321524198167026 2023-01-24 06:27:38.193163: step: 260/466, loss: 0.0014581572031602263 2023-01-24 06:27:38.913610: step: 262/466, loss: 0.014903835952281952 2023-01-24 06:27:39.710473: step: 264/466, loss: 0.011509068310260773 2023-01-24 06:27:40.377581: step: 266/466, loss: 0.001634338404983282 2023-01-24 06:27:41.098004: step: 268/466, loss: 0.016303768381476402 2023-01-24 06:27:41.769974: step: 270/466, loss: 0.010090984404087067 2023-01-24 06:27:42.548579: step: 272/466, loss: 0.0007559580262750387 2023-01-24 06:27:43.341190: step: 274/466, loss: 0.006533884909003973 2023-01-24 06:27:44.044200: step: 276/466, loss: 0.001971112797036767 2023-01-24 06:27:44.779273: step: 278/466, loss: 0.007579253986477852 2023-01-24 06:27:45.502652: step: 280/466, loss: 0.012597577646374702 2023-01-24 06:27:46.277262: step: 282/466, loss: 0.0018686820985749364 2023-01-24 06:27:47.045743: step: 284/466, loss: 0.024334682151675224 2023-01-24 06:27:47.762973: step: 286/466, loss: 0.0003072105173487216 2023-01-24 06:27:48.508178: step: 288/466, loss: 0.00048379477811977267 2023-01-24 06:27:49.232745: step: 290/466, loss: 0.0029958547092974186 2023-01-24 06:27:49.927759: step: 292/466, loss: 5.4033156629884616e-05 2023-01-24 06:27:50.647089: step: 294/466, loss: 0.002091967035084963 2023-01-24 06:27:51.503621: step: 296/466, loss: 0.009681585244834423 2023-01-24 06:27:52.240099: step: 298/466, loss: 0.0075980499386787415 2023-01-24 06:27:52.948003: step: 300/466, loss: 0.007685400079935789 2023-01-24 06:27:53.662403: step: 302/466, loss: 0.0036172426771372557 2023-01-24 06:27:54.384684: step: 304/466, loss: 0.00325842946767807 2023-01-24 06:27:55.163819: step: 306/466, loss: 0.0005344321252778172 2023-01-24 06:27:55.919839: step: 308/466, loss: 0.06320324540138245 2023-01-24 06:27:56.690776: step: 310/466, loss: 0.00019163635442964733 2023-01-24 06:27:57.505496: step: 312/466, loss: 0.0513727143406868 2023-01-24 06:27:58.230470: step: 314/466, loss: 0.06122292950749397 2023-01-24 06:27:58.974415: step: 316/466, loss: 0.0025486303493380547 2023-01-24 06:27:59.628997: step: 318/466, loss: 0.0069191232323646545 2023-01-24 06:28:00.359033: step: 320/466, loss: 0.038638561964035034 2023-01-24 06:28:01.117947: step: 322/466, loss: 0.03692803159356117 2023-01-24 06:28:01.863576: step: 324/466, loss: 0.15488755702972412 2023-01-24 06:28:02.534718: step: 326/466, loss: 0.001069761230610311 2023-01-24 06:28:03.412951: step: 328/466, loss: 0.021730933338403702 2023-01-24 06:28:04.031760: step: 330/466, loss: 0.01111496239900589 2023-01-24 06:28:04.779073: step: 332/466, loss: 0.00878437515348196 2023-01-24 06:28:05.504865: step: 334/466, loss: 0.015419178642332554 2023-01-24 06:28:06.287423: step: 336/466, loss: 0.0022405844647437334 2023-01-24 06:28:07.055338: step: 338/466, loss: 0.13187530636787415 2023-01-24 06:28:07.792241: step: 340/466, loss: 0.00011246054782532156 2023-01-24 06:28:08.603431: step: 342/466, loss: 0.04561132937669754 2023-01-24 06:28:09.451490: step: 344/466, loss: 0.029233692213892937 2023-01-24 06:28:10.242779: step: 346/466, loss: 0.03551267087459564 2023-01-24 06:28:11.011892: step: 348/466, loss: 0.0002540225104894489 2023-01-24 06:28:11.844417: step: 350/466, loss: 0.06430414319038391 2023-01-24 06:28:12.552321: step: 352/466, loss: 0.00018176485900767148 2023-01-24 06:28:13.334786: step: 354/466, loss: 0.014430318959057331 2023-01-24 06:28:14.105041: step: 356/466, loss: 0.0014982149004936218 2023-01-24 06:28:14.872273: step: 358/466, loss: 0.08484133332967758 2023-01-24 06:28:15.646952: step: 360/466, loss: 0.003684528172016144 2023-01-24 06:28:16.420486: step: 362/466, loss: 0.003811977803707123 2023-01-24 06:28:17.140878: step: 364/466, loss: 0.008543026633560658 2023-01-24 06:28:17.917236: step: 366/466, loss: 0.005676996428519487 2023-01-24 06:28:18.601544: step: 368/466, loss: 0.006688602734357119 2023-01-24 06:28:19.412173: step: 370/466, loss: 0.024656053632497787 2023-01-24 06:28:20.144478: step: 372/466, loss: 0.00386234768666327 2023-01-24 06:28:20.840927: step: 374/466, loss: 0.0007061361102387309 2023-01-24 06:28:21.587367: step: 376/466, loss: 0.005585248116403818 2023-01-24 06:28:22.389340: step: 378/466, loss: 0.000482331175589934 2023-01-24 06:28:23.084292: step: 380/466, loss: 0.0001822631456889212 2023-01-24 06:28:23.762378: step: 382/466, loss: 0.00896023865789175 2023-01-24 06:28:24.477825: step: 384/466, loss: 0.001212327741086483 2023-01-24 06:28:25.264940: step: 386/466, loss: 0.022206583991646767 2023-01-24 06:28:25.999671: step: 388/466, loss: 0.009799190796911716 2023-01-24 06:28:26.641818: step: 390/466, loss: 0.0009703595424070954 2023-01-24 06:28:27.369254: step: 392/466, loss: 0.019333017989993095 2023-01-24 06:28:28.127312: step: 394/466, loss: 0.00568966893479228 2023-01-24 06:28:28.880719: step: 396/466, loss: 0.002850270364433527 2023-01-24 06:28:29.679998: step: 398/466, loss: 0.007840417325496674 2023-01-24 06:28:30.501604: step: 400/466, loss: 0.036355625838041306 2023-01-24 06:28:31.231025: step: 402/466, loss: 0.004576034378260374 2023-01-24 06:28:32.047755: step: 404/466, loss: 0.04875103011727333 2023-01-24 06:28:32.770004: step: 406/466, loss: 0.24837371706962585 2023-01-24 06:28:33.598400: step: 408/466, loss: 0.0003534825809765607 2023-01-24 06:28:34.303766: step: 410/466, loss: 0.022242676466703415 2023-01-24 06:28:35.086334: step: 412/466, loss: 1.1368690729141235 2023-01-24 06:28:35.833418: step: 414/466, loss: 0.000881312764249742 2023-01-24 06:28:36.544805: step: 416/466, loss: 0.02301531471312046 2023-01-24 06:28:37.320058: step: 418/466, loss: 0.00803961418569088 2023-01-24 06:28:37.954897: step: 420/466, loss: 0.01750531978905201 2023-01-24 06:28:38.701304: step: 422/466, loss: 0.03382166847586632 2023-01-24 06:28:39.508195: step: 424/466, loss: 0.06351270526647568 2023-01-24 06:28:40.377160: step: 426/466, loss: 0.014697426930069923 2023-01-24 06:28:41.075595: step: 428/466, loss: 0.0015871673822402954 2023-01-24 06:28:41.852309: step: 430/466, loss: 0.01852606236934662 2023-01-24 06:28:42.559024: step: 432/466, loss: 0.0006288749864324927 2023-01-24 06:28:43.270240: step: 434/466, loss: 0.07311911135911942 2023-01-24 06:28:44.025066: step: 436/466, loss: 0.016755113378167152 2023-01-24 06:28:44.823955: step: 438/466, loss: 0.0033878744579851627 2023-01-24 06:28:45.599020: step: 440/466, loss: 0.003291395725682378 2023-01-24 06:28:46.323316: step: 442/466, loss: 0.7009214162826538 2023-01-24 06:28:47.084717: step: 444/466, loss: 0.05325298756361008 2023-01-24 06:28:47.877129: step: 446/466, loss: 0.002627542708069086 2023-01-24 06:28:48.631127: step: 448/466, loss: 0.006730484776198864 2023-01-24 06:28:49.437764: step: 450/466, loss: 0.018681922927498817 2023-01-24 06:28:50.181978: step: 452/466, loss: 0.006985391955822706 2023-01-24 06:28:50.933170: step: 454/466, loss: 0.005801316816359758 2023-01-24 06:28:51.638540: step: 456/466, loss: 0.000606681453064084 2023-01-24 06:28:52.358090: step: 458/466, loss: 0.029049178585410118 2023-01-24 06:28:53.086994: step: 460/466, loss: 0.021959295496344566 2023-01-24 06:28:53.798927: step: 462/466, loss: 0.0020662713795900345 2023-01-24 06:28:54.585209: step: 464/466, loss: 0.010049436241388321 2023-01-24 06:28:55.349866: step: 466/466, loss: 0.004619895480573177 2023-01-24 06:28:56.107708: step: 468/466, loss: 0.0024331167805939913 2023-01-24 06:28:56.838724: step: 470/466, loss: 0.0027067656628787518 2023-01-24 06:28:57.620719: step: 472/466, loss: 0.00027432592469267547 2023-01-24 06:28:58.389735: step: 474/466, loss: 0.06490861624479294 2023-01-24 06:28:59.247200: step: 476/466, loss: 0.0030483517330139875 2023-01-24 06:28:59.915286: step: 478/466, loss: 0.03932291641831398 2023-01-24 06:29:00.717065: step: 480/466, loss: 0.0018771301256492734 2023-01-24 06:29:01.465497: step: 482/466, loss: 0.005720630753785372 2023-01-24 06:29:02.305798: step: 484/466, loss: 0.03505473956465721 2023-01-24 06:29:03.088078: step: 486/466, loss: 0.04068222641944885 2023-01-24 06:29:03.745914: step: 488/466, loss: 0.0047709825448691845 2023-01-24 06:29:04.488547: step: 490/466, loss: 0.02563410997390747 2023-01-24 06:29:05.263893: step: 492/466, loss: 0.008590064011514187 2023-01-24 06:29:05.935422: step: 494/466, loss: 0.0010861967457458377 2023-01-24 06:29:06.725284: step: 496/466, loss: 0.08789907395839691 2023-01-24 06:29:07.440369: step: 498/466, loss: 0.01686914451420307 2023-01-24 06:29:08.147020: step: 500/466, loss: 0.0014343768125399947 2023-01-24 06:29:09.051855: step: 502/466, loss: 0.04180929437279701 2023-01-24 06:29:09.788266: step: 504/466, loss: 0.022112946957349777 2023-01-24 06:29:10.539110: step: 506/466, loss: 0.03575494885444641 2023-01-24 06:29:11.264991: step: 508/466, loss: 0.0023700930178165436 2023-01-24 06:29:12.067357: step: 510/466, loss: 0.009916874580085278 2023-01-24 06:29:12.841952: step: 512/466, loss: 0.03779168799519539 2023-01-24 06:29:13.594934: step: 514/466, loss: 0.0007968175923451781 2023-01-24 06:29:14.297166: step: 516/466, loss: 0.030672218650579453 2023-01-24 06:29:15.029365: step: 518/466, loss: 0.0027551238890737295 2023-01-24 06:29:15.758860: step: 520/466, loss: 0.009607107378542423 2023-01-24 06:29:16.494609: step: 522/466, loss: 0.0026895857881754637 2023-01-24 06:29:17.235985: step: 524/466, loss: 0.007058565504848957 2023-01-24 06:29:18.011846: step: 526/466, loss: 0.015240203589200974 2023-01-24 06:29:18.745021: step: 528/466, loss: 0.0023862060625106096 2023-01-24 06:29:19.538260: step: 530/466, loss: 0.012481776997447014 2023-01-24 06:29:20.264619: step: 532/466, loss: 0.0006766861770302057 2023-01-24 06:29:21.065175: step: 534/466, loss: 0.06194831430912018 2023-01-24 06:29:21.925648: step: 536/466, loss: 0.019686013460159302 2023-01-24 06:29:22.650240: step: 538/466, loss: 7.519257633248344e-05 2023-01-24 06:29:23.463205: step: 540/466, loss: 0.008502397686243057 2023-01-24 06:29:24.207590: step: 542/466, loss: 0.014276986010372639 2023-01-24 06:29:24.955280: step: 544/466, loss: 0.0015168474055826664 2023-01-24 06:29:25.654528: step: 546/466, loss: 0.01749262772500515 2023-01-24 06:29:26.487105: step: 548/466, loss: 0.0004611381737049669 2023-01-24 06:29:27.214730: step: 550/466, loss: 0.0009669710998423398 2023-01-24 06:29:27.939284: step: 552/466, loss: 0.004210221581161022 2023-01-24 06:29:28.723357: step: 554/466, loss: 0.00013607698201667517 2023-01-24 06:29:29.401110: step: 556/466, loss: 0.0025229689199477434 2023-01-24 06:29:30.138733: step: 558/466, loss: 0.036629319190979004 2023-01-24 06:29:30.917892: step: 560/466, loss: 0.008265677839517593 2023-01-24 06:29:31.915075: step: 562/466, loss: 0.003423975547775626 2023-01-24 06:29:32.674211: step: 564/466, loss: 0.027235476300120354 2023-01-24 06:29:33.365681: step: 566/466, loss: 0.018567977473139763 2023-01-24 06:29:34.138698: step: 568/466, loss: 0.00036482102586887777 2023-01-24 06:29:34.833323: step: 570/466, loss: 0.004040045198053122 2023-01-24 06:29:35.685610: step: 572/466, loss: 0.06701383739709854 2023-01-24 06:29:36.433965: step: 574/466, loss: 0.033170491456985474 2023-01-24 06:29:37.263766: step: 576/466, loss: 0.01985347270965576 2023-01-24 06:29:38.027717: step: 578/466, loss: 7.628784806001931e-05 2023-01-24 06:29:38.742814: step: 580/466, loss: 0.13127250969409943 2023-01-24 06:29:39.464532: step: 582/466, loss: 0.005728569347411394 2023-01-24 06:29:40.191918: step: 584/466, loss: 0.018810346722602844 2023-01-24 06:29:40.921814: step: 586/466, loss: 0.031102297827601433 2023-01-24 06:29:41.652369: step: 588/466, loss: 0.02655886299908161 2023-01-24 06:29:42.368895: step: 590/466, loss: 0.0003828817280009389 2023-01-24 06:29:43.117486: step: 592/466, loss: 0.010496832430362701 2023-01-24 06:29:43.813569: step: 594/466, loss: 0.01843322440981865 2023-01-24 06:29:44.568694: step: 596/466, loss: 0.00011009426816599444 2023-01-24 06:29:45.300189: step: 598/466, loss: 0.0060088844038546085 2023-01-24 06:29:46.119663: step: 600/466, loss: 0.04839334264397621 2023-01-24 06:29:46.827936: step: 602/466, loss: 0.005110643804073334 2023-01-24 06:29:47.503467: step: 604/466, loss: 0.012522554956376553 2023-01-24 06:29:48.266917: step: 606/466, loss: 0.050412654876708984 2023-01-24 06:29:49.059228: step: 608/466, loss: 0.0001513346505817026 2023-01-24 06:29:49.917295: step: 610/466, loss: 0.025511352345347404 2023-01-24 06:29:50.731871: step: 612/466, loss: 0.0007206370355561376 2023-01-24 06:29:51.565654: step: 614/466, loss: 0.012813889421522617 2023-01-24 06:29:52.349772: step: 616/466, loss: 0.11198767274618149 2023-01-24 06:29:53.141030: step: 618/466, loss: 0.02641315758228302 2023-01-24 06:29:54.018855: step: 620/466, loss: 0.0035171855706721544 2023-01-24 06:29:54.859063: step: 622/466, loss: 0.0036150936502963305 2023-01-24 06:29:55.731921: step: 624/466, loss: 0.014979987405240536 2023-01-24 06:29:56.478335: step: 626/466, loss: 0.05457896739244461 2023-01-24 06:29:57.206848: step: 628/466, loss: 0.0005795444594696164 2023-01-24 06:29:57.990295: step: 630/466, loss: 0.013990444131195545 2023-01-24 06:29:58.805525: step: 632/466, loss: 0.034092966467142105 2023-01-24 06:29:59.473645: step: 634/466, loss: 0.01346777006983757 2023-01-24 06:30:00.234341: step: 636/466, loss: 0.011793782003223896 2023-01-24 06:30:00.970206: step: 638/466, loss: 0.010652897879481316 2023-01-24 06:30:01.751870: step: 640/466, loss: 0.02251473255455494 2023-01-24 06:30:02.529641: step: 642/466, loss: 0.03246127441525459 2023-01-24 06:30:03.354035: step: 644/466, loss: 0.000812268815934658 2023-01-24 06:30:04.125520: step: 646/466, loss: 0.2501620948314667 2023-01-24 06:30:04.949433: step: 648/466, loss: 0.0055265543051064014 2023-01-24 06:30:05.825898: step: 650/466, loss: 0.01182998064905405 2023-01-24 06:30:06.541676: step: 652/466, loss: 0.006487314589321613 2023-01-24 06:30:07.286574: step: 654/466, loss: 0.016985846683382988 2023-01-24 06:30:08.054984: step: 656/466, loss: 0.009141712449491024 2023-01-24 06:30:08.911132: step: 658/466, loss: 0.0020782635547220707 2023-01-24 06:30:09.714401: step: 660/466, loss: 0.001583437668159604 2023-01-24 06:30:10.512305: step: 662/466, loss: 0.003407202661037445 2023-01-24 06:30:11.285534: step: 664/466, loss: 0.0012576496228575706 2023-01-24 06:30:12.011394: step: 666/466, loss: 0.0018089022487401962 2023-01-24 06:30:12.726932: step: 668/466, loss: 2.7493411835166626e-05 2023-01-24 06:30:13.463601: step: 670/466, loss: 0.0005983322625979781 2023-01-24 06:30:14.281453: step: 672/466, loss: 0.00021879436098970473 2023-01-24 06:30:14.992922: step: 674/466, loss: 0.01310392189770937 2023-01-24 06:30:15.697593: step: 676/466, loss: 0.0006318899104371667 2023-01-24 06:30:16.394491: step: 678/466, loss: 0.031030451878905296 2023-01-24 06:30:17.141306: step: 680/466, loss: 0.012557669542729855 2023-01-24 06:30:17.929522: step: 682/466, loss: 0.007738407235592604 2023-01-24 06:30:18.640743: step: 684/466, loss: 0.008845807053148746 2023-01-24 06:30:19.388295: step: 686/466, loss: 0.05530845746397972 2023-01-24 06:30:20.138919: step: 688/466, loss: 0.0006768331513740122 2023-01-24 06:30:20.844404: step: 690/466, loss: 0.0006791841005906463 2023-01-24 06:30:21.628678: step: 692/466, loss: 0.0006975700962357223 2023-01-24 06:30:22.396354: step: 694/466, loss: 0.020109187811613083 2023-01-24 06:30:23.196966: step: 696/466, loss: 0.003456049831584096 2023-01-24 06:30:23.887860: step: 698/466, loss: 0.0036004381254315376 2023-01-24 06:30:24.581285: step: 700/466, loss: 0.015351896174252033 2023-01-24 06:30:25.337967: step: 702/466, loss: 0.0023220102302730083 2023-01-24 06:30:26.070219: step: 704/466, loss: 0.0014163806336000562 2023-01-24 06:30:26.820483: step: 706/466, loss: 0.01341936830431223 2023-01-24 06:30:27.509139: step: 708/466, loss: 0.06239281967282295 2023-01-24 06:30:28.127721: step: 710/466, loss: 4.801032543182373 2023-01-24 06:30:28.909164: step: 712/466, loss: 0.5246943831443787 2023-01-24 06:30:29.808002: step: 714/466, loss: 0.013656568713486195 2023-01-24 06:30:30.577011: step: 716/466, loss: 0.00019670475739985704 2023-01-24 06:30:31.286712: step: 718/466, loss: 0.0020928506273776293 2023-01-24 06:30:32.121824: step: 720/466, loss: 0.0014590020291507244 2023-01-24 06:30:32.829204: step: 722/466, loss: 0.007673331536352634 2023-01-24 06:30:33.579787: step: 724/466, loss: 0.04808073118329048 2023-01-24 06:30:34.321088: step: 726/466, loss: 0.0033867366146296263 2023-01-24 06:30:35.089387: step: 728/466, loss: 0.004553688690066338 2023-01-24 06:30:35.838710: step: 730/466, loss: 0.01876821555197239 2023-01-24 06:30:36.541068: step: 732/466, loss: 0.004025513771921396 2023-01-24 06:30:37.322287: step: 734/466, loss: 0.021394729614257812 2023-01-24 06:30:38.062950: step: 736/466, loss: 0.042647164314985275 2023-01-24 06:30:38.858314: step: 738/466, loss: 0.010004247538745403 2023-01-24 06:30:39.564643: step: 740/466, loss: 0.005100119858980179 2023-01-24 06:30:40.373406: step: 742/466, loss: 0.0197369996458292 2023-01-24 06:30:41.146459: step: 744/466, loss: 0.0205406304448843 2023-01-24 06:30:41.890629: step: 746/466, loss: 0.00041430548299103975 2023-01-24 06:30:42.658168: step: 748/466, loss: 0.02803983725607395 2023-01-24 06:30:43.438904: step: 750/466, loss: 0.008702714927494526 2023-01-24 06:30:44.238701: step: 752/466, loss: 0.008735493756830692 2023-01-24 06:30:44.950139: step: 754/466, loss: 0.004952584858983755 2023-01-24 06:30:45.641153: step: 756/466, loss: 1.1424092008383013e-05 2023-01-24 06:30:46.320303: step: 758/466, loss: 0.006848689168691635 2023-01-24 06:30:47.086385: step: 760/466, loss: 0.037673287093639374 2023-01-24 06:30:47.872417: step: 762/466, loss: 0.00044622819405049086 2023-01-24 06:30:48.583953: step: 764/466, loss: 0.0030297415796667337 2023-01-24 06:30:49.434091: step: 766/466, loss: 0.038348063826560974 2023-01-24 06:30:50.264100: step: 768/466, loss: 0.0030770660378038883 2023-01-24 06:30:51.104307: step: 770/466, loss: 5.194837649469264e-05 2023-01-24 06:30:51.849424: step: 772/466, loss: 0.02890346758067608 2023-01-24 06:30:52.630838: step: 774/466, loss: 0.0009718273649923503 2023-01-24 06:30:53.442632: step: 776/466, loss: 0.0007162726833485067 2023-01-24 06:30:54.136116: step: 778/466, loss: 0.0012545500649139285 2023-01-24 06:30:54.875559: step: 780/466, loss: 0.003798744175583124 2023-01-24 06:30:55.656146: step: 782/466, loss: 0.004894550424069166 2023-01-24 06:30:56.477499: step: 784/466, loss: 0.0067248838022351265 2023-01-24 06:30:57.246950: step: 786/466, loss: 0.006345099303871393 2023-01-24 06:30:57.992099: step: 788/466, loss: 0.006225190591067076 2023-01-24 06:30:58.748923: step: 790/466, loss: 0.008393198251724243 2023-01-24 06:30:59.465427: step: 792/466, loss: 0.021510576829314232 2023-01-24 06:31:00.251894: step: 794/466, loss: 9.573287388775498e-05 2023-01-24 06:31:01.010236: step: 796/466, loss: 0.02046097069978714 2023-01-24 06:31:01.722147: step: 798/466, loss: 0.029701031744480133 2023-01-24 06:31:02.520117: step: 800/466, loss: 0.0017636730335652828 2023-01-24 06:31:03.297445: step: 802/466, loss: 0.008908066898584366 2023-01-24 06:31:04.119344: step: 804/466, loss: 0.010303734801709652 2023-01-24 06:31:04.885720: step: 806/466, loss: 0.02440200001001358 2023-01-24 06:31:05.698792: step: 808/466, loss: 0.03921886533498764 2023-01-24 06:31:06.398309: step: 810/466, loss: 0.0026704040355980396 2023-01-24 06:31:07.151945: step: 812/466, loss: 0.004355969838798046 2023-01-24 06:31:07.890773: step: 814/466, loss: 0.020293502137064934 2023-01-24 06:31:08.676157: step: 816/466, loss: 0.010316620580852032 2023-01-24 06:31:09.378064: step: 818/466, loss: 0.002169701736420393 2023-01-24 06:31:10.157393: step: 820/466, loss: 0.07257067412137985 2023-01-24 06:31:10.955783: step: 822/466, loss: 0.2700299918651581 2023-01-24 06:31:11.687211: step: 824/466, loss: 0.009220699779689312 2023-01-24 06:31:12.440087: step: 826/466, loss: 0.0862964317202568 2023-01-24 06:31:13.095584: step: 828/466, loss: 2.2523789084516466e-05 2023-01-24 06:31:13.849951: step: 830/466, loss: 0.002025639172643423 2023-01-24 06:31:14.571079: step: 832/466, loss: 0.017040148377418518 2023-01-24 06:31:15.389560: step: 834/466, loss: 0.016010645776987076 2023-01-24 06:31:16.039891: step: 836/466, loss: 0.011474061757326126 2023-01-24 06:31:16.813481: step: 838/466, loss: 0.013029249384999275 2023-01-24 06:31:17.531140: step: 840/466, loss: 0.030176879838109016 2023-01-24 06:31:18.306377: step: 842/466, loss: 0.0135821383446455 2023-01-24 06:31:19.087025: step: 844/466, loss: 0.0947868824005127 2023-01-24 06:31:19.815125: step: 846/466, loss: 0.0001525956904515624 2023-01-24 06:31:20.507667: step: 848/466, loss: 0.0007349324878305197 2023-01-24 06:31:21.216178: step: 850/466, loss: 2.448061786708422e-05 2023-01-24 06:31:21.870090: step: 852/466, loss: 0.007771041709929705 2023-01-24 06:31:22.599284: step: 854/466, loss: 0.0008471178589388728 2023-01-24 06:31:23.368855: step: 856/466, loss: 0.003764393040910363 2023-01-24 06:31:24.084279: step: 858/466, loss: 0.001283894875086844 2023-01-24 06:31:24.745488: step: 860/466, loss: 0.00010542834206717089 2023-01-24 06:31:25.493409: step: 862/466, loss: 0.011902322061359882 2023-01-24 06:31:26.278670: step: 864/466, loss: 0.003066282719373703 2023-01-24 06:31:27.034065: step: 866/466, loss: 0.0017051482573151588 2023-01-24 06:31:27.702889: step: 868/466, loss: 0.0004545479314401746 2023-01-24 06:31:28.426526: step: 870/466, loss: 0.004616321064531803 2023-01-24 06:31:29.156725: step: 872/466, loss: 0.024221308529376984 2023-01-24 06:31:29.804847: step: 874/466, loss: 3.606598329497501e-05 2023-01-24 06:31:30.571830: step: 876/466, loss: 0.00022110596182756126 2023-01-24 06:31:31.313905: step: 878/466, loss: 0.017143752425909042 2023-01-24 06:31:32.120232: step: 880/466, loss: 0.006943912245333195 2023-01-24 06:31:32.884080: step: 882/466, loss: 0.0264279842376709 2023-01-24 06:31:33.605555: step: 884/466, loss: 0.0035525208804756403 2023-01-24 06:31:34.353778: step: 886/466, loss: 0.003330837469547987 2023-01-24 06:31:35.134710: step: 888/466, loss: 0.07850099354982376 2023-01-24 06:31:35.800651: step: 890/466, loss: 0.001839144853875041 2023-01-24 06:31:36.575143: step: 892/466, loss: 0.0007886227685958147 2023-01-24 06:31:37.316898: step: 894/466, loss: 0.0025404752232134342 2023-01-24 06:31:38.052975: step: 896/466, loss: 0.013902636244893074 2023-01-24 06:31:38.786442: step: 898/466, loss: 0.0199846550822258 2023-01-24 06:31:39.539231: step: 900/466, loss: 0.002748046535998583 2023-01-24 06:31:40.275980: step: 902/466, loss: 0.00542854517698288 2023-01-24 06:31:40.991088: step: 904/466, loss: 0.0008315120358020067 2023-01-24 06:31:41.698630: step: 906/466, loss: 0.004181792959570885 2023-01-24 06:31:42.472007: step: 908/466, loss: 0.0013797288993373513 2023-01-24 06:31:43.233531: step: 910/466, loss: 0.03098655864596367 2023-01-24 06:31:44.154681: step: 912/466, loss: 0.022968340665102005 2023-01-24 06:31:44.931165: step: 914/466, loss: 0.0047401487827301025 2023-01-24 06:31:45.663839: step: 916/466, loss: 0.061698734760284424 2023-01-24 06:31:46.384418: step: 918/466, loss: 0.10695232450962067 2023-01-24 06:31:47.141162: step: 920/466, loss: 0.5546409487724304 2023-01-24 06:31:47.865627: step: 922/466, loss: 0.026372479274868965 2023-01-24 06:31:48.643584: step: 924/466, loss: 0.0012187837855890393 2023-01-24 06:31:49.360463: step: 926/466, loss: 0.060460496693849564 2023-01-24 06:31:50.099184: step: 928/466, loss: 0.013208061456680298 2023-01-24 06:31:50.861897: step: 930/466, loss: 0.038146454840898514 2023-01-24 06:31:51.708792: step: 932/466, loss: 0.019662169739603996 ================================================== Loss: 0.035 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33342619509043925, 'r': 0.3264666350411132, 'f1': 0.32990971556407794}, 'combined': 0.2430913693630048, 'epoch': 34} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3679515102247656, 'r': 0.3003555655387601, 'f1': 0.3307350406791309}, 'combined': 0.2032810493930268, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3071149132730015, 'r': 0.3333391468541876, 'f1': 0.31969013720137734}, 'combined': 0.23556115372733066, 'epoch': 34} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.33951378347461875, 'r': 0.29538287574394906, 'f1': 0.31591458629148955}, 'combined': 0.19417189206208627, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35221080305927344, 'r': 0.3495374762808349, 'f1': 0.35086904761904764}, 'combined': 0.25853508771929823, 'epoch': 34} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3622993568974475, 'r': 0.29479990998847766, 'f1': 0.3250827483293867}, 'combined': 0.20078640337991538, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3017241379310345, 'r': 0.25, 'f1': 0.2734375}, 'combined': 0.18229166666666666, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23076923076923078, 'r': 0.391304347826087, 'f1': 0.2903225806451613}, 'combined': 0.14516129032258066, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561785714285714, 'r': 0.3210338167525074, 'f1': 0.33769425434844597}, 'combined': 0.24882734530938122, 'epoch': 33} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37857267048883675, 'r': 0.28679747764305813, 'f1': 0.32635575042141096}, 'combined': 0.20058938806389162, 'epoch': 33} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.325, 'f1': 0.3729508196721312}, 'combined': 0.24863387978142076, 'epoch': 33} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3600931677018634, 'r': 0.3300284629981025, 'f1': 0.34440594059405943}, 'combined': 0.25377279833246486, 'epoch': 29} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3732928220606822, 'r': 0.2841675462053636, 'f1': 0.3226892764375249}, 'combined': 0.19930808250553012, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 29} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:34:36.447290: step: 2/466, loss: 0.0016653207130730152 2023-01-24 06:34:37.208650: step: 4/466, loss: 0.009674855507910252 2023-01-24 06:34:37.918415: step: 6/466, loss: 0.003571053734049201 2023-01-24 06:34:38.652174: step: 8/466, loss: 0.0011327445972710848 2023-01-24 06:34:39.473311: step: 10/466, loss: 0.010663919150829315 2023-01-24 06:34:40.217716: step: 12/466, loss: 0.004768662620335817 2023-01-24 06:34:41.017055: step: 14/466, loss: 2.741898775100708 2023-01-24 06:34:41.807926: step: 16/466, loss: 0.03675287589430809 2023-01-24 06:34:42.525301: step: 18/466, loss: 0.022872446104884148 2023-01-24 06:34:43.317553: step: 20/466, loss: 0.011350450105965137 2023-01-24 06:34:44.071031: step: 22/466, loss: 0.11710529774427414 2023-01-24 06:34:44.846355: step: 24/466, loss: 0.01858246885240078 2023-01-24 06:34:45.510258: step: 26/466, loss: 0.0015298037324100733 2023-01-24 06:34:46.207598: step: 28/466, loss: 0.053046926856040955 2023-01-24 06:34:46.937538: step: 30/466, loss: 0.009396509267389774 2023-01-24 06:34:47.699947: step: 32/466, loss: 0.0038175564259290695 2023-01-24 06:34:48.361453: step: 34/466, loss: 0.00033454614458605647 2023-01-24 06:34:49.141254: step: 36/466, loss: 0.03061281517148018 2023-01-24 06:34:49.895876: step: 38/466, loss: 0.00020051149476785213 2023-01-24 06:34:50.580163: step: 40/466, loss: 0.14205829799175262 2023-01-24 06:34:51.398219: step: 42/466, loss: 0.000653352471999824 2023-01-24 06:34:52.123078: step: 44/466, loss: 0.0044058533385396 2023-01-24 06:34:52.851774: step: 46/466, loss: 0.020861299708485603 2023-01-24 06:34:53.591078: step: 48/466, loss: 0.011016382835805416 2023-01-24 06:34:54.399100: step: 50/466, loss: 0.026647163555026054 2023-01-24 06:34:55.133940: step: 52/466, loss: 0.00011628265929175541 2023-01-24 06:34:55.827965: step: 54/466, loss: 0.004194340668618679 2023-01-24 06:34:56.584615: step: 56/466, loss: 0.0391090102493763 2023-01-24 06:34:57.260069: step: 58/466, loss: 0.009294603951275349 2023-01-24 06:34:58.047640: step: 60/466, loss: 0.029620099812746048 2023-01-24 06:34:58.806631: step: 62/466, loss: 0.00043678830843418837 2023-01-24 06:34:59.571507: step: 64/466, loss: 0.016344642266631126 2023-01-24 06:35:00.299134: step: 66/466, loss: 0.01553379651159048 2023-01-24 06:35:01.013455: step: 68/466, loss: 0.010907202027738094 2023-01-24 06:35:01.766994: step: 70/466, loss: 0.005213810596615076 2023-01-24 06:35:02.600387: step: 72/466, loss: 0.05356352776288986 2023-01-24 06:35:03.332223: step: 74/466, loss: 0.000370173278497532 2023-01-24 06:35:04.062195: step: 76/466, loss: 9.902358578983694e-05 2023-01-24 06:35:04.772501: step: 78/466, loss: 0.004199262708425522 2023-01-24 06:35:05.521441: step: 80/466, loss: 0.0001537224161438644 2023-01-24 06:35:06.214610: step: 82/466, loss: 0.0006503834738396108 2023-01-24 06:35:06.994290: step: 84/466, loss: 0.021211711689829826 2023-01-24 06:35:07.797979: step: 86/466, loss: 0.018842682242393494 2023-01-24 06:35:08.614819: step: 88/466, loss: 1.0714588165283203 2023-01-24 06:35:09.431476: step: 90/466, loss: 0.0007923658122308552 2023-01-24 06:35:10.190396: step: 92/466, loss: 0.004402614664286375 2023-01-24 06:35:10.950753: step: 94/466, loss: 0.14788812398910522 2023-01-24 06:35:11.691683: step: 96/466, loss: 0.8132479786872864 2023-01-24 06:35:12.428701: step: 98/466, loss: 0.0061640930362045765 2023-01-24 06:35:13.281899: step: 100/466, loss: 0.005263158120214939 2023-01-24 06:35:14.116294: step: 102/466, loss: 0.004776317160576582 2023-01-24 06:35:14.888362: step: 104/466, loss: 0.013818934559822083 2023-01-24 06:35:15.586598: step: 106/466, loss: 0.006237700115889311 2023-01-24 06:35:16.297504: step: 108/466, loss: 0.0023206511978060007 2023-01-24 06:35:17.094426: step: 110/466, loss: 0.009178460575640202 2023-01-24 06:35:17.807881: step: 112/466, loss: 0.009001265279948711 2023-01-24 06:35:18.572869: step: 114/466, loss: 0.03055627830326557 2023-01-24 06:35:19.342579: step: 116/466, loss: 0.0008209957159124315 2023-01-24 06:35:20.131034: step: 118/466, loss: 0.00911964476108551 2023-01-24 06:35:20.843909: step: 120/466, loss: 5.4802025260869414e-05 2023-01-24 06:35:21.525543: step: 122/466, loss: 0.008433597162365913 2023-01-24 06:35:22.281797: step: 124/466, loss: 0.0014055280480533838 2023-01-24 06:35:23.035561: step: 126/466, loss: 0.00755557045340538 2023-01-24 06:35:23.700776: step: 128/466, loss: 0.003355384338647127 2023-01-24 06:35:24.438085: step: 130/466, loss: 0.003536886302754283 2023-01-24 06:35:25.161322: step: 132/466, loss: 0.06227538734674454 2023-01-24 06:35:25.939303: step: 134/466, loss: 0.009922748431563377 2023-01-24 06:35:26.679265: step: 136/466, loss: 0.00438820431008935 2023-01-24 06:35:27.459218: step: 138/466, loss: 0.01125524751842022 2023-01-24 06:35:28.210482: step: 140/466, loss: 0.008828779682517052 2023-01-24 06:35:28.892956: step: 142/466, loss: 0.007119577843695879 2023-01-24 06:35:29.701507: step: 144/466, loss: 0.015063256956636906 2023-01-24 06:35:30.500702: step: 146/466, loss: 0.08099152892827988 2023-01-24 06:35:31.221387: step: 148/466, loss: 0.004164229147136211 2023-01-24 06:35:32.046123: step: 150/466, loss: 0.004260669928044081 2023-01-24 06:35:32.924801: step: 152/466, loss: 0.017557373270392418 2023-01-24 06:35:33.749637: step: 154/466, loss: 0.021458711475133896 2023-01-24 06:35:34.522133: step: 156/466, loss: 0.01932448521256447 2023-01-24 06:35:35.232688: step: 158/466, loss: 0.012501021847128868 2023-01-24 06:35:36.043751: step: 160/466, loss: 0.00018512348469812423 2023-01-24 06:35:36.864691: step: 162/466, loss: 0.012105180881917477 2023-01-24 06:35:37.663617: step: 164/466, loss: 0.04477028176188469 2023-01-24 06:35:38.527458: step: 166/466, loss: 0.008865798823535442 2023-01-24 06:35:39.293440: step: 168/466, loss: 0.0018770851893350482 2023-01-24 06:35:39.996222: step: 170/466, loss: 0.002944796811789274 2023-01-24 06:35:40.814700: step: 172/466, loss: 0.005974494852125645 2023-01-24 06:35:41.513615: step: 174/466, loss: 1.3817396393278614e-05 2023-01-24 06:35:42.185887: step: 176/466, loss: 0.022418662905693054 2023-01-24 06:35:42.953593: step: 178/466, loss: 0.0034530037082731724 2023-01-24 06:35:43.653439: step: 180/466, loss: 0.0007848286768421531 2023-01-24 06:35:44.427646: step: 182/466, loss: 0.03065107949078083 2023-01-24 06:35:45.118498: step: 184/466, loss: 0.024792812764644623 2023-01-24 06:35:45.862893: step: 186/466, loss: 0.003852969268336892 2023-01-24 06:35:46.619617: step: 188/466, loss: 0.0042743380181491375 2023-01-24 06:35:47.318984: step: 190/466, loss: 0.0022542106453329325 2023-01-24 06:35:48.054488: step: 192/466, loss: 0.029596656560897827 2023-01-24 06:35:48.792803: step: 194/466, loss: 0.0017251368844881654 2023-01-24 06:35:49.472958: step: 196/466, loss: 0.08572795987129211 2023-01-24 06:35:50.276801: step: 198/466, loss: 0.005135236773639917 2023-01-24 06:35:51.043577: step: 200/466, loss: 8.483673445880413e-05 2023-01-24 06:35:51.797912: step: 202/466, loss: 0.002134964568540454 2023-01-24 06:35:52.602754: step: 204/466, loss: 0.022524390369653702 2023-01-24 06:35:53.398203: step: 206/466, loss: 0.014563613571226597 2023-01-24 06:35:54.221743: step: 208/466, loss: 0.06287034600973129 2023-01-24 06:35:55.044831: step: 210/466, loss: 0.015115310437977314 2023-01-24 06:35:55.790971: step: 212/466, loss: 0.0005720432964153588 2023-01-24 06:35:56.494868: step: 214/466, loss: 0.003158562583848834 2023-01-24 06:35:57.267047: step: 216/466, loss: 0.08497099578380585 2023-01-24 06:35:57.970315: step: 218/466, loss: 0.000755978049710393 2023-01-24 06:35:58.722635: step: 220/466, loss: 0.0012348828604444861 2023-01-24 06:35:59.460196: step: 222/466, loss: 0.0018827618332579732 2023-01-24 06:36:00.201874: step: 224/466, loss: 0.001642904942855239 2023-01-24 06:36:00.957683: step: 226/466, loss: 0.0006373273790813982 2023-01-24 06:36:01.759206: step: 228/466, loss: 0.0010114375036209822 2023-01-24 06:36:02.553131: step: 230/466, loss: 0.016211893409490585 2023-01-24 06:36:03.284852: step: 232/466, loss: 0.00019392998365219682 2023-01-24 06:36:04.050241: step: 234/466, loss: 0.00047139558591879904 2023-01-24 06:36:04.739008: step: 236/466, loss: 0.013145842589437962 2023-01-24 06:36:05.405833: step: 238/466, loss: 0.00024978467263281345 2023-01-24 06:36:06.121208: step: 240/466, loss: 0.00021391009795479476 2023-01-24 06:36:06.908016: step: 242/466, loss: 0.0013800224987789989 2023-01-24 06:36:07.649039: step: 244/466, loss: 0.012048288248479366 2023-01-24 06:36:08.407269: step: 246/466, loss: 0.0030543492175638676 2023-01-24 06:36:09.194465: step: 248/466, loss: 0.010977456346154213 2023-01-24 06:36:09.941697: step: 250/466, loss: 0.00030336013878695667 2023-01-24 06:36:10.694016: step: 252/466, loss: 0.018870405852794647 2023-01-24 06:36:11.460008: step: 254/466, loss: 0.0030181799083948135 2023-01-24 06:36:12.226960: step: 256/466, loss: 0.0009396415553055704 2023-01-24 06:36:13.019791: step: 258/466, loss: 0.0036075881216675043 2023-01-24 06:36:13.806451: step: 260/466, loss: 0.01562928967177868 2023-01-24 06:36:14.610474: step: 262/466, loss: 0.007501538842916489 2023-01-24 06:36:15.407824: step: 264/466, loss: 0.018335195258259773 2023-01-24 06:36:16.186306: step: 266/466, loss: 0.0022711025085300207 2023-01-24 06:36:16.965691: step: 268/466, loss: 0.007673630956560373 2023-01-24 06:36:17.766338: step: 270/466, loss: 0.00035795767325907946 2023-01-24 06:36:18.438318: step: 272/466, loss: 0.0024007440079003572 2023-01-24 06:36:19.238138: step: 274/466, loss: 0.009524238295853138 2023-01-24 06:36:19.916906: step: 276/466, loss: 0.0010717433178797364 2023-01-24 06:36:20.654695: step: 278/466, loss: 0.0006339615792967379 2023-01-24 06:36:21.408415: step: 280/466, loss: 0.00019898975733667612 2023-01-24 06:36:22.142225: step: 282/466, loss: 0.00895773060619831 2023-01-24 06:36:22.840543: step: 284/466, loss: 0.0007097829948179424 2023-01-24 06:36:23.700366: step: 286/466, loss: 5.67880088055972e-05 2023-01-24 06:36:24.475818: step: 288/466, loss: 0.00515703996643424 2023-01-24 06:36:25.297771: step: 290/466, loss: 0.021021075546741486 2023-01-24 06:36:26.044027: step: 292/466, loss: 0.01693740487098694 2023-01-24 06:36:26.806724: step: 294/466, loss: 8.952400821726769e-05 2023-01-24 06:36:27.613818: step: 296/466, loss: 0.0016920892521739006 2023-01-24 06:36:28.412748: step: 298/466, loss: 0.005159687716513872 2023-01-24 06:36:29.141408: step: 300/466, loss: 0.01627163589000702 2023-01-24 06:36:29.942723: step: 302/466, loss: 0.00884185079485178 2023-01-24 06:36:30.620235: step: 304/466, loss: 0.0006348793976940215 2023-01-24 06:36:31.394312: step: 306/466, loss: 0.00011097556125605479 2023-01-24 06:36:32.170331: step: 308/466, loss: 0.021979600191116333 2023-01-24 06:36:32.903655: step: 310/466, loss: 0.0020497848745435476 2023-01-24 06:36:33.670267: step: 312/466, loss: 0.000507302291225642 2023-01-24 06:36:34.493553: step: 314/466, loss: 0.024476177990436554 2023-01-24 06:36:35.191511: step: 316/466, loss: 0.0002447162114549428 2023-01-24 06:36:35.958102: step: 318/466, loss: 0.00038294721161946654 2023-01-24 06:36:36.648167: step: 320/466, loss: 0.0008296699961647391 2023-01-24 06:36:37.318498: step: 322/466, loss: 2.2966776214161655e-06 2023-01-24 06:36:38.071233: step: 324/466, loss: 0.023119645193219185 2023-01-24 06:36:38.824368: step: 326/466, loss: 0.1485862284898758 2023-01-24 06:36:39.642678: step: 328/466, loss: 0.002185945864766836 2023-01-24 06:36:40.394892: step: 330/466, loss: 0.008323568850755692 2023-01-24 06:36:41.113053: step: 332/466, loss: 0.006141643971204758 2023-01-24 06:36:41.774639: step: 334/466, loss: 0.021487636491656303 2023-01-24 06:36:42.498210: step: 336/466, loss: 0.0013316937256604433 2023-01-24 06:36:43.236360: step: 338/466, loss: 0.051021821796894073 2023-01-24 06:36:43.993520: step: 340/466, loss: 0.005893372930586338 2023-01-24 06:36:44.728837: step: 342/466, loss: 0.007925250567495823 2023-01-24 06:36:45.430404: step: 344/466, loss: 0.0031972848810255527 2023-01-24 06:36:46.138082: step: 346/466, loss: 0.010418121702969074 2023-01-24 06:36:47.097316: step: 348/466, loss: 0.0009884964674711227 2023-01-24 06:36:47.794718: step: 350/466, loss: 7.996588465175591e-06 2023-01-24 06:36:48.624499: step: 352/466, loss: 0.008195837959647179 2023-01-24 06:36:49.369887: step: 354/466, loss: 0.0049437265843153 2023-01-24 06:36:50.072288: step: 356/466, loss: 8.494260691804811e-05 2023-01-24 06:36:50.760475: step: 358/466, loss: 4.478584014577791e-05 2023-01-24 06:36:51.581905: step: 360/466, loss: 0.47110632061958313 2023-01-24 06:36:52.326335: step: 362/466, loss: 0.007744072936475277 2023-01-24 06:36:53.018161: step: 364/466, loss: 0.01543444860726595 2023-01-24 06:36:53.747870: step: 366/466, loss: 0.0016744795721024275 2023-01-24 06:36:54.500416: step: 368/466, loss: 0.001099259126931429 2023-01-24 06:36:55.273687: step: 370/466, loss: 9.564329957356676e-05 2023-01-24 06:36:56.071405: step: 372/466, loss: 0.0027040676213800907 2023-01-24 06:36:56.783122: step: 374/466, loss: 4.3052299588453025e-05 2023-01-24 06:36:57.564717: step: 376/466, loss: 0.00027351133758202195 2023-01-24 06:36:58.284440: step: 378/466, loss: 0.006958000361919403 2023-01-24 06:36:58.987388: step: 380/466, loss: 0.00037304943543858826 2023-01-24 06:36:59.747019: step: 382/466, loss: 6.726358697051182e-05 2023-01-24 06:37:00.488625: step: 384/466, loss: 0.0019613406620919704 2023-01-24 06:37:01.371593: step: 386/466, loss: 0.042300328612327576 2023-01-24 06:37:02.209013: step: 388/466, loss: 0.023403432220220566 2023-01-24 06:37:02.913319: step: 390/466, loss: 0.0028738633263856173 2023-01-24 06:37:03.705878: step: 392/466, loss: 0.006875161547213793 2023-01-24 06:37:04.404495: step: 394/466, loss: 0.003601239761337638 2023-01-24 06:37:05.109536: step: 396/466, loss: 0.007054249756038189 2023-01-24 06:37:05.870244: step: 398/466, loss: 0.006960573140531778 2023-01-24 06:37:06.599330: step: 400/466, loss: 0.001701996778137982 2023-01-24 06:37:07.413896: step: 402/466, loss: 0.012502540834248066 2023-01-24 06:37:08.126522: step: 404/466, loss: 0.0007834290154278278 2023-01-24 06:37:08.918177: step: 406/466, loss: 1.070505142211914 2023-01-24 06:37:09.727388: step: 408/466, loss: 0.21781472861766815 2023-01-24 06:37:10.443017: step: 410/466, loss: 0.0198195967823267 2023-01-24 06:37:11.172955: step: 412/466, loss: 0.0037474900018423796 2023-01-24 06:37:11.921654: step: 414/466, loss: 0.000687834806740284 2023-01-24 06:37:12.695762: step: 416/466, loss: 0.028054993599653244 2023-01-24 06:37:13.484991: step: 418/466, loss: 0.016235293820500374 2023-01-24 06:37:14.249748: step: 420/466, loss: 0.0015493419487029314 2023-01-24 06:37:14.990784: step: 422/466, loss: 0.02799062617123127 2023-01-24 06:37:15.649287: step: 424/466, loss: 0.002889692084863782 2023-01-24 06:37:16.387290: step: 426/466, loss: 0.026743553578853607 2023-01-24 06:37:17.120714: step: 428/466, loss: 0.005009463522583246 2023-01-24 06:37:17.884204: step: 430/466, loss: 7.741060107946396e-05 2023-01-24 06:37:18.595031: step: 432/466, loss: 0.009120047092437744 2023-01-24 06:37:19.437008: step: 434/466, loss: 0.030081048607826233 2023-01-24 06:37:20.136513: step: 436/466, loss: 0.0034114550799131393 2023-01-24 06:37:20.852954: step: 438/466, loss: 0.0009074569679796696 2023-01-24 06:37:21.602566: step: 440/466, loss: 0.0015783591661602259 2023-01-24 06:37:22.382337: step: 442/466, loss: 0.0004006644303444773 2023-01-24 06:37:23.110975: step: 444/466, loss: 0.010972261428833008 2023-01-24 06:37:23.939197: step: 446/466, loss: 0.019794577732682228 2023-01-24 06:37:24.725112: step: 448/466, loss: 0.0050488668493926525 2023-01-24 06:37:25.465053: step: 450/466, loss: 0.09048167616128922 2023-01-24 06:37:26.288456: step: 452/466, loss: 0.09162473678588867 2023-01-24 06:37:26.977767: step: 454/466, loss: 0.007137281354516745 2023-01-24 06:37:27.699671: step: 456/466, loss: 0.03742596507072449 2023-01-24 06:37:28.483373: step: 458/466, loss: 0.01613294705748558 2023-01-24 06:37:29.241999: step: 460/466, loss: 0.000966592924669385 2023-01-24 06:37:30.000155: step: 462/466, loss: 0.010030020028352737 2023-01-24 06:37:30.787381: step: 464/466, loss: 0.015448026359081268 2023-01-24 06:37:31.467314: step: 466/466, loss: 0.0014781695790588856 2023-01-24 06:37:32.267224: step: 468/466, loss: 0.06412964314222336 2023-01-24 06:37:33.020977: step: 470/466, loss: 0.002992757363244891 2023-01-24 06:37:33.652262: step: 472/466, loss: 0.00024858355754986405 2023-01-24 06:37:34.347506: step: 474/466, loss: 0.002858598018065095 2023-01-24 06:37:35.126518: step: 476/466, loss: 0.010369786061346531 2023-01-24 06:37:35.884242: step: 478/466, loss: 0.0003029539075214416 2023-01-24 06:37:36.560991: step: 480/466, loss: 0.014699235558509827 2023-01-24 06:37:37.404888: step: 482/466, loss: 0.06207871064543724 2023-01-24 06:37:38.159870: step: 484/466, loss: 0.0003268167783971876 2023-01-24 06:37:38.868073: step: 486/466, loss: 0.04465591162443161 2023-01-24 06:37:39.651431: step: 488/466, loss: 0.010574414394795895 2023-01-24 06:37:40.320659: step: 490/466, loss: 8.157742558978498e-05 2023-01-24 06:37:41.120164: step: 492/466, loss: 0.033157918602228165 2023-01-24 06:37:41.927174: step: 494/466, loss: 0.001525003812275827 2023-01-24 06:37:42.771457: step: 496/466, loss: 8.820889343041927e-05 2023-01-24 06:37:43.591295: step: 498/466, loss: 0.05314020812511444 2023-01-24 06:37:44.418664: step: 500/466, loss: 2.079984188079834 2023-01-24 06:37:45.190460: step: 502/466, loss: 2.2129243006929755e-05 2023-01-24 06:37:45.950543: step: 504/466, loss: 0.03799187391996384 2023-01-24 06:37:46.655199: step: 506/466, loss: 0.0069936420768499374 2023-01-24 06:37:47.413015: step: 508/466, loss: 0.08669064193964005 2023-01-24 06:37:48.125030: step: 510/466, loss: 0.0012966989306733012 2023-01-24 06:37:48.833217: step: 512/466, loss: 0.7818902730941772 2023-01-24 06:37:49.555781: step: 514/466, loss: 0.00016538219642825425 2023-01-24 06:37:50.301247: step: 516/466, loss: 0.017313748598098755 2023-01-24 06:37:51.025524: step: 518/466, loss: 0.0070531475357711315 2023-01-24 06:37:51.756876: step: 520/466, loss: 0.00020073176710866392 2023-01-24 06:37:52.502703: step: 522/466, loss: 0.003936620429158211 2023-01-24 06:37:53.266698: step: 524/466, loss: 0.0012498348951339722 2023-01-24 06:37:54.013045: step: 526/466, loss: 0.025787923485040665 2023-01-24 06:37:54.664919: step: 528/466, loss: 0.00017150120402220637 2023-01-24 06:37:55.344326: step: 530/466, loss: 0.008530835621058941 2023-01-24 06:37:56.150074: step: 532/466, loss: 0.0306110680103302 2023-01-24 06:37:56.922535: step: 534/466, loss: 0.013904067687690258 2023-01-24 06:37:57.766878: step: 536/466, loss: 0.2676345109939575 2023-01-24 06:37:58.552848: step: 538/466, loss: 0.029973825439810753 2023-01-24 06:37:59.347808: step: 540/466, loss: 0.0027498030103743076 2023-01-24 06:38:00.160457: step: 542/466, loss: 0.0015113947447389364 2023-01-24 06:38:00.959980: step: 544/466, loss: 0.01900675520300865 2023-01-24 06:38:01.685257: step: 546/466, loss: 0.0037392114754766226 2023-01-24 06:38:02.434686: step: 548/466, loss: 0.0005670114187523723 2023-01-24 06:38:03.209122: step: 550/466, loss: 0.012769817374646664 2023-01-24 06:38:03.985296: step: 552/466, loss: 0.006172207649797201 2023-01-24 06:38:04.720906: step: 554/466, loss: 0.07989545166492462 2023-01-24 06:38:05.492058: step: 556/466, loss: 0.0014416680205613375 2023-01-24 06:38:06.238865: step: 558/466, loss: 0.0365539975464344 2023-01-24 06:38:06.968511: step: 560/466, loss: 0.02450944483280182 2023-01-24 06:38:07.665633: step: 562/466, loss: 0.0024222354404628277 2023-01-24 06:38:08.377137: step: 564/466, loss: 0.023588059470057487 2023-01-24 06:38:09.075657: step: 566/466, loss: 0.014570425264537334 2023-01-24 06:38:09.785012: step: 568/466, loss: 0.004594041034579277 2023-01-24 06:38:10.599009: step: 570/466, loss: 5.841199163114652e-05 2023-01-24 06:38:11.421064: step: 572/466, loss: 0.11277756839990616 2023-01-24 06:38:12.154267: step: 574/466, loss: 0.0013348526554182172 2023-01-24 06:38:12.841325: step: 576/466, loss: 0.0006081808242015541 2023-01-24 06:38:13.546774: step: 578/466, loss: 0.0010401842882856727 2023-01-24 06:38:14.290187: step: 580/466, loss: 0.03048798255622387 2023-01-24 06:38:15.058744: step: 582/466, loss: 0.011533768847584724 2023-01-24 06:38:15.779781: step: 584/466, loss: 0.02726082131266594 2023-01-24 06:38:16.514802: step: 586/466, loss: 0.003118099644780159 2023-01-24 06:38:17.225748: step: 588/466, loss: 0.004390220623463392 2023-01-24 06:38:18.026858: step: 590/466, loss: 0.004966802895069122 2023-01-24 06:38:18.768052: step: 592/466, loss: 0.0012968675000593066 2023-01-24 06:38:19.471067: step: 594/466, loss: 0.001784640597179532 2023-01-24 06:38:20.287194: step: 596/466, loss: 0.00022365168842952698 2023-01-24 06:38:21.049689: step: 598/466, loss: 0.33438318967819214 2023-01-24 06:38:22.029207: step: 600/466, loss: 3.4098749893018976e-05 2023-01-24 06:38:22.814594: step: 602/466, loss: 0.43303313851356506 2023-01-24 06:38:23.511104: step: 604/466, loss: 0.0001014567751553841 2023-01-24 06:38:24.276030: step: 606/466, loss: 0.00971634965389967 2023-01-24 06:38:25.008369: step: 608/466, loss: 0.0004996024654246867 2023-01-24 06:38:25.774135: step: 610/466, loss: 0.0024302061647176743 2023-01-24 06:38:26.522475: step: 612/466, loss: 0.0526101216673851 2023-01-24 06:38:27.324700: step: 614/466, loss: 0.00908119697123766 2023-01-24 06:38:28.071275: step: 616/466, loss: 0.0009205329697579145 2023-01-24 06:38:28.796307: step: 618/466, loss: 0.02464241161942482 2023-01-24 06:38:29.705037: step: 620/466, loss: 0.006661287043243647 2023-01-24 06:38:30.434689: step: 622/466, loss: 0.024558162316679955 2023-01-24 06:38:31.098520: step: 624/466, loss: 0.0010383835760876536 2023-01-24 06:38:31.936809: step: 626/466, loss: 0.0002190878294641152 2023-01-24 06:38:32.652789: step: 628/466, loss: 0.01551087200641632 2023-01-24 06:38:33.387764: step: 630/466, loss: 0.013792922720313072 2023-01-24 06:38:34.083101: step: 632/466, loss: 0.011820226907730103 2023-01-24 06:38:34.807641: step: 634/466, loss: 0.009754060767591 2023-01-24 06:38:35.592787: step: 636/466, loss: 0.0007316062110476196 2023-01-24 06:38:36.353267: step: 638/466, loss: 0.007642973214387894 2023-01-24 06:38:37.045056: step: 640/466, loss: 0.001830677385441959 2023-01-24 06:38:37.879391: step: 642/466, loss: 0.0008155608084052801 2023-01-24 06:38:38.580117: step: 644/466, loss: 0.002872632583603263 2023-01-24 06:38:39.282134: step: 646/466, loss: 0.00423818826675415 2023-01-24 06:38:40.063738: step: 648/466, loss: 0.0069534857757389545 2023-01-24 06:38:40.847090: step: 650/466, loss: 0.0025988135021179914 2023-01-24 06:38:41.613510: step: 652/466, loss: 0.02148084156215191 2023-01-24 06:38:42.393114: step: 654/466, loss: 0.003581245429813862 2023-01-24 06:38:43.093852: step: 656/466, loss: 0.0006101642502471805 2023-01-24 06:38:43.982482: step: 658/466, loss: 0.013008118607103825 2023-01-24 06:38:44.812507: step: 660/466, loss: 0.011089122854173183 2023-01-24 06:38:45.595749: step: 662/466, loss: 7.312051457120106e-05 2023-01-24 06:38:46.428697: step: 664/466, loss: 0.000744556135032326 2023-01-24 06:38:47.156700: step: 666/466, loss: 0.0013610776513814926 2023-01-24 06:38:47.892769: step: 668/466, loss: 1.1327815055847168 2023-01-24 06:38:48.579879: step: 670/466, loss: 0.0048438976518809795 2023-01-24 06:38:49.323109: step: 672/466, loss: 8.07375690783374e-05 2023-01-24 06:38:50.044281: step: 674/466, loss: 0.041891444474458694 2023-01-24 06:38:50.829143: step: 676/466, loss: 0.0002708226384129375 2023-01-24 06:38:51.555936: step: 678/466, loss: 0.0003661640512291342 2023-01-24 06:38:52.273107: step: 680/466, loss: 0.0008033191552385688 2023-01-24 06:38:52.983775: step: 682/466, loss: 0.0003179586201440543 2023-01-24 06:38:53.655916: step: 684/466, loss: 0.04391786456108093 2023-01-24 06:38:54.551925: step: 686/466, loss: 0.0032935445196926594 2023-01-24 06:38:55.322462: step: 688/466, loss: 0.0052387891337275505 2023-01-24 06:38:56.137902: step: 690/466, loss: 0.031956274062395096 2023-01-24 06:38:56.908582: step: 692/466, loss: 0.032561078667640686 2023-01-24 06:38:57.651569: step: 694/466, loss: 0.030473001301288605 2023-01-24 06:38:58.432504: step: 696/466, loss: 0.004874146543443203 2023-01-24 06:38:59.125503: step: 698/466, loss: 0.00477445125579834 2023-01-24 06:38:59.936085: step: 700/466, loss: 0.0238727405667305 2023-01-24 06:39:00.575088: step: 702/466, loss: 0.001801260863430798 2023-01-24 06:39:01.253953: step: 704/466, loss: 0.0008925177971832454 2023-01-24 06:39:02.004282: step: 706/466, loss: 0.00024452884099446237 2023-01-24 06:39:02.748253: step: 708/466, loss: 0.0016748437192291021 2023-01-24 06:39:03.570853: step: 710/466, loss: 0.00023085040447767824 2023-01-24 06:39:04.234319: step: 712/466, loss: 0.008219665847718716 2023-01-24 06:39:05.099220: step: 714/466, loss: 0.07780952006578445 2023-01-24 06:39:06.020992: step: 716/466, loss: 0.025093533098697662 2023-01-24 06:39:06.715774: step: 718/466, loss: 0.002227090997621417 2023-01-24 06:39:07.462149: step: 720/466, loss: 0.042245469987392426 2023-01-24 06:39:08.300640: step: 722/466, loss: 0.03828979283571243 2023-01-24 06:39:09.059208: step: 724/466, loss: 0.0036812573671340942 2023-01-24 06:39:09.827836: step: 726/466, loss: 0.01815827749669552 2023-01-24 06:39:10.634039: step: 728/466, loss: 0.0014923752751201391 2023-01-24 06:39:11.403331: step: 730/466, loss: 0.0003263081598561257 2023-01-24 06:39:12.125369: step: 732/466, loss: 0.012029914185404778 2023-01-24 06:39:12.895096: step: 734/466, loss: 2.6921272365143523e-05 2023-01-24 06:39:13.653942: step: 736/466, loss: 0.21548262238502502 2023-01-24 06:39:14.478149: step: 738/466, loss: 0.0009322597761638463 2023-01-24 06:39:15.134386: step: 740/466, loss: 9.577017044648528e-05 2023-01-24 06:39:15.780229: step: 742/466, loss: 0.012453495524823666 2023-01-24 06:39:16.493128: step: 744/466, loss: 0.0031533828005194664 2023-01-24 06:39:17.321565: step: 746/466, loss: 0.0029514674097299576 2023-01-24 06:39:18.057381: step: 748/466, loss: 0.00029309350065886974 2023-01-24 06:39:18.892230: step: 750/466, loss: 0.033656831830739975 2023-01-24 06:39:19.626943: step: 752/466, loss: 0.009833025746047497 2023-01-24 06:39:20.326561: step: 754/466, loss: 0.020311174914240837 2023-01-24 06:39:21.035787: step: 756/466, loss: 0.007364567369222641 2023-01-24 06:39:21.770614: step: 758/466, loss: 0.03312944993376732 2023-01-24 06:39:22.563931: step: 760/466, loss: 0.002991229295730591 2023-01-24 06:39:23.311033: step: 762/466, loss: 0.0003226816188544035 2023-01-24 06:39:23.998399: step: 764/466, loss: 0.0016692840727046132 2023-01-24 06:39:24.662494: step: 766/466, loss: 0.0013053424190729856 2023-01-24 06:39:25.474862: step: 768/466, loss: 0.0428982712328434 2023-01-24 06:39:26.336188: step: 770/466, loss: 0.7088618278503418 2023-01-24 06:39:27.084073: step: 772/466, loss: 0.014309249818325043 2023-01-24 06:39:27.805145: step: 774/466, loss: 0.037123847752809525 2023-01-24 06:39:28.561225: step: 776/466, loss: 0.19105984270572662 2023-01-24 06:39:29.217028: step: 778/466, loss: 0.0012500167358666658 2023-01-24 06:39:29.953279: step: 780/466, loss: 0.01712871342897415 2023-01-24 06:39:30.627106: step: 782/466, loss: 0.005755425896495581 2023-01-24 06:39:31.417629: step: 784/466, loss: 0.012732706032693386 2023-01-24 06:39:32.225997: step: 786/466, loss: 0.010109102353453636 2023-01-24 06:39:32.931992: step: 788/466, loss: 0.8606966733932495 2023-01-24 06:39:33.671948: step: 790/466, loss: 0.012393337674438953 2023-01-24 06:39:34.523324: step: 792/466, loss: 0.0015637580072507262 2023-01-24 06:39:35.276838: step: 794/466, loss: 0.0003238733916077763 2023-01-24 06:39:35.954002: step: 796/466, loss: 0.007022204343229532 2023-01-24 06:39:36.729858: step: 798/466, loss: 0.017238151282072067 2023-01-24 06:39:37.486595: step: 800/466, loss: 0.003413428319618106 2023-01-24 06:39:38.258512: step: 802/466, loss: 0.00270936731249094 2023-01-24 06:39:39.085359: step: 804/466, loss: 0.0003040628507733345 2023-01-24 06:39:39.834059: step: 806/466, loss: 0.010912488214671612 2023-01-24 06:39:40.602649: step: 808/466, loss: 0.019548660144209862 2023-01-24 06:39:41.314767: step: 810/466, loss: 0.0020851590670645237 2023-01-24 06:39:42.194839: step: 812/466, loss: 0.01595069281756878 2023-01-24 06:39:43.043765: step: 814/466, loss: 0.006865901872515678 2023-01-24 06:39:43.850379: step: 816/466, loss: 0.10417701303958893 2023-01-24 06:39:44.604470: step: 818/466, loss: 0.017476389184594154 2023-01-24 06:39:45.428041: step: 820/466, loss: 0.0005563123850151896 2023-01-24 06:39:46.296398: step: 822/466, loss: 0.003879000199958682 2023-01-24 06:39:47.023787: step: 824/466, loss: 0.023705052211880684 2023-01-24 06:39:47.749592: step: 826/466, loss: 0.028963197022676468 2023-01-24 06:39:48.494945: step: 828/466, loss: 0.02524581365287304 2023-01-24 06:39:49.335153: step: 830/466, loss: 0.1719159483909607 2023-01-24 06:39:50.193745: step: 832/466, loss: 0.0010757212294265628 2023-01-24 06:39:50.914183: step: 834/466, loss: 0.025246674194931984 2023-01-24 06:39:51.660835: step: 836/466, loss: 0.020809084177017212 2023-01-24 06:39:52.448013: step: 838/466, loss: 0.012798292562365532 2023-01-24 06:39:53.199583: step: 840/466, loss: 0.0154691431671381 2023-01-24 06:39:53.935646: step: 842/466, loss: 0.004644252825528383 2023-01-24 06:39:54.652238: step: 844/466, loss: 0.00046820956049486995 2023-01-24 06:39:55.421709: step: 846/466, loss: 2.369272470474243 2023-01-24 06:39:56.221218: step: 848/466, loss: 0.006457947660237551 2023-01-24 06:39:56.939462: step: 850/466, loss: 0.020441725850105286 2023-01-24 06:39:57.730415: step: 852/466, loss: 0.03185777738690376 2023-01-24 06:39:58.534329: step: 854/466, loss: 0.0019530951976776123 2023-01-24 06:39:59.350889: step: 856/466, loss: 0.0021320057567209005 2023-01-24 06:40:00.104837: step: 858/466, loss: 0.6590021252632141 2023-01-24 06:40:00.809979: step: 860/466, loss: 0.028135813772678375 2023-01-24 06:40:01.624866: step: 862/466, loss: 0.00792783871293068 2023-01-24 06:40:02.401570: step: 864/466, loss: 0.030260713770985603 2023-01-24 06:40:03.108393: step: 866/466, loss: 0.010048595257103443 2023-01-24 06:40:03.886209: step: 868/466, loss: 0.020413830876350403 2023-01-24 06:40:04.670065: step: 870/466, loss: 0.006550361402332783 2023-01-24 06:40:05.316513: step: 872/466, loss: 0.003479516599327326 2023-01-24 06:40:06.039612: step: 874/466, loss: 0.1767842024564743 2023-01-24 06:40:06.767923: step: 876/466, loss: 0.00718360161408782 2023-01-24 06:40:07.575023: step: 878/466, loss: 0.0019658098462969065 2023-01-24 06:40:08.322915: step: 880/466, loss: 0.04372788965702057 2023-01-24 06:40:09.046520: step: 882/466, loss: 0.15487949550151825 2023-01-24 06:40:09.734041: step: 884/466, loss: 0.0005945255979895592 2023-01-24 06:40:10.459455: step: 886/466, loss: 0.0019250859040766954 2023-01-24 06:40:11.233370: step: 888/466, loss: 0.011378058232367039 2023-01-24 06:40:11.944483: step: 890/466, loss: 0.0009098451700992882 2023-01-24 06:40:12.701993: step: 892/466, loss: 0.20266282558441162 2023-01-24 06:40:13.508077: step: 894/466, loss: 0.0019502755021676421 2023-01-24 06:40:14.248496: step: 896/466, loss: 0.0023257662542164326 2023-01-24 06:40:14.915841: step: 898/466, loss: 0.002615791978314519 2023-01-24 06:40:15.648508: step: 900/466, loss: 0.6581990122795105 2023-01-24 06:40:16.404719: step: 902/466, loss: 0.023657280951738358 2023-01-24 06:40:17.277502: step: 904/466, loss: 0.02864796854555607 2023-01-24 06:40:18.015413: step: 906/466, loss: 0.0019437010632827878 2023-01-24 06:40:18.780659: step: 908/466, loss: 0.012087050825357437 2023-01-24 06:40:19.528417: step: 910/466, loss: 0.02876484952867031 2023-01-24 06:40:20.311311: step: 912/466, loss: 0.007884223945438862 2023-01-24 06:40:20.916637: step: 914/466, loss: 0.0017012872267514467 2023-01-24 06:40:21.616483: step: 916/466, loss: 0.0029429281130433083 2023-01-24 06:40:22.346761: step: 918/466, loss: 0.08260404318571091 2023-01-24 06:40:23.094303: step: 920/466, loss: 0.022113988175988197 2023-01-24 06:40:23.838909: step: 922/466, loss: 0.0013095721369609237 2023-01-24 06:40:24.503329: step: 924/466, loss: 0.001993898767977953 2023-01-24 06:40:25.361176: step: 926/466, loss: 0.0019014434656128287 2023-01-24 06:40:26.052286: step: 928/466, loss: 0.004830385558307171 2023-01-24 06:40:26.777014: step: 930/466, loss: 0.0004450043197721243 2023-01-24 06:40:27.451697: step: 932/466, loss: 0.0031805401667952538 ================================================== Loss: 0.051 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34063103095568936, 'r': 0.3283502157978941, 'f1': 0.33437790091882164}, 'combined': 0.24638371646650015, 'epoch': 35} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3659093422271125, 'r': 0.2961519286309559, 'f1': 0.3273556759005011}, 'combined': 0.20120397640713725, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3149050525146073, 'r': 0.3268559083975146, 'f1': 0.32076920619271915}, 'combined': 0.23635625719463516, 'epoch': 35} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35551952051882296, 'r': 0.2969851107280289, 'f1': 0.323626834542158}, 'combined': 0.1989121031820093, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35347101972101974, 'r': 0.3320078837986808, 'f1': 0.3424034339763303}, 'combined': 0.25229726714045386, 'epoch': 35} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.36615625662975515, 'r': 0.2890540292805086, 'f1': 0.3230686196510479}, 'combined': 0.19954238272564725, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3560606060606061, 'r': 0.3357142857142857, 'f1': 0.34558823529411764}, 'combined': 0.23039215686274508, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2986111111111111, 'r': 0.4673913043478261, 'f1': 0.3644067796610169}, 'combined': 0.18220338983050846, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561785714285714, 'r': 0.3210338167525074, 'f1': 0.33769425434844597}, 'combined': 0.24882734530938122, 'epoch': 33} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37857267048883675, 'r': 0.28679747764305813, 'f1': 0.32635575042141096}, 'combined': 0.20058938806389162, 'epoch': 33} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.325, 'f1': 0.3729508196721312}, 'combined': 0.24863387978142076, 'epoch': 33} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30530119988658644, 'r': 0.3319498814706148, 'f1': 0.3180683409727528}, 'combined': 0.2343661459799231, 'epoch': 15} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35260847783740157, 'r': 0.2936366960153751, 'f1': 0.320431912247511}, 'combined': 0.19694839484481164, 'epoch': 15} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 15} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3600931677018634, 'r': 0.3300284629981025, 'f1': 0.34440594059405943}, 'combined': 0.25377279833246486, 'epoch': 29} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3732928220606822, 'r': 0.2841675462053636, 'f1': 0.3226892764375249}, 'combined': 0.19930808250553012, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 29} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:43:11.056546: step: 2/466, loss: 6.959749589441344e-05 2023-01-24 06:43:11.745339: step: 4/466, loss: 0.052603382617235184 2023-01-24 06:43:12.590071: step: 6/466, loss: 0.007829297333955765 2023-01-24 06:43:13.385290: step: 8/466, loss: 0.0017035205382853746 2023-01-24 06:43:14.099092: step: 10/466, loss: 2.6053298824990634e-06 2023-01-24 06:43:14.813007: step: 12/466, loss: 0.0007733256788924336 2023-01-24 06:43:15.521916: step: 14/466, loss: 0.0004052332369610667 2023-01-24 06:43:16.300889: step: 16/466, loss: 1.8812017515301704e-05 2023-01-24 06:43:16.987006: step: 18/466, loss: 0.01579943485558033 2023-01-24 06:43:17.680833: step: 20/466, loss: 0.0032925379928201437 2023-01-24 06:43:18.538042: step: 22/466, loss: 0.09599200636148453 2023-01-24 06:43:19.348851: step: 24/466, loss: 0.03901425004005432 2023-01-24 06:43:20.168535: step: 26/466, loss: 0.00021684799867216498 2023-01-24 06:43:20.926053: step: 28/466, loss: 0.04132521525025368 2023-01-24 06:43:21.654872: step: 30/466, loss: 0.001680803601630032 2023-01-24 06:43:22.323591: step: 32/466, loss: 0.02639612928032875 2023-01-24 06:43:23.084626: step: 34/466, loss: 0.009096384048461914 2023-01-24 06:43:23.897870: step: 36/466, loss: 0.000210379614145495 2023-01-24 06:43:24.669106: step: 38/466, loss: 0.007331050466746092 2023-01-24 06:43:25.463932: step: 40/466, loss: 0.00036271740100346506 2023-01-24 06:43:26.170651: step: 42/466, loss: 0.003784711705520749 2023-01-24 06:43:26.906623: step: 44/466, loss: 0.006800322327762842 2023-01-24 06:43:27.622133: step: 46/466, loss: 0.0009321786346845329 2023-01-24 06:43:28.314701: step: 48/466, loss: 0.005665746051818132 2023-01-24 06:43:28.997143: step: 50/466, loss: 0.093864306807518 2023-01-24 06:43:29.761567: step: 52/466, loss: 0.003616434521973133 2023-01-24 06:43:30.528513: step: 54/466, loss: 0.02172437310218811 2023-01-24 06:43:31.290377: step: 56/466, loss: 0.010583080351352692 2023-01-24 06:43:32.102250: step: 58/466, loss: 0.14625628292560577 2023-01-24 06:43:32.881676: step: 60/466, loss: 0.003461432410404086 2023-01-24 06:43:33.691112: step: 62/466, loss: 0.05907382443547249 2023-01-24 06:43:34.445204: step: 64/466, loss: 0.0032302553299814463 2023-01-24 06:43:35.181945: step: 66/466, loss: 0.02619274891912937 2023-01-24 06:43:36.071239: step: 68/466, loss: 0.30181601643562317 2023-01-24 06:43:36.797644: step: 70/466, loss: 0.0006808125763200223 2023-01-24 06:43:37.576046: step: 72/466, loss: 0.0056705838069319725 2023-01-24 06:43:38.291865: step: 74/466, loss: 0.0020186409819871187 2023-01-24 06:43:39.026118: step: 76/466, loss: 0.001368081197142601 2023-01-24 06:43:39.755151: step: 78/466, loss: 0.0015106059145182371 2023-01-24 06:43:40.465606: step: 80/466, loss: 0.0026107397861778736 2023-01-24 06:43:41.207460: step: 82/466, loss: 0.0015447793994098902 2023-01-24 06:43:41.963308: step: 84/466, loss: 0.002406883519142866 2023-01-24 06:43:42.665787: step: 86/466, loss: 0.0089812520891428 2023-01-24 06:43:43.406211: step: 88/466, loss: 0.005512211471796036 2023-01-24 06:43:44.084221: step: 90/466, loss: 0.002036134712398052 2023-01-24 06:43:44.873986: step: 92/466, loss: 0.010609150864183903 2023-01-24 06:43:45.576143: step: 94/466, loss: 8.391762094106525e-05 2023-01-24 06:43:46.291858: step: 96/466, loss: 0.001140699489042163 2023-01-24 06:43:47.186682: step: 98/466, loss: 0.08036769926548004 2023-01-24 06:43:48.014666: step: 100/466, loss: 0.08868908882141113 2023-01-24 06:43:48.811379: step: 102/466, loss: 0.03348981961607933 2023-01-24 06:43:49.561609: step: 104/466, loss: 0.004189977888017893 2023-01-24 06:43:50.402242: step: 106/466, loss: 0.27573361992836 2023-01-24 06:43:51.218450: step: 108/466, loss: 0.003680627327412367 2023-01-24 06:43:52.049739: step: 110/466, loss: 0.0007431924459524453 2023-01-24 06:43:52.776672: step: 112/466, loss: 0.0027506130281835794 2023-01-24 06:43:53.476760: step: 114/466, loss: 0.0016330421203747392 2023-01-24 06:43:54.174635: step: 116/466, loss: 0.016300100833177567 2023-01-24 06:43:54.928344: step: 118/466, loss: 3.5442230000626296e-05 2023-01-24 06:43:55.732855: step: 120/466, loss: 0.06424372643232346 2023-01-24 06:43:56.455626: step: 122/466, loss: 0.016632402315735817 2023-01-24 06:43:57.210813: step: 124/466, loss: 0.00016443005006294698 2023-01-24 06:43:57.954250: step: 126/466, loss: 0.0009986787335947156 2023-01-24 06:43:58.629509: step: 128/466, loss: 0.0072743832133710384 2023-01-24 06:43:59.393772: step: 130/466, loss: 0.0005441168905235827 2023-01-24 06:44:00.177474: step: 132/466, loss: 0.0016753192758187652 2023-01-24 06:44:00.964932: step: 134/466, loss: 0.013735419139266014 2023-01-24 06:44:01.768815: step: 136/466, loss: 0.013947011902928352 2023-01-24 06:44:02.527228: step: 138/466, loss: 7.987304707057774e-05 2023-01-24 06:44:03.325129: step: 140/466, loss: 6.819606642238796e-05 2023-01-24 06:44:04.127589: step: 142/466, loss: 0.0016738786362111568 2023-01-24 06:44:04.864540: step: 144/466, loss: 0.03004748746752739 2023-01-24 06:44:05.718145: step: 146/466, loss: 0.007570676505565643 2023-01-24 06:44:06.565959: step: 148/466, loss: 0.00446693692356348 2023-01-24 06:44:07.306086: step: 150/466, loss: 0.0004110346781089902 2023-01-24 06:44:08.065372: step: 152/466, loss: 0.0028237239457666874 2023-01-24 06:44:08.796406: step: 154/466, loss: 0.0074403490871191025 2023-01-24 06:44:09.476776: step: 156/466, loss: 0.0017213046085089445 2023-01-24 06:44:10.173692: step: 158/466, loss: 0.004246914759278297 2023-01-24 06:44:11.017776: step: 160/466, loss: 6.689901056233793e-05 2023-01-24 06:44:11.742295: step: 162/466, loss: 0.13011637330055237 2023-01-24 06:44:12.561355: step: 164/466, loss: 0.01242199819535017 2023-01-24 06:44:13.262216: step: 166/466, loss: 0.0013810870004817843 2023-01-24 06:44:13.943656: step: 168/466, loss: 0.0005429077427834272 2023-01-24 06:44:14.665986: step: 170/466, loss: 0.02831660956144333 2023-01-24 06:44:15.371078: step: 172/466, loss: 0.17097602784633636 2023-01-24 06:44:16.136621: step: 174/466, loss: 0.17554019391536713 2023-01-24 06:44:16.843020: step: 176/466, loss: 0.0023959302343428135 2023-01-24 06:44:17.513407: step: 178/466, loss: 0.0022369904909282923 2023-01-24 06:44:18.301242: step: 180/466, loss: 0.0011512894416227937 2023-01-24 06:44:19.005623: step: 182/466, loss: 0.0018296147463843226 2023-01-24 06:44:19.711250: step: 184/466, loss: 0.00896126963198185 2023-01-24 06:44:20.488655: step: 186/466, loss: 0.014478500932455063 2023-01-24 06:44:21.365749: step: 188/466, loss: 0.016855215653777122 2023-01-24 06:44:22.129688: step: 190/466, loss: 0.00267539219930768 2023-01-24 06:44:22.896720: step: 192/466, loss: 0.006435306742787361 2023-01-24 06:44:23.818330: step: 194/466, loss: 0.006483323406428099 2023-01-24 06:44:24.657911: step: 196/466, loss: 0.0008203312754631042 2023-01-24 06:44:25.338765: step: 198/466, loss: 0.001442829379811883 2023-01-24 06:44:26.076276: step: 200/466, loss: 0.010103004053235054 2023-01-24 06:44:26.790746: step: 202/466, loss: 0.09546792507171631 2023-01-24 06:44:27.574314: step: 204/466, loss: 0.0025321985594928265 2023-01-24 06:44:28.317769: step: 206/466, loss: 0.11714517325162888 2023-01-24 06:44:29.118769: step: 208/466, loss: 0.00020245101768523455 2023-01-24 06:44:29.798329: step: 210/466, loss: 0.040933359414339066 2023-01-24 06:44:30.516909: step: 212/466, loss: 2.2363874450093135e-05 2023-01-24 06:44:31.316403: step: 214/466, loss: 0.022035591304302216 2023-01-24 06:44:32.134675: step: 216/466, loss: 0.032153207808732986 2023-01-24 06:44:32.851018: step: 218/466, loss: 0.0018491385271772742 2023-01-24 06:44:33.566260: step: 220/466, loss: 0.0022317946422845125 2023-01-24 06:44:34.276844: step: 222/466, loss: 0.0013667675666511059 2023-01-24 06:44:35.067400: step: 224/466, loss: 0.003354973392561078 2023-01-24 06:44:35.830685: step: 226/466, loss: 0.02265651896595955 2023-01-24 06:44:36.553364: step: 228/466, loss: 0.00097334646852687 2023-01-24 06:44:37.314877: step: 230/466, loss: 0.0008900273824110627 2023-01-24 06:44:38.141530: step: 232/466, loss: 0.0070862616412341595 2023-01-24 06:44:38.825624: step: 234/466, loss: 0.0008075744262896478 2023-01-24 06:44:39.524453: step: 236/466, loss: 0.006123298313468695 2023-01-24 06:44:40.302224: step: 238/466, loss: 0.006236757151782513 2023-01-24 06:44:41.075143: step: 240/466, loss: 0.004826977849006653 2023-01-24 06:44:41.790305: step: 242/466, loss: 0.0019498377805575728 2023-01-24 06:44:42.526832: step: 244/466, loss: 0.0002522287250030786 2023-01-24 06:44:43.314300: step: 246/466, loss: 0.02219093032181263 2023-01-24 06:44:44.136461: step: 248/466, loss: 0.000634671829175204 2023-01-24 06:44:44.876418: step: 250/466, loss: 0.0149555504322052 2023-01-24 06:44:45.651451: step: 252/466, loss: 0.04596257209777832 2023-01-24 06:44:46.378092: step: 254/466, loss: 0.022731617093086243 2023-01-24 06:44:47.117107: step: 256/466, loss: 0.009812143631279469 2023-01-24 06:44:47.849952: step: 258/466, loss: 0.10636556148529053 2023-01-24 06:44:48.695003: step: 260/466, loss: 0.0033983385656028986 2023-01-24 06:44:49.413032: step: 262/466, loss: 0.01362368743866682 2023-01-24 06:44:50.155342: step: 264/466, loss: 0.06555137783288956 2023-01-24 06:44:50.941958: step: 266/466, loss: 0.014873780310153961 2023-01-24 06:44:51.661988: step: 268/466, loss: 0.02528764307498932 2023-01-24 06:44:52.402796: step: 270/466, loss: 0.0005162880406714976 2023-01-24 06:44:53.165449: step: 272/466, loss: 0.006696953438222408 2023-01-24 06:44:53.877750: step: 274/466, loss: 0.01726466603577137 2023-01-24 06:44:54.627155: step: 276/466, loss: 0.012693598866462708 2023-01-24 06:44:55.406376: step: 278/466, loss: 1.0962568521499634 2023-01-24 06:44:56.174386: step: 280/466, loss: 0.0010377082508057356 2023-01-24 06:44:56.939536: step: 282/466, loss: 0.003022135468199849 2023-01-24 06:44:57.652597: step: 284/466, loss: 0.0012633508304134011 2023-01-24 06:44:58.488805: step: 286/466, loss: 0.012754724361002445 2023-01-24 06:44:59.150229: step: 288/466, loss: 0.0007646095473319292 2023-01-24 06:44:59.907153: step: 290/466, loss: 0.0002800831862259656 2023-01-24 06:45:00.628185: step: 292/466, loss: 0.0013062810758128762 2023-01-24 06:45:01.393306: step: 294/466, loss: 0.0002482525887899101 2023-01-24 06:45:02.243817: step: 296/466, loss: 2.8109747290727682e-05 2023-01-24 06:45:02.903380: step: 298/466, loss: 0.006925472058355808 2023-01-24 06:45:03.728511: step: 300/466, loss: 0.28649452328681946 2023-01-24 06:45:04.499042: step: 302/466, loss: 0.0009704851545393467 2023-01-24 06:45:05.217988: step: 304/466, loss: 0.005734934937208891 2023-01-24 06:45:05.932869: step: 306/466, loss: 0.019645430147647858 2023-01-24 06:45:06.720327: step: 308/466, loss: 0.0017498302040621638 2023-01-24 06:45:07.541915: step: 310/466, loss: 0.0007500092615373433 2023-01-24 06:45:08.242152: step: 312/466, loss: 0.010207954794168472 2023-01-24 06:45:08.998685: step: 314/466, loss: 0.08134476840496063 2023-01-24 06:45:09.783657: step: 316/466, loss: 0.010409084148705006 2023-01-24 06:45:10.454931: step: 318/466, loss: 0.0070197382010519505 2023-01-24 06:45:11.174482: step: 320/466, loss: 0.0004017484898213297 2023-01-24 06:45:11.943899: step: 322/466, loss: 0.051938921213150024 2023-01-24 06:45:12.672858: step: 324/466, loss: 0.012206167913973331 2023-01-24 06:45:13.389257: step: 326/466, loss: 0.03690803796052933 2023-01-24 06:45:14.104131: step: 328/466, loss: 0.0020243311300873756 2023-01-24 06:45:14.836415: step: 330/466, loss: 0.031494155526161194 2023-01-24 06:45:15.518711: step: 332/466, loss: 0.013773403130471706 2023-01-24 06:45:16.263477: step: 334/466, loss: 0.006675016600638628 2023-01-24 06:45:17.030145: step: 336/466, loss: 0.08140300959348679 2023-01-24 06:45:17.702396: step: 338/466, loss: 0.00034415972186252475 2023-01-24 06:45:18.424908: step: 340/466, loss: 0.002552608260884881 2023-01-24 06:45:19.113811: step: 342/466, loss: 0.00010834995919140056 2023-01-24 06:45:19.841242: step: 344/466, loss: 0.00013172495528124273 2023-01-24 06:45:20.612218: step: 346/466, loss: 0.008109035901725292 2023-01-24 06:45:21.387879: step: 348/466, loss: 0.31519660353660583 2023-01-24 06:45:22.194860: step: 350/466, loss: 0.002458364935591817 2023-01-24 06:45:22.960870: step: 352/466, loss: 0.010051483288407326 2023-01-24 06:45:23.729527: step: 354/466, loss: 0.015387998893857002 2023-01-24 06:45:24.486764: step: 356/466, loss: 0.015859654173254967 2023-01-24 06:45:25.280538: step: 358/466, loss: 0.00026099529350176454 2023-01-24 06:45:26.039798: step: 360/466, loss: 0.02173546329140663 2023-01-24 06:45:26.773781: step: 362/466, loss: 0.03111352026462555 2023-01-24 06:45:27.466827: step: 364/466, loss: 0.030289320275187492 2023-01-24 06:45:28.247195: step: 366/466, loss: 0.00595248956233263 2023-01-24 06:45:28.968895: step: 368/466, loss: 0.0007932390435598791 2023-01-24 06:45:29.700009: step: 370/466, loss: 0.05523402616381645 2023-01-24 06:45:30.591267: step: 372/466, loss: 0.011178974062204361 2023-01-24 06:45:31.350777: step: 374/466, loss: 0.03265348821878433 2023-01-24 06:45:32.107343: step: 376/466, loss: 2.4180924892425537 2023-01-24 06:45:32.866143: step: 378/466, loss: 0.06094865873456001 2023-01-24 06:45:33.646006: step: 380/466, loss: 0.0009674673201516271 2023-01-24 06:45:34.402125: step: 382/466, loss: 0.0025440987665206194 2023-01-24 06:45:35.126486: step: 384/466, loss: 0.04209550470113754 2023-01-24 06:45:35.863358: step: 386/466, loss: 0.003342913230881095 2023-01-24 06:45:36.516014: step: 388/466, loss: 0.004386276006698608 2023-01-24 06:45:37.288626: step: 390/466, loss: 0.0031542626675218344 2023-01-24 06:45:38.120407: step: 392/466, loss: 0.008323580026626587 2023-01-24 06:45:38.865384: step: 394/466, loss: 0.028583411127328873 2023-01-24 06:45:39.611131: step: 396/466, loss: 0.021640345454216003 2023-01-24 06:45:40.325229: step: 398/466, loss: 0.003628394566476345 2023-01-24 06:45:40.995529: step: 400/466, loss: 0.0007856853189878166 2023-01-24 06:45:41.749852: step: 402/466, loss: 0.006474267691373825 2023-01-24 06:45:42.400723: step: 404/466, loss: 0.0037609722930938005 2023-01-24 06:45:43.219656: step: 406/466, loss: 0.04697392135858536 2023-01-24 06:45:43.981529: step: 408/466, loss: 0.005808677524328232 2023-01-24 06:45:44.786081: step: 410/466, loss: 0.017602592706680298 2023-01-24 06:45:45.487875: step: 412/466, loss: 0.004466408398002386 2023-01-24 06:45:46.294548: step: 414/466, loss: 0.030358077958226204 2023-01-24 06:45:47.091621: step: 416/466, loss: 0.00018989270029123873 2023-01-24 06:45:47.788178: step: 418/466, loss: 0.019630558788776398 2023-01-24 06:45:48.594070: step: 420/466, loss: 0.008626433089375496 2023-01-24 06:45:49.337957: step: 422/466, loss: 0.013188624754548073 2023-01-24 06:45:50.036325: step: 424/466, loss: 0.18110044300556183 2023-01-24 06:45:50.818915: step: 426/466, loss: 0.15420110523700714 2023-01-24 06:45:51.614864: step: 428/466, loss: 0.00033983562025241554 2023-01-24 06:45:52.405338: step: 430/466, loss: 0.004736277740448713 2023-01-24 06:45:53.106946: step: 432/466, loss: 0.0006059578736312687 2023-01-24 06:45:53.846698: step: 434/466, loss: 0.014155655167996883 2023-01-24 06:45:54.625384: step: 436/466, loss: 0.01840216852724552 2023-01-24 06:45:55.377406: step: 438/466, loss: 0.008797436952590942 2023-01-24 06:45:56.008022: step: 440/466, loss: 2.0591582142515108e-05 2023-01-24 06:45:56.761073: step: 442/466, loss: 0.002799784764647484 2023-01-24 06:45:57.564984: step: 444/466, loss: 0.003289586864411831 2023-01-24 06:45:58.354261: step: 446/466, loss: 0.0030177352018654346 2023-01-24 06:45:59.109136: step: 448/466, loss: 0.0005092833307571709 2023-01-24 06:45:59.819745: step: 450/466, loss: 0.0026675830595195293 2023-01-24 06:46:00.539492: step: 452/466, loss: 0.00010813782137120143 2023-01-24 06:46:01.261625: step: 454/466, loss: 0.030953940004110336 2023-01-24 06:46:02.081539: step: 456/466, loss: 0.044374607503414154 2023-01-24 06:46:02.886897: step: 458/466, loss: 0.13990625739097595 2023-01-24 06:46:03.623562: step: 460/466, loss: 0.00537828216329217 2023-01-24 06:46:04.379030: step: 462/466, loss: 0.022325586527585983 2023-01-24 06:46:05.152912: step: 464/466, loss: 0.006508402526378632 2023-01-24 06:46:05.910344: step: 466/466, loss: 0.0010946118272840977 2023-01-24 06:46:06.664854: step: 468/466, loss: 0.0019944056402891874 2023-01-24 06:46:07.441840: step: 470/466, loss: 0.005054292269051075 2023-01-24 06:46:08.225438: step: 472/466, loss: 0.09031184017658234 2023-01-24 06:46:08.941374: step: 474/466, loss: 0.13491177558898926 2023-01-24 06:46:09.653511: step: 476/466, loss: 7.156516949180514e-05 2023-01-24 06:46:10.386255: step: 478/466, loss: 0.024453453719615936 2023-01-24 06:46:11.232307: step: 480/466, loss: 0.006817124783992767 2023-01-24 06:46:12.023410: step: 482/466, loss: 0.021428994834423065 2023-01-24 06:46:12.779483: step: 484/466, loss: 0.004524318967014551 2023-01-24 06:46:13.625463: step: 486/466, loss: 0.0009695948101580143 2023-01-24 06:46:14.406416: step: 488/466, loss: 0.019664600491523743 2023-01-24 06:46:15.080402: step: 490/466, loss: 0.00011819535575341433 2023-01-24 06:46:15.772467: step: 492/466, loss: 0.0010263827862218022 2023-01-24 06:46:16.528600: step: 494/466, loss: 9.183879592455924e-05 2023-01-24 06:46:17.268584: step: 496/466, loss: 0.028333380818367004 2023-01-24 06:46:18.007545: step: 498/466, loss: 0.00033218032331205904 2023-01-24 06:46:18.778972: step: 500/466, loss: 0.00035989010939374566 2023-01-24 06:46:19.515590: step: 502/466, loss: 0.016311464831233025 2023-01-24 06:46:20.249803: step: 504/466, loss: 0.020886670798063278 2023-01-24 06:46:20.956508: step: 506/466, loss: 0.0010080871870741248 2023-01-24 06:46:21.716275: step: 508/466, loss: 0.04013931751251221 2023-01-24 06:46:22.439827: step: 510/466, loss: 0.01586906984448433 2023-01-24 06:46:23.206501: step: 512/466, loss: 4.607872009277344 2023-01-24 06:46:23.984553: step: 514/466, loss: 0.0015745365526527166 2023-01-24 06:46:24.797859: step: 516/466, loss: 0.07102135568857193 2023-01-24 06:46:25.533002: step: 518/466, loss: 0.0023052708711475134 2023-01-24 06:46:26.235729: step: 520/466, loss: 0.044435564428567886 2023-01-24 06:46:26.946822: step: 522/466, loss: 0.01360018365085125 2023-01-24 06:46:27.754568: step: 524/466, loss: 0.00858695525676012 2023-01-24 06:46:28.578530: step: 526/466, loss: 0.0014044283889234066 2023-01-24 06:46:29.311577: step: 528/466, loss: 0.0015609466936439276 2023-01-24 06:46:30.043981: step: 530/466, loss: 0.002609415678307414 2023-01-24 06:46:30.741306: step: 532/466, loss: 0.03356180340051651 2023-01-24 06:46:31.534967: step: 534/466, loss: 0.00122835545334965 2023-01-24 06:46:32.242165: step: 536/466, loss: 0.004466540180146694 2023-01-24 06:46:33.073096: step: 538/466, loss: 0.019091887399554253 2023-01-24 06:46:33.848676: step: 540/466, loss: 0.05677692964673042 2023-01-24 06:46:34.561181: step: 542/466, loss: 0.02603871561586857 2023-01-24 06:46:35.274566: step: 544/466, loss: 4.200865078018978e-05 2023-01-24 06:46:36.001424: step: 546/466, loss: 0.057331353425979614 2023-01-24 06:46:36.776450: step: 548/466, loss: 0.00982821173965931 2023-01-24 06:46:37.576827: step: 550/466, loss: 0.060289330780506134 2023-01-24 06:46:38.327229: step: 552/466, loss: 0.012156656943261623 2023-01-24 06:46:39.018121: step: 554/466, loss: 0.0003722023102454841 2023-01-24 06:46:39.787074: step: 556/466, loss: 0.018028881400823593 2023-01-24 06:46:40.525048: step: 558/466, loss: 0.023712920024991035 2023-01-24 06:46:41.353668: step: 560/466, loss: 0.003337965114042163 2023-01-24 06:46:42.121751: step: 562/466, loss: 0.00031802200828678906 2023-01-24 06:46:42.813612: step: 564/466, loss: 0.001543916412629187 2023-01-24 06:46:43.558800: step: 566/466, loss: 0.0645618662238121 2023-01-24 06:46:44.357783: step: 568/466, loss: 0.009318874217569828 2023-01-24 06:46:45.076061: step: 570/466, loss: 0.016917699947953224 2023-01-24 06:46:45.843788: step: 572/466, loss: 0.09670083969831467 2023-01-24 06:46:46.627733: step: 574/466, loss: 0.0034049071837216616 2023-01-24 06:46:47.308289: step: 576/466, loss: 0.0009502943139523268 2023-01-24 06:46:48.152690: step: 578/466, loss: 0.025513457134366035 2023-01-24 06:46:48.879288: step: 580/466, loss: 0.02725810743868351 2023-01-24 06:46:49.646555: step: 582/466, loss: 0.020639048889279366 2023-01-24 06:46:50.448854: step: 584/466, loss: 0.016725769266486168 2023-01-24 06:46:51.222255: step: 586/466, loss: 0.004354603588581085 2023-01-24 06:46:52.019898: step: 588/466, loss: 0.00044333579717203975 2023-01-24 06:46:52.827799: step: 590/466, loss: 0.02828321047127247 2023-01-24 06:46:53.554176: step: 592/466, loss: 0.053278740495443344 2023-01-24 06:46:54.281105: step: 594/466, loss: 0.002482444979250431 2023-01-24 06:46:55.118015: step: 596/466, loss: 0.006808500736951828 2023-01-24 06:46:55.924550: step: 598/466, loss: 0.007787493988871574 2023-01-24 06:46:56.682408: step: 600/466, loss: 0.002453563967719674 2023-01-24 06:46:57.382735: step: 602/466, loss: 0.0010509646963328123 2023-01-24 06:46:58.222330: step: 604/466, loss: 0.010503526777029037 2023-01-24 06:46:58.955590: step: 606/466, loss: 0.020881492644548416 2023-01-24 06:46:59.619458: step: 608/466, loss: 0.0014220715966075659 2023-01-24 06:47:00.373459: step: 610/466, loss: 0.004759980831295252 2023-01-24 06:47:01.100486: step: 612/466, loss: 0.012297210283577442 2023-01-24 06:47:01.848563: step: 614/466, loss: 0.002239755354821682 2023-01-24 06:47:02.618188: step: 616/466, loss: 0.04295475035905838 2023-01-24 06:47:03.465814: step: 618/466, loss: 0.01774514839053154 2023-01-24 06:47:04.179795: step: 620/466, loss: 3.909325823769905e-05 2023-01-24 06:47:04.905077: step: 622/466, loss: 0.018953876569867134 2023-01-24 06:47:05.624691: step: 624/466, loss: 0.0008622193709015846 2023-01-24 06:47:06.365027: step: 626/466, loss: 0.021152107045054436 2023-01-24 06:47:07.091372: step: 628/466, loss: 0.00045716180466115475 2023-01-24 06:47:07.916180: step: 630/466, loss: 0.05074857920408249 2023-01-24 06:47:08.707768: step: 632/466, loss: 3.691697202157229e-05 2023-01-24 06:47:09.479058: step: 634/466, loss: 0.004377929028123617 2023-01-24 06:47:10.290108: step: 636/466, loss: 0.0053786421194672585 2023-01-24 06:47:11.102306: step: 638/466, loss: 0.003944407217204571 2023-01-24 06:47:11.759277: step: 640/466, loss: 0.018148386850953102 2023-01-24 06:47:12.510254: step: 642/466, loss: 0.02679787389934063 2023-01-24 06:47:13.420468: step: 644/466, loss: 0.0066886176355183125 2023-01-24 06:47:14.184546: step: 646/466, loss: 0.002962264697998762 2023-01-24 06:47:14.874530: step: 648/466, loss: 0.0009095259010791779 2023-01-24 06:47:15.576215: step: 650/466, loss: 0.0002648793160915375 2023-01-24 06:47:16.325612: step: 652/466, loss: 0.0013989545404911041 2023-01-24 06:47:17.039265: step: 654/466, loss: 0.1868881732225418 2023-01-24 06:47:17.746037: step: 656/466, loss: 0.015928208827972412 2023-01-24 06:47:18.423624: step: 658/466, loss: 0.0023805315140634775 2023-01-24 06:47:19.095006: step: 660/466, loss: 0.004664386156946421 2023-01-24 06:47:19.821575: step: 662/466, loss: 1.4249751984607428e-05 2023-01-24 06:47:20.543221: step: 664/466, loss: 0.000596735393628478 2023-01-24 06:47:21.259744: step: 666/466, loss: 0.002551491605117917 2023-01-24 06:47:22.033358: step: 668/466, loss: 0.03295866400003433 2023-01-24 06:47:22.883260: step: 670/466, loss: 0.009886063635349274 2023-01-24 06:47:23.643658: step: 672/466, loss: 0.01368051115423441 2023-01-24 06:47:24.394797: step: 674/466, loss: 0.003591829678043723 2023-01-24 06:47:25.095610: step: 676/466, loss: 0.00019231434271205217 2023-01-24 06:47:25.803406: step: 678/466, loss: 0.00796997919678688 2023-01-24 06:47:26.667034: step: 680/466, loss: 0.009898564778268337 2023-01-24 06:47:27.375289: step: 682/466, loss: 0.002351475181058049 2023-01-24 06:47:28.161603: step: 684/466, loss: 0.035061463713645935 2023-01-24 06:47:28.955364: step: 686/466, loss: 0.004485884215682745 2023-01-24 06:47:29.754891: step: 688/466, loss: 0.01222966331988573 2023-01-24 06:47:30.521851: step: 690/466, loss: 0.01499281357973814 2023-01-24 06:47:31.334025: step: 692/466, loss: 0.009576707147061825 2023-01-24 06:47:32.144574: step: 694/466, loss: 0.0007713002851232886 2023-01-24 06:47:32.891752: step: 696/466, loss: 8.355027966899797e-05 2023-01-24 06:47:33.691735: step: 698/466, loss: 0.00462744478136301 2023-01-24 06:47:34.445541: step: 700/466, loss: 0.6917054653167725 2023-01-24 06:47:35.154375: step: 702/466, loss: 0.0004787829238921404 2023-01-24 06:47:35.926533: step: 704/466, loss: 0.006655456963926554 2023-01-24 06:47:36.633383: step: 706/466, loss: 0.018385127186775208 2023-01-24 06:47:37.408254: step: 708/466, loss: 0.059344708919525146 2023-01-24 06:47:38.183598: step: 710/466, loss: 0.0037663299590349197 2023-01-24 06:47:38.877086: step: 712/466, loss: 0.0011843966785818338 2023-01-24 06:47:39.557718: step: 714/466, loss: 0.00032077066134661436 2023-01-24 06:47:40.314838: step: 716/466, loss: 0.00018971598183270544 2023-01-24 06:47:41.022413: step: 718/466, loss: 0.0027237460017204285 2023-01-24 06:47:41.745734: step: 720/466, loss: 0.0007768472423776984 2023-01-24 06:47:42.418987: step: 722/466, loss: 0.02395699918270111 2023-01-24 06:47:43.205362: step: 724/466, loss: 0.009225727058947086 2023-01-24 06:47:43.897008: step: 726/466, loss: 0.00010193004709435627 2023-01-24 06:47:44.634744: step: 728/466, loss: 0.0001947433629538864 2023-01-24 06:47:45.390993: step: 730/466, loss: 0.0005076914094388485 2023-01-24 06:47:46.078967: step: 732/466, loss: 0.0006210353458300233 2023-01-24 06:47:46.828919: step: 734/466, loss: 0.0017323088832199574 2023-01-24 06:47:47.577772: step: 736/466, loss: 0.0036521030124276876 2023-01-24 06:47:48.331308: step: 738/466, loss: 0.027026157826185226 2023-01-24 06:47:49.097960: step: 740/466, loss: 0.0278612170368433 2023-01-24 06:47:49.859085: step: 742/466, loss: 0.009088082239031792 2023-01-24 06:47:50.688998: step: 744/466, loss: 0.1705555021762848 2023-01-24 06:47:51.449285: step: 746/466, loss: 0.0037146315444260836 2023-01-24 06:47:52.248946: step: 748/466, loss: 5.9328271163394675e-05 2023-01-24 06:47:53.001345: step: 750/466, loss: 0.009150792844593525 2023-01-24 06:47:53.783702: step: 752/466, loss: 0.003557102754712105 2023-01-24 06:47:54.524909: step: 754/466, loss: 0.07643107324838638 2023-01-24 06:47:55.344882: step: 756/466, loss: 0.023097380995750427 2023-01-24 06:47:56.061829: step: 758/466, loss: 0.0860399603843689 2023-01-24 06:47:56.863470: step: 760/466, loss: 0.011937241069972515 2023-01-24 06:47:57.584784: step: 762/466, loss: 0.002413738053292036 2023-01-24 06:47:58.316559: step: 764/466, loss: 0.0005132968653924763 2023-01-24 06:47:59.072369: step: 766/466, loss: 0.01412399671971798 2023-01-24 06:47:59.883784: step: 768/466, loss: 0.007267692591995001 2023-01-24 06:48:00.600846: step: 770/466, loss: 0.002236289670690894 2023-01-24 06:48:01.300438: step: 772/466, loss: 0.37281978130340576 2023-01-24 06:48:02.123257: step: 774/466, loss: 0.01073366403579712 2023-01-24 06:48:02.888984: step: 776/466, loss: 0.025290386751294136 2023-01-24 06:48:03.763031: step: 778/466, loss: 0.018467970192432404 2023-01-24 06:48:04.464517: step: 780/466, loss: 0.0012066556373611093 2023-01-24 06:48:05.210474: step: 782/466, loss: 0.0006555514992214739 2023-01-24 06:48:05.971314: step: 784/466, loss: 0.02548740617930889 2023-01-24 06:48:06.708536: step: 786/466, loss: 0.4201527237892151 2023-01-24 06:48:07.474204: step: 788/466, loss: 0.014074753038585186 2023-01-24 06:48:08.255000: step: 790/466, loss: 0.017719129100441933 2023-01-24 06:48:08.929632: step: 792/466, loss: 0.002532778074964881 2023-01-24 06:48:09.772225: step: 794/466, loss: 0.0023974813520908356 2023-01-24 06:48:10.530475: step: 796/466, loss: 0.0025688474997878075 2023-01-24 06:48:11.257035: step: 798/466, loss: 0.0006184555240906775 2023-01-24 06:48:11.985418: step: 800/466, loss: 0.029398677870631218 2023-01-24 06:48:12.758888: step: 802/466, loss: 0.00013305842003319412 2023-01-24 06:48:13.566169: step: 804/466, loss: 0.00010052655125036836 2023-01-24 06:48:14.348784: step: 806/466, loss: 0.02093655802309513 2023-01-24 06:48:15.041861: step: 808/466, loss: 0.0018348832381889224 2023-01-24 06:48:15.719861: step: 810/466, loss: 0.01457955688238144 2023-01-24 06:48:16.518928: step: 812/466, loss: 0.032794274389743805 2023-01-24 06:48:17.306170: step: 814/466, loss: 0.35781222581863403 2023-01-24 06:48:18.055129: step: 816/466, loss: 0.0023967118468135595 2023-01-24 06:48:18.703987: step: 818/466, loss: 0.01247766800224781 2023-01-24 06:48:19.486679: step: 820/466, loss: 0.004071477800607681 2023-01-24 06:48:20.176594: step: 822/466, loss: 3.02156840916723e-05 2023-01-24 06:48:20.870062: step: 824/466, loss: 0.02758164145052433 2023-01-24 06:48:21.616296: step: 826/466, loss: 0.11011549085378647 2023-01-24 06:48:22.482705: step: 828/466, loss: 0.0051184226758778095 2023-01-24 06:48:23.284919: step: 830/466, loss: 0.003817720804363489 2023-01-24 06:48:24.013019: step: 832/466, loss: 0.00034997283364646137 2023-01-24 06:48:24.761936: step: 834/466, loss: 0.00047167122829705477 2023-01-24 06:48:25.514821: step: 836/466, loss: 0.005143610294908285 2023-01-24 06:48:26.279835: step: 838/466, loss: 0.01582680456340313 2023-01-24 06:48:27.021816: step: 840/466, loss: 0.0280031468719244 2023-01-24 06:48:27.940338: step: 842/466, loss: 0.0013992044841870666 2023-01-24 06:48:28.698659: step: 844/466, loss: 0.0012508267536759377 2023-01-24 06:48:29.479228: step: 846/466, loss: 0.005863756872713566 2023-01-24 06:48:30.144060: step: 848/466, loss: 0.0010132716270163655 2023-01-24 06:48:30.974689: step: 850/466, loss: 0.0030189540702849627 2023-01-24 06:48:31.709021: step: 852/466, loss: 0.03408302739262581 2023-01-24 06:48:32.558846: step: 854/466, loss: 0.005213402211666107 2023-01-24 06:48:33.260173: step: 856/466, loss: 0.03750526160001755 2023-01-24 06:48:34.010556: step: 858/466, loss: 0.019979437813162804 2023-01-24 06:48:34.708379: step: 860/466, loss: 0.010737068951129913 2023-01-24 06:48:35.439544: step: 862/466, loss: 0.003372674575075507 2023-01-24 06:48:36.176737: step: 864/466, loss: 0.029829924926161766 2023-01-24 06:48:36.926380: step: 866/466, loss: 0.00047801234177313745 2023-01-24 06:48:37.617929: step: 868/466, loss: 0.002308440860360861 2023-01-24 06:48:38.406803: step: 870/466, loss: 0.007953722961246967 2023-01-24 06:48:39.194771: step: 872/466, loss: 0.00415385514497757 2023-01-24 06:48:39.970220: step: 874/466, loss: 0.002917190082371235 2023-01-24 06:48:40.710736: step: 876/466, loss: 0.10013389587402344 2023-01-24 06:48:41.348415: step: 878/466, loss: 0.00955923367291689 2023-01-24 06:48:42.069752: step: 880/466, loss: 0.0020114178769290447 2023-01-24 06:48:42.959161: step: 882/466, loss: 0.02671821415424347 2023-01-24 06:48:43.683801: step: 884/466, loss: 0.00014646655472461134 2023-01-24 06:48:44.421383: step: 886/466, loss: 0.0035071747843176126 2023-01-24 06:48:45.150247: step: 888/466, loss: 0.00027901571593247354 2023-01-24 06:48:45.854078: step: 890/466, loss: 0.007693085819482803 2023-01-24 06:48:46.726883: step: 892/466, loss: 0.008314241655170918 2023-01-24 06:48:47.437766: step: 894/466, loss: 0.0002768787962850183 2023-01-24 06:48:48.306472: step: 896/466, loss: 0.045164354145526886 2023-01-24 06:48:49.133928: step: 898/466, loss: 0.0007990582962520421 2023-01-24 06:48:49.838371: step: 900/466, loss: 0.014751252718269825 2023-01-24 06:48:50.606245: step: 902/466, loss: 0.0013847840018570423 2023-01-24 06:48:51.340600: step: 904/466, loss: 0.006584350951015949 2023-01-24 06:48:52.061271: step: 906/466, loss: 0.00011031017493223771 2023-01-24 06:48:52.860078: step: 908/466, loss: 0.0004208995378576219 2023-01-24 06:48:53.691142: step: 910/466, loss: 0.0004874985897913575 2023-01-24 06:48:54.437560: step: 912/466, loss: 0.26059654355049133 2023-01-24 06:48:55.307512: step: 914/466, loss: 0.03792070969939232 2023-01-24 06:48:56.246069: step: 916/466, loss: 0.0002478975220583379 2023-01-24 06:48:57.133502: step: 918/466, loss: 0.0019585671834647655 2023-01-24 06:48:57.867873: step: 920/466, loss: 0.0002386308478889987 2023-01-24 06:48:58.571232: step: 922/466, loss: 0.014447817578911781 2023-01-24 06:48:59.289656: step: 924/466, loss: 0.017603475600481033 2023-01-24 06:49:00.150092: step: 926/466, loss: 0.03096870332956314 2023-01-24 06:49:00.891622: step: 928/466, loss: 0.016626933589577675 2023-01-24 06:49:01.657870: step: 930/466, loss: 0.001796972006559372 2023-01-24 06:49:02.440272: step: 932/466, loss: 0.011337809264659882 ================================================== Loss: 0.041 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35766666666666663, 'r': 0.3223750790638836, 'f1': 0.339105123087159}, 'combined': 0.24986693280106453, 'epoch': 36} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.38034323772559026, 'r': 0.2946506538359426, 'f1': 0.33205747512370876}, 'combined': 0.20409386275896246, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33043321205998016, 'r': 0.3285521880824091, 'f1': 0.3294900154508651}, 'combined': 0.24278211664800584, 'epoch': 36} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35885318259317184, 'r': 0.2913738579634333, 'f1': 0.321612082343187}, 'combined': 0.1976737676841052, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37147302904564317, 'r': 0.339753320683112, 'f1': 0.35490584737363723}, 'combined': 0.26150957174899586, 'epoch': 36} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38138922621752097, 'r': 0.28918160566753104, 'f1': 0.32894585799934034}, 'combined': 0.20317244170547497, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32407407407407407, 'r': 0.25, 'f1': 0.282258064516129}, 'combined': 0.18817204301075266, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3425925925925926, 'r': 0.40217391304347827, 'f1': 0.37}, 'combined': 0.185, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 36} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561785714285714, 'r': 0.3210338167525074, 'f1': 0.33769425434844597}, 'combined': 0.24882734530938122, 'epoch': 33} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37857267048883675, 'r': 0.28679747764305813, 'f1': 0.32635575042141096}, 'combined': 0.20058938806389162, 'epoch': 33} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.325, 'f1': 0.3729508196721312}, 'combined': 0.24863387978142076, 'epoch': 33} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33043321205998016, 'r': 0.3285521880824091, 'f1': 0.3294900154508651}, 'combined': 0.24278211664800584, 'epoch': 36} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35885318259317184, 'r': 0.2913738579634333, 'f1': 0.321612082343187}, 'combined': 0.1976737676841052, 'epoch': 36} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3425925925925926, 'r': 0.40217391304347827, 'f1': 0.37}, 'combined': 0.185, 'epoch': 36} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37147302904564317, 'r': 0.339753320683112, 'f1': 0.35490584737363723}, 'combined': 0.26150957174899586, 'epoch': 36} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38138922621752097, 'r': 0.28918160566753104, 'f1': 0.32894585799934034}, 'combined': 0.20317244170547497, 'epoch': 36} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 36} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:52:07.564768: step: 2/466, loss: 0.0048588840290904045 2023-01-24 06:52:08.287646: step: 4/466, loss: 0.00700045982375741 2023-01-24 06:52:09.015053: step: 6/466, loss: 0.0010578229557722807 2023-01-24 06:52:09.821595: step: 8/466, loss: 0.019440356642007828 2023-01-24 06:52:10.591075: step: 10/466, loss: 0.01578276976943016 2023-01-24 06:52:11.367053: step: 12/466, loss: 0.00025084824301302433 2023-01-24 06:52:12.049149: step: 14/466, loss: 0.0112862978130579 2023-01-24 06:52:12.736011: step: 16/466, loss: 0.00468059116974473 2023-01-24 06:52:13.506394: step: 18/466, loss: 0.0021089408546686172 2023-01-24 06:52:14.275357: step: 20/466, loss: 0.00011019927478628233 2023-01-24 06:52:15.034048: step: 22/466, loss: 0.08023007959127426 2023-01-24 06:52:15.788250: step: 24/466, loss: 0.0026985728181898594 2023-01-24 06:52:16.558204: step: 26/466, loss: 0.0008987162145785987 2023-01-24 06:52:17.286498: step: 28/466, loss: 5.221124229137786e-05 2023-01-24 06:52:18.071931: step: 30/466, loss: 4.487218757276423e-05 2023-01-24 06:52:18.781369: step: 32/466, loss: 0.007680452428758144 2023-01-24 06:52:19.555623: step: 34/466, loss: 0.025444507598876953 2023-01-24 06:52:20.396381: step: 36/466, loss: 0.03113366663455963 2023-01-24 06:52:21.126819: step: 38/466, loss: 0.014559357427060604 2023-01-24 06:52:21.935917: step: 40/466, loss: 0.0157176461070776 2023-01-24 06:52:22.704048: step: 42/466, loss: 0.032705437391996384 2023-01-24 06:52:23.462271: step: 44/466, loss: 0.0012881396105512977 2023-01-24 06:52:24.256428: step: 46/466, loss: 0.0005634765257127583 2023-01-24 06:52:24.940493: step: 48/466, loss: 0.00015636030002497137 2023-01-24 06:52:25.646884: step: 50/466, loss: 0.0008369830320589244 2023-01-24 06:52:26.468972: step: 52/466, loss: 0.0010885618394240737 2023-01-24 06:52:27.248921: step: 54/466, loss: 0.017781171947717667 2023-01-24 06:52:28.003887: step: 56/466, loss: 0.03731287643313408 2023-01-24 06:52:28.748780: step: 58/466, loss: 0.00025140156503766775 2023-01-24 06:52:29.571319: step: 60/466, loss: 0.008262161165475845 2023-01-24 06:52:30.354178: step: 62/466, loss: 0.012372287921607494 2023-01-24 06:52:31.126463: step: 64/466, loss: 0.0029393951408565044 2023-01-24 06:52:31.916023: step: 66/466, loss: 0.003331736195832491 2023-01-24 06:52:32.643904: step: 68/466, loss: 0.0029272697865962982 2023-01-24 06:52:33.321979: step: 70/466, loss: 0.010165790095925331 2023-01-24 06:52:34.068441: step: 72/466, loss: 0.17820614576339722 2023-01-24 06:52:34.805301: step: 74/466, loss: 0.03329198807477951 2023-01-24 06:52:35.568448: step: 76/466, loss: 0.0048468150198459625 2023-01-24 06:52:36.332540: step: 78/466, loss: 0.0005752610741183162 2023-01-24 06:52:37.110317: step: 80/466, loss: 0.0031736392993479967 2023-01-24 06:52:37.858139: step: 82/466, loss: 0.009133272804319859 2023-01-24 06:52:38.596899: step: 84/466, loss: 4.2883766582235694e-05 2023-01-24 06:52:39.356591: step: 86/466, loss: 0.004588339943438768 2023-01-24 06:52:40.090766: step: 88/466, loss: 0.005838914308696985 2023-01-24 06:52:40.813115: step: 90/466, loss: 0.028857816010713577 2023-01-24 06:52:41.569711: step: 92/466, loss: 0.001227057189680636 2023-01-24 06:52:42.274105: step: 94/466, loss: 0.001945412135683 2023-01-24 06:52:42.964040: step: 96/466, loss: 0.006568763870745897 2023-01-24 06:52:43.750489: step: 98/466, loss: 0.00024487273185513914 2023-01-24 06:52:44.509890: step: 100/466, loss: 0.0056565627455711365 2023-01-24 06:52:45.156261: step: 102/466, loss: 0.0008967267931438982 2023-01-24 06:52:45.818670: step: 104/466, loss: 0.007367671467363834 2023-01-24 06:52:46.603019: step: 106/466, loss: 0.024896690621972084 2023-01-24 06:52:47.348386: step: 108/466, loss: 0.026141609996557236 2023-01-24 06:52:48.048471: step: 110/466, loss: 0.0013241652632132173 2023-01-24 06:52:48.817558: step: 112/466, loss: 0.0025367976631969213 2023-01-24 06:52:49.501359: step: 114/466, loss: 0.004370294511318207 2023-01-24 06:52:50.281559: step: 116/466, loss: 0.006824452430009842 2023-01-24 06:52:51.024969: step: 118/466, loss: 0.2507859766483307 2023-01-24 06:52:51.706137: step: 120/466, loss: 0.0023692583199590445 2023-01-24 06:52:52.517658: step: 122/466, loss: 0.003840988501906395 2023-01-24 06:52:53.256942: step: 124/466, loss: 0.0017793461447581649 2023-01-24 06:52:54.075667: step: 126/466, loss: 0.0038497764617204666 2023-01-24 06:52:54.770645: step: 128/466, loss: 0.017459649592638016 2023-01-24 06:52:55.586406: step: 130/466, loss: 0.01397742610424757 2023-01-24 06:52:56.329551: step: 132/466, loss: 0.0017786064418032765 2023-01-24 06:52:57.124505: step: 134/466, loss: 0.019823169335722923 2023-01-24 06:52:57.840124: step: 136/466, loss: 9.185528324451298e-05 2023-01-24 06:52:58.639780: step: 138/466, loss: 0.0022579084616154432 2023-01-24 06:52:59.398139: step: 140/466, loss: 0.003933996427804232 2023-01-24 06:53:00.127616: step: 142/466, loss: 0.016194989904761314 2023-01-24 06:53:00.785879: step: 144/466, loss: 0.003658501198515296 2023-01-24 06:53:01.512567: step: 146/466, loss: 0.0016831440152600408 2023-01-24 06:53:02.269231: step: 148/466, loss: 0.007074498571455479 2023-01-24 06:53:02.997935: step: 150/466, loss: 0.02480948716402054 2023-01-24 06:53:03.719217: step: 152/466, loss: 4.1526051063556224e-05 2023-01-24 06:53:04.457382: step: 154/466, loss: 0.0003189579292666167 2023-01-24 06:53:05.237322: step: 156/466, loss: 0.005066257435828447 2023-01-24 06:53:06.041220: step: 158/466, loss: 0.02136784978210926 2023-01-24 06:53:06.730888: step: 160/466, loss: 0.0036368719302117825 2023-01-24 06:53:07.483565: step: 162/466, loss: 0.024072490632534027 2023-01-24 06:53:08.239637: step: 164/466, loss: 0.0027644268702715635 2023-01-24 06:53:08.894288: step: 166/466, loss: 0.0003109975077677518 2023-01-24 06:53:09.606664: step: 168/466, loss: 0.004258118104189634 2023-01-24 06:53:10.320293: step: 170/466, loss: 0.002872900338843465 2023-01-24 06:53:11.028579: step: 172/466, loss: 0.012874235399067402 2023-01-24 06:53:11.833138: step: 174/466, loss: 0.015394738875329494 2023-01-24 06:53:12.491298: step: 176/466, loss: 0.008090341463685036 2023-01-24 06:53:13.209945: step: 178/466, loss: 0.12440593540668488 2023-01-24 06:53:13.902173: step: 180/466, loss: 0.07188346982002258 2023-01-24 06:53:14.730774: step: 182/466, loss: 0.0001309849467361346 2023-01-24 06:53:15.451375: step: 184/466, loss: 0.031471334397792816 2023-01-24 06:53:16.146800: step: 186/466, loss: 0.08152177929878235 2023-01-24 06:53:16.915471: step: 188/466, loss: 0.0021092984825372696 2023-01-24 06:53:17.644493: step: 190/466, loss: 0.00014183452003635466 2023-01-24 06:53:18.415840: step: 192/466, loss: 0.006583390291780233 2023-01-24 06:53:19.111272: step: 194/466, loss: 7.404685311485082e-05 2023-01-24 06:53:19.801102: step: 196/466, loss: 0.025401262566447258 2023-01-24 06:53:20.675259: step: 198/466, loss: 0.009899438358843327 2023-01-24 06:53:21.550194: step: 200/466, loss: 0.008677742443978786 2023-01-24 06:53:22.258341: step: 202/466, loss: 0.0013968941057100892 2023-01-24 06:53:23.044208: step: 204/466, loss: 0.07518845051527023 2023-01-24 06:53:23.826214: step: 206/466, loss: 0.001217787736095488 2023-01-24 06:53:24.571965: step: 208/466, loss: 0.01642589643597603 2023-01-24 06:53:25.251851: step: 210/466, loss: 0.0003180429630447179 2023-01-24 06:53:25.983803: step: 212/466, loss: 0.004087543115019798 2023-01-24 06:53:26.695731: step: 214/466, loss: 0.002301363507285714 2023-01-24 06:53:27.445466: step: 216/466, loss: 0.0014160939026623964 2023-01-24 06:53:28.098631: step: 218/466, loss: 0.036056406795978546 2023-01-24 06:53:28.833197: step: 220/466, loss: 0.06661252677440643 2023-01-24 06:53:29.558254: step: 222/466, loss: 0.0015557609731331468 2023-01-24 06:53:30.278422: step: 224/466, loss: 0.0019310928182676435 2023-01-24 06:53:31.126115: step: 226/466, loss: 0.05399719625711441 2023-01-24 06:53:31.930207: step: 228/466, loss: 0.002947665983811021 2023-01-24 06:53:32.642458: step: 230/466, loss: 0.014477964490652084 2023-01-24 06:53:33.411588: step: 232/466, loss: 0.019763614982366562 2023-01-24 06:53:34.168347: step: 234/466, loss: 0.006132103502750397 2023-01-24 06:53:34.926649: step: 236/466, loss: 0.010499351657927036 2023-01-24 06:53:35.676989: step: 238/466, loss: 0.0016905704978853464 2023-01-24 06:53:36.406341: step: 240/466, loss: 0.025621794164180756 2023-01-24 06:53:37.210642: step: 242/466, loss: 0.013779666274785995 2023-01-24 06:53:37.949139: step: 244/466, loss: 0.0009651576983742416 2023-01-24 06:53:38.751884: step: 246/466, loss: 0.022902846336364746 2023-01-24 06:53:39.483564: step: 248/466, loss: 0.009824266657233238 2023-01-24 06:53:40.178160: step: 250/466, loss: 0.0001699442946119234 2023-01-24 06:53:40.904732: step: 252/466, loss: 0.0057142386212944984 2023-01-24 06:53:41.646422: step: 254/466, loss: 0.021286005154252052 2023-01-24 06:53:42.364198: step: 256/466, loss: 0.001488046138547361 2023-01-24 06:53:43.023104: step: 258/466, loss: 0.0026199682615697384 2023-01-24 06:53:43.768648: step: 260/466, loss: 0.012011692859232426 2023-01-24 06:53:44.531277: step: 262/466, loss: 0.035835057497024536 2023-01-24 06:53:45.211457: step: 264/466, loss: 0.0019202068215236068 2023-01-24 06:53:45.959425: step: 266/466, loss: 0.02158491127192974 2023-01-24 06:53:46.683449: step: 268/466, loss: 0.00046308664605021477 2023-01-24 06:53:47.366008: step: 270/466, loss: 0.02859680913388729 2023-01-24 06:53:48.014582: step: 272/466, loss: 0.00014170895155984908 2023-01-24 06:53:48.773609: step: 274/466, loss: 0.00041168491588905454 2023-01-24 06:53:49.536930: step: 276/466, loss: 0.003559098346158862 2023-01-24 06:53:50.349870: step: 278/466, loss: 0.0903628021478653 2023-01-24 06:53:51.151196: step: 280/466, loss: 0.017044108361005783 2023-01-24 06:53:51.891136: step: 282/466, loss: 0.011135498993098736 2023-01-24 06:53:52.635222: step: 284/466, loss: 0.16792574524879456 2023-01-24 06:53:53.486255: step: 286/466, loss: 0.0021677750628441572 2023-01-24 06:53:54.222045: step: 288/466, loss: 0.003123561153188348 2023-01-24 06:53:54.966571: step: 290/466, loss: 0.001427959301508963 2023-01-24 06:53:55.711565: step: 292/466, loss: 0.0020495569333434105 2023-01-24 06:53:56.405476: step: 294/466, loss: 0.0013727025361731648 2023-01-24 06:53:57.139740: step: 296/466, loss: 0.07193455845117569 2023-01-24 06:53:57.882395: step: 298/466, loss: 0.002000251319259405 2023-01-24 06:53:58.856042: step: 300/466, loss: 0.0009960554307326674 2023-01-24 06:53:59.630280: step: 302/466, loss: 0.0006270164158195257 2023-01-24 06:54:00.396872: step: 304/466, loss: 0.005108023062348366 2023-01-24 06:54:01.154238: step: 306/466, loss: 0.004545687232166529 2023-01-24 06:54:01.945467: step: 308/466, loss: 0.21969875693321228 2023-01-24 06:54:02.684598: step: 310/466, loss: 0.013188119977712631 2023-01-24 06:54:03.460131: step: 312/466, loss: 0.04590911045670509 2023-01-24 06:54:04.190607: step: 314/466, loss: 0.0009027286432683468 2023-01-24 06:54:05.016797: step: 316/466, loss: 0.1313788741827011 2023-01-24 06:54:05.750643: step: 318/466, loss: 0.0030443756841123104 2023-01-24 06:54:06.520865: step: 320/466, loss: 0.01420830562710762 2023-01-24 06:54:07.220703: step: 322/466, loss: 0.029201209545135498 2023-01-24 06:54:07.974461: step: 324/466, loss: 1.162377953529358 2023-01-24 06:54:08.703822: step: 326/466, loss: 0.0018761560786515474 2023-01-24 06:54:09.478518: step: 328/466, loss: 0.03066885843873024 2023-01-24 06:54:10.240786: step: 330/466, loss: 0.002510959981009364 2023-01-24 06:54:10.961125: step: 332/466, loss: 0.00010499132622499019 2023-01-24 06:54:11.686399: step: 334/466, loss: 0.009663326665759087 2023-01-24 06:54:12.385500: step: 336/466, loss: 0.00018944129988085479 2023-01-24 06:54:13.160564: step: 338/466, loss: 0.0010537714697420597 2023-01-24 06:54:13.994671: step: 340/466, loss: 0.0030080315191298723 2023-01-24 06:54:14.799221: step: 342/466, loss: 0.07661273330450058 2023-01-24 06:54:15.562356: step: 344/466, loss: 0.02163214050233364 2023-01-24 06:54:16.332129: step: 346/466, loss: 0.0030055749230086803 2023-01-24 06:54:17.077567: step: 348/466, loss: 0.01387910358607769 2023-01-24 06:54:17.722975: step: 350/466, loss: 0.002649215515702963 2023-01-24 06:54:18.481913: step: 352/466, loss: 0.00022997992346063256 2023-01-24 06:54:19.270116: step: 354/466, loss: 0.0016065045492723584 2023-01-24 06:54:19.968516: step: 356/466, loss: 0.0029339250177145004 2023-01-24 06:54:20.647257: step: 358/466, loss: 0.004379142541438341 2023-01-24 06:54:21.342989: step: 360/466, loss: 0.00419607711955905 2023-01-24 06:54:22.099811: step: 362/466, loss: 0.11295860260725021 2023-01-24 06:54:22.868399: step: 364/466, loss: 0.0018885949393734336 2023-01-24 06:54:23.563370: step: 366/466, loss: 0.001850526430644095 2023-01-24 06:54:24.302761: step: 368/466, loss: 0.015120510943233967 2023-01-24 06:54:25.059267: step: 370/466, loss: 0.009983471594750881 2023-01-24 06:54:25.888461: step: 372/466, loss: 0.011431551538407803 2023-01-24 06:54:26.574208: step: 374/466, loss: 0.009747300297021866 2023-01-24 06:54:27.246720: step: 376/466, loss: 0.0008253143751062453 2023-01-24 06:54:27.944463: step: 378/466, loss: 0.000229777317144908 2023-01-24 06:54:28.792005: step: 380/466, loss: 0.04102977365255356 2023-01-24 06:54:29.589781: step: 382/466, loss: 0.001671002828516066 2023-01-24 06:54:30.348353: step: 384/466, loss: 0.00015503703616559505 2023-01-24 06:54:31.132597: step: 386/466, loss: 3.818309778580442e-05 2023-01-24 06:54:31.921185: step: 388/466, loss: 0.008570864796638489 2023-01-24 06:54:32.704953: step: 390/466, loss: 0.004652679897844791 2023-01-24 06:54:33.418102: step: 392/466, loss: 0.00042334620957262814 2023-01-24 06:54:34.142957: step: 394/466, loss: 0.0036026700399816036 2023-01-24 06:54:34.901283: step: 396/466, loss: 0.00144184532109648 2023-01-24 06:54:35.649053: step: 398/466, loss: 0.009980946779251099 2023-01-24 06:54:36.385209: step: 400/466, loss: 0.000343825900927186 2023-01-24 06:54:37.185045: step: 402/466, loss: 0.021086854860186577 2023-01-24 06:54:37.896912: step: 404/466, loss: 0.0005226695793680847 2023-01-24 06:54:38.624353: step: 406/466, loss: 0.03247930482029915 2023-01-24 06:54:39.329431: step: 408/466, loss: 0.0001630575570743531 2023-01-24 06:54:40.174207: step: 410/466, loss: 0.016545020043849945 2023-01-24 06:54:40.898575: step: 412/466, loss: 0.0021113858092576265 2023-01-24 06:54:41.608611: step: 414/466, loss: 0.013716679066419601 2023-01-24 06:54:42.379828: step: 416/466, loss: 0.01107280608266592 2023-01-24 06:54:43.184874: step: 418/466, loss: 0.0002520505804568529 2023-01-24 06:54:44.145139: step: 420/466, loss: 0.034739550203084946 2023-01-24 06:54:44.803625: step: 422/466, loss: 0.006240678019821644 2023-01-24 06:54:45.501879: step: 424/466, loss: 0.011412478983402252 2023-01-24 06:54:46.190729: step: 426/466, loss: 0.008855105377733707 2023-01-24 06:54:46.941531: step: 428/466, loss: 0.013068322092294693 2023-01-24 06:54:47.596372: step: 430/466, loss: 0.0018111380049958825 2023-01-24 06:54:48.337187: step: 432/466, loss: 0.0008868640870787203 2023-01-24 06:54:49.092617: step: 434/466, loss: 0.00042411615140736103 2023-01-24 06:54:49.821636: step: 436/466, loss: 0.15449778735637665 2023-01-24 06:54:50.602454: step: 438/466, loss: 0.010791739448904991 2023-01-24 06:54:51.400199: step: 440/466, loss: 0.0001165992216556333 2023-01-24 06:54:52.120502: step: 442/466, loss: 0.00017539116379339248 2023-01-24 06:54:52.936410: step: 444/466, loss: 0.004423872102051973 2023-01-24 06:54:53.568988: step: 446/466, loss: 0.00048000356764532626 2023-01-24 06:54:54.413792: step: 448/466, loss: 0.007207691669464111 2023-01-24 06:54:55.239111: step: 450/466, loss: 0.05481301620602608 2023-01-24 06:54:55.918879: step: 452/466, loss: 0.1583072394132614 2023-01-24 06:54:56.618308: step: 454/466, loss: 0.0013135368935763836 2023-01-24 06:54:57.304748: step: 456/466, loss: 0.02863306924700737 2023-01-24 06:54:58.040963: step: 458/466, loss: 0.010624400340020657 2023-01-24 06:54:58.810653: step: 460/466, loss: 0.0007287510670721531 2023-01-24 06:54:59.651578: step: 462/466, loss: 0.006504683755338192 2023-01-24 06:55:00.359818: step: 464/466, loss: 0.008223538286983967 2023-01-24 06:55:01.125113: step: 466/466, loss: 0.01025502197444439 2023-01-24 06:55:01.911539: step: 468/466, loss: 0.0026410671416670084 2023-01-24 06:55:02.722051: step: 470/466, loss: 0.0350828617811203 2023-01-24 06:55:03.537548: step: 472/466, loss: 0.01333890575915575 2023-01-24 06:55:04.375328: step: 474/466, loss: 0.04505704715847969 2023-01-24 06:55:05.057707: step: 476/466, loss: 0.052273474633693695 2023-01-24 06:55:05.801523: step: 478/466, loss: 9.465384937357157e-05 2023-01-24 06:55:06.524055: step: 480/466, loss: 0.027441969141364098 2023-01-24 06:55:07.276556: step: 482/466, loss: 0.001578064518980682 2023-01-24 06:55:08.006818: step: 484/466, loss: 0.0018492471426725388 2023-01-24 06:55:08.718418: step: 486/466, loss: 0.016192801296710968 2023-01-24 06:55:09.499405: step: 488/466, loss: 0.005125532392412424 2023-01-24 06:55:10.336811: step: 490/466, loss: 0.0839591696858406 2023-01-24 06:55:11.152065: step: 492/466, loss: 0.0011096717789769173 2023-01-24 06:55:11.880447: step: 494/466, loss: 0.08878828585147858 2023-01-24 06:55:12.803973: step: 496/466, loss: 0.00310177868232131 2023-01-24 06:55:13.522336: step: 498/466, loss: 0.017978543415665627 2023-01-24 06:55:14.210585: step: 500/466, loss: 0.001203813822939992 2023-01-24 06:55:14.963853: step: 502/466, loss: 0.0003878503921441734 2023-01-24 06:55:15.681872: step: 504/466, loss: 0.0007280244608409703 2023-01-24 06:55:16.413325: step: 506/466, loss: 0.018997440114617348 2023-01-24 06:55:17.110230: step: 508/466, loss: 0.00015789296594448388 2023-01-24 06:55:17.909771: step: 510/466, loss: 0.00040656939381733537 2023-01-24 06:55:18.622160: step: 512/466, loss: 0.04028857499361038 2023-01-24 06:55:19.339403: step: 514/466, loss: 0.014796644449234009 2023-01-24 06:55:20.116152: step: 516/466, loss: 0.020934315398335457 2023-01-24 06:55:20.831501: step: 518/466, loss: 0.0004342313332017511 2023-01-24 06:55:21.625173: step: 520/466, loss: 0.0016563318204134703 2023-01-24 06:55:22.394646: step: 522/466, loss: 0.011757887899875641 2023-01-24 06:55:23.150319: step: 524/466, loss: 0.016990801319479942 2023-01-24 06:55:23.958445: step: 526/466, loss: 0.013895918615162373 2023-01-24 06:55:24.670062: step: 528/466, loss: 0.002518326509743929 2023-01-24 06:55:25.392522: step: 530/466, loss: 0.00970179121941328 2023-01-24 06:55:26.211159: step: 532/466, loss: 0.02301209419965744 2023-01-24 06:55:27.003645: step: 534/466, loss: 0.03104967251420021 2023-01-24 06:55:27.752751: step: 536/466, loss: 0.0026576619129627943 2023-01-24 06:55:28.509222: step: 538/466, loss: 0.004054277669638395 2023-01-24 06:55:29.263912: step: 540/466, loss: 0.7004632949829102 2023-01-24 06:55:29.979933: step: 542/466, loss: 0.0006315643549896777 2023-01-24 06:55:30.723499: step: 544/466, loss: 0.01206926442682743 2023-01-24 06:55:31.501984: step: 546/466, loss: 0.006809778045862913 2023-01-24 06:55:32.265036: step: 548/466, loss: 0.0025723904836922884 2023-01-24 06:55:33.088560: step: 550/466, loss: 0.0011128417681902647 2023-01-24 06:55:33.872152: step: 552/466, loss: 0.0028901181649416685 2023-01-24 06:55:34.631595: step: 554/466, loss: 0.012472089380025864 2023-01-24 06:55:35.407064: step: 556/466, loss: 0.03356686979532242 2023-01-24 06:55:36.175411: step: 558/466, loss: 0.00046890118392184377 2023-01-24 06:55:37.007604: step: 560/466, loss: 0.08391924202442169 2023-01-24 06:55:37.788282: step: 562/466, loss: 0.006775359157472849 2023-01-24 06:55:38.530222: step: 564/466, loss: 0.011288279667496681 2023-01-24 06:55:39.381585: step: 566/466, loss: 0.03128872066736221 2023-01-24 06:55:40.103927: step: 568/466, loss: 0.00021249732526484877 2023-01-24 06:55:40.832711: step: 570/466, loss: 0.007754562888294458 2023-01-24 06:55:41.508947: step: 572/466, loss: 0.00020670304365921766 2023-01-24 06:55:42.303127: step: 574/466, loss: 0.00020100167603231966 2023-01-24 06:55:43.030696: step: 576/466, loss: 0.0014069074532017112 2023-01-24 06:55:43.834207: step: 578/466, loss: 0.05859680846333504 2023-01-24 06:55:44.578337: step: 580/466, loss: 0.017105508595705032 2023-01-24 06:55:45.262260: step: 582/466, loss: 0.0008416476775892079 2023-01-24 06:55:46.009555: step: 584/466, loss: 0.00212100800126791 2023-01-24 06:55:46.842693: step: 586/466, loss: 0.007836099714040756 2023-01-24 06:55:47.618696: step: 588/466, loss: 0.006854955572634935 2023-01-24 06:55:48.366179: step: 590/466, loss: 0.0015867829788476229 2023-01-24 06:55:49.126240: step: 592/466, loss: 1.9550052456906997e-06 2023-01-24 06:55:49.885703: step: 594/466, loss: 0.00018021200958173722 2023-01-24 06:55:50.647142: step: 596/466, loss: 0.03990541771054268 2023-01-24 06:55:51.410615: step: 598/466, loss: 0.0500204972922802 2023-01-24 06:55:52.101962: step: 600/466, loss: 0.1216036006808281 2023-01-24 06:55:52.895605: step: 602/466, loss: 0.010651436634361744 2023-01-24 06:55:53.642524: step: 604/466, loss: 0.002248160308226943 2023-01-24 06:55:54.375606: step: 606/466, loss: 0.07864465564489365 2023-01-24 06:55:55.194362: step: 608/466, loss: 0.007119298912584782 2023-01-24 06:55:55.905982: step: 610/466, loss: 0.0018475898541510105 2023-01-24 06:55:56.651480: step: 612/466, loss: 0.0014052045298740268 2023-01-24 06:55:57.611519: step: 614/466, loss: 0.0003670759324450046 2023-01-24 06:55:58.377833: step: 616/466, loss: 0.047144003212451935 2023-01-24 06:55:59.101393: step: 618/466, loss: 0.014714747667312622 2023-01-24 06:55:59.846138: step: 620/466, loss: 0.0009839566191658378 2023-01-24 06:56:00.696232: step: 622/466, loss: 0.0046821883879601955 2023-01-24 06:56:01.478111: step: 624/466, loss: 0.01558632217347622 2023-01-24 06:56:02.238631: step: 626/466, loss: 0.022430801764130592 2023-01-24 06:56:03.010275: step: 628/466, loss: 4.678280674852431e-05 2023-01-24 06:56:03.736319: step: 630/466, loss: 0.008924842812120914 2023-01-24 06:56:04.484913: step: 632/466, loss: 0.36126741766929626 2023-01-24 06:56:05.301155: step: 634/466, loss: 0.0009075882844626904 2023-01-24 06:56:06.013289: step: 636/466, loss: 0.0497039258480072 2023-01-24 06:56:06.834183: step: 638/466, loss: 0.0003013135283254087 2023-01-24 06:56:07.567704: step: 640/466, loss: 0.003686620155349374 2023-01-24 06:56:08.298040: step: 642/466, loss: 9.751777542987838e-05 2023-01-24 06:56:09.117338: step: 644/466, loss: 0.0009560614125803113 2023-01-24 06:56:10.014689: step: 646/466, loss: 0.002088146051391959 2023-01-24 06:56:10.737184: step: 648/466, loss: 0.003502498846501112 2023-01-24 06:56:11.477577: step: 650/466, loss: 0.001366930897347629 2023-01-24 06:56:12.224329: step: 652/466, loss: 0.020780501887202263 2023-01-24 06:56:12.947431: step: 654/466, loss: 0.0038583616260439157 2023-01-24 06:56:13.635592: step: 656/466, loss: 0.0011789824347943068 2023-01-24 06:56:14.447185: step: 658/466, loss: 0.003159801010042429 2023-01-24 06:56:15.195598: step: 660/466, loss: 0.013112317770719528 2023-01-24 06:56:15.992175: step: 662/466, loss: 0.0060791620053350925 2023-01-24 06:56:16.725383: step: 664/466, loss: 0.0005515529774129391 2023-01-24 06:56:17.447376: step: 666/466, loss: 0.010279749520123005 2023-01-24 06:56:18.290542: step: 668/466, loss: 0.02576580084860325 2023-01-24 06:56:19.037058: step: 670/466, loss: 0.004443008918315172 2023-01-24 06:56:19.737760: step: 672/466, loss: 0.0020656739361584187 2023-01-24 06:56:20.368097: step: 674/466, loss: 0.059588123112916946 2023-01-24 06:56:21.190466: step: 676/466, loss: 0.06571090221405029 2023-01-24 06:56:21.907200: step: 678/466, loss: 0.0033334565814584494 2023-01-24 06:56:22.580909: step: 680/466, loss: 3.0374652851605788e-05 2023-01-24 06:56:23.348076: step: 682/466, loss: 0.002010779222473502 2023-01-24 06:56:24.028027: step: 684/466, loss: 0.00045487828901968896 2023-01-24 06:56:24.731766: step: 686/466, loss: 0.009617500938475132 2023-01-24 06:56:25.475966: step: 688/466, loss: 1.1015014933946077e-05 2023-01-24 06:56:26.186572: step: 690/466, loss: 0.01960671693086624 2023-01-24 06:56:26.981026: step: 692/466, loss: 0.004229302518069744 2023-01-24 06:56:27.740000: step: 694/466, loss: 0.0006667460547760129 2023-01-24 06:56:28.475028: step: 696/466, loss: 0.0023100117687135935 2023-01-24 06:56:29.179510: step: 698/466, loss: 0.010152764618396759 2023-01-24 06:56:29.850791: step: 700/466, loss: 0.03864798694849014 2023-01-24 06:56:30.657300: step: 702/466, loss: 0.08513421565294266 2023-01-24 06:56:31.431718: step: 704/466, loss: 0.08132486790418625 2023-01-24 06:56:32.260323: step: 706/466, loss: 0.0360155887901783 2023-01-24 06:56:33.046446: step: 708/466, loss: 0.029786350205540657 2023-01-24 06:56:33.766432: step: 710/466, loss: 0.00011806087422883138 2023-01-24 06:56:34.503126: step: 712/466, loss: 0.0663982704281807 2023-01-24 06:56:35.171246: step: 714/466, loss: 0.0035313130356371403 2023-01-24 06:56:35.882128: step: 716/466, loss: 0.00889684446156025 2023-01-24 06:56:36.572032: step: 718/466, loss: 0.0003816070093307644 2023-01-24 06:56:37.493962: step: 720/466, loss: 0.06195018067955971 2023-01-24 06:56:38.303050: step: 722/466, loss: 0.04165364056825638 2023-01-24 06:56:39.001401: step: 724/466, loss: 0.005327022168785334 2023-01-24 06:56:39.745544: step: 726/466, loss: 0.005947711877524853 2023-01-24 06:56:40.464606: step: 728/466, loss: 0.0034825638867914677 2023-01-24 06:56:41.225948: step: 730/466, loss: 0.018863987177610397 2023-01-24 06:56:41.935915: step: 732/466, loss: 0.016013626009225845 2023-01-24 06:56:42.614420: step: 734/466, loss: 0.0028963927179574966 2023-01-24 06:56:43.349876: step: 736/466, loss: 0.007359576877206564 2023-01-24 06:56:44.167490: step: 738/466, loss: 0.003223610110580921 2023-01-24 06:56:44.874208: step: 740/466, loss: 0.08692073076963425 2023-01-24 06:56:45.620453: step: 742/466, loss: 0.00045874129864387214 2023-01-24 06:56:46.381617: step: 744/466, loss: 0.00390154798515141 2023-01-24 06:56:47.120868: step: 746/466, loss: 0.1868094652891159 2023-01-24 06:56:47.897787: step: 748/466, loss: 0.058032043278217316 2023-01-24 06:56:48.751596: step: 750/466, loss: 0.0044331420212984085 2023-01-24 06:56:49.483433: step: 752/466, loss: 0.0005825747502967715 2023-01-24 06:56:50.327360: step: 754/466, loss: 0.019781772047281265 2023-01-24 06:56:51.087784: step: 756/466, loss: 0.013738803565502167 2023-01-24 06:56:51.838356: step: 758/466, loss: 0.0019082339713349938 2023-01-24 06:56:52.514378: step: 760/466, loss: 0.0057865045964717865 2023-01-24 06:56:53.248777: step: 762/466, loss: 0.002451360924169421 2023-01-24 06:56:53.948074: step: 764/466, loss: 0.005069708917289972 2023-01-24 06:56:54.672389: step: 766/466, loss: 0.0019848656374961138 2023-01-24 06:56:55.450443: step: 768/466, loss: 0.010547601617872715 2023-01-24 06:56:56.280971: step: 770/466, loss: 0.045725855976343155 2023-01-24 06:56:56.973112: step: 772/466, loss: 0.004928800743073225 2023-01-24 06:56:57.770909: step: 774/466, loss: 0.0019239893881604075 2023-01-24 06:56:58.519102: step: 776/466, loss: 0.1870308816432953 2023-01-24 06:56:59.264962: step: 778/466, loss: 0.08053610473871231 2023-01-24 06:57:00.079064: step: 780/466, loss: 0.006630890071392059 2023-01-24 06:57:00.913705: step: 782/466, loss: 0.006986789871007204 2023-01-24 06:57:01.605002: step: 784/466, loss: 0.001514837727881968 2023-01-24 06:57:02.347336: step: 786/466, loss: 0.0006803342257626355 2023-01-24 06:57:03.097752: step: 788/466, loss: 0.004624820314347744 2023-01-24 06:57:03.833872: step: 790/466, loss: 0.01107484195381403 2023-01-24 06:57:04.610120: step: 792/466, loss: 0.00035859248600900173 2023-01-24 06:57:05.523765: step: 794/466, loss: 0.004311860539019108 2023-01-24 06:57:06.237242: step: 796/466, loss: 0.006726352032274008 2023-01-24 06:57:06.945076: step: 798/466, loss: 0.02447984181344509 2023-01-24 06:57:07.686688: step: 800/466, loss: 0.026846712455153465 2023-01-24 06:57:08.469407: step: 802/466, loss: 0.031323280185461044 2023-01-24 06:57:09.279410: step: 804/466, loss: 0.008843212388455868 2023-01-24 06:57:10.078431: step: 806/466, loss: 0.0007169311284087598 2023-01-24 06:57:10.828207: step: 808/466, loss: 0.002318635117262602 2023-01-24 06:57:11.617919: step: 810/466, loss: 0.06627952307462692 2023-01-24 06:57:12.305085: step: 812/466, loss: 0.00226763472892344 2023-01-24 06:57:13.146554: step: 814/466, loss: 0.0014524434227496386 2023-01-24 06:57:13.990348: step: 816/466, loss: 0.000614943157415837 2023-01-24 06:57:14.751094: step: 818/466, loss: 0.009615597315132618 2023-01-24 06:57:15.493678: step: 820/466, loss: 0.058534231036901474 2023-01-24 06:57:16.290926: step: 822/466, loss: 0.037576254457235336 2023-01-24 06:57:17.043155: step: 824/466, loss: 0.5030120611190796 2023-01-24 06:57:17.831335: step: 826/466, loss: 0.026011312380433083 2023-01-24 06:57:18.589785: step: 828/466, loss: 0.012016969732940197 2023-01-24 06:57:19.263368: step: 830/466, loss: 0.000505308504216373 2023-01-24 06:57:19.964681: step: 832/466, loss: 0.013352626003324986 2023-01-24 06:57:20.643495: step: 834/466, loss: 0.004660810809582472 2023-01-24 06:57:21.436927: step: 836/466, loss: 0.013114354573190212 2023-01-24 06:57:22.195306: step: 838/466, loss: 0.021583227440714836 2023-01-24 06:57:22.952994: step: 840/466, loss: 0.0003550250257831067 2023-01-24 06:57:23.723029: step: 842/466, loss: 0.001051700091920793 2023-01-24 06:57:24.413341: step: 844/466, loss: 0.0002409262233413756 2023-01-24 06:57:25.121774: step: 846/466, loss: 0.0005547681939788163 2023-01-24 06:57:25.864163: step: 848/466, loss: 0.007193288300186396 2023-01-24 06:57:26.619778: step: 850/466, loss: 0.00018879900744650513 2023-01-24 06:57:27.441366: step: 852/466, loss: 0.00012669037096202374 2023-01-24 06:57:28.177964: step: 854/466, loss: 0.010259213857352734 2023-01-24 06:57:28.923648: step: 856/466, loss: 0.0011524403234943748 2023-01-24 06:57:29.637662: step: 858/466, loss: 0.42043399810791016 2023-01-24 06:57:30.366079: step: 860/466, loss: 0.0032950653694570065 2023-01-24 06:57:31.155109: step: 862/466, loss: 0.004293408710509539 2023-01-24 06:57:31.892580: step: 864/466, loss: 0.014959764666855335 2023-01-24 06:57:32.719251: step: 866/466, loss: 0.02132979966700077 2023-01-24 06:57:33.569059: step: 868/466, loss: 0.020063387230038643 2023-01-24 06:57:34.471546: step: 870/466, loss: 0.005825446918606758 2023-01-24 06:57:35.169287: step: 872/466, loss: 0.0006191516295075417 2023-01-24 06:57:35.945587: step: 874/466, loss: 0.04492638260126114 2023-01-24 06:57:36.717389: step: 876/466, loss: 0.0017336331075057387 2023-01-24 06:57:37.482138: step: 878/466, loss: 0.03967329487204552 2023-01-24 06:57:38.252047: step: 880/466, loss: 0.008279534988105297 2023-01-24 06:57:39.040075: step: 882/466, loss: 0.004151599947363138 2023-01-24 06:57:39.792236: step: 884/466, loss: 0.014451387338340282 2023-01-24 06:57:40.485206: step: 886/466, loss: 0.0016142098465934396 2023-01-24 06:57:41.345078: step: 888/466, loss: 0.009346477687358856 2023-01-24 06:57:42.099384: step: 890/466, loss: 0.010483842343091965 2023-01-24 06:57:42.841381: step: 892/466, loss: 3.388475670362823e-05 2023-01-24 06:57:43.656709: step: 894/466, loss: 0.007849263958632946 2023-01-24 06:57:44.424412: step: 896/466, loss: 0.06408103555440903 2023-01-24 06:57:45.280967: step: 898/466, loss: 0.008165200240910053 2023-01-24 06:57:46.011643: step: 900/466, loss: 0.02221984788775444 2023-01-24 06:57:46.801822: step: 902/466, loss: 0.0095894830301404 2023-01-24 06:57:47.572779: step: 904/466, loss: 0.015169389545917511 2023-01-24 06:57:48.348655: step: 906/466, loss: 0.001172103569842875 2023-01-24 06:57:49.058776: step: 908/466, loss: 0.009952932596206665 2023-01-24 06:57:49.919068: step: 910/466, loss: 0.007936015725135803 2023-01-24 06:57:50.702254: step: 912/466, loss: 0.011334918439388275 2023-01-24 06:57:51.482364: step: 914/466, loss: 0.004115479066967964 2023-01-24 06:57:52.179212: step: 916/466, loss: 5.65770678804256e-05 2023-01-24 06:57:52.899621: step: 918/466, loss: 0.004586285445839167 2023-01-24 06:57:53.620214: step: 920/466, loss: 0.000270793039817363 2023-01-24 06:57:54.322531: step: 922/466, loss: 0.00014199443103279918 2023-01-24 06:57:55.013268: step: 924/466, loss: 0.0424426905810833 2023-01-24 06:57:55.862630: step: 926/466, loss: 0.023249058052897453 2023-01-24 06:57:56.591157: step: 928/466, loss: 0.022267363965511322 2023-01-24 06:57:57.300823: step: 930/466, loss: 0.008111781440675259 2023-01-24 06:57:58.012384: step: 932/466, loss: 0.003675048239529133 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3379506641366224, 'r': 0.3379506641366224, 'f1': 0.3379506641366224}, 'combined': 0.24901627883751123, 'epoch': 37} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.367587125724285, 'r': 0.2917762627066248, 'f1': 0.3253234851820725}, 'combined': 0.19995492259971284, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31579340819345414, 'r': 0.3451556036421814, 'f1': 0.32982230846678073}, 'combined': 0.24302696413341737, 'epoch': 37} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35567784140697833, 'r': 0.2961927258163832, 'f1': 0.3232211873211406}, 'combined': 0.19866277854860348, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3562906309751434, 'r': 0.3535863377609108, 'f1': 0.3549333333333333}, 'combined': 0.2615298245614035, 'epoch': 37} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37270231967819284, 'r': 0.29163795032011103, 'f1': 0.32722430206067876}, 'combined': 0.20210912774336046, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3161764705882353, 'r': 0.30714285714285716, 'f1': 0.3115942028985507}, 'combined': 0.2077294685990338, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2905405405405405, 'r': 0.4673913043478261, 'f1': 0.3583333333333333}, 'combined': 0.17916666666666664, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561785714285714, 'r': 0.3210338167525074, 'f1': 0.33769425434844597}, 'combined': 0.24882734530938122, 'epoch': 33} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37857267048883675, 'r': 0.28679747764305813, 'f1': 0.32635575042141096}, 'combined': 0.20058938806389162, 'epoch': 33} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.325, 'f1': 0.3729508196721312}, 'combined': 0.24863387978142076, 'epoch': 33} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33043321205998016, 'r': 0.3285521880824091, 'f1': 0.3294900154508651}, 'combined': 0.24278211664800584, 'epoch': 36} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35885318259317184, 'r': 0.2913738579634333, 'f1': 0.321612082343187}, 'combined': 0.1976737676841052, 'epoch': 36} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3425925925925926, 'r': 0.40217391304347827, 'f1': 0.37}, 'combined': 0.185, 'epoch': 36} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37147302904564317, 'r': 0.339753320683112, 'f1': 0.35490584737363723}, 'combined': 0.26150957174899586, 'epoch': 36} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38138922621752097, 'r': 0.28918160566753104, 'f1': 0.32894585799934034}, 'combined': 0.20317244170547497, 'epoch': 36} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 36} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 07:00:42.800505: step: 2/466, loss: 0.00015852558135520667 2023-01-24 07:00:43.473736: step: 4/466, loss: 0.00033445178996771574 2023-01-24 07:00:44.275960: step: 6/466, loss: 0.0018999316962435842 2023-01-24 07:00:45.103146: step: 8/466, loss: 0.002631555078551173 2023-01-24 07:00:45.774977: step: 10/466, loss: 0.01319513376802206 2023-01-24 07:00:46.531792: step: 12/466, loss: 0.004481269977986813 2023-01-24 07:00:47.355858: step: 14/466, loss: 0.00275377556681633 2023-01-24 07:00:48.085243: step: 16/466, loss: 0.003091811668127775 2023-01-24 07:00:48.781116: step: 18/466, loss: 2.619748830795288 2023-01-24 07:00:49.518796: step: 20/466, loss: 0.012885436415672302 2023-01-24 07:00:50.218282: step: 22/466, loss: 0.04020577296614647 2023-01-24 07:00:50.903987: step: 24/466, loss: 0.015233595855534077 2023-01-24 07:00:51.753423: step: 26/466, loss: 0.0010679669212549925 2023-01-24 07:00:52.517390: step: 28/466, loss: 0.014175614342093468 2023-01-24 07:00:53.257659: step: 30/466, loss: 0.0001996166247408837 2023-01-24 07:00:53.976047: step: 32/466, loss: 0.011988112702965736 2023-01-24 07:00:54.646660: step: 34/466, loss: 0.00021556120191235095 2023-01-24 07:00:55.350280: step: 36/466, loss: 0.0013716488610953093 2023-01-24 07:00:56.108406: step: 38/466, loss: 0.001971025485545397 2023-01-24 07:00:56.868079: step: 40/466, loss: 9.164853690890595e-05 2023-01-24 07:00:57.626053: step: 42/466, loss: 0.0010934865567833185 2023-01-24 07:00:58.341931: step: 44/466, loss: 0.022326651960611343 2023-01-24 07:00:59.118723: step: 46/466, loss: 0.0021874187514185905 2023-01-24 07:00:59.898256: step: 48/466, loss: 0.0008808678830973804 2023-01-24 07:01:00.639175: step: 50/466, loss: 0.10775995254516602 2023-01-24 07:01:01.359887: step: 52/466, loss: 0.00046264220145531 2023-01-24 07:01:02.094515: step: 54/466, loss: 0.006234243977814913 2023-01-24 07:01:02.852274: step: 56/466, loss: 0.010173077695071697 2023-01-24 07:01:03.651658: step: 58/466, loss: 0.04318838566541672 2023-01-24 07:01:04.354867: step: 60/466, loss: 0.0006960787577554584 2023-01-24 07:01:05.084773: step: 62/466, loss: 0.45456135272979736 2023-01-24 07:01:05.856640: step: 64/466, loss: 0.007998891174793243 2023-01-24 07:01:06.685122: step: 66/466, loss: 0.116494320333004 2023-01-24 07:01:07.453338: step: 68/466, loss: 0.0003614113375078887 2023-01-24 07:01:08.181705: step: 70/466, loss: 0.0005324063822627068 2023-01-24 07:01:09.085030: step: 72/466, loss: 0.0016135982004925609 2023-01-24 07:01:09.768455: step: 74/466, loss: 0.0055488417856395245 2023-01-24 07:01:10.539758: step: 76/466, loss: 0.05743572115898132 2023-01-24 07:01:11.303395: step: 78/466, loss: 0.28396084904670715 2023-01-24 07:01:12.041893: step: 80/466, loss: 0.010283631272614002 2023-01-24 07:01:12.841183: step: 82/466, loss: 0.008233075961470604 2023-01-24 07:01:13.541781: step: 84/466, loss: 0.03372426703572273 2023-01-24 07:01:14.260942: step: 86/466, loss: 0.00029308663215488195 2023-01-24 07:01:15.043051: step: 88/466, loss: 0.07326936721801758 2023-01-24 07:01:15.922155: step: 90/466, loss: 0.008953921496868134 2023-01-24 07:01:16.637222: step: 92/466, loss: 5.050439722253941e-05 2023-01-24 07:01:17.441327: step: 94/466, loss: 0.008332867175340652 2023-01-24 07:01:18.161955: step: 96/466, loss: 0.06297744810581207 2023-01-24 07:01:18.868186: step: 98/466, loss: 0.002277067629620433 2023-01-24 07:01:19.691508: step: 100/466, loss: 0.020023813471198082 2023-01-24 07:01:20.460939: step: 102/466, loss: 0.0022804755717515945 2023-01-24 07:01:21.207121: step: 104/466, loss: 0.004442311357706785 2023-01-24 07:01:21.963201: step: 106/466, loss: 0.0021702053491026163 2023-01-24 07:01:22.650265: step: 108/466, loss: 9.901211342366878e-06 2023-01-24 07:01:23.440888: step: 110/466, loss: 0.012853569351136684 2023-01-24 07:01:24.242219: step: 112/466, loss: 0.0051322742365300655 2023-01-24 07:01:24.948701: step: 114/466, loss: 0.0065353913232684135 2023-01-24 07:01:25.745775: step: 116/466, loss: 0.00728453928604722 2023-01-24 07:01:26.413499: step: 118/466, loss: 0.003233555005863309 2023-01-24 07:01:27.083808: step: 120/466, loss: 0.0018335931235924363 2023-01-24 07:01:27.832037: step: 122/466, loss: 0.0021310176234692335 2023-01-24 07:01:28.569284: step: 124/466, loss: 3.200375067535788e-05 2023-01-24 07:01:29.248763: step: 126/466, loss: 0.046245038509368896 2023-01-24 07:01:29.971852: step: 128/466, loss: 0.03275144472718239 2023-01-24 07:01:30.751481: step: 130/466, loss: 0.012383184395730495 2023-01-24 07:01:31.481792: step: 132/466, loss: 0.6921976804733276 2023-01-24 07:01:32.292042: step: 134/466, loss: 0.023298662155866623 2023-01-24 07:01:33.058256: step: 136/466, loss: 0.0016130059957504272 2023-01-24 07:01:33.856136: step: 138/466, loss: 0.0016224872088059783 2023-01-24 07:01:34.576864: step: 140/466, loss: 0.0008939092513173819 2023-01-24 07:01:35.348538: step: 142/466, loss: 0.0006672238232567906 2023-01-24 07:01:36.069042: step: 144/466, loss: 0.00451664999127388 2023-01-24 07:01:36.735768: step: 146/466, loss: 0.00010222888522548601 2023-01-24 07:01:37.423873: step: 148/466, loss: 0.014285118319094181 2023-01-24 07:01:38.186966: step: 150/466, loss: 0.007723046466708183 2023-01-24 07:01:38.984950: step: 152/466, loss: 0.013501619920134544 2023-01-24 07:01:39.783449: step: 154/466, loss: 0.010910639539361 2023-01-24 07:01:40.535627: step: 156/466, loss: 0.9693259596824646 2023-01-24 07:01:41.213991: step: 158/466, loss: 0.13675321638584137 2023-01-24 07:01:41.984872: step: 160/466, loss: 0.0007152618491090834 2023-01-24 07:01:42.724018: step: 162/466, loss: 0.009600437246263027 2023-01-24 07:01:43.478519: step: 164/466, loss: 0.0026017329655587673 2023-01-24 07:01:44.170234: step: 166/466, loss: 0.0006253659958019853 2023-01-24 07:01:44.866202: step: 168/466, loss: 0.0031114050652831793 2023-01-24 07:01:45.542199: step: 170/466, loss: 0.002069795271381736 2023-01-24 07:01:46.289430: step: 172/466, loss: 0.006407567299902439 2023-01-24 07:01:47.001321: step: 174/466, loss: 0.005875298287719488 2023-01-24 07:01:47.672039: step: 176/466, loss: 0.014285593293607235 2023-01-24 07:01:48.464294: step: 178/466, loss: 0.011498566716909409 2023-01-24 07:01:49.179940: step: 180/466, loss: 0.004969505127519369 2023-01-24 07:01:49.911775: step: 182/466, loss: 0.0030212486162781715 2023-01-24 07:01:50.663232: step: 184/466, loss: 0.0011963235447183251 2023-01-24 07:01:51.447186: step: 186/466, loss: 0.0015245783142745495 2023-01-24 07:01:52.201375: step: 188/466, loss: 0.0013877090532332659 2023-01-24 07:01:53.041180: step: 190/466, loss: 0.03716769814491272 2023-01-24 07:01:53.808723: step: 192/466, loss: 0.00022803526371717453 2023-01-24 07:01:54.574235: step: 194/466, loss: 0.019137857481837273 2023-01-24 07:01:55.242222: step: 196/466, loss: 0.0002453289635013789 2023-01-24 07:01:55.978538: step: 198/466, loss: 0.003092587925493717 2023-01-24 07:01:56.847842: step: 200/466, loss: 0.0006169404368847609 2023-01-24 07:01:57.560784: step: 202/466, loss: 0.004191730171442032 2023-01-24 07:01:58.391332: step: 204/466, loss: 0.00016001469339244068 2023-01-24 07:01:59.179882: step: 206/466, loss: 0.0030550588853657246 2023-01-24 07:01:59.857887: step: 208/466, loss: 0.0962376520037651 2023-01-24 07:02:00.559190: step: 210/466, loss: 0.02584371156990528 2023-01-24 07:02:01.278375: step: 212/466, loss: 0.01125494297593832 2023-01-24 07:02:02.048983: step: 214/466, loss: 4.2713403672678396e-05 2023-01-24 07:02:02.843474: step: 216/466, loss: 0.039989691227674484 2023-01-24 07:02:03.585585: step: 218/466, loss: 0.00022126472322270274 2023-01-24 07:02:04.398655: step: 220/466, loss: 0.012783754616975784 2023-01-24 07:02:05.127133: step: 222/466, loss: 0.5585067868232727 2023-01-24 07:02:05.863810: step: 224/466, loss: 0.14413070678710938 2023-01-24 07:02:06.542169: step: 226/466, loss: 0.04216240718960762 2023-01-24 07:02:07.252625: step: 228/466, loss: 0.0006263788091018796 2023-01-24 07:02:08.008056: step: 230/466, loss: 0.00011739307956304401 2023-01-24 07:02:08.681610: step: 232/466, loss: 0.0006011001532897353 2023-01-24 07:02:09.368475: step: 234/466, loss: 0.00026434441679157317 2023-01-24 07:02:10.144384: step: 236/466, loss: 0.03465807065367699 2023-01-24 07:02:10.943381: step: 238/466, loss: 0.03530780225992203 2023-01-24 07:02:11.729751: step: 240/466, loss: 0.007487792056053877 2023-01-24 07:02:12.498425: step: 242/466, loss: 0.027928482741117477 2023-01-24 07:02:13.207372: step: 244/466, loss: 0.0005281373159959912 2023-01-24 07:02:13.947699: step: 246/466, loss: 2.9038224965916015e-05 2023-01-24 07:02:14.720958: step: 248/466, loss: 0.015432695858180523 2023-01-24 07:02:15.583824: step: 250/466, loss: 0.0829966738820076 2023-01-24 07:02:16.239652: step: 252/466, loss: 0.0011326514650136232 2023-01-24 07:02:16.979292: step: 254/466, loss: 0.001102382899262011 2023-01-24 07:02:17.721239: step: 256/466, loss: 5.16331747348886e-05 2023-01-24 07:02:18.475720: step: 258/466, loss: 0.002248742850497365 2023-01-24 07:02:19.239753: step: 260/466, loss: 0.0016092954901978374 2023-01-24 07:02:19.933688: step: 262/466, loss: 0.0007166212890297174 2023-01-24 07:02:20.638890: step: 264/466, loss: 6.099096208345145e-05 2023-01-24 07:02:21.388152: step: 266/466, loss: 0.04940228536725044 2023-01-24 07:02:22.231507: step: 268/466, loss: 0.00033056834945455194 2023-01-24 07:02:22.938357: step: 270/466, loss: 0.00013981727533973753 2023-01-24 07:02:23.668715: step: 272/466, loss: 0.004971823655068874 2023-01-24 07:02:24.433010: step: 274/466, loss: 0.000589183415286243 2023-01-24 07:02:25.157566: step: 276/466, loss: 0.0033571096137166023 2023-01-24 07:02:25.883702: step: 278/466, loss: 0.02178746648132801 2023-01-24 07:02:26.682440: step: 280/466, loss: 1.2804764537577285e-06 2023-01-24 07:02:27.436241: step: 282/466, loss: 0.004462054930627346 2023-01-24 07:02:28.176443: step: 284/466, loss: 0.003304133890196681 2023-01-24 07:02:28.852566: step: 286/466, loss: 0.007589017506688833 2023-01-24 07:02:29.529085: step: 288/466, loss: 5.5421369324903935e-06 2023-01-24 07:02:30.345101: step: 290/466, loss: 0.05665628984570503 2023-01-24 07:02:31.148683: step: 292/466, loss: 0.04041202366352081 2023-01-24 07:02:32.001032: step: 294/466, loss: 0.425844669342041 2023-01-24 07:02:32.746387: step: 296/466, loss: 0.010454821400344372 2023-01-24 07:02:33.496734: step: 298/466, loss: 0.0012415708042681217 2023-01-24 07:02:34.237378: step: 300/466, loss: 0.049156442284584045 2023-01-24 07:02:35.056365: step: 302/466, loss: 0.0007102354429662228 2023-01-24 07:02:35.762135: step: 304/466, loss: 0.0025498925242573023 2023-01-24 07:02:36.486056: step: 306/466, loss: 0.0020394527819007635 2023-01-24 07:02:37.225836: step: 308/466, loss: 0.0024311868473887444 2023-01-24 07:02:37.958554: step: 310/466, loss: 0.026064248755574226 2023-01-24 07:02:38.760785: step: 312/466, loss: 0.05080139636993408 2023-01-24 07:02:39.493082: step: 314/466, loss: 0.003608688246458769 2023-01-24 07:02:40.209755: step: 316/466, loss: 4.1068644350161776e-05 2023-01-24 07:02:40.927413: step: 318/466, loss: 0.04948783665895462 2023-01-24 07:02:41.677690: step: 320/466, loss: 0.00613086624071002 2023-01-24 07:02:42.370199: step: 322/466, loss: 0.014376579783856869 2023-01-24 07:02:43.118960: step: 324/466, loss: 0.007867695763707161 2023-01-24 07:02:43.785141: step: 326/466, loss: 0.011521144770085812 2023-01-24 07:02:44.541537: step: 328/466, loss: 0.005872808862477541 2023-01-24 07:02:45.328580: step: 330/466, loss: 0.0047467658296227455 2023-01-24 07:02:46.229849: step: 332/466, loss: 0.003558957949280739 2023-01-24 07:02:47.022643: step: 334/466, loss: 0.006062633357942104 2023-01-24 07:02:47.849452: step: 336/466, loss: 0.0010586964199319482 2023-01-24 07:02:48.579679: step: 338/466, loss: 0.0005861878162249923 2023-01-24 07:02:49.388418: step: 340/466, loss: 0.0028245861176401377 2023-01-24 07:02:50.306046: step: 342/466, loss: 0.03435346484184265 2023-01-24 07:02:51.053163: step: 344/466, loss: 0.022072521969676018 2023-01-24 07:02:51.786063: step: 346/466, loss: 0.00022341452131513506 2023-01-24 07:02:52.554956: step: 348/466, loss: 0.025059375911951065 2023-01-24 07:02:53.347023: step: 350/466, loss: 0.00031253520864993334 2023-01-24 07:02:54.128584: step: 352/466, loss: 0.00012384621368255466 2023-01-24 07:02:54.913920: step: 354/466, loss: 0.004707323852926493 2023-01-24 07:02:55.677667: step: 356/466, loss: 0.1190241277217865 2023-01-24 07:02:56.424226: step: 358/466, loss: 0.00039719868800602853 2023-01-24 07:02:57.189591: step: 360/466, loss: 0.029429111629724503 2023-01-24 07:02:57.957204: step: 362/466, loss: 0.01712273247539997 2023-01-24 07:02:58.745591: step: 364/466, loss: 0.009178683161735535 2023-01-24 07:02:59.505763: step: 366/466, loss: 0.00032894068863242865 2023-01-24 07:03:00.189484: step: 368/466, loss: 0.0008436114294454455 2023-01-24 07:03:01.003866: step: 370/466, loss: 0.0006354791112244129 2023-01-24 07:03:01.849572: step: 372/466, loss: 0.05993903428316116 2023-01-24 07:03:02.638434: step: 374/466, loss: 0.00040928684757091105 2023-01-24 07:03:03.427566: step: 376/466, loss: 0.0014766182284802198 2023-01-24 07:03:04.141663: step: 378/466, loss: 3.832852598861791e-05 2023-01-24 07:03:04.941155: step: 380/466, loss: 0.06406915932893753 2023-01-24 07:03:05.662211: step: 382/466, loss: 0.00010840618779184297 2023-01-24 07:03:06.433521: step: 384/466, loss: 0.007566460873931646 2023-01-24 07:03:07.196877: step: 386/466, loss: 0.0007533340249210596 2023-01-24 07:03:07.926140: step: 388/466, loss: 0.00020608464546967298 2023-01-24 07:03:08.671261: step: 390/466, loss: 0.03024054691195488 2023-01-24 07:03:09.504719: step: 392/466, loss: 0.04676214978098869 2023-01-24 07:03:10.201582: step: 394/466, loss: 0.0019371528178453445 2023-01-24 07:03:10.979367: step: 396/466, loss: 0.00275464728474617 2023-01-24 07:03:11.660822: step: 398/466, loss: 0.0007575393537990749 2023-01-24 07:03:12.373121: step: 400/466, loss: 0.02955714613199234 2023-01-24 07:03:13.170239: step: 402/466, loss: 0.021825818344950676 2023-01-24 07:03:13.984903: step: 404/466, loss: 0.0011155270040035248 2023-01-24 07:03:14.795085: step: 406/466, loss: 7.650330371689051e-05 2023-01-24 07:03:15.550216: step: 408/466, loss: 0.01209378894418478 2023-01-24 07:03:16.292437: step: 410/466, loss: 0.09773796796798706 2023-01-24 07:03:17.102016: step: 412/466, loss: 0.007413296960294247 2023-01-24 07:03:17.844574: step: 414/466, loss: 0.010011570528149605 2023-01-24 07:03:18.636131: step: 416/466, loss: 0.0029658616986125708 2023-01-24 07:03:19.452668: step: 418/466, loss: 0.018716806545853615 2023-01-24 07:03:20.188646: step: 420/466, loss: 3.0199102184269577e-05 2023-01-24 07:03:20.924395: step: 422/466, loss: 0.0003793942742049694 2023-01-24 07:03:21.671869: step: 424/466, loss: 0.0004991032765246928 2023-01-24 07:03:22.427985: step: 426/466, loss: 0.00234232097864151 2023-01-24 07:03:23.165491: step: 428/466, loss: 0.015742633491754532 2023-01-24 07:03:23.886370: step: 430/466, loss: 0.022508902475237846 2023-01-24 07:03:24.587962: step: 432/466, loss: 6.772443884983659e-05 2023-01-24 07:03:25.375074: step: 434/466, loss: 0.0015357910888269544 2023-01-24 07:03:26.124178: step: 436/466, loss: 0.0032131534535437822 2023-01-24 07:03:26.867424: step: 438/466, loss: 0.0007034862646833062 2023-01-24 07:03:27.609932: step: 440/466, loss: 0.0008740437333472073 2023-01-24 07:03:28.363927: step: 442/466, loss: 0.9496735334396362 2023-01-24 07:03:29.142342: step: 444/466, loss: 0.016295237466692924 2023-01-24 07:03:29.867725: step: 446/466, loss: 0.0013185566058382392 2023-01-24 07:03:30.550945: step: 448/466, loss: 0.01054754201322794 2023-01-24 07:03:31.276784: step: 450/466, loss: 0.01293019950389862 2023-01-24 07:03:32.162704: step: 452/466, loss: 0.0074695199728012085 2023-01-24 07:03:32.857450: step: 454/466, loss: 0.01076709944754839 2023-01-24 07:03:33.623453: step: 456/466, loss: 0.005442376714199781 2023-01-24 07:03:34.344389: step: 458/466, loss: 0.005675592925399542 2023-01-24 07:03:35.158076: step: 460/466, loss: 0.005998608190566301 2023-01-24 07:03:35.880909: step: 462/466, loss: 0.0031393063254654408 2023-01-24 07:03:36.594837: step: 464/466, loss: 0.0003654154425021261 2023-01-24 07:03:37.361941: step: 466/466, loss: 0.007295468356460333 2023-01-24 07:03:38.055895: step: 468/466, loss: 0.002882494358345866 2023-01-24 07:03:38.912840: step: 470/466, loss: 0.002005321439355612 2023-01-24 07:03:39.681191: step: 472/466, loss: 0.10671700537204742 2023-01-24 07:03:40.394531: step: 474/466, loss: 0.00037021772004663944 2023-01-24 07:03:41.148899: step: 476/466, loss: 0.001452124328352511 2023-01-24 07:03:41.907299: step: 478/466, loss: 0.012873172760009766 2023-01-24 07:03:42.626490: step: 480/466, loss: 0.00556617695838213 2023-01-24 07:03:43.361943: step: 482/466, loss: 0.003965499345213175 2023-01-24 07:03:44.162520: step: 484/466, loss: 0.026193661615252495 2023-01-24 07:03:44.900287: step: 486/466, loss: 0.03303632140159607 2023-01-24 07:03:45.662734: step: 488/466, loss: 0.006419027224183083 2023-01-24 07:03:46.428306: step: 490/466, loss: 0.011763244867324829 2023-01-24 07:03:47.129465: step: 492/466, loss: 0.0004500457434915006 2023-01-24 07:03:47.924033: step: 494/466, loss: 0.000631829840131104 2023-01-24 07:03:48.692064: step: 496/466, loss: 0.01301645953208208 2023-01-24 07:03:49.369531: step: 498/466, loss: 0.002552577992901206 2023-01-24 07:03:50.066516: step: 500/466, loss: 0.002179771428927779 2023-01-24 07:03:50.777664: step: 502/466, loss: 0.003267711028456688 2023-01-24 07:03:51.477981: step: 504/466, loss: 0.003001777222380042 2023-01-24 07:03:52.232684: step: 506/466, loss: 0.2626116871833801 2023-01-24 07:03:53.027775: step: 508/466, loss: 4.068400085088797e-05 2023-01-24 07:03:53.778233: step: 510/466, loss: 0.0008522938587702811 2023-01-24 07:03:54.590507: step: 512/466, loss: 0.004108110908418894 2023-01-24 07:03:55.261613: step: 514/466, loss: 0.0018746658461168408 2023-01-24 07:03:56.045086: step: 516/466, loss: 0.1399819254875183 2023-01-24 07:03:56.742002: step: 518/466, loss: 0.008463362231850624 2023-01-24 07:03:57.554222: step: 520/466, loss: 0.0003495319979265332 2023-01-24 07:03:58.373414: step: 522/466, loss: 0.011042834259569645 2023-01-24 07:03:59.230879: step: 524/466, loss: 0.026595329865813255 2023-01-24 07:04:00.020683: step: 526/466, loss: 0.00015499211440328509 2023-01-24 07:04:00.746852: step: 528/466, loss: 0.0020827208645641804 2023-01-24 07:04:01.520829: step: 530/466, loss: 0.000841870962176472 2023-01-24 07:04:02.358401: step: 532/466, loss: 0.0030897376127541065 2023-01-24 07:04:03.166245: step: 534/466, loss: 0.017255224287509918 2023-01-24 07:04:03.934182: step: 536/466, loss: 0.026173541322350502 2023-01-24 07:04:04.662555: step: 538/466, loss: 0.0008655735873617232 2023-01-24 07:04:05.508678: step: 540/466, loss: 0.002662037266418338 2023-01-24 07:04:06.269789: step: 542/466, loss: 0.6328471899032593 2023-01-24 07:04:06.991441: step: 544/466, loss: 0.002544648479670286 2023-01-24 07:04:07.810059: step: 546/466, loss: 0.0013098662020638585 2023-01-24 07:04:08.491609: step: 548/466, loss: 0.0001053504747687839 2023-01-24 07:04:09.287709: step: 550/466, loss: 3.780534098041244e-05 2023-01-24 07:04:09.996607: step: 552/466, loss: 0.014702056534588337 2023-01-24 07:04:10.799928: step: 554/466, loss: 0.0015839324332773685 2023-01-24 07:04:11.608596: step: 556/466, loss: 0.009505736641585827 2023-01-24 07:04:12.322095: step: 558/466, loss: 0.011248605325818062 2023-01-24 07:04:13.027465: step: 560/466, loss: 0.0002523681614547968 2023-01-24 07:04:13.763631: step: 562/466, loss: 0.008334793150424957 2023-01-24 07:04:14.470291: step: 564/466, loss: 0.0003578077012207359 2023-01-24 07:04:15.320493: step: 566/466, loss: 0.039999186992645264 2023-01-24 07:04:16.113581: step: 568/466, loss: 0.004132232163101435 2023-01-24 07:04:16.890010: step: 570/466, loss: 0.0009728502482175827 2023-01-24 07:04:17.718107: step: 572/466, loss: 0.01512494869530201 2023-01-24 07:04:18.478928: step: 574/466, loss: 3.994491999037564e-05 2023-01-24 07:04:19.321749: step: 576/466, loss: 0.005956718698143959 2023-01-24 07:04:20.040149: step: 578/466, loss: 0.018015773966908455 2023-01-24 07:04:20.691932: step: 580/466, loss: 0.0006239361246116459 2023-01-24 07:04:21.399507: step: 582/466, loss: 0.027943609282374382 2023-01-24 07:04:22.169562: step: 584/466, loss: 0.1084119901061058 2023-01-24 07:04:22.938461: step: 586/466, loss: 0.008794408291578293 2023-01-24 07:04:23.689688: step: 588/466, loss: 0.0004625521833077073 2023-01-24 07:04:24.635457: step: 590/466, loss: 0.018595660105347633 2023-01-24 07:04:25.403512: step: 592/466, loss: 0.002094178693369031 2023-01-24 07:04:26.290027: step: 594/466, loss: 0.0010547454003244638 2023-01-24 07:04:26.955110: step: 596/466, loss: 0.05371860787272453 2023-01-24 07:04:27.731838: step: 598/466, loss: 0.0032484375406056643 2023-01-24 07:04:28.440827: step: 600/466, loss: 0.01112589705735445 2023-01-24 07:04:29.208233: step: 602/466, loss: 0.0007691808859817684 2023-01-24 07:04:29.939029: step: 604/466, loss: 0.015428180806338787 2023-01-24 07:04:30.739126: step: 606/466, loss: 0.0009691608138382435 2023-01-24 07:04:31.455814: step: 608/466, loss: 0.0001645991433179006 2023-01-24 07:04:32.143036: step: 610/466, loss: 0.009261749684810638 2023-01-24 07:04:32.877789: step: 612/466, loss: 0.0037737779784947634 2023-01-24 07:04:33.616435: step: 614/466, loss: 0.0011043829144909978 2023-01-24 07:04:34.306309: step: 616/466, loss: 0.0018476293189451098 2023-01-24 07:04:35.063178: step: 618/466, loss: 0.00040778619586490095 2023-01-24 07:04:35.841105: step: 620/466, loss: 0.007349770981818438 2023-01-24 07:04:36.607670: step: 622/466, loss: 0.0008918479434214532 2023-01-24 07:04:37.357702: step: 624/466, loss: 0.0003366429591551423 2023-01-24 07:04:38.065280: step: 626/466, loss: 0.003023396944627166 2023-01-24 07:04:38.929982: step: 628/466, loss: 0.016576239839196205 2023-01-24 07:04:39.710631: step: 630/466, loss: 0.04712849110364914 2023-01-24 07:04:40.475849: step: 632/466, loss: 0.20594191551208496 2023-01-24 07:04:41.244265: step: 634/466, loss: 0.00019367921049706638 2023-01-24 07:04:42.041517: step: 636/466, loss: 0.006996245123445988 2023-01-24 07:04:42.830199: step: 638/466, loss: 0.0027225457597523928 2023-01-24 07:04:43.609047: step: 640/466, loss: 0.042146023362874985 2023-01-24 07:04:44.344132: step: 642/466, loss: 0.06693723797798157 2023-01-24 07:04:45.085529: step: 644/466, loss: 0.007613023277372122 2023-01-24 07:04:45.814003: step: 646/466, loss: 0.008608619682490826 2023-01-24 07:04:46.560660: step: 648/466, loss: 0.016024397686123848 2023-01-24 07:04:47.365073: step: 650/466, loss: 0.0009257309720851481 2023-01-24 07:04:48.085458: step: 652/466, loss: 0.08387546241283417 2023-01-24 07:04:48.813649: step: 654/466, loss: 0.0019786653574556112 2023-01-24 07:04:49.490175: step: 656/466, loss: 0.0012632932048290968 2023-01-24 07:04:50.222539: step: 658/466, loss: 0.0005319062620401382 2023-01-24 07:04:51.111768: step: 660/466, loss: 0.016213281080126762 2023-01-24 07:04:51.897088: step: 662/466, loss: 0.01797870174050331 2023-01-24 07:04:52.708451: step: 664/466, loss: 0.0009241351508535445 2023-01-24 07:04:53.503613: step: 666/466, loss: 0.0023297558072954416 2023-01-24 07:04:54.314997: step: 668/466, loss: 0.0005481429398059845 2023-01-24 07:04:55.073478: step: 670/466, loss: 0.0035553527995944023 2023-01-24 07:04:55.852447: step: 672/466, loss: 0.03022400662302971 2023-01-24 07:04:56.563977: step: 674/466, loss: 5.107448669150472e-05 2023-01-24 07:04:57.238151: step: 676/466, loss: 7.539623038610443e-05 2023-01-24 07:04:57.973634: step: 678/466, loss: 0.00021213498257566243 2023-01-24 07:04:58.738345: step: 680/466, loss: 0.025467250496149063 2023-01-24 07:04:59.488861: step: 682/466, loss: 0.06051236391067505 2023-01-24 07:05:00.151887: step: 684/466, loss: 0.0003113978891633451 2023-01-24 07:05:00.891007: step: 686/466, loss: 0.00044558229274116457 2023-01-24 07:05:01.651428: step: 688/466, loss: 0.009315641596913338 2023-01-24 07:05:02.504760: step: 690/466, loss: 0.0009563063504174352 2023-01-24 07:05:03.209936: step: 692/466, loss: 0.0002467456506565213 2023-01-24 07:05:04.026929: step: 694/466, loss: 0.2347729653120041 2023-01-24 07:05:04.827213: step: 696/466, loss: 0.00039162777829915285 2023-01-24 07:05:05.636365: step: 698/466, loss: 0.26982438564300537 2023-01-24 07:05:06.560081: step: 700/466, loss: 0.08687514811754227 2023-01-24 07:05:07.385943: step: 702/466, loss: 0.02728988230228424 2023-01-24 07:05:08.115691: step: 704/466, loss: 0.012942968867719173 2023-01-24 07:05:08.907946: step: 706/466, loss: 0.006773222703486681 2023-01-24 07:05:09.636773: step: 708/466, loss: 0.00023235747357830405 2023-01-24 07:05:10.435966: step: 710/466, loss: 0.0025265063159167767 2023-01-24 07:05:11.164348: step: 712/466, loss: 0.002385400701314211 2023-01-24 07:05:11.873798: step: 714/466, loss: 0.005350553430616856 2023-01-24 07:05:12.683479: step: 716/466, loss: 0.5845271944999695 2023-01-24 07:05:13.476675: step: 718/466, loss: 0.0005377520574256778 2023-01-24 07:05:14.205236: step: 720/466, loss: 0.0005495331133715808 2023-01-24 07:05:15.015877: step: 722/466, loss: 0.0012658092891797423 2023-01-24 07:05:15.661979: step: 724/466, loss: 0.00034427945502102375 2023-01-24 07:05:16.389176: step: 726/466, loss: 0.018899939954280853 2023-01-24 07:05:17.104150: step: 728/466, loss: 0.03183247894048691 2023-01-24 07:05:17.876982: step: 730/466, loss: 0.02796352282166481 2023-01-24 07:05:18.628866: step: 732/466, loss: 0.017680644989013672 2023-01-24 07:05:19.336427: step: 734/466, loss: 0.000501900096423924 2023-01-24 07:05:20.082396: step: 736/466, loss: 0.08719105273485184 2023-01-24 07:05:20.944007: step: 738/466, loss: 0.19477348029613495 2023-01-24 07:05:21.684977: step: 740/466, loss: 0.005550151690840721 2023-01-24 07:05:22.444485: step: 742/466, loss: 0.009086458012461662 2023-01-24 07:05:23.195733: step: 744/466, loss: 0.018472852185368538 2023-01-24 07:05:23.951799: step: 746/466, loss: 0.007351238746196032 2023-01-24 07:05:24.831411: step: 748/466, loss: 0.002588978037238121 2023-01-24 07:05:25.584904: step: 750/466, loss: 0.10875791311264038 2023-01-24 07:05:26.399601: step: 752/466, loss: 0.06240355968475342 2023-01-24 07:05:27.205349: step: 754/466, loss: 0.48639747500419617 2023-01-24 07:05:27.903098: step: 756/466, loss: 0.0014076440129429102 2023-01-24 07:05:28.642510: step: 758/466, loss: 0.00018148223171010613 2023-01-24 07:05:29.355808: step: 760/466, loss: 0.018693551421165466 2023-01-24 07:05:30.060326: step: 762/466, loss: 0.000626800290774554 2023-01-24 07:05:30.841447: step: 764/466, loss: 0.0006411468493752182 2023-01-24 07:05:31.489321: step: 766/466, loss: 0.00027139694429934025 2023-01-24 07:05:32.281204: step: 768/466, loss: 0.000939077464863658 2023-01-24 07:05:33.057897: step: 770/466, loss: 0.00024035456590354443 2023-01-24 07:05:33.782085: step: 772/466, loss: 0.007670735474675894 2023-01-24 07:05:34.568420: step: 774/466, loss: 0.0011248189257457852 2023-01-24 07:05:35.362887: step: 776/466, loss: 0.0717734843492508 2023-01-24 07:05:36.045173: step: 778/466, loss: 0.02742578461766243 2023-01-24 07:05:36.814667: step: 780/466, loss: 0.007576515898108482 2023-01-24 07:05:37.539303: step: 782/466, loss: 0.0005250229733064771 2023-01-24 07:05:38.373791: step: 784/466, loss: 0.012105558067560196 2023-01-24 07:05:39.126409: step: 786/466, loss: 0.006359845399856567 2023-01-24 07:05:39.821827: step: 788/466, loss: 0.24488550424575806 2023-01-24 07:05:40.575607: step: 790/466, loss: 0.0007657191599719226 2023-01-24 07:05:41.409693: step: 792/466, loss: 0.036015670746564865 2023-01-24 07:05:42.167973: step: 794/466, loss: 0.001616243040189147 2023-01-24 07:05:42.889958: step: 796/466, loss: 0.0015849809860810637 2023-01-24 07:05:43.636906: step: 798/466, loss: 0.0016199310775846243 2023-01-24 07:05:44.457291: step: 800/466, loss: 0.0002885865105781704 2023-01-24 07:05:45.162592: step: 802/466, loss: 0.009428664110600948 2023-01-24 07:05:45.932633: step: 804/466, loss: 0.009232708252966404 2023-01-24 07:05:46.608089: step: 806/466, loss: 0.005409192759543657 2023-01-24 07:05:47.427815: step: 808/466, loss: 0.0034889320377260447 2023-01-24 07:05:48.216889: step: 810/466, loss: 0.017431536689400673 2023-01-24 07:05:48.978901: step: 812/466, loss: 0.01968550868332386 2023-01-24 07:05:49.673488: step: 814/466, loss: 0.33501455187797546 2023-01-24 07:05:50.449246: step: 816/466, loss: 0.05295717343688011 2023-01-24 07:05:51.172784: step: 818/466, loss: 0.017188014462590218 2023-01-24 07:05:51.986457: step: 820/466, loss: 0.07719063013792038 2023-01-24 07:05:52.696632: step: 822/466, loss: 0.0049071419052779675 2023-01-24 07:05:53.457533: step: 824/466, loss: 0.006922825239598751 2023-01-24 07:05:54.203216: step: 826/466, loss: 0.000999518553726375 2023-01-24 07:05:54.929878: step: 828/466, loss: 0.0007073359447531402 2023-01-24 07:05:55.576208: step: 830/466, loss: 0.01538429781794548 2023-01-24 07:05:56.327404: step: 832/466, loss: 0.031232627108693123 2023-01-24 07:05:57.083418: step: 834/466, loss: 0.006306334864348173 2023-01-24 07:05:57.811154: step: 836/466, loss: 0.028609514236450195 2023-01-24 07:05:58.564559: step: 838/466, loss: 0.019720768555998802 2023-01-24 07:05:59.193141: step: 840/466, loss: 0.006836059037595987 2023-01-24 07:05:59.981434: step: 842/466, loss: 0.0017908071167767048 2023-01-24 07:06:00.695530: step: 844/466, loss: 0.015740016475319862 2023-01-24 07:06:01.472396: step: 846/466, loss: 0.002415057271718979 2023-01-24 07:06:02.307242: step: 848/466, loss: 0.20730531215667725 2023-01-24 07:06:03.053389: step: 850/466, loss: 0.0008651576936244965 2023-01-24 07:06:03.850173: step: 852/466, loss: 0.030669640749692917 2023-01-24 07:06:04.553956: step: 854/466, loss: 0.14007040858268738 2023-01-24 07:06:05.375260: step: 856/466, loss: 0.0048508713953197 2023-01-24 07:06:05.987071: step: 858/466, loss: 0.0003013678069692105 2023-01-24 07:06:06.709300: step: 860/466, loss: 0.02404596656560898 2023-01-24 07:06:07.483105: step: 862/466, loss: 0.006639343686401844 2023-01-24 07:06:08.211214: step: 864/466, loss: 0.007302514743059874 2023-01-24 07:06:08.920873: step: 866/466, loss: 5.850410889252089e-05 2023-01-24 07:06:09.667390: step: 868/466, loss: 0.029857581481337547 2023-01-24 07:06:10.386349: step: 870/466, loss: 1.3937670701125171e-05 2023-01-24 07:06:11.214890: step: 872/466, loss: 0.004357927944511175 2023-01-24 07:06:11.936996: step: 874/466, loss: 0.04697030410170555 2023-01-24 07:06:12.684767: step: 876/466, loss: 0.0013890161644667387 2023-01-24 07:06:13.400509: step: 878/466, loss: 0.0010503502562642097 2023-01-24 07:06:14.146206: step: 880/466, loss: 0.010377208702266216 2023-01-24 07:06:14.884441: step: 882/466, loss: 0.0020709082018584013 2023-01-24 07:06:15.620430: step: 884/466, loss: 0.001266932813450694 2023-01-24 07:06:16.329636: step: 886/466, loss: 0.008883432485163212 2023-01-24 07:06:17.029940: step: 888/466, loss: 0.011375979520380497 2023-01-24 07:06:17.817264: step: 890/466, loss: 0.01430260669440031 2023-01-24 07:06:18.644377: step: 892/466, loss: 0.0017887807916849852 2023-01-24 07:06:19.364594: step: 894/466, loss: 0.0024639118928462267 2023-01-24 07:06:20.182915: step: 896/466, loss: 0.00041694813990034163 2023-01-24 07:06:20.919154: step: 898/466, loss: 0.0011450136080384254 2023-01-24 07:06:21.633670: step: 900/466, loss: 0.004664968233555555 2023-01-24 07:06:22.358982: step: 902/466, loss: 0.11045849323272705 2023-01-24 07:06:23.094362: step: 904/466, loss: 0.00037624945980496705 2023-01-24 07:06:23.804182: step: 906/466, loss: 0.03347666561603546 2023-01-24 07:06:24.561483: step: 908/466, loss: 0.0009400354465469718 2023-01-24 07:06:25.316560: step: 910/466, loss: 0.026599857956171036 2023-01-24 07:06:25.989246: step: 912/466, loss: 0.044138990342617035 2023-01-24 07:06:26.763397: step: 914/466, loss: 0.006451373919844627 2023-01-24 07:06:27.553082: step: 916/466, loss: 0.002460882533341646 2023-01-24 07:06:28.238082: step: 918/466, loss: 0.005053863860666752 2023-01-24 07:06:29.051109: step: 920/466, loss: 0.007943828590214252 2023-01-24 07:06:29.875966: step: 922/466, loss: 0.0019944910891354084 2023-01-24 07:06:30.516714: step: 924/466, loss: 1.8738961443887092e-05 2023-01-24 07:06:31.280943: step: 926/466, loss: 0.02110712230205536 2023-01-24 07:06:32.012065: step: 928/466, loss: 0.027644284069538116 2023-01-24 07:06:32.771530: step: 930/466, loss: 0.09916841238737106 2023-01-24 07:06:33.531179: step: 932/466, loss: 0.008947809226810932 ================================================== Loss: 0.036 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3416696075663467, 'r': 0.3280546896177826, 'f1': 0.334723758816208}, 'combined': 0.24663855912773217, 'epoch': 38} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37697397846053926, 'r': 0.28656550050939694, 'f1': 0.3256105785424038}, 'combined': 0.20013137998216035, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.320523728320127, 'r': 0.3332960210994869, 'f1': 0.32678512208265975}, 'combined': 0.24078903732406506, 'epoch': 38} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.36520170261838375, 'r': 0.28552133113800915, 'f1': 0.32048312678756125}, 'combined': 0.19697987304991568, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36623458118187036, 'r': 0.34608125508267823, 'f1': 0.35587282229965156}, 'combined': 0.26222207958921695, 'epoch': 38} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.3854102961599207, 'r': 0.2873058571373954, 'f1': 0.3292046279699323}, 'combined': 0.20333227021672293, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3446969696969697, 'r': 0.325, 'f1': 0.3345588235294118}, 'combined': 0.22303921568627452, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2661290322580645, 'r': 0.358695652173913, 'f1': 0.30555555555555547}, 'combined': 0.15277777777777773, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561785714285714, 'r': 0.3210338167525074, 'f1': 0.33769425434844597}, 'combined': 0.24882734530938122, 'epoch': 33} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37857267048883675, 'r': 0.28679747764305813, 'f1': 0.32635575042141096}, 'combined': 0.20058938806389162, 'epoch': 33} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.325, 'f1': 0.3729508196721312}, 'combined': 0.24863387978142076, 'epoch': 33} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33043321205998016, 'r': 0.3285521880824091, 'f1': 0.3294900154508651}, 'combined': 0.24278211664800584, 'epoch': 36} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35885318259317184, 'r': 0.2913738579634333, 'f1': 0.321612082343187}, 'combined': 0.1976737676841052, 'epoch': 36} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3425925925925926, 'r': 0.40217391304347827, 'f1': 0.37}, 'combined': 0.185, 'epoch': 36} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37147302904564317, 'r': 0.339753320683112, 'f1': 0.35490584737363723}, 'combined': 0.26150957174899586, 'epoch': 36} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38138922621752097, 'r': 0.28918160566753104, 'f1': 0.32894585799934034}, 'combined': 0.20317244170547497, 'epoch': 36} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 36} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 07:09:17.274646: step: 2/466, loss: 0.0008740079356357455 2023-01-24 07:09:18.023059: step: 4/466, loss: 0.00029015541076660156 2023-01-24 07:09:18.859535: step: 6/466, loss: 0.005229489412158728 2023-01-24 07:09:19.711088: step: 8/466, loss: 0.005481514148414135 2023-01-24 07:09:20.516427: step: 10/466, loss: 0.0002174789842683822 2023-01-24 07:09:21.173338: step: 12/466, loss: 0.00012509390944615006 2023-01-24 07:09:21.876806: step: 14/466, loss: 0.019930889829993248 2023-01-24 07:09:22.534897: step: 16/466, loss: 0.00010953243327094242 2023-01-24 07:09:23.283300: step: 18/466, loss: 0.009445936419069767 2023-01-24 07:09:24.127211: step: 20/466, loss: 0.012281639501452446 2023-01-24 07:09:24.852239: step: 22/466, loss: 0.0023591876961290836 2023-01-24 07:09:25.658221: step: 24/466, loss: 0.008492972701787949 2023-01-24 07:09:26.393719: step: 26/466, loss: 6.964744898141362e-06 2023-01-24 07:09:27.199078: step: 28/466, loss: 0.00017814691818784922 2023-01-24 07:09:27.887478: step: 30/466, loss: 0.0022147567942738533 2023-01-24 07:09:28.630661: step: 32/466, loss: 0.0043442039750516415 2023-01-24 07:09:29.403173: step: 34/466, loss: 0.004584559705108404 2023-01-24 07:09:30.170191: step: 36/466, loss: 0.013966542668640614 2023-01-24 07:09:31.076302: step: 38/466, loss: 0.0012340530520305037 2023-01-24 07:09:31.785885: step: 40/466, loss: 0.028278250247240067 2023-01-24 07:09:32.604265: step: 42/466, loss: 0.4269455671310425 2023-01-24 07:09:33.396062: step: 44/466, loss: 0.008779562078416348 2023-01-24 07:09:34.152618: step: 46/466, loss: 0.00039897681563161314 2023-01-24 07:09:34.888451: step: 48/466, loss: 8.907252777135e-05 2023-01-24 07:09:35.667317: step: 50/466, loss: 0.007853885181248188 2023-01-24 07:09:36.414173: step: 52/466, loss: 0.00019616371719166636 2023-01-24 07:09:37.112908: step: 54/466, loss: 0.11887061595916748 2023-01-24 07:09:37.812238: step: 56/466, loss: 0.00011372471635695547 2023-01-24 07:09:38.517545: step: 58/466, loss: 0.013960963115096092 2023-01-24 07:09:39.353362: step: 60/466, loss: 0.03438699245452881 2023-01-24 07:09:40.113955: step: 62/466, loss: 0.011784059926867485 2023-01-24 07:09:40.844982: step: 64/466, loss: 0.014181561768054962 2023-01-24 07:09:41.541484: step: 66/466, loss: 0.05932268872857094 2023-01-24 07:09:42.264488: step: 68/466, loss: 0.0005954782827757299 2023-01-24 07:09:42.950218: step: 70/466, loss: 0.0001969360455404967 2023-01-24 07:09:43.681396: step: 72/466, loss: 0.007253388874232769 2023-01-24 07:09:44.379231: step: 74/466, loss: 0.001776547054760158 2023-01-24 07:09:45.150647: step: 76/466, loss: 0.0010972290765494108 2023-01-24 07:09:45.852405: step: 78/466, loss: 0.0016925893723964691 2023-01-24 07:09:46.582626: step: 80/466, loss: 0.0105961998924613 2023-01-24 07:09:47.308307: step: 82/466, loss: 0.001080236746929586 2023-01-24 07:09:48.054710: step: 84/466, loss: 0.00023807617253623903 2023-01-24 07:09:48.846117: step: 86/466, loss: 0.0045480867847800255 2023-01-24 07:09:49.606539: step: 88/466, loss: 0.007902023382484913 2023-01-24 07:09:50.321308: step: 90/466, loss: 0.1550598293542862 2023-01-24 07:09:51.076624: step: 92/466, loss: 0.0016592772444710135 2023-01-24 07:09:51.751761: step: 94/466, loss: 0.001391400583088398 2023-01-24 07:09:52.506484: step: 96/466, loss: 0.0004946725675836205 2023-01-24 07:09:53.238480: step: 98/466, loss: 0.008832229301333427 2023-01-24 07:09:53.966129: step: 100/466, loss: 0.013128861784934998 2023-01-24 07:09:54.747225: step: 102/466, loss: 0.000519815890584141 2023-01-24 07:09:55.493399: step: 104/466, loss: 0.001086760894395411 2023-01-24 07:09:56.203848: step: 106/466, loss: 0.0441858246922493 2023-01-24 07:09:57.011260: step: 108/466, loss: 0.0010125155095010996 2023-01-24 07:09:57.757161: step: 110/466, loss: 0.004666542634367943 2023-01-24 07:09:58.466946: step: 112/466, loss: 0.02774696797132492 2023-01-24 07:09:59.205832: step: 114/466, loss: 0.021488526836037636 2023-01-24 07:10:00.047460: step: 116/466, loss: 0.004335596691817045 2023-01-24 07:10:00.794071: step: 118/466, loss: 0.01922680251300335 2023-01-24 07:10:01.513656: step: 120/466, loss: 0.00018423274741508067 2023-01-24 07:10:02.274303: step: 122/466, loss: 1.6785366824478842e-05 2023-01-24 07:10:03.008600: step: 124/466, loss: 0.03141075372695923 2023-01-24 07:10:03.753291: step: 126/466, loss: 0.00644349679350853 2023-01-24 07:10:04.537114: step: 128/466, loss: 0.03776460886001587 2023-01-24 07:10:05.273805: step: 130/466, loss: 0.0003045888734050095 2023-01-24 07:10:06.021143: step: 132/466, loss: 0.04505644366145134 2023-01-24 07:10:06.829181: step: 134/466, loss: 0.00014811464643571526 2023-01-24 07:10:07.568146: step: 136/466, loss: 0.0005231269751675427 2023-01-24 07:10:08.277228: step: 138/466, loss: 0.002811503829434514 2023-01-24 07:10:08.986183: step: 140/466, loss: 0.0004333566757850349 2023-01-24 07:10:09.714118: step: 142/466, loss: 0.011612461879849434 2023-01-24 07:10:10.402811: step: 144/466, loss: 4.0755899135547224e-06 2023-01-24 07:10:11.434518: step: 146/466, loss: 0.04024361073970795 2023-01-24 07:10:12.157614: step: 148/466, loss: 0.013624968938529491 2023-01-24 07:10:12.864566: step: 150/466, loss: 0.009867950342595577 2023-01-24 07:10:13.597043: step: 152/466, loss: 0.0022369185462594032 2023-01-24 07:10:14.345244: step: 154/466, loss: 0.00014311866834759712 2023-01-24 07:10:15.251382: step: 156/466, loss: 2.0264893464627676e-05 2023-01-24 07:10:16.065520: step: 158/466, loss: 0.2612498700618744 2023-01-24 07:10:16.892109: step: 160/466, loss: 0.0024147075600922108 2023-01-24 07:10:17.611335: step: 162/466, loss: 0.0004107421846129 2023-01-24 07:10:18.352960: step: 164/466, loss: 0.000620778591837734 2023-01-24 07:10:19.074558: step: 166/466, loss: 0.47788673639297485 2023-01-24 07:10:19.871801: step: 168/466, loss: 0.010592091828584671 2023-01-24 07:10:20.668712: step: 170/466, loss: 0.0020647207275032997 2023-01-24 07:10:21.431501: step: 172/466, loss: 0.001345380092971027 2023-01-24 07:10:22.184690: step: 174/466, loss: 0.0003394253144506365 2023-01-24 07:10:22.988244: step: 176/466, loss: 0.014013102278113365 2023-01-24 07:10:23.723410: step: 178/466, loss: 0.001352597028017044 2023-01-24 07:10:24.532072: step: 180/466, loss: 0.015661533921957016 2023-01-24 07:10:25.350463: step: 182/466, loss: 0.0011373377637937665 2023-01-24 07:10:26.114332: step: 184/466, loss: 0.0079009048640728 2023-01-24 07:10:26.866020: step: 186/466, loss: 0.03485744819045067 2023-01-24 07:10:27.682168: step: 188/466, loss: 0.0019052948337048292 2023-01-24 07:10:28.417196: step: 190/466, loss: 0.0438397042453289 2023-01-24 07:10:29.115528: step: 192/466, loss: 0.0030667998362332582 2023-01-24 07:10:29.879076: step: 194/466, loss: 0.0016462020576000214 2023-01-24 07:10:30.625325: step: 196/466, loss: 0.0005860528908669949 2023-01-24 07:10:31.365080: step: 198/466, loss: 0.002719779498875141 2023-01-24 07:10:32.196004: step: 200/466, loss: 0.25626522302627563 2023-01-24 07:10:32.982559: step: 202/466, loss: 0.0031375677790492773 2023-01-24 07:10:33.757060: step: 204/466, loss: 0.009526461362838745 2023-01-24 07:10:34.442473: step: 206/466, loss: 0.0008482421399094164 2023-01-24 07:10:35.219479: step: 208/466, loss: 0.005725763738155365 2023-01-24 07:10:35.928196: step: 210/466, loss: 0.0013842338230460882 2023-01-24 07:10:36.610350: step: 212/466, loss: 0.024162253364920616 2023-01-24 07:10:37.356930: step: 214/466, loss: 0.04524444043636322 2023-01-24 07:10:38.098308: step: 216/466, loss: 0.014798237942159176 2023-01-24 07:10:38.853353: step: 218/466, loss: 0.008126954548060894 2023-01-24 07:10:39.579697: step: 220/466, loss: 0.0012192793656140566 2023-01-24 07:10:40.329770: step: 222/466, loss: 0.00041413892176933587 2023-01-24 07:10:41.045427: step: 224/466, loss: 0.011747845448553562 2023-01-24 07:10:41.827151: step: 226/466, loss: 0.00561458058655262 2023-01-24 07:10:42.605196: step: 228/466, loss: 9.295693598687649e-05 2023-01-24 07:10:43.373843: step: 230/466, loss: 0.011560462415218353 2023-01-24 07:10:44.088917: step: 232/466, loss: 0.009240617975592613 2023-01-24 07:10:44.803284: step: 234/466, loss: 0.009746174328029156 2023-01-24 07:10:45.512441: step: 236/466, loss: 8.335171878570691e-05 2023-01-24 07:10:46.082764: step: 238/466, loss: 0.005442628636956215 2023-01-24 07:10:46.937423: step: 240/466, loss: 0.04420081153512001 2023-01-24 07:10:47.734096: step: 242/466, loss: 0.0012216203613206744 2023-01-24 07:10:48.456348: step: 244/466, loss: 0.0006596589810214937 2023-01-24 07:10:49.173985: step: 246/466, loss: 2.5175178961944766e-05 2023-01-24 07:10:49.882392: step: 248/466, loss: 0.00014090738841332495 2023-01-24 07:10:50.594196: step: 250/466, loss: 0.012872757390141487 2023-01-24 07:10:51.425789: step: 252/466, loss: 0.0034725533332675695 2023-01-24 07:10:52.279831: step: 254/466, loss: 0.0004913858720101416 2023-01-24 07:10:53.017076: step: 256/466, loss: 0.0009157611057162285 2023-01-24 07:10:53.801899: step: 258/466, loss: 0.031524062156677246 2023-01-24 07:10:54.620730: step: 260/466, loss: 0.018843840807676315 2023-01-24 07:10:55.371266: step: 262/466, loss: 0.00017298573220614344 2023-01-24 07:10:56.117026: step: 264/466, loss: 0.0018737587379291654 2023-01-24 07:10:56.832161: step: 266/466, loss: 0.0009831200586631894 2023-01-24 07:10:57.607882: step: 268/466, loss: 0.0018375710351392627 2023-01-24 07:10:58.386533: step: 270/466, loss: 0.02300839126110077 2023-01-24 07:10:59.260028: step: 272/466, loss: 0.04054649546742439 2023-01-24 07:11:00.034366: step: 274/466, loss: 0.003835452953353524 2023-01-24 07:11:00.784201: step: 276/466, loss: 0.004952685441821814 2023-01-24 07:11:01.504502: step: 278/466, loss: 0.1168748065829277 2023-01-24 07:11:02.211655: step: 280/466, loss: 0.0005180786829441786 2023-01-24 07:11:03.043061: step: 282/466, loss: 0.04683459550142288 2023-01-24 07:11:03.748725: step: 284/466, loss: 0.0003267234133090824 2023-01-24 07:11:04.403889: step: 286/466, loss: 0.0005253761191852391 2023-01-24 07:11:05.135219: step: 288/466, loss: 0.0009431827929802239 2023-01-24 07:11:05.846871: step: 290/466, loss: 0.00791383907198906 2023-01-24 07:11:06.625287: step: 292/466, loss: 0.06768930703401566 2023-01-24 07:11:07.452776: step: 294/466, loss: 0.006540664006024599 2023-01-24 07:11:08.248164: step: 296/466, loss: 0.005072426982223988 2023-01-24 07:11:08.967410: step: 298/466, loss: 0.002139729680493474 2023-01-24 07:11:09.711647: step: 300/466, loss: 0.032697562128305435 2023-01-24 07:11:10.450659: step: 302/466, loss: 0.08820069581270218 2023-01-24 07:11:11.172262: step: 304/466, loss: 0.00013937368930783123 2023-01-24 07:11:11.963609: step: 306/466, loss: 0.0013572914758697152 2023-01-24 07:11:12.620899: step: 308/466, loss: 0.0005084231379441917 2023-01-24 07:11:13.341438: step: 310/466, loss: 0.006255794316530228 2023-01-24 07:11:14.095900: step: 312/466, loss: 0.00024360780662391335 2023-01-24 07:11:14.791822: step: 314/466, loss: 0.0010025992523878813 2023-01-24 07:11:15.568502: step: 316/466, loss: 0.0012603605864569545 2023-01-24 07:11:16.420829: step: 318/466, loss: 0.0074287052266299725 2023-01-24 07:11:17.243800: step: 320/466, loss: 0.0029444722458720207 2023-01-24 07:11:17.996770: step: 322/466, loss: 0.005362970754504204 2023-01-24 07:11:18.774899: step: 324/466, loss: 0.016828326508402824 2023-01-24 07:11:19.521399: step: 326/466, loss: 0.09386395663022995 2023-01-24 07:11:20.257865: step: 328/466, loss: 0.000387120118830353 2023-01-24 07:11:21.084026: step: 330/466, loss: 0.004994371440261602 2023-01-24 07:11:21.827407: step: 332/466, loss: 0.013452321290969849 2023-01-24 07:11:22.594733: step: 334/466, loss: 0.00022745579190086573 2023-01-24 07:11:23.383911: step: 336/466, loss: 0.0002610105730127543 2023-01-24 07:11:24.187245: step: 338/466, loss: 0.12716002762317657 2023-01-24 07:11:24.990005: step: 340/466, loss: 0.061778996139764786 2023-01-24 07:11:25.832148: step: 342/466, loss: 0.010173936374485493 2023-01-24 07:11:26.521349: step: 344/466, loss: 0.0002518314286135137 2023-01-24 07:11:27.255066: step: 346/466, loss: 0.0007772438693791628 2023-01-24 07:11:28.055279: step: 348/466, loss: 9.038544521899894e-05 2023-01-24 07:11:28.823882: step: 350/466, loss: 0.00029960667598061264 2023-01-24 07:11:29.564583: step: 352/466, loss: 0.048271242529153824 2023-01-24 07:11:30.315876: step: 354/466, loss: 0.004015645012259483 2023-01-24 07:11:31.097273: step: 356/466, loss: 0.14496587216854095 2023-01-24 07:11:31.880648: step: 358/466, loss: 0.006087936460971832 2023-01-24 07:11:32.672571: step: 360/466, loss: 0.009134674444794655 2023-01-24 07:11:33.394716: step: 362/466, loss: 0.02341679111123085 2023-01-24 07:11:34.074541: step: 364/466, loss: 4.412142880028114e-05 2023-01-24 07:11:34.811167: step: 366/466, loss: 0.008231754414737225 2023-01-24 07:11:35.453349: step: 368/466, loss: 0.0006990503752604127 2023-01-24 07:11:36.174980: step: 370/466, loss: 0.00015528335643466562 2023-01-24 07:11:36.989619: step: 372/466, loss: 0.011209993623197079 2023-01-24 07:11:37.815246: step: 374/466, loss: 0.0003509388188831508 2023-01-24 07:11:38.649459: step: 376/466, loss: 0.0019555650651454926 2023-01-24 07:11:39.475560: step: 378/466, loss: 0.009965005330741405 2023-01-24 07:11:40.202212: step: 380/466, loss: 0.0002666927466634661 2023-01-24 07:11:41.014411: step: 382/466, loss: 0.05020932853221893 2023-01-24 07:11:41.762343: step: 384/466, loss: 0.018008515238761902 2023-01-24 07:11:42.550989: step: 386/466, loss: 5.9866510127903894e-05 2023-01-24 07:11:43.314360: step: 388/466, loss: 0.052257269620895386 2023-01-24 07:11:44.064922: step: 390/466, loss: 0.0010209325700998306 2023-01-24 07:11:44.810471: step: 392/466, loss: 0.008724289946258068 2023-01-24 07:11:45.623288: step: 394/466, loss: 0.05061984807252884 2023-01-24 07:11:46.376304: step: 396/466, loss: 0.2913358509540558 2023-01-24 07:11:47.115385: step: 398/466, loss: 0.004957552067935467 2023-01-24 07:11:47.822204: step: 400/466, loss: 0.0007999493391253054 2023-01-24 07:11:48.597633: step: 402/466, loss: 0.2070377767086029 2023-01-24 07:11:49.299949: step: 404/466, loss: 0.0007287136395461857 2023-01-24 07:11:50.064025: step: 406/466, loss: 2.076130112982355e-05 2023-01-24 07:11:50.870643: step: 408/466, loss: 0.04241722449660301 2023-01-24 07:11:51.735088: step: 410/466, loss: 0.06292695552110672 2023-01-24 07:11:52.526762: step: 412/466, loss: 0.0008323242655023932 2023-01-24 07:11:53.206432: step: 414/466, loss: 0.0010838143061846495 2023-01-24 07:11:53.962044: step: 416/466, loss: 0.004325313027948141 2023-01-24 07:11:54.710652: step: 418/466, loss: 0.007513652089983225 2023-01-24 07:11:55.522974: step: 420/466, loss: 0.023051241412758827 2023-01-24 07:11:56.264780: step: 422/466, loss: 0.00151598802767694 2023-01-24 07:11:57.014851: step: 424/466, loss: 0.08067677170038223 2023-01-24 07:11:57.764706: step: 426/466, loss: 0.00105815299320966 2023-01-24 07:11:58.413026: step: 428/466, loss: 0.011436041444540024 2023-01-24 07:11:59.135821: step: 430/466, loss: 0.0014418803621083498 2023-01-24 07:11:59.848581: step: 432/466, loss: 0.0001352071121800691 2023-01-24 07:12:00.600210: step: 434/466, loss: 0.0011846505803987384 2023-01-24 07:12:01.290913: step: 436/466, loss: 1.4939187167328782e-05 2023-01-24 07:12:02.083731: step: 438/466, loss: 0.0004720861034002155 2023-01-24 07:12:02.725346: step: 440/466, loss: 9.890823275782168e-05 2023-01-24 07:12:03.427887: step: 442/466, loss: 1.8691593140829355e-05 2023-01-24 07:12:04.193349: step: 444/466, loss: 0.006000523455440998 2023-01-24 07:12:04.973226: step: 446/466, loss: 0.014063859358429909 2023-01-24 07:12:05.778674: step: 448/466, loss: 0.002362149301916361 2023-01-24 07:12:06.546146: step: 450/466, loss: 0.007006255444139242 2023-01-24 07:12:07.301393: step: 452/466, loss: 0.041615139693021774 2023-01-24 07:12:08.010536: step: 454/466, loss: 0.0002590412914287299 2023-01-24 07:12:08.751773: step: 456/466, loss: 0.000590867770370096 2023-01-24 07:12:09.500797: step: 458/466, loss: 0.0014481049729511142 2023-01-24 07:12:10.248703: step: 460/466, loss: 0.014301631599664688 2023-01-24 07:12:11.050238: step: 462/466, loss: 0.002033855300396681 2023-01-24 07:12:11.808075: step: 464/466, loss: 0.003797616111114621 2023-01-24 07:12:12.585137: step: 466/466, loss: 8.198487921617925e-05 2023-01-24 07:12:13.341583: step: 468/466, loss: 0.015080071054399014 2023-01-24 07:12:14.028640: step: 470/466, loss: 0.0005228759837336838 2023-01-24 07:12:14.803643: step: 472/466, loss: 0.0002708770043682307 2023-01-24 07:12:15.558555: step: 474/466, loss: 0.07632824778556824 2023-01-24 07:12:16.291906: step: 476/466, loss: 0.010276122018694878 2023-01-24 07:12:17.059275: step: 478/466, loss: 0.0030062233563512564 2023-01-24 07:12:17.811899: step: 480/466, loss: 0.08177800476551056 2023-01-24 07:12:18.544203: step: 482/466, loss: 0.0020473485346883535 2023-01-24 07:12:19.274830: step: 484/466, loss: 0.017333390191197395 2023-01-24 07:12:20.068877: step: 486/466, loss: 0.0031148327980190516 2023-01-24 07:12:20.762110: step: 488/466, loss: 9.554430289426818e-05 2023-01-24 07:12:21.465156: step: 490/466, loss: 0.00386765762232244 2023-01-24 07:12:22.196087: step: 492/466, loss: 0.012081998400390148 2023-01-24 07:12:22.954565: step: 494/466, loss: 4.6635068429168314e-05 2023-01-24 07:12:23.645889: step: 496/466, loss: 0.016525747254490852 2023-01-24 07:12:24.376041: step: 498/466, loss: 0.04223182424902916 2023-01-24 07:12:25.178830: step: 500/466, loss: 0.0017167312325909734 2023-01-24 07:12:25.890590: step: 502/466, loss: 0.002390617271885276 2023-01-24 07:12:26.701016: step: 504/466, loss: 0.010442443192005157 2023-01-24 07:12:27.451446: step: 506/466, loss: 0.004288220778107643 2023-01-24 07:12:28.166790: step: 508/466, loss: 0.0003184019587934017 2023-01-24 07:12:28.905199: step: 510/466, loss: 0.0012993603013455868 2023-01-24 07:12:29.661590: step: 512/466, loss: 0.014843190088868141 2023-01-24 07:12:30.416398: step: 514/466, loss: 0.006183616816997528 2023-01-24 07:12:31.131529: step: 516/466, loss: 0.00975726917386055 2023-01-24 07:12:31.993292: step: 518/466, loss: 0.007801082916557789 2023-01-24 07:12:32.842724: step: 520/466, loss: 0.00010823424236150458 2023-01-24 07:12:33.607906: step: 522/466, loss: 0.027075359597802162 2023-01-24 07:12:34.360635: step: 524/466, loss: 0.022378744557499886 2023-01-24 07:12:35.070806: step: 526/466, loss: 0.001080716261640191 2023-01-24 07:12:35.741535: step: 528/466, loss: 0.0001632870698813349 2023-01-24 07:12:36.520813: step: 530/466, loss: 1.4522252058668528e-05 2023-01-24 07:12:37.294665: step: 532/466, loss: 0.0007839349564164877 2023-01-24 07:12:37.959483: step: 534/466, loss: 1.9176708519808017e-05 2023-01-24 07:12:38.699739: step: 536/466, loss: 9.760348802956287e-06 2023-01-24 07:12:39.459032: step: 538/466, loss: 0.0004075799079146236 2023-01-24 07:12:40.245895: step: 540/466, loss: 0.0017076395452022552 2023-01-24 07:12:41.047088: step: 542/466, loss: 0.030729996040463448 2023-01-24 07:12:41.782748: step: 544/466, loss: 0.0003406803007237613 2023-01-24 07:12:42.532747: step: 546/466, loss: 0.02445288561284542 2023-01-24 07:12:43.187089: step: 548/466, loss: 0.024324113503098488 2023-01-24 07:12:43.953333: step: 550/466, loss: 0.11394055932760239 2023-01-24 07:12:44.763925: step: 552/466, loss: 0.01220634113997221 2023-01-24 07:12:45.478634: step: 554/466, loss: 0.00687334593385458 2023-01-24 07:12:46.176412: step: 556/466, loss: 0.0009453383972868323 2023-01-24 07:12:46.918201: step: 558/466, loss: 0.00467744842171669 2023-01-24 07:12:47.706232: step: 560/466, loss: 0.001324250246398151 2023-01-24 07:12:48.493143: step: 562/466, loss: 0.005736398510634899 2023-01-24 07:12:49.201352: step: 564/466, loss: 0.00043739372631534934 2023-01-24 07:12:49.998474: step: 566/466, loss: 1.2006373405456543 2023-01-24 07:12:50.706506: step: 568/466, loss: 0.0038406013045459986 2023-01-24 07:12:51.444673: step: 570/466, loss: 0.00694508571177721 2023-01-24 07:12:52.182909: step: 572/466, loss: 0.05925419181585312 2023-01-24 07:12:52.912996: step: 574/466, loss: 0.0008464656420983374 2023-01-24 07:12:53.755741: step: 576/466, loss: 0.004853491205722094 2023-01-24 07:12:54.480627: step: 578/466, loss: 0.06622689217329025 2023-01-24 07:12:55.220750: step: 580/466, loss: 0.002883708104491234 2023-01-24 07:12:55.986554: step: 582/466, loss: 0.007973263040184975 2023-01-24 07:12:56.865247: step: 584/466, loss: 0.004370575770735741 2023-01-24 07:12:57.574700: step: 586/466, loss: 0.001321446499787271 2023-01-24 07:12:58.246675: step: 588/466, loss: 0.2518007457256317 2023-01-24 07:12:58.947314: step: 590/466, loss: 0.0006912379176355898 2023-01-24 07:12:59.673085: step: 592/466, loss: 0.007234994322061539 2023-01-24 07:13:00.379999: step: 594/466, loss: 0.001353597966954112 2023-01-24 07:13:01.077529: step: 596/466, loss: 0.00021362333791330457 2023-01-24 07:13:01.770476: step: 598/466, loss: 0.005170780699700117 2023-01-24 07:13:02.689140: step: 600/466, loss: 0.001198322745040059 2023-01-24 07:13:03.467329: step: 602/466, loss: 0.017721228301525116 2023-01-24 07:13:04.163253: step: 604/466, loss: 0.0025628963485360146 2023-01-24 07:13:04.880785: step: 606/466, loss: 0.07840071618556976 2023-01-24 07:13:05.611949: step: 608/466, loss: 0.035325538367033005 2023-01-24 07:13:06.415855: step: 610/466, loss: 0.00014155724784359336 2023-01-24 07:13:07.142113: step: 612/466, loss: 0.004721387289464474 2023-01-24 07:13:07.929452: step: 614/466, loss: 0.020070061087608337 2023-01-24 07:13:08.779100: step: 616/466, loss: 0.0019762504380196333 2023-01-24 07:13:09.651312: step: 618/466, loss: 0.00040442211320623755 2023-01-24 07:13:10.349714: step: 620/466, loss: 0.8801062107086182 2023-01-24 07:13:11.181693: step: 622/466, loss: 0.0013993729371577501 2023-01-24 07:13:12.006012: step: 624/466, loss: 0.05443740636110306 2023-01-24 07:13:12.800572: step: 626/466, loss: 0.0011238008737564087 2023-01-24 07:13:13.511571: step: 628/466, loss: 0.0003994059225078672 2023-01-24 07:13:14.226209: step: 630/466, loss: 0.01006253995001316 2023-01-24 07:13:15.048047: step: 632/466, loss: 0.0012169163674116135 2023-01-24 07:13:15.799054: step: 634/466, loss: 0.14805126190185547 2023-01-24 07:13:16.540407: step: 636/466, loss: 0.0948682650923729 2023-01-24 07:13:17.372885: step: 638/466, loss: 0.0009406930766999722 2023-01-24 07:13:18.126483: step: 640/466, loss: 0.019608452916145325 2023-01-24 07:13:18.872322: step: 642/466, loss: 0.01036733016371727 2023-01-24 07:13:19.595657: step: 644/466, loss: 0.012805589474737644 2023-01-24 07:13:20.378653: step: 646/466, loss: 0.0072302124463021755 2023-01-24 07:13:21.076333: step: 648/466, loss: 0.0005278410390019417 2023-01-24 07:13:21.901215: step: 650/466, loss: 0.0015965031925588846 2023-01-24 07:13:22.600190: step: 652/466, loss: 0.00028344758902676404 2023-01-24 07:13:23.373221: step: 654/466, loss: 0.03176767751574516 2023-01-24 07:13:24.182136: step: 656/466, loss: 0.003983621019870043 2023-01-24 07:13:24.927201: step: 658/466, loss: 0.050124142318964005 2023-01-24 07:13:25.626312: step: 660/466, loss: 0.003998556639999151 2023-01-24 07:13:26.318363: step: 662/466, loss: 0.0013740364229306579 2023-01-24 07:13:26.984516: step: 664/466, loss: 0.005735581275075674 2023-01-24 07:13:27.723386: step: 666/466, loss: 0.0006358802784234285 2023-01-24 07:13:28.437505: step: 668/466, loss: 0.0014941692352294922 2023-01-24 07:13:29.143102: step: 670/466, loss: 0.001266355742700398 2023-01-24 07:13:29.864284: step: 672/466, loss: 0.004563577938824892 2023-01-24 07:13:30.543332: step: 674/466, loss: 0.0006881391745992005 2023-01-24 07:13:31.240901: step: 676/466, loss: 0.5590264797210693 2023-01-24 07:13:31.994446: step: 678/466, loss: 0.016854893416166306 2023-01-24 07:13:32.731695: step: 680/466, loss: 0.009256008081138134 2023-01-24 07:13:33.475264: step: 682/466, loss: 0.002191653475165367 2023-01-24 07:13:34.128432: step: 684/466, loss: 0.00032921944512054324 2023-01-24 07:13:34.822196: step: 686/466, loss: 0.0008804819080978632 2023-01-24 07:13:35.538981: step: 688/466, loss: 0.0013804353075101972 2023-01-24 07:13:36.295300: step: 690/466, loss: 0.002117349300533533 2023-01-24 07:13:37.154689: step: 692/466, loss: 0.0751693993806839 2023-01-24 07:13:37.907122: step: 694/466, loss: 0.005391803570091724 2023-01-24 07:13:38.659831: step: 696/466, loss: 0.0005898470990359783 2023-01-24 07:13:39.371113: step: 698/466, loss: 0.003721448592841625 2023-01-24 07:13:40.030476: step: 700/466, loss: 0.08029770106077194 2023-01-24 07:13:40.794057: step: 702/466, loss: 0.003063736716285348 2023-01-24 07:13:41.485768: step: 704/466, loss: 0.000615855969954282 2023-01-24 07:13:42.211808: step: 706/466, loss: 0.0162990503013134 2023-01-24 07:13:42.880405: step: 708/466, loss: 0.004094661679118872 2023-01-24 07:13:43.624643: step: 710/466, loss: 0.007667486555874348 2023-01-24 07:13:44.378060: step: 712/466, loss: 0.003508082590997219 2023-01-24 07:13:45.127752: step: 714/466, loss: 0.002244040835648775 2023-01-24 07:13:45.870198: step: 716/466, loss: 0.010504397563636303 2023-01-24 07:13:46.672139: step: 718/466, loss: 0.0021802643314003944 2023-01-24 07:13:47.409520: step: 720/466, loss: 1.154409646987915 2023-01-24 07:13:48.167484: step: 722/466, loss: 0.004091985523700714 2023-01-24 07:13:48.895876: step: 724/466, loss: 0.05520002916455269 2023-01-24 07:13:49.661552: step: 726/466, loss: 0.4075619876384735 2023-01-24 07:13:50.466112: step: 728/466, loss: 0.023832345381379128 2023-01-24 07:13:51.204966: step: 730/466, loss: 0.024165647104382515 2023-01-24 07:13:51.919146: step: 732/466, loss: 0.010060613043606281 2023-01-24 07:13:52.674649: step: 734/466, loss: 0.00020264496561139822 2023-01-24 07:13:53.411032: step: 736/466, loss: 0.11557676643133163 2023-01-24 07:13:54.204025: step: 738/466, loss: 0.00019353475363459438 2023-01-24 07:13:54.933405: step: 740/466, loss: 0.0036374281626194715 2023-01-24 07:13:55.636477: step: 742/466, loss: 0.0026445419061928988 2023-01-24 07:13:56.286065: step: 744/466, loss: 0.004334342200309038 2023-01-24 07:13:56.972354: step: 746/466, loss: 1.0070466995239258 2023-01-24 07:13:57.638104: step: 748/466, loss: 0.04262397810816765 2023-01-24 07:13:58.352859: step: 750/466, loss: 0.011616052128374577 2023-01-24 07:13:59.189671: step: 752/466, loss: 0.00277088675647974 2023-01-24 07:13:59.969427: step: 754/466, loss: 0.015158126130700111 2023-01-24 07:14:00.726300: step: 756/466, loss: 0.004447794985026121 2023-01-24 07:14:01.620559: step: 758/466, loss: 0.0005685980431735516 2023-01-24 07:14:02.345893: step: 760/466, loss: 0.00014420298975892365 2023-01-24 07:14:03.139716: step: 762/466, loss: 0.005476310383528471 2023-01-24 07:14:03.942419: step: 764/466, loss: 0.0002213385159848258 2023-01-24 07:14:04.667757: step: 766/466, loss: 0.2885030210018158 2023-01-24 07:14:05.426514: step: 768/466, loss: 0.0647197961807251 2023-01-24 07:14:06.209189: step: 770/466, loss: 0.002829147269949317 2023-01-24 07:14:06.912445: step: 772/466, loss: 0.0008479239768348634 2023-01-24 07:14:07.600326: step: 774/466, loss: 0.007131071761250496 2023-01-24 07:14:08.415580: step: 776/466, loss: 0.004595254082232714 2023-01-24 07:14:09.111053: step: 778/466, loss: 5.081822564534377e-06 2023-01-24 07:14:09.932207: step: 780/466, loss: 0.015721395611763 2023-01-24 07:14:10.645827: step: 782/466, loss: 3.051890598726459e-05 2023-01-24 07:14:11.393621: step: 784/466, loss: 0.000999096198938787 2023-01-24 07:14:12.128418: step: 786/466, loss: 0.025470739230513573 2023-01-24 07:14:12.817698: step: 788/466, loss: 0.0016792990500107408 2023-01-24 07:14:13.550129: step: 790/466, loss: 0.10520414263010025 2023-01-24 07:14:14.264159: step: 792/466, loss: 0.00962340272963047 2023-01-24 07:14:14.926335: step: 794/466, loss: 0.00011330540291965008 2023-01-24 07:14:15.670311: step: 796/466, loss: 0.0818401500582695 2023-01-24 07:14:16.474233: step: 798/466, loss: 0.012172805145382881 2023-01-24 07:14:17.269349: step: 800/466, loss: 0.08613215386867523 2023-01-24 07:14:18.105460: step: 802/466, loss: 0.0011135649401694536 2023-01-24 07:14:18.865229: step: 804/466, loss: 0.003869681851938367 2023-01-24 07:14:19.693578: step: 806/466, loss: 0.030655736103653908 2023-01-24 07:14:20.425205: step: 808/466, loss: 0.09402773529291153 2023-01-24 07:14:21.221282: step: 810/466, loss: 0.10320362448692322 2023-01-24 07:14:21.983506: step: 812/466, loss: 5.8628080296330154e-05 2023-01-24 07:14:22.763312: step: 814/466, loss: 0.007948066107928753 2023-01-24 07:14:23.546491: step: 816/466, loss: 0.0008257722365669906 2023-01-24 07:14:24.257966: step: 818/466, loss: 0.013069583103060722 2023-01-24 07:14:25.013338: step: 820/466, loss: 0.0013827694347128272 2023-01-24 07:14:25.782732: step: 822/466, loss: 0.019912388175725937 2023-01-24 07:14:26.533093: step: 824/466, loss: 0.014426201581954956 2023-01-24 07:14:27.365519: step: 826/466, loss: 0.057453930377960205 2023-01-24 07:14:28.176901: step: 828/466, loss: 0.014677576720714569 2023-01-24 07:14:28.920213: step: 830/466, loss: 0.004854255355894566 2023-01-24 07:14:29.770964: step: 832/466, loss: 0.001058097812347114 2023-01-24 07:14:30.599055: step: 834/466, loss: 0.0003384735609870404 2023-01-24 07:14:31.358959: step: 836/466, loss: 0.0007604488637298346 2023-01-24 07:14:32.082442: step: 838/466, loss: 0.0034899867605417967 2023-01-24 07:14:32.839064: step: 840/466, loss: 0.02199520915746689 2023-01-24 07:14:33.706349: step: 842/466, loss: 0.00045649081584997475 2023-01-24 07:14:34.416794: step: 844/466, loss: 0.0009873228846117854 2023-01-24 07:14:35.459500: step: 846/466, loss: 0.00015023874584585428 2023-01-24 07:14:36.228402: step: 848/466, loss: 0.03509344533085823 2023-01-24 07:14:37.067220: step: 850/466, loss: 0.025919271633028984 2023-01-24 07:14:37.757079: step: 852/466, loss: 0.00026613284717313945 2023-01-24 07:14:38.509914: step: 854/466, loss: 0.0004146205901633948 2023-01-24 07:14:39.293876: step: 856/466, loss: 0.015960287302732468 2023-01-24 07:14:40.074455: step: 858/466, loss: 0.014743988402187824 2023-01-24 07:14:40.860873: step: 860/466, loss: 0.00048502220306545496 2023-01-24 07:14:41.595872: step: 862/466, loss: 0.003604623256251216 2023-01-24 07:14:42.444443: step: 864/466, loss: 0.0007024814840406179 2023-01-24 07:14:43.198568: step: 866/466, loss: 0.025188926607370377 2023-01-24 07:14:43.945706: step: 868/466, loss: 0.0032895857002586126 2023-01-24 07:14:44.716246: step: 870/466, loss: 0.00014311580162029713 2023-01-24 07:14:45.397008: step: 872/466, loss: 0.01836252771317959 2023-01-24 07:14:46.107853: step: 874/466, loss: 0.0012512169778347015 2023-01-24 07:14:46.938065: step: 876/466, loss: 0.10940083861351013 2023-01-24 07:14:47.664485: step: 878/466, loss: 0.0006442145677283406 2023-01-24 07:14:48.424593: step: 880/466, loss: 0.000579558894969523 2023-01-24 07:14:49.207984: step: 882/466, loss: 0.0004485124663915485 2023-01-24 07:14:49.842850: step: 884/466, loss: 0.0012692167656496167 2023-01-24 07:14:50.604896: step: 886/466, loss: 0.4959481656551361 2023-01-24 07:14:51.368885: step: 888/466, loss: 0.031073397025465965 2023-01-24 07:14:52.140249: step: 890/466, loss: 0.001908604521304369 2023-01-24 07:14:52.808970: step: 892/466, loss: 0.0033289012499153614 2023-01-24 07:14:53.565434: step: 894/466, loss: 0.0019436075817793608 2023-01-24 07:14:54.337894: step: 896/466, loss: 0.0030539692379534245 2023-01-24 07:14:55.177233: step: 898/466, loss: 0.005594159010797739 2023-01-24 07:14:56.011957: step: 900/466, loss: 0.00043902089237235487 2023-01-24 07:14:56.781572: step: 902/466, loss: 0.0003640690119937062 2023-01-24 07:14:57.548464: step: 904/466, loss: 0.0005915475194342434 2023-01-24 07:14:58.291715: step: 906/466, loss: 0.013333105482161045 2023-01-24 07:14:58.986898: step: 908/466, loss: 0.00022897178132552654 2023-01-24 07:14:59.681354: step: 910/466, loss: 0.0003777458332479 2023-01-24 07:15:00.478948: step: 912/466, loss: 0.0017597827827557921 2023-01-24 07:15:01.199589: step: 914/466, loss: 3.888283026753925e-05 2023-01-24 07:15:01.966621: step: 916/466, loss: 2.9405186069197953e-05 2023-01-24 07:15:02.735221: step: 918/466, loss: 0.012713199481368065 2023-01-24 07:15:03.477960: step: 920/466, loss: 7.25022327969782e-05 2023-01-24 07:15:04.189896: step: 922/466, loss: 0.15175369381904602 2023-01-24 07:15:04.971381: step: 924/466, loss: 0.00019238461391068995 2023-01-24 07:15:05.800350: step: 926/466, loss: 0.01744076795876026 2023-01-24 07:15:06.510031: step: 928/466, loss: 0.0002738804614637047 2023-01-24 07:15:07.273290: step: 930/466, loss: 3.8340131141012534e-05 2023-01-24 07:15:08.088117: step: 932/466, loss: 0.15691609680652618 ================================================== Loss: 0.031 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3336493312464428, 'r': 0.3178215641095148, 'f1': 0.3255431764542552}, 'combined': 0.23987391949260906, 'epoch': 39} Test Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3711599758808273, 'r': 0.2913959602669233, 'f1': 0.3264766389786694}, 'combined': 0.2006636902990846, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3178581893926638, 'r': 0.3311274117202513, 'f1': 0.3243571486553391}, 'combined': 0.23900000427235513, 'epoch': 39} Test Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.3633884829407073, 'r': 0.2972605960970777, 'f1': 0.3270149932278624}, 'combined': 0.20099458120346667, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35187269241692704, 'r': 0.3358481295744104, 'f1': 0.3436737170596394}, 'combined': 0.25323326520183953, 'epoch': 39} Test Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.37335651059954456, 'r': 0.289561591842801, 'f1': 0.32616308149008527}, 'combined': 0.20145366797917036, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2905405405405405, 'r': 0.30714285714285716, 'f1': 0.29861111111111105}, 'combined': 0.19907407407407401, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2857142857142857, 'r': 0.43478260869565216, 'f1': 0.3448275862068965}, 'combined': 0.17241379310344826, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561785714285714, 'r': 0.3210338167525074, 'f1': 0.33769425434844597}, 'combined': 0.24882734530938122, 'epoch': 33} Test for Chinese: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.37857267048883675, 'r': 0.28679747764305813, 'f1': 0.32635575042141096}, 'combined': 0.20058938806389162, 'epoch': 33} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.325, 'f1': 0.3729508196721312}, 'combined': 0.24863387978142076, 'epoch': 33} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33043321205998016, 'r': 0.3285521880824091, 'f1': 0.3294900154508651}, 'combined': 0.24278211664800584, 'epoch': 36} Test for Korean: {'template': {'p': 0.8513513513513513, 'r': 0.48091603053435117, 'f1': 0.6146341463414634}, 'slot': {'p': 0.35885318259317184, 'r': 0.2913738579634333, 'f1': 0.321612082343187}, 'combined': 0.1976737676841052, 'epoch': 36} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3425925925925926, 'r': 0.40217391304347827, 'f1': 0.37}, 'combined': 0.185, 'epoch': 36} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37147302904564317, 'r': 0.339753320683112, 'f1': 0.35490584737363723}, 'combined': 0.26150957174899586, 'epoch': 36} Test for Russian: {'template': {'p': 0.863013698630137, 'r': 0.48091603053435117, 'f1': 0.6176470588235295}, 'slot': {'p': 0.38138922621752097, 'r': 0.28918160566753104, 'f1': 0.32894585799934034}, 'combined': 0.20317244170547497, 'epoch': 36} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 36}