Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1127]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 613743345, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:19:53.799829: step: 2/466, loss: 13.076241493225098 2023-01-24 00:19:54.361978: step: 4/466, loss: 12.855111122131348 2023-01-24 00:19:54.974088: step: 6/466, loss: 8.903879165649414 2023-01-24 00:19:55.669234: step: 8/466, loss: 14.036937713623047 2023-01-24 00:19:56.282972: step: 10/466, loss: 10.639755249023438 2023-01-24 00:19:56.901716: step: 12/466, loss: 12.28707218170166 2023-01-24 00:19:57.591756: step: 14/466, loss: 22.052391052246094 2023-01-24 00:19:58.137164: step: 16/466, loss: 14.403186798095703 2023-01-24 00:19:58.749343: step: 18/466, loss: 14.720062255859375 2023-01-24 00:19:59.363273: step: 20/466, loss: 14.548443794250488 2023-01-24 00:19:59.961714: step: 22/466, loss: 12.669960021972656 2023-01-24 00:20:00.631461: step: 24/466, loss: 26.453371047973633 2023-01-24 00:20:01.274379: step: 26/466, loss: 16.097545623779297 2023-01-24 00:20:01.903574: step: 28/466, loss: 10.801416397094727 2023-01-24 00:20:02.571573: step: 30/466, loss: 5.212574005126953 2023-01-24 00:20:03.224232: step: 32/466, loss: 22.528642654418945 2023-01-24 00:20:03.855388: step: 34/466, loss: 4.774672031402588 2023-01-24 00:20:04.419189: step: 36/466, loss: 17.159042358398438 2023-01-24 00:20:05.018511: step: 38/466, loss: 18.028188705444336 2023-01-24 00:20:05.633490: step: 40/466, loss: 5.712503910064697 2023-01-24 00:20:06.196096: step: 42/466, loss: 12.436981201171875 2023-01-24 00:20:06.778537: step: 44/466, loss: 7.921031475067139 2023-01-24 00:20:07.421495: step: 46/466, loss: 13.548808097839355 2023-01-24 00:20:08.010885: step: 48/466, loss: 5.939296722412109 2023-01-24 00:20:08.670749: step: 50/466, loss: 6.09789514541626 2023-01-24 00:20:09.274071: step: 52/466, loss: 5.971683502197266 2023-01-24 00:20:09.887479: step: 54/466, loss: 19.42380714416504 2023-01-24 00:20:10.457592: step: 56/466, loss: 19.322917938232422 2023-01-24 00:20:11.116919: step: 58/466, loss: 16.892105102539062 2023-01-24 00:20:11.757100: step: 60/466, loss: 13.79200267791748 2023-01-24 00:20:12.415431: step: 62/466, loss: 10.073508262634277 2023-01-24 00:20:13.034907: step: 64/466, loss: 9.672430992126465 2023-01-24 00:20:13.651829: step: 66/466, loss: 16.316783905029297 2023-01-24 00:20:14.265434: step: 68/466, loss: 15.566567420959473 2023-01-24 00:20:14.844264: step: 70/466, loss: 13.069356918334961 2023-01-24 00:20:15.543298: step: 72/466, loss: 6.8840131759643555 2023-01-24 00:20:16.117786: step: 74/466, loss: 12.612266540527344 2023-01-24 00:20:16.759967: step: 76/466, loss: 5.053008079528809 2023-01-24 00:20:17.362978: step: 78/466, loss: 25.458084106445312 2023-01-24 00:20:17.954957: step: 80/466, loss: 12.025644302368164 2023-01-24 00:20:18.595522: step: 82/466, loss: 18.070756912231445 2023-01-24 00:20:19.257834: step: 84/466, loss: 12.265995979309082 2023-01-24 00:20:19.922782: step: 86/466, loss: 5.9723334312438965 2023-01-24 00:20:20.575575: step: 88/466, loss: 23.769235610961914 2023-01-24 00:20:21.267624: step: 90/466, loss: 11.45355224609375 2023-01-24 00:20:21.893757: step: 92/466, loss: 19.209590911865234 2023-01-24 00:20:22.505405: step: 94/466, loss: 5.9348039627075195 2023-01-24 00:20:23.185434: step: 96/466, loss: 20.47209930419922 2023-01-24 00:20:23.874338: step: 98/466, loss: 15.648857116699219 2023-01-24 00:20:24.453159: step: 100/466, loss: 26.48772430419922 2023-01-24 00:20:25.109982: step: 102/466, loss: 23.994529724121094 2023-01-24 00:20:25.721499: step: 104/466, loss: 4.9366841316223145 2023-01-24 00:20:26.366328: step: 106/466, loss: 6.385857105255127 2023-01-24 00:20:26.970391: step: 108/466, loss: 16.829540252685547 2023-01-24 00:20:27.644425: step: 110/466, loss: 14.721445083618164 2023-01-24 00:20:28.241811: step: 112/466, loss: 15.021709442138672 2023-01-24 00:20:28.940097: step: 114/466, loss: 16.95585060119629 2023-01-24 00:20:29.637032: step: 116/466, loss: 5.885819435119629 2023-01-24 00:20:30.207769: step: 118/466, loss: 22.682077407836914 2023-01-24 00:20:30.786096: step: 120/466, loss: 8.589439392089844 2023-01-24 00:20:31.367472: step: 122/466, loss: 7.018434524536133 2023-01-24 00:20:31.929468: step: 124/466, loss: 15.75301742553711 2023-01-24 00:20:32.524893: step: 126/466, loss: 10.034443855285645 2023-01-24 00:20:33.161197: step: 128/466, loss: 7.172554969787598 2023-01-24 00:20:33.806027: step: 130/466, loss: 16.368778228759766 2023-01-24 00:20:34.453894: step: 132/466, loss: 7.45112419128418 2023-01-24 00:20:35.073328: step: 134/466, loss: 9.522209167480469 2023-01-24 00:20:35.732764: step: 136/466, loss: 17.63762664794922 2023-01-24 00:20:36.381976: step: 138/466, loss: 8.851346015930176 2023-01-24 00:20:37.035739: step: 140/466, loss: 14.376631736755371 2023-01-24 00:20:37.673531: step: 142/466, loss: 7.4392409324646 2023-01-24 00:20:38.311657: step: 144/466, loss: 4.238392353057861 2023-01-24 00:20:38.965395: step: 146/466, loss: 5.5457048416137695 2023-01-24 00:20:39.618283: step: 148/466, loss: 12.26491928100586 2023-01-24 00:20:40.274461: step: 150/466, loss: 15.939020156860352 2023-01-24 00:20:40.927638: step: 152/466, loss: 4.8080153465271 2023-01-24 00:20:41.539645: step: 154/466, loss: 17.984691619873047 2023-01-24 00:20:42.182788: step: 156/466, loss: 3.5370397567749023 2023-01-24 00:20:42.789240: step: 158/466, loss: 8.747307777404785 2023-01-24 00:20:43.443724: step: 160/466, loss: 6.944577217102051 2023-01-24 00:20:44.054317: step: 162/466, loss: 15.041013717651367 2023-01-24 00:20:44.692246: step: 164/466, loss: 17.815420150756836 2023-01-24 00:20:45.513180: step: 166/466, loss: 12.960419654846191 2023-01-24 00:20:46.161472: step: 168/466, loss: 16.44618797302246 2023-01-24 00:20:46.833478: step: 170/466, loss: 5.99251651763916 2023-01-24 00:20:47.444768: step: 172/466, loss: 3.8540897369384766 2023-01-24 00:20:48.048803: step: 174/466, loss: 11.568319320678711 2023-01-24 00:20:48.669971: step: 176/466, loss: 13.330349922180176 2023-01-24 00:20:49.278575: step: 178/466, loss: 15.725419998168945 2023-01-24 00:20:49.916194: step: 180/466, loss: 4.868077278137207 2023-01-24 00:20:50.551929: step: 182/466, loss: 4.35472297668457 2023-01-24 00:20:51.193563: step: 184/466, loss: 6.567266941070557 2023-01-24 00:20:51.804442: step: 186/466, loss: 3.5111114978790283 2023-01-24 00:20:52.451948: step: 188/466, loss: 11.336760520935059 2023-01-24 00:20:53.079812: step: 190/466, loss: 3.495755672454834 2023-01-24 00:20:53.675202: step: 192/466, loss: 3.3268911838531494 2023-01-24 00:20:54.331311: step: 194/466, loss: 12.559239387512207 2023-01-24 00:20:54.996882: step: 196/466, loss: 13.295437812805176 2023-01-24 00:20:55.622349: step: 198/466, loss: 12.328798294067383 2023-01-24 00:20:56.230051: step: 200/466, loss: 4.826976299285889 2023-01-24 00:20:56.849536: step: 202/466, loss: 8.107643127441406 2023-01-24 00:20:57.443286: step: 204/466, loss: 12.581511497497559 2023-01-24 00:20:58.054986: step: 206/466, loss: 10.175179481506348 2023-01-24 00:20:58.644013: step: 208/466, loss: 3.33925724029541 2023-01-24 00:20:59.263978: step: 210/466, loss: 12.030865669250488 2023-01-24 00:20:59.828462: step: 212/466, loss: 6.707439422607422 2023-01-24 00:21:00.416208: step: 214/466, loss: 6.418980121612549 2023-01-24 00:21:01.084631: step: 216/466, loss: 5.509450435638428 2023-01-24 00:21:01.728049: step: 218/466, loss: 7.686718940734863 2023-01-24 00:21:02.318240: step: 220/466, loss: 9.260184288024902 2023-01-24 00:21:02.992215: step: 222/466, loss: 7.044439792633057 2023-01-24 00:21:03.648821: step: 224/466, loss: 10.225696563720703 2023-01-24 00:21:04.303829: step: 226/466, loss: 3.3414080142974854 2023-01-24 00:21:05.020344: step: 228/466, loss: 4.594356060028076 2023-01-24 00:21:05.669353: step: 230/466, loss: 9.147964477539062 2023-01-24 00:21:06.291940: step: 232/466, loss: 10.93932819366455 2023-01-24 00:21:06.987177: step: 234/466, loss: 3.6889586448669434 2023-01-24 00:21:07.622540: step: 236/466, loss: 6.666989326477051 2023-01-24 00:21:08.252635: step: 238/466, loss: 8.69485092163086 2023-01-24 00:21:08.925723: step: 240/466, loss: 6.226055145263672 2023-01-24 00:21:09.522361: step: 242/466, loss: 5.682862281799316 2023-01-24 00:21:10.120514: step: 244/466, loss: 8.642372131347656 2023-01-24 00:21:10.751617: step: 246/466, loss: 4.897340297698975 2023-01-24 00:21:11.379311: step: 248/466, loss: 9.65620231628418 2023-01-24 00:21:11.980889: step: 250/466, loss: 9.824047088623047 2023-01-24 00:21:12.609634: step: 252/466, loss: 12.255440711975098 2023-01-24 00:21:13.193345: step: 254/466, loss: 8.427801132202148 2023-01-24 00:21:13.831299: step: 256/466, loss: 2.771171808242798 2023-01-24 00:21:14.424123: step: 258/466, loss: 2.793335437774658 2023-01-24 00:21:15.036650: step: 260/466, loss: 4.4932026863098145 2023-01-24 00:21:15.556093: step: 262/466, loss: 14.632492065429688 2023-01-24 00:21:16.194709: step: 264/466, loss: 6.25474739074707 2023-01-24 00:21:16.806486: step: 266/466, loss: 4.915351867675781 2023-01-24 00:21:17.416223: step: 268/466, loss: 2.20871901512146 2023-01-24 00:21:18.039730: step: 270/466, loss: 3.317594528198242 2023-01-24 00:21:18.702294: step: 272/466, loss: 2.623382329940796 2023-01-24 00:21:19.361822: step: 274/466, loss: 5.40425968170166 2023-01-24 00:21:19.964735: step: 276/466, loss: 5.922799110412598 2023-01-24 00:21:20.530475: step: 278/466, loss: 10.036717414855957 2023-01-24 00:21:21.212707: step: 280/466, loss: 7.337660789489746 2023-01-24 00:21:21.778746: step: 282/466, loss: 4.743040561676025 2023-01-24 00:21:22.470154: step: 284/466, loss: 11.601187705993652 2023-01-24 00:21:23.085336: step: 286/466, loss: 7.599787712097168 2023-01-24 00:21:23.682150: step: 288/466, loss: 4.125901222229004 2023-01-24 00:21:24.305510: step: 290/466, loss: 8.427830696105957 2023-01-24 00:21:24.944409: step: 292/466, loss: 4.56728458404541 2023-01-24 00:21:25.553217: step: 294/466, loss: 8.052621841430664 2023-01-24 00:21:26.224991: step: 296/466, loss: 12.73134708404541 2023-01-24 00:21:26.899845: step: 298/466, loss: 3.4680447578430176 2023-01-24 00:21:27.563307: step: 300/466, loss: 13.000292778015137 2023-01-24 00:21:28.220987: step: 302/466, loss: 1.799185872077942 2023-01-24 00:21:28.829928: step: 304/466, loss: 4.542043685913086 2023-01-24 00:21:29.438968: step: 306/466, loss: 1.7015113830566406 2023-01-24 00:21:30.072347: step: 308/466, loss: 8.83305549621582 2023-01-24 00:21:30.722409: step: 310/466, loss: 2.995100498199463 2023-01-24 00:21:31.283572: step: 312/466, loss: 3.73531436920166 2023-01-24 00:21:31.883815: step: 314/466, loss: 3.0925674438476562 2023-01-24 00:21:32.524701: step: 316/466, loss: 8.24208927154541 2023-01-24 00:21:33.130226: step: 318/466, loss: 5.052097797393799 2023-01-24 00:21:33.804814: step: 320/466, loss: 10.515788078308105 2023-01-24 00:21:34.484886: step: 322/466, loss: 2.801037073135376 2023-01-24 00:21:35.093960: step: 324/466, loss: 3.955794334411621 2023-01-24 00:21:35.697008: step: 326/466, loss: 8.783456802368164 2023-01-24 00:21:36.295837: step: 328/466, loss: 7.397550106048584 2023-01-24 00:21:36.933364: step: 330/466, loss: 2.2124977111816406 2023-01-24 00:21:37.546205: step: 332/466, loss: 2.6953563690185547 2023-01-24 00:21:38.150866: step: 334/466, loss: 3.194175958633423 2023-01-24 00:21:38.804499: step: 336/466, loss: 8.288835525512695 2023-01-24 00:21:39.493183: step: 338/466, loss: 5.7289719581604 2023-01-24 00:21:40.115740: step: 340/466, loss: 1.7479150295257568 2023-01-24 00:21:40.747953: step: 342/466, loss: 3.9976894855499268 2023-01-24 00:21:41.395154: step: 344/466, loss: 5.309848785400391 2023-01-24 00:21:42.022161: step: 346/466, loss: 9.174612998962402 2023-01-24 00:21:42.613551: step: 348/466, loss: 10.968244552612305 2023-01-24 00:21:43.216247: step: 350/466, loss: 7.314735412597656 2023-01-24 00:21:43.837291: step: 352/466, loss: 5.568917274475098 2023-01-24 00:21:44.486946: step: 354/466, loss: 5.966447830200195 2023-01-24 00:21:45.164382: step: 356/466, loss: 2.349154472351074 2023-01-24 00:21:45.768020: step: 358/466, loss: 1.817748785018921 2023-01-24 00:21:46.373512: step: 360/466, loss: 4.91948127746582 2023-01-24 00:21:47.028023: step: 362/466, loss: 5.156692981719971 2023-01-24 00:21:47.600306: step: 364/466, loss: 2.1112470626831055 2023-01-24 00:21:48.289142: step: 366/466, loss: 3.301403284072876 2023-01-24 00:21:48.887536: step: 368/466, loss: 2.6327786445617676 2023-01-24 00:21:49.573748: step: 370/466, loss: 11.900964736938477 2023-01-24 00:21:50.194251: step: 372/466, loss: 1.7852437496185303 2023-01-24 00:21:50.796035: step: 374/466, loss: 11.14040470123291 2023-01-24 00:21:51.372751: step: 376/466, loss: 6.314337253570557 2023-01-24 00:21:52.080152: step: 378/466, loss: 5.979366302490234 2023-01-24 00:21:52.711577: step: 380/466, loss: 2.11566162109375 2023-01-24 00:21:53.381721: step: 382/466, loss: 2.9155261516571045 2023-01-24 00:21:54.011504: step: 384/466, loss: 1.8489387035369873 2023-01-24 00:21:54.643527: step: 386/466, loss: 2.673495054244995 2023-01-24 00:21:55.236372: step: 388/466, loss: 2.064558744430542 2023-01-24 00:21:55.903951: step: 390/466, loss: 4.720061302185059 2023-01-24 00:21:56.534944: step: 392/466, loss: 0.9562152624130249 2023-01-24 00:21:57.142952: step: 394/466, loss: 3.733920097351074 2023-01-24 00:21:57.741891: step: 396/466, loss: 1.6869310140609741 2023-01-24 00:21:58.448869: step: 398/466, loss: 3.41373872756958 2023-01-24 00:21:59.044531: step: 400/466, loss: 8.510161399841309 2023-01-24 00:21:59.620885: step: 402/466, loss: 2.9869790077209473 2023-01-24 00:22:00.323151: step: 404/466, loss: 2.9523603916168213 2023-01-24 00:22:00.968399: step: 406/466, loss: 4.403825759887695 2023-01-24 00:22:01.647368: step: 408/466, loss: 6.967065811157227 2023-01-24 00:22:02.240436: step: 410/466, loss: 1.8065185546875 2023-01-24 00:22:02.849452: step: 412/466, loss: 2.615292549133301 2023-01-24 00:22:03.470561: step: 414/466, loss: 3.42132568359375 2023-01-24 00:22:04.067148: step: 416/466, loss: 1.5297431945800781 2023-01-24 00:22:04.751491: step: 418/466, loss: 2.7468690872192383 2023-01-24 00:22:05.427542: step: 420/466, loss: 1.3169325590133667 2023-01-24 00:22:06.025045: step: 422/466, loss: 1.277280330657959 2023-01-24 00:22:06.680615: step: 424/466, loss: 6.031007766723633 2023-01-24 00:22:07.333346: step: 426/466, loss: 0.8889445066452026 2023-01-24 00:22:08.009026: step: 428/466, loss: 3.3562746047973633 2023-01-24 00:22:08.658399: step: 430/466, loss: 1.2084641456604004 2023-01-24 00:22:09.226635: step: 432/466, loss: 6.203814506530762 2023-01-24 00:22:09.877819: step: 434/466, loss: 4.7271857261657715 2023-01-24 00:22:10.490830: step: 436/466, loss: 1.5970778465270996 2023-01-24 00:22:11.137866: step: 438/466, loss: 1.6236553192138672 2023-01-24 00:22:11.695665: step: 440/466, loss: 2.298182487487793 2023-01-24 00:22:12.344569: step: 442/466, loss: 2.216686248779297 2023-01-24 00:22:12.978696: step: 444/466, loss: 1.738180160522461 2023-01-24 00:22:13.572631: step: 446/466, loss: 0.6070207357406616 2023-01-24 00:22:14.255824: step: 448/466, loss: 6.839568138122559 2023-01-24 00:22:14.899885: step: 450/466, loss: 1.0888025760650635 2023-01-24 00:22:15.508748: step: 452/466, loss: 2.980574369430542 2023-01-24 00:22:16.194298: step: 454/466, loss: 3.1009440422058105 2023-01-24 00:22:16.778104: step: 456/466, loss: 3.6753101348876953 2023-01-24 00:22:17.354530: step: 458/466, loss: 0.9100843667984009 2023-01-24 00:22:17.997071: step: 460/466, loss: 1.9703644514083862 2023-01-24 00:22:18.682296: step: 462/466, loss: 1.3446106910705566 2023-01-24 00:22:19.315581: step: 464/466, loss: 1.59548020362854 2023-01-24 00:22:19.988335: step: 466/466, loss: 2.3122241497039795 2023-01-24 00:22:20.614875: step: 468/466, loss: 1.7087005376815796 2023-01-24 00:22:21.285756: step: 470/466, loss: 0.7805963754653931 2023-01-24 00:22:21.943064: step: 472/466, loss: 1.2716556787490845 2023-01-24 00:22:22.547374: step: 474/466, loss: 7.557900428771973 2023-01-24 00:22:23.245472: step: 476/466, loss: 7.787192344665527 2023-01-24 00:22:23.825174: step: 478/466, loss: 1.5486183166503906 2023-01-24 00:22:24.469339: step: 480/466, loss: 0.793107807636261 2023-01-24 00:22:25.054839: step: 482/466, loss: 4.602435111999512 2023-01-24 00:22:25.677758: step: 484/466, loss: 14.594005584716797 2023-01-24 00:22:26.352188: step: 486/466, loss: 0.6587408185005188 2023-01-24 00:22:26.983847: step: 488/466, loss: 1.9012621641159058 2023-01-24 00:22:27.613460: step: 490/466, loss: 1.7635666131973267 2023-01-24 00:22:28.216487: step: 492/466, loss: 5.529452800750732 2023-01-24 00:22:28.840637: step: 494/466, loss: 3.497648239135742 2023-01-24 00:22:29.483756: step: 496/466, loss: 1.3656432628631592 2023-01-24 00:22:30.079079: step: 498/466, loss: 1.1757444143295288 2023-01-24 00:22:30.684962: step: 500/466, loss: 11.79849624633789 2023-01-24 00:22:31.292954: step: 502/466, loss: 2.29689621925354 2023-01-24 00:22:31.916611: step: 504/466, loss: 3.479254722595215 2023-01-24 00:22:32.485606: step: 506/466, loss: 0.8649065494537354 2023-01-24 00:22:33.047130: step: 508/466, loss: 1.3104722499847412 2023-01-24 00:22:33.649484: step: 510/466, loss: 2.9688644409179688 2023-01-24 00:22:34.313499: step: 512/466, loss: 1.84807550907135 2023-01-24 00:22:34.970884: step: 514/466, loss: 2.0440447330474854 2023-01-24 00:22:35.626306: step: 516/466, loss: 1.0096993446350098 2023-01-24 00:22:36.333383: step: 518/466, loss: 1.8664580583572388 2023-01-24 00:22:36.952429: step: 520/466, loss: 2.7214341163635254 2023-01-24 00:22:37.559249: step: 522/466, loss: 1.2181735038757324 2023-01-24 00:22:38.220268: step: 524/466, loss: 2.78680419921875 2023-01-24 00:22:38.946145: step: 526/466, loss: 1.1728681325912476 2023-01-24 00:22:39.604548: step: 528/466, loss: 7.325881004333496 2023-01-24 00:22:40.202570: step: 530/466, loss: 0.7084908485412598 2023-01-24 00:22:40.798670: step: 532/466, loss: 4.042177677154541 2023-01-24 00:22:41.390686: step: 534/466, loss: 0.99375319480896 2023-01-24 00:22:42.067134: step: 536/466, loss: 8.10824203491211 2023-01-24 00:22:42.640669: step: 538/466, loss: 2.4628915786743164 2023-01-24 00:22:43.232500: step: 540/466, loss: 3.5445218086242676 2023-01-24 00:22:43.813409: step: 542/466, loss: 5.805854320526123 2023-01-24 00:22:44.446004: step: 544/466, loss: 2.3361477851867676 2023-01-24 00:22:45.030057: step: 546/466, loss: 1.2651429176330566 2023-01-24 00:22:45.704786: step: 548/466, loss: 6.927567958831787 2023-01-24 00:22:46.273455: step: 550/466, loss: 2.0593607425689697 2023-01-24 00:22:46.924591: step: 552/466, loss: 3.235837459564209 2023-01-24 00:22:47.568690: step: 554/466, loss: 1.5556988716125488 2023-01-24 00:22:48.175487: step: 556/466, loss: 1.3860385417938232 2023-01-24 00:22:48.828859: step: 558/466, loss: 3.06978178024292 2023-01-24 00:22:49.396956: step: 560/466, loss: 1.8661152124404907 2023-01-24 00:22:49.977819: step: 562/466, loss: 1.7554469108581543 2023-01-24 00:22:50.532550: step: 564/466, loss: 0.7988944053649902 2023-01-24 00:22:51.149937: step: 566/466, loss: 1.3624035120010376 2023-01-24 00:22:51.770372: step: 568/466, loss: 3.3787741661071777 2023-01-24 00:22:52.458735: step: 570/466, loss: 8.649011611938477 2023-01-24 00:22:53.116638: step: 572/466, loss: 0.8634949326515198 2023-01-24 00:22:53.718792: step: 574/466, loss: 8.739198684692383 2023-01-24 00:22:54.357993: step: 576/466, loss: 8.703192710876465 2023-01-24 00:22:55.008567: step: 578/466, loss: 1.8177729845046997 2023-01-24 00:22:55.606130: step: 580/466, loss: 1.5112245082855225 2023-01-24 00:22:56.263269: step: 582/466, loss: 3.063086986541748 2023-01-24 00:22:56.893127: step: 584/466, loss: 5.767726898193359 2023-01-24 00:22:57.497909: step: 586/466, loss: 2.0085208415985107 2023-01-24 00:22:58.120558: step: 588/466, loss: 6.870216369628906 2023-01-24 00:22:58.759165: step: 590/466, loss: 0.4155629873275757 2023-01-24 00:22:59.395548: step: 592/466, loss: 3.4884564876556396 2023-01-24 00:22:59.991152: step: 594/466, loss: 5.3875322341918945 2023-01-24 00:23:00.645451: step: 596/466, loss: 3.704777240753174 2023-01-24 00:23:01.284829: step: 598/466, loss: 3.0357890129089355 2023-01-24 00:23:01.888543: step: 600/466, loss: 2.5868122577667236 2023-01-24 00:23:02.514676: step: 602/466, loss: 2.622529983520508 2023-01-24 00:23:03.148047: step: 604/466, loss: 2.5356907844543457 2023-01-24 00:23:03.821119: step: 606/466, loss: 1.3839924335479736 2023-01-24 00:23:04.455182: step: 608/466, loss: 10.012450218200684 2023-01-24 00:23:05.120251: step: 610/466, loss: 4.111511707305908 2023-01-24 00:23:05.762925: step: 612/466, loss: 2.201719284057617 2023-01-24 00:23:06.340834: step: 614/466, loss: 4.48403263092041 2023-01-24 00:23:06.954487: step: 616/466, loss: 3.024829149246216 2023-01-24 00:23:07.607547: step: 618/466, loss: 3.7050843238830566 2023-01-24 00:23:08.213529: step: 620/466, loss: 1.3436836004257202 2023-01-24 00:23:08.825858: step: 622/466, loss: 1.4644038677215576 2023-01-24 00:23:09.419900: step: 624/466, loss: 1.202675700187683 2023-01-24 00:23:10.111532: step: 626/466, loss: 5.483269691467285 2023-01-24 00:23:10.767350: step: 628/466, loss: 1.0728418827056885 2023-01-24 00:23:11.376181: step: 630/466, loss: 6.990755558013916 2023-01-24 00:23:11.990869: step: 632/466, loss: 2.7628517150878906 2023-01-24 00:23:12.730130: step: 634/466, loss: 1.0182297229766846 2023-01-24 00:23:13.365783: step: 636/466, loss: 2.5928008556365967 2023-01-24 00:23:14.027059: step: 638/466, loss: 8.490153312683105 2023-01-24 00:23:14.759875: step: 640/466, loss: 0.6833595037460327 2023-01-24 00:23:15.400088: step: 642/466, loss: 0.8581178188323975 2023-01-24 00:23:16.056594: step: 644/466, loss: 2.870028495788574 2023-01-24 00:23:16.660901: step: 646/466, loss: 1.4932246208190918 2023-01-24 00:23:17.274225: step: 648/466, loss: 4.9424920082092285 2023-01-24 00:23:17.917919: step: 650/466, loss: 11.312755584716797 2023-01-24 00:23:18.531358: step: 652/466, loss: 3.2909493446350098 2023-01-24 00:23:19.182576: step: 654/466, loss: 2.3444089889526367 2023-01-24 00:23:19.813635: step: 656/466, loss: 1.102722406387329 2023-01-24 00:23:20.516748: step: 658/466, loss: 1.4511377811431885 2023-01-24 00:23:21.219401: step: 660/466, loss: 4.209312438964844 2023-01-24 00:23:21.854082: step: 662/466, loss: 1.1369702816009521 2023-01-24 00:23:22.547701: step: 664/466, loss: 3.2985424995422363 2023-01-24 00:23:23.141998: step: 666/466, loss: 0.7985827922821045 2023-01-24 00:23:23.775079: step: 668/466, loss: 0.6459063291549683 2023-01-24 00:23:24.396447: step: 670/466, loss: 2.3331503868103027 2023-01-24 00:23:25.032542: step: 672/466, loss: 5.232911109924316 2023-01-24 00:23:25.671303: step: 674/466, loss: 0.4341469705104828 2023-01-24 00:23:26.321229: step: 676/466, loss: 14.222973823547363 2023-01-24 00:23:26.940326: step: 678/466, loss: 0.7190119624137878 2023-01-24 00:23:27.583149: step: 680/466, loss: 2.441484212875366 2023-01-24 00:23:28.241453: step: 682/466, loss: 1.3511295318603516 2023-01-24 00:23:28.863365: step: 684/466, loss: 2.3318142890930176 2023-01-24 00:23:29.472624: step: 686/466, loss: 7.932394027709961 2023-01-24 00:23:30.135722: step: 688/466, loss: 3.3909051418304443 2023-01-24 00:23:30.780838: step: 690/466, loss: 5.007787704467773 2023-01-24 00:23:31.366498: step: 692/466, loss: 8.316167831420898 2023-01-24 00:23:31.957200: step: 694/466, loss: 2.1374423503875732 2023-01-24 00:23:32.524016: step: 696/466, loss: 2.0556044578552246 2023-01-24 00:23:33.175015: step: 698/466, loss: 3.3444528579711914 2023-01-24 00:23:33.902857: step: 700/466, loss: 3.3472328186035156 2023-01-24 00:23:34.474827: step: 702/466, loss: 5.110820770263672 2023-01-24 00:23:35.146983: step: 704/466, loss: 1.0950136184692383 2023-01-24 00:23:35.759997: step: 706/466, loss: 1.4175435304641724 2023-01-24 00:23:36.427049: step: 708/466, loss: 0.6698507070541382 2023-01-24 00:23:36.993821: step: 710/466, loss: 2.666046619415283 2023-01-24 00:23:37.584354: step: 712/466, loss: 0.7699897289276123 2023-01-24 00:23:38.172516: step: 714/466, loss: 4.566158294677734 2023-01-24 00:23:38.765221: step: 716/466, loss: 0.4597007632255554 2023-01-24 00:23:39.400233: step: 718/466, loss: 3.247370719909668 2023-01-24 00:23:40.046837: step: 720/466, loss: 1.869280219078064 2023-01-24 00:23:40.683133: step: 722/466, loss: 1.0909264087677002 2023-01-24 00:23:41.261509: step: 724/466, loss: 1.2511248588562012 2023-01-24 00:23:41.899051: step: 726/466, loss: 0.8433552980422974 2023-01-24 00:23:42.573391: step: 728/466, loss: 0.5003337264060974 2023-01-24 00:23:43.156734: step: 730/466, loss: 10.544416427612305 2023-01-24 00:23:43.763027: step: 732/466, loss: 1.9595627784729004 2023-01-24 00:23:44.320818: step: 734/466, loss: 0.4986826181411743 2023-01-24 00:23:44.909406: step: 736/466, loss: 2.4393367767333984 2023-01-24 00:23:45.723361: step: 738/466, loss: 0.43393364548683167 2023-01-24 00:23:46.310252: step: 740/466, loss: 4.039564609527588 2023-01-24 00:23:46.879924: step: 742/466, loss: 3.1970245838165283 2023-01-24 00:23:47.503104: step: 744/466, loss: 0.92037433385849 2023-01-24 00:23:48.211837: step: 746/466, loss: 1.3021045923233032 2023-01-24 00:23:48.864293: step: 748/466, loss: 1.1154721975326538 2023-01-24 00:23:49.467999: step: 750/466, loss: 3.194521188735962 2023-01-24 00:23:50.044143: step: 752/466, loss: 1.9175065755844116 2023-01-24 00:23:50.711843: step: 754/466, loss: 2.066864490509033 2023-01-24 00:23:51.390453: step: 756/466, loss: 2.003392219543457 2023-01-24 00:23:51.997378: step: 758/466, loss: 2.3764588832855225 2023-01-24 00:23:52.615283: step: 760/466, loss: 1.6392319202423096 2023-01-24 00:23:53.371267: step: 762/466, loss: 2.3279452323913574 2023-01-24 00:23:54.134613: step: 764/466, loss: 1.0867607593536377 2023-01-24 00:23:54.778354: step: 766/466, loss: 2.5018019676208496 2023-01-24 00:23:55.404822: step: 768/466, loss: 0.972590446472168 2023-01-24 00:23:56.008711: step: 770/466, loss: 2.3740994930267334 2023-01-24 00:23:56.618849: step: 772/466, loss: 1.1381977796554565 2023-01-24 00:23:57.269347: step: 774/466, loss: 1.6053963899612427 2023-01-24 00:23:57.853075: step: 776/466, loss: 2.5549421310424805 2023-01-24 00:23:58.487675: step: 778/466, loss: 6.050621509552002 2023-01-24 00:23:59.094471: step: 780/466, loss: 9.900514602661133 2023-01-24 00:23:59.688709: step: 782/466, loss: 2.7175397872924805 2023-01-24 00:24:00.301671: step: 784/466, loss: 6.7735700607299805 2023-01-24 00:24:00.943701: step: 786/466, loss: 4.265678405761719 2023-01-24 00:24:01.511756: step: 788/466, loss: 1.092139482498169 2023-01-24 00:24:02.153595: step: 790/466, loss: 3.1744885444641113 2023-01-24 00:24:02.736204: step: 792/466, loss: 1.2144368886947632 2023-01-24 00:24:03.367372: step: 794/466, loss: 1.2058383226394653 2023-01-24 00:24:04.001630: step: 796/466, loss: 3.056980609893799 2023-01-24 00:24:04.630281: step: 798/466, loss: 2.6981399059295654 2023-01-24 00:24:05.260663: step: 800/466, loss: 3.3247382640838623 2023-01-24 00:24:05.920055: step: 802/466, loss: 2.5999903678894043 2023-01-24 00:24:06.541992: step: 804/466, loss: 1.947007179260254 2023-01-24 00:24:07.213590: step: 806/466, loss: 3.425672769546509 2023-01-24 00:24:07.816092: step: 808/466, loss: 1.6140003204345703 2023-01-24 00:24:08.386435: step: 810/466, loss: 2.9465503692626953 2023-01-24 00:24:09.020093: step: 812/466, loss: 0.9249109625816345 2023-01-24 00:24:09.605626: step: 814/466, loss: 0.5993981957435608 2023-01-24 00:24:10.184332: step: 816/466, loss: 1.445728063583374 2023-01-24 00:24:10.807263: step: 818/466, loss: 2.0373096466064453 2023-01-24 00:24:11.411691: step: 820/466, loss: 0.901696503162384 2023-01-24 00:24:12.066426: step: 822/466, loss: 1.959995150566101 2023-01-24 00:24:12.684031: step: 824/466, loss: 0.6444437503814697 2023-01-24 00:24:13.320061: step: 826/466, loss: 1.7989659309387207 2023-01-24 00:24:13.966483: step: 828/466, loss: 1.0831447839736938 2023-01-24 00:24:14.658029: step: 830/466, loss: 0.9967033863067627 2023-01-24 00:24:15.258344: step: 832/466, loss: 3.739194631576538 2023-01-24 00:24:15.872115: step: 834/466, loss: 1.3975876569747925 2023-01-24 00:24:16.591375: step: 836/466, loss: 2.5847549438476562 2023-01-24 00:24:17.220233: step: 838/466, loss: 1.2046366930007935 2023-01-24 00:24:17.833133: step: 840/466, loss: 0.6176460981369019 2023-01-24 00:24:18.605614: step: 842/466, loss: 1.2880494594573975 2023-01-24 00:24:19.162676: step: 844/466, loss: 1.0232499837875366 2023-01-24 00:24:19.826765: step: 846/466, loss: 7.343748569488525 2023-01-24 00:24:20.511886: step: 848/466, loss: 2.252338171005249 2023-01-24 00:24:21.167888: step: 850/466, loss: 0.7068266272544861 2023-01-24 00:24:21.808316: step: 852/466, loss: 0.9141572713851929 2023-01-24 00:24:22.448989: step: 854/466, loss: 8.108214378356934 2023-01-24 00:24:23.049606: step: 856/466, loss: 6.887636184692383 2023-01-24 00:24:23.675810: step: 858/466, loss: 0.5420605540275574 2023-01-24 00:24:24.384718: step: 860/466, loss: 0.5500837564468384 2023-01-24 00:24:25.031199: step: 862/466, loss: 1.6031155586242676 2023-01-24 00:24:25.648816: step: 864/466, loss: 3.2517027854919434 2023-01-24 00:24:26.339158: step: 866/466, loss: 1.4873687028884888 2023-01-24 00:24:26.986008: step: 868/466, loss: 1.351752758026123 2023-01-24 00:24:27.641783: step: 870/466, loss: 1.4392306804656982 2023-01-24 00:24:28.298259: step: 872/466, loss: 3.2745513916015625 2023-01-24 00:24:28.904848: step: 874/466, loss: 1.054884672164917 2023-01-24 00:24:29.552013: step: 876/466, loss: 3.826343059539795 2023-01-24 00:24:30.227606: step: 878/466, loss: 1.9085838794708252 2023-01-24 00:24:30.866015: step: 880/466, loss: 0.7896057367324829 2023-01-24 00:24:31.460679: step: 882/466, loss: 1.2758047580718994 2023-01-24 00:24:32.078352: step: 884/466, loss: 8.894779205322266 2023-01-24 00:24:32.740272: step: 886/466, loss: 12.874110221862793 2023-01-24 00:24:33.433040: step: 888/466, loss: 4.848669052124023 2023-01-24 00:24:34.081628: step: 890/466, loss: 3.7010364532470703 2023-01-24 00:24:34.685733: step: 892/466, loss: 1.2354850769042969 2023-01-24 00:24:35.331907: step: 894/466, loss: 0.6324902176856995 2023-01-24 00:24:35.903142: step: 896/466, loss: 1.2102067470550537 2023-01-24 00:24:36.480247: step: 898/466, loss: 0.9676986336708069 2023-01-24 00:24:37.053569: step: 900/466, loss: 1.7856495380401611 2023-01-24 00:24:37.673817: step: 902/466, loss: 1.2424051761627197 2023-01-24 00:24:38.285146: step: 904/466, loss: 0.7720033526420593 2023-01-24 00:24:38.851861: step: 906/466, loss: 0.6424551010131836 2023-01-24 00:24:39.390261: step: 908/466, loss: 0.5964785218238831 2023-01-24 00:24:40.078041: step: 910/466, loss: 0.453046053647995 2023-01-24 00:24:40.707153: step: 912/466, loss: 2.552675724029541 2023-01-24 00:24:41.366416: step: 914/466, loss: 1.0938541889190674 2023-01-24 00:24:41.986636: step: 916/466, loss: 4.85699462890625 2023-01-24 00:24:42.582037: step: 918/466, loss: 13.960785865783691 2023-01-24 00:24:43.188759: step: 920/466, loss: 0.9788107872009277 2023-01-24 00:24:43.802599: step: 922/466, loss: 3.9030308723449707 2023-01-24 00:24:44.389185: step: 924/466, loss: 0.75482577085495 2023-01-24 00:24:45.004871: step: 926/466, loss: 0.974812388420105 2023-01-24 00:24:45.608449: step: 928/466, loss: 2.224337577819824 2023-01-24 00:24:46.236571: step: 930/466, loss: 4.5829362869262695 2023-01-24 00:24:46.937345: step: 932/466, loss: 2.171318292617798 ================================================== Loss: 5.563 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 0} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3229166666666667, 'r': 0.027852650494159928, 'f1': 0.05128205128205129}, 'combined': 0.03211119098969567, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 0} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3115131578947368, 'r': 0.02658917340521114, 'f1': 0.04899627483443708}, 'combined': 0.03036388862979199, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 0} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.30892857142857144, 'r': 0.019429469901168013, 'f1': 0.03655959425190194}, 'combined': 0.024252404107697325, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 0} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3229166666666667, 'r': 0.027852650494159928, 'f1': 0.05128205128205129}, 'combined': 0.03211119098969567, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 0} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3115131578947368, 'r': 0.02658917340521114, 'f1': 0.04899627483443708}, 'combined': 0.03036388862979199, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 0} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.30892857142857144, 'r': 0.019429469901168013, 'f1': 0.03655959425190194}, 'combined': 0.024252404107697325, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:27:40.620324: step: 2/466, loss: 4.929131507873535 2023-01-24 00:27:41.237185: step: 4/466, loss: 2.8415467739105225 2023-01-24 00:27:41.907748: step: 6/466, loss: 2.0225117206573486 2023-01-24 00:27:42.530674: step: 8/466, loss: 0.7062174081802368 2023-01-24 00:27:43.139364: step: 10/466, loss: 1.4165326356887817 2023-01-24 00:27:43.739231: step: 12/466, loss: 2.3219292163848877 2023-01-24 00:27:44.326095: step: 14/466, loss: 1.5617159605026245 2023-01-24 00:27:45.001127: step: 16/466, loss: 1.4706218242645264 2023-01-24 00:27:45.565471: step: 18/466, loss: 6.345338821411133 2023-01-24 00:27:46.205871: step: 20/466, loss: 6.110724925994873 2023-01-24 00:27:46.800381: step: 22/466, loss: 1.883397102355957 2023-01-24 00:27:47.473266: step: 24/466, loss: 12.628595352172852 2023-01-24 00:27:48.137851: step: 26/466, loss: 1.3402032852172852 2023-01-24 00:27:48.756004: step: 28/466, loss: 1.3088589906692505 2023-01-24 00:27:49.389756: step: 30/466, loss: 1.4807056188583374 2023-01-24 00:27:50.040807: step: 32/466, loss: 4.869627952575684 2023-01-24 00:27:50.699936: step: 34/466, loss: 1.9308644533157349 2023-01-24 00:27:51.323038: step: 36/466, loss: 2.5034115314483643 2023-01-24 00:27:51.927396: step: 38/466, loss: 1.6823831796646118 2023-01-24 00:27:52.492634: step: 40/466, loss: 0.5867857933044434 2023-01-24 00:27:53.123966: step: 42/466, loss: 2.1307899951934814 2023-01-24 00:27:53.697289: step: 44/466, loss: 0.2725277841091156 2023-01-24 00:27:54.257046: step: 46/466, loss: 2.647444009780884 2023-01-24 00:27:54.923859: step: 48/466, loss: 3.0356178283691406 2023-01-24 00:27:55.595652: step: 50/466, loss: 4.61689567565918 2023-01-24 00:27:56.154809: step: 52/466, loss: 1.5133917331695557 2023-01-24 00:27:56.812652: step: 54/466, loss: 0.7836480140686035 2023-01-24 00:27:57.429160: step: 56/466, loss: 1.1973618268966675 2023-01-24 00:27:58.080131: step: 58/466, loss: 1.8865139484405518 2023-01-24 00:27:58.678036: step: 60/466, loss: 0.727965235710144 2023-01-24 00:27:59.450817: step: 62/466, loss: 7.887497901916504 2023-01-24 00:28:00.099573: step: 64/466, loss: 2.089545726776123 2023-01-24 00:28:00.720543: step: 66/466, loss: 2.698453187942505 2023-01-24 00:28:01.360517: step: 68/466, loss: 3.0904152393341064 2023-01-24 00:28:02.029202: step: 70/466, loss: 2.1787655353546143 2023-01-24 00:28:02.674793: step: 72/466, loss: 0.7506197690963745 2023-01-24 00:28:03.298669: step: 74/466, loss: 1.150722622871399 2023-01-24 00:28:03.913638: step: 76/466, loss: 0.5888922810554504 2023-01-24 00:28:04.538941: step: 78/466, loss: 1.4027190208435059 2023-01-24 00:28:05.160830: step: 80/466, loss: 4.744175434112549 2023-01-24 00:28:05.771287: step: 82/466, loss: 0.45425620675086975 2023-01-24 00:28:06.369068: step: 84/466, loss: 1.6839087009429932 2023-01-24 00:28:07.019312: step: 86/466, loss: 0.9715327620506287 2023-01-24 00:28:07.616826: step: 88/466, loss: 1.4811819791793823 2023-01-24 00:28:08.168503: step: 90/466, loss: 0.8654317259788513 2023-01-24 00:28:08.858080: step: 92/466, loss: 0.7142919898033142 2023-01-24 00:28:09.473983: step: 94/466, loss: 1.9248844385147095 2023-01-24 00:28:10.084335: step: 96/466, loss: 8.593791007995605 2023-01-24 00:28:10.633129: step: 98/466, loss: 12.591300964355469 2023-01-24 00:28:11.275576: step: 100/466, loss: 0.34657251834869385 2023-01-24 00:28:11.924146: step: 102/466, loss: 4.688027858734131 2023-01-24 00:28:12.516430: step: 104/466, loss: 2.3076515197753906 2023-01-24 00:28:13.169146: step: 106/466, loss: 8.795105934143066 2023-01-24 00:28:13.809150: step: 108/466, loss: 10.504772186279297 2023-01-24 00:28:14.372056: step: 110/466, loss: 2.015002965927124 2023-01-24 00:28:14.960063: step: 112/466, loss: 0.8676893711090088 2023-01-24 00:28:15.576041: step: 114/466, loss: 2.728665828704834 2023-01-24 00:28:16.204878: step: 116/466, loss: 3.5228874683380127 2023-01-24 00:28:16.896712: step: 118/466, loss: 1.037108302116394 2023-01-24 00:28:17.499786: step: 120/466, loss: 4.725841045379639 2023-01-24 00:28:18.121571: step: 122/466, loss: 1.8643794059753418 2023-01-24 00:28:18.752016: step: 124/466, loss: 1.6684551239013672 2023-01-24 00:28:19.381117: step: 126/466, loss: 0.8428720235824585 2023-01-24 00:28:19.958239: step: 128/466, loss: 4.136777400970459 2023-01-24 00:28:20.650707: step: 130/466, loss: 1.1405422687530518 2023-01-24 00:28:21.309328: step: 132/466, loss: 1.033148169517517 2023-01-24 00:28:21.935029: step: 134/466, loss: 1.2923126220703125 2023-01-24 00:28:22.550663: step: 136/466, loss: 1.0092655420303345 2023-01-24 00:28:23.159479: step: 138/466, loss: 0.3972666263580322 2023-01-24 00:28:23.757748: step: 140/466, loss: 5.283818244934082 2023-01-24 00:28:24.392772: step: 142/466, loss: 0.5786647200584412 2023-01-24 00:28:25.099387: step: 144/466, loss: 1.15341055393219 2023-01-24 00:28:25.798199: step: 146/466, loss: 6.17548942565918 2023-01-24 00:28:26.387681: step: 148/466, loss: 2.5550308227539062 2023-01-24 00:28:27.067655: step: 150/466, loss: 1.2243494987487793 2023-01-24 00:28:27.743778: step: 152/466, loss: 0.4730167090892792 2023-01-24 00:28:28.432808: step: 154/466, loss: 5.324580192565918 2023-01-24 00:28:29.078016: step: 156/466, loss: 2.6694695949554443 2023-01-24 00:28:29.689931: step: 158/466, loss: 1.080947756767273 2023-01-24 00:28:30.312484: step: 160/466, loss: 1.998123288154602 2023-01-24 00:28:31.062592: step: 162/466, loss: 3.806281089782715 2023-01-24 00:28:31.687266: step: 164/466, loss: 1.2770198583602905 2023-01-24 00:28:32.297040: step: 166/466, loss: 1.6960055828094482 2023-01-24 00:28:33.003730: step: 168/466, loss: 1.8077749013900757 2023-01-24 00:28:33.641447: step: 170/466, loss: 2.616239309310913 2023-01-24 00:28:34.278742: step: 172/466, loss: 1.490473747253418 2023-01-24 00:28:34.958541: step: 174/466, loss: 6.659701824188232 2023-01-24 00:28:35.604739: step: 176/466, loss: 0.7965739369392395 2023-01-24 00:28:36.241339: step: 178/466, loss: 1.3609330654144287 2023-01-24 00:28:36.852218: step: 180/466, loss: 3.4460201263427734 2023-01-24 00:28:37.519866: step: 182/466, loss: 1.4271316528320312 2023-01-24 00:28:38.155124: step: 184/466, loss: 15.995522499084473 2023-01-24 00:28:38.816348: step: 186/466, loss: 1.9016940593719482 2023-01-24 00:28:39.427904: step: 188/466, loss: 1.519442081451416 2023-01-24 00:28:40.113253: step: 190/466, loss: 2.3379290103912354 2023-01-24 00:28:40.739733: step: 192/466, loss: 4.037121295928955 2023-01-24 00:28:41.379598: step: 194/466, loss: 1.0202337503433228 2023-01-24 00:28:41.988384: step: 196/466, loss: 1.505065679550171 2023-01-24 00:28:42.620900: step: 198/466, loss: 4.231905937194824 2023-01-24 00:28:43.195872: step: 200/466, loss: 0.5224823951721191 2023-01-24 00:28:43.834072: step: 202/466, loss: 1.1126139163970947 2023-01-24 00:28:44.484450: step: 204/466, loss: 2.1017184257507324 2023-01-24 00:28:45.103687: step: 206/466, loss: 1.1046385765075684 2023-01-24 00:28:45.704864: step: 208/466, loss: 0.5189461708068848 2023-01-24 00:28:46.283780: step: 210/466, loss: 1.4643187522888184 2023-01-24 00:28:47.058613: step: 212/466, loss: 2.6156485080718994 2023-01-24 00:28:47.745981: step: 214/466, loss: 1.7121052742004395 2023-01-24 00:28:48.336905: step: 216/466, loss: 1.8391915559768677 2023-01-24 00:28:48.955888: step: 218/466, loss: 0.9342567920684814 2023-01-24 00:28:49.599866: step: 220/466, loss: 3.362318754196167 2023-01-24 00:28:50.266412: step: 222/466, loss: 1.9302512407302856 2023-01-24 00:28:50.939978: step: 224/466, loss: 2.4147017002105713 2023-01-24 00:28:51.698476: step: 226/466, loss: 3.9908182621002197 2023-01-24 00:28:52.308822: step: 228/466, loss: 4.233687400817871 2023-01-24 00:28:53.014866: step: 230/466, loss: 2.4430644512176514 2023-01-24 00:28:53.630879: step: 232/466, loss: 2.7673659324645996 2023-01-24 00:28:54.224408: step: 234/466, loss: 1.1658360958099365 2023-01-24 00:28:54.882141: step: 236/466, loss: 0.8770806193351746 2023-01-24 00:28:55.500973: step: 238/466, loss: 2.096273899078369 2023-01-24 00:28:56.102020: step: 240/466, loss: 7.8668599128723145 2023-01-24 00:28:56.764245: step: 242/466, loss: 1.9725823402404785 2023-01-24 00:28:57.420215: step: 244/466, loss: 3.454176664352417 2023-01-24 00:28:58.084929: step: 246/466, loss: 4.818187713623047 2023-01-24 00:28:58.687389: step: 248/466, loss: 2.0286688804626465 2023-01-24 00:28:59.260600: step: 250/466, loss: 2.0410051345825195 2023-01-24 00:28:59.921023: step: 252/466, loss: 0.6237905025482178 2023-01-24 00:29:00.544819: step: 254/466, loss: 2.2543628215789795 2023-01-24 00:29:01.139576: step: 256/466, loss: 1.4204860925674438 2023-01-24 00:29:01.751746: step: 258/466, loss: 8.815642356872559 2023-01-24 00:29:02.362007: step: 260/466, loss: 2.6833858489990234 2023-01-24 00:29:03.064669: step: 262/466, loss: 0.5628165602684021 2023-01-24 00:29:03.637762: step: 264/466, loss: 2.933077096939087 2023-01-24 00:29:04.235476: step: 266/466, loss: 1.7693064212799072 2023-01-24 00:29:04.758718: step: 268/466, loss: 0.9090259075164795 2023-01-24 00:29:05.414373: step: 270/466, loss: 0.7030649781227112 2023-01-24 00:29:06.059408: step: 272/466, loss: 0.9038471579551697 2023-01-24 00:29:06.744919: step: 274/466, loss: 2.7512764930725098 2023-01-24 00:29:07.449698: step: 276/466, loss: 5.882218360900879 2023-01-24 00:29:08.035934: step: 278/466, loss: 2.691417694091797 2023-01-24 00:29:08.648119: step: 280/466, loss: 1.095444917678833 2023-01-24 00:29:09.273322: step: 282/466, loss: 5.419581890106201 2023-01-24 00:29:09.901501: step: 284/466, loss: 3.2140955924987793 2023-01-24 00:29:10.486465: step: 286/466, loss: 0.7479810118675232 2023-01-24 00:29:11.081712: step: 288/466, loss: 1.4674075841903687 2023-01-24 00:29:11.693154: step: 290/466, loss: 1.0882282257080078 2023-01-24 00:29:12.335252: step: 292/466, loss: 0.6354427337646484 2023-01-24 00:29:12.901242: step: 294/466, loss: 0.7517061233520508 2023-01-24 00:29:13.527215: step: 296/466, loss: 1.2195165157318115 2023-01-24 00:29:14.156078: step: 298/466, loss: 1.6127018928527832 2023-01-24 00:29:14.789449: step: 300/466, loss: 0.7257331609725952 2023-01-24 00:29:15.410473: step: 302/466, loss: 4.134901523590088 2023-01-24 00:29:16.179212: step: 304/466, loss: 5.65886116027832 2023-01-24 00:29:16.839216: step: 306/466, loss: 3.192089080810547 2023-01-24 00:29:17.427255: step: 308/466, loss: 0.6151542067527771 2023-01-24 00:29:18.211885: step: 310/466, loss: 0.7849242091178894 2023-01-24 00:29:18.847168: step: 312/466, loss: 3.01977276802063 2023-01-24 00:29:19.453630: step: 314/466, loss: 3.3342926502227783 2023-01-24 00:29:20.109920: step: 316/466, loss: 0.28638797998428345 2023-01-24 00:29:20.728488: step: 318/466, loss: 4.61793327331543 2023-01-24 00:29:21.332166: step: 320/466, loss: 1.9822195768356323 2023-01-24 00:29:21.924888: step: 322/466, loss: 1.9863197803497314 2023-01-24 00:29:22.464091: step: 324/466, loss: 2.362244129180908 2023-01-24 00:29:23.115256: step: 326/466, loss: 1.6751642227172852 2023-01-24 00:29:23.771841: step: 328/466, loss: 0.8643975257873535 2023-01-24 00:29:24.459589: step: 330/466, loss: 1.003990650177002 2023-01-24 00:29:25.099071: step: 332/466, loss: 2.7076058387756348 2023-01-24 00:29:25.769004: step: 334/466, loss: 1.2687891721725464 2023-01-24 00:29:26.364951: step: 336/466, loss: 0.9781487584114075 2023-01-24 00:29:27.024872: step: 338/466, loss: 1.5812121629714966 2023-01-24 00:29:27.684571: step: 340/466, loss: 0.7526068091392517 2023-01-24 00:29:28.316226: step: 342/466, loss: 0.7111249566078186 2023-01-24 00:29:28.959444: step: 344/466, loss: 1.1937557458877563 2023-01-24 00:29:29.599690: step: 346/466, loss: 1.871544599533081 2023-01-24 00:29:30.183467: step: 348/466, loss: 3.4902377128601074 2023-01-24 00:29:30.809256: step: 350/466, loss: 1.8830691576004028 2023-01-24 00:29:31.404648: step: 352/466, loss: 1.7916781902313232 2023-01-24 00:29:32.102681: step: 354/466, loss: 21.213960647583008 2023-01-24 00:29:32.781320: step: 356/466, loss: 3.97394061088562 2023-01-24 00:29:33.408194: step: 358/466, loss: 1.4885226488113403 2023-01-24 00:29:34.076759: step: 360/466, loss: 2.4368529319763184 2023-01-24 00:29:34.650623: step: 362/466, loss: 8.993447303771973 2023-01-24 00:29:35.257638: step: 364/466, loss: 1.512786865234375 2023-01-24 00:29:35.888566: step: 366/466, loss: 0.8020037412643433 2023-01-24 00:29:36.490159: step: 368/466, loss: 3.514601707458496 2023-01-24 00:29:37.087346: step: 370/466, loss: 1.5181677341461182 2023-01-24 00:29:37.774109: step: 372/466, loss: 1.298141598701477 2023-01-24 00:29:38.395957: step: 374/466, loss: 0.8693670034408569 2023-01-24 00:29:39.012263: step: 376/466, loss: 1.5451771020889282 2023-01-24 00:29:39.609968: step: 378/466, loss: 1.384331464767456 2023-01-24 00:29:40.226702: step: 380/466, loss: 1.4840786457061768 2023-01-24 00:29:40.902455: step: 382/466, loss: 0.9185431003570557 2023-01-24 00:29:41.481832: step: 384/466, loss: 1.204012393951416 2023-01-24 00:29:42.200339: step: 386/466, loss: 1.0616750717163086 2023-01-24 00:29:42.813365: step: 388/466, loss: 0.9257512092590332 2023-01-24 00:29:43.493053: step: 390/466, loss: 5.8560791015625 2023-01-24 00:29:44.168201: step: 392/466, loss: 0.400015652179718 2023-01-24 00:29:44.759507: step: 394/466, loss: 1.7335723638534546 2023-01-24 00:29:45.418642: step: 396/466, loss: 0.47278809547424316 2023-01-24 00:29:46.057576: step: 398/466, loss: 1.4631285667419434 2023-01-24 00:29:46.715706: step: 400/466, loss: 0.72065669298172 2023-01-24 00:29:47.296551: step: 402/466, loss: 2.369809865951538 2023-01-24 00:29:47.863451: step: 404/466, loss: 3.312589168548584 2023-01-24 00:29:48.490095: step: 406/466, loss: 4.5040974617004395 2023-01-24 00:29:49.072383: step: 408/466, loss: 1.723700761795044 2023-01-24 00:29:49.681193: step: 410/466, loss: 1.7383880615234375 2023-01-24 00:29:50.360730: step: 412/466, loss: 0.31944039463996887 2023-01-24 00:29:51.018724: step: 414/466, loss: 0.3767268657684326 2023-01-24 00:29:51.683814: step: 416/466, loss: 1.6418299674987793 2023-01-24 00:29:52.261785: step: 418/466, loss: 1.1182104349136353 2023-01-24 00:29:52.923677: step: 420/466, loss: 1.0689901113510132 2023-01-24 00:29:53.668214: step: 422/466, loss: 4.924126148223877 2023-01-24 00:29:54.301853: step: 424/466, loss: 1.012005090713501 2023-01-24 00:29:54.870662: step: 426/466, loss: 2.006622076034546 2023-01-24 00:29:55.410918: step: 428/466, loss: 2.862314224243164 2023-01-24 00:29:56.020248: step: 430/466, loss: 1.9691691398620605 2023-01-24 00:29:56.620493: step: 432/466, loss: 2.445025682449341 2023-01-24 00:29:57.289000: step: 434/466, loss: 1.6351045370101929 2023-01-24 00:29:57.949296: step: 436/466, loss: 3.27296781539917 2023-01-24 00:29:58.554495: step: 438/466, loss: 5.215020179748535 2023-01-24 00:29:59.215924: step: 440/466, loss: 1.332781195640564 2023-01-24 00:29:59.852017: step: 442/466, loss: 0.8536605834960938 2023-01-24 00:30:00.498131: step: 444/466, loss: 1.8220601081848145 2023-01-24 00:30:01.090363: step: 446/466, loss: 1.024617075920105 2023-01-24 00:30:01.702534: step: 448/466, loss: 1.2876169681549072 2023-01-24 00:30:02.312803: step: 450/466, loss: 3.4061126708984375 2023-01-24 00:30:02.909721: step: 452/466, loss: 0.49869418144226074 2023-01-24 00:30:03.518636: step: 454/466, loss: 0.6542998552322388 2023-01-24 00:30:04.094513: step: 456/466, loss: 4.0117998123168945 2023-01-24 00:30:04.763046: step: 458/466, loss: 2.689338207244873 2023-01-24 00:30:05.358719: step: 460/466, loss: 4.6095428466796875 2023-01-24 00:30:06.060300: step: 462/466, loss: 1.7618801593780518 2023-01-24 00:30:06.697067: step: 464/466, loss: 1.122097134590149 2023-01-24 00:30:07.330866: step: 466/466, loss: 0.6446213722229004 2023-01-24 00:30:07.934820: step: 468/466, loss: 1.0039113759994507 2023-01-24 00:30:08.556331: step: 470/466, loss: 9.731908798217773 2023-01-24 00:30:09.222141: step: 472/466, loss: 0.5952492952346802 2023-01-24 00:30:09.831000: step: 474/466, loss: 1.4649676084518433 2023-01-24 00:30:10.435050: step: 476/466, loss: 4.67943000793457 2023-01-24 00:30:11.121456: step: 478/466, loss: 0.49548181891441345 2023-01-24 00:30:11.733368: step: 480/466, loss: 0.4967639446258545 2023-01-24 00:30:12.381481: step: 482/466, loss: 0.6325618028640747 2023-01-24 00:30:13.137213: step: 484/466, loss: 1.9765273332595825 2023-01-24 00:30:13.783705: step: 486/466, loss: 0.9099281430244446 2023-01-24 00:30:14.423505: step: 488/466, loss: 3.319847583770752 2023-01-24 00:30:14.993083: step: 490/466, loss: 1.457308053970337 2023-01-24 00:30:15.653423: step: 492/466, loss: 2.3949124813079834 2023-01-24 00:30:16.273953: step: 494/466, loss: 1.507026195526123 2023-01-24 00:30:16.906309: step: 496/466, loss: 1.5553545951843262 2023-01-24 00:30:17.502146: step: 498/466, loss: 0.35252252221107483 2023-01-24 00:30:18.070511: step: 500/466, loss: 2.2092807292938232 2023-01-24 00:30:18.756801: step: 502/466, loss: 3.489619493484497 2023-01-24 00:30:19.380456: step: 504/466, loss: 0.6942618489265442 2023-01-24 00:30:20.109527: step: 506/466, loss: 4.0520758628845215 2023-01-24 00:30:20.728569: step: 508/466, loss: 2.635681629180908 2023-01-24 00:30:21.428092: step: 510/466, loss: 2.1636979579925537 2023-01-24 00:30:22.070842: step: 512/466, loss: 1.7167152166366577 2023-01-24 00:30:22.718990: step: 514/466, loss: 1.7931861877441406 2023-01-24 00:30:23.277829: step: 516/466, loss: 2.4630913734436035 2023-01-24 00:30:23.925736: step: 518/466, loss: 0.3835010528564453 2023-01-24 00:30:24.580563: step: 520/466, loss: 3.4767537117004395 2023-01-24 00:30:25.236816: step: 522/466, loss: 1.347123384475708 2023-01-24 00:30:25.857370: step: 524/466, loss: 5.978583335876465 2023-01-24 00:30:26.395681: step: 526/466, loss: 0.522865891456604 2023-01-24 00:30:27.051226: step: 528/466, loss: 0.9223169684410095 2023-01-24 00:30:27.662327: step: 530/466, loss: 0.24813520908355713 2023-01-24 00:30:28.254818: step: 532/466, loss: 4.144888401031494 2023-01-24 00:30:28.825643: step: 534/466, loss: 2.292142868041992 2023-01-24 00:30:29.400435: step: 536/466, loss: 2.742295980453491 2023-01-24 00:30:30.023585: step: 538/466, loss: 2.1504781246185303 2023-01-24 00:30:30.605909: step: 540/466, loss: 0.5144567489624023 2023-01-24 00:30:31.272436: step: 542/466, loss: 1.7471604347229004 2023-01-24 00:30:31.927589: step: 544/466, loss: 1.7209891080856323 2023-01-24 00:30:32.601380: step: 546/466, loss: 0.3878607749938965 2023-01-24 00:30:33.282794: step: 548/466, loss: 1.5918083190917969 2023-01-24 00:30:33.951988: step: 550/466, loss: 4.83439302444458 2023-01-24 00:30:34.549298: step: 552/466, loss: 0.20302298665046692 2023-01-24 00:30:35.259149: step: 554/466, loss: 2.577263832092285 2023-01-24 00:30:35.923883: step: 556/466, loss: 3.62603759765625 2023-01-24 00:30:36.581790: step: 558/466, loss: 1.6257869005203247 2023-01-24 00:30:37.184717: step: 560/466, loss: 0.7063632011413574 2023-01-24 00:30:37.897915: step: 562/466, loss: 3.2233450412750244 2023-01-24 00:30:38.462328: step: 564/466, loss: 1.3194761276245117 2023-01-24 00:30:39.062638: step: 566/466, loss: 0.6329874992370605 2023-01-24 00:30:39.780769: step: 568/466, loss: 1.4945679903030396 2023-01-24 00:30:40.398232: step: 570/466, loss: 0.9494670629501343 2023-01-24 00:30:41.020231: step: 572/466, loss: 5.421570301055908 2023-01-24 00:30:41.600614: step: 574/466, loss: 5.772930145263672 2023-01-24 00:30:42.232551: step: 576/466, loss: 5.787838459014893 2023-01-24 00:30:42.882065: step: 578/466, loss: 0.34725356101989746 2023-01-24 00:30:43.462147: step: 580/466, loss: 1.0084733963012695 2023-01-24 00:30:44.046436: step: 582/466, loss: 1.314733862876892 2023-01-24 00:30:44.728283: step: 584/466, loss: 1.7601927518844604 2023-01-24 00:30:45.265943: step: 586/466, loss: 1.2080614566802979 2023-01-24 00:30:45.897816: step: 588/466, loss: 0.6289536952972412 2023-01-24 00:30:46.559177: step: 590/466, loss: 11.007817268371582 2023-01-24 00:30:47.234560: step: 592/466, loss: 2.669806957244873 2023-01-24 00:30:47.805401: step: 594/466, loss: 0.7001340985298157 2023-01-24 00:30:48.418011: step: 596/466, loss: 3.2594616413116455 2023-01-24 00:30:49.017614: step: 598/466, loss: 0.4124808609485626 2023-01-24 00:30:49.679209: step: 600/466, loss: 4.088919639587402 2023-01-24 00:30:50.339043: step: 602/466, loss: 2.3946118354797363 2023-01-24 00:30:50.976741: step: 604/466, loss: 5.389073848724365 2023-01-24 00:30:51.623618: step: 606/466, loss: 5.529489994049072 2023-01-24 00:30:52.166147: step: 608/466, loss: 2.3979246616363525 2023-01-24 00:30:52.772150: step: 610/466, loss: 1.1010305881500244 2023-01-24 00:30:53.399884: step: 612/466, loss: 2.4431354999542236 2023-01-24 00:30:54.029793: step: 614/466, loss: 2.6164841651916504 2023-01-24 00:30:54.690525: step: 616/466, loss: 6.679588317871094 2023-01-24 00:30:55.273988: step: 618/466, loss: 1.1283159255981445 2023-01-24 00:30:55.936352: step: 620/466, loss: 0.9353471398353577 2023-01-24 00:30:56.586440: step: 622/466, loss: 1.9671612977981567 2023-01-24 00:30:57.184309: step: 624/466, loss: 0.5379169583320618 2023-01-24 00:30:57.834126: step: 626/466, loss: 0.6202490329742432 2023-01-24 00:30:58.426364: step: 628/466, loss: 0.6703006029129028 2023-01-24 00:30:59.081346: step: 630/466, loss: 3.3022804260253906 2023-01-24 00:30:59.632611: step: 632/466, loss: 1.3915318250656128 2023-01-24 00:31:00.248145: step: 634/466, loss: 8.410735130310059 2023-01-24 00:31:00.846050: step: 636/466, loss: 2.620047092437744 2023-01-24 00:31:01.410965: step: 638/466, loss: 0.33021292090415955 2023-01-24 00:31:02.063739: step: 640/466, loss: 0.47434836626052856 2023-01-24 00:31:02.695500: step: 642/466, loss: 0.6993430256843567 2023-01-24 00:31:03.445305: step: 644/466, loss: 6.348758697509766 2023-01-24 00:31:04.052171: step: 646/466, loss: 0.26549726724624634 2023-01-24 00:31:04.657295: step: 648/466, loss: 4.406766891479492 2023-01-24 00:31:05.267261: step: 650/466, loss: 0.6784960031509399 2023-01-24 00:31:05.910207: step: 652/466, loss: 0.45862019062042236 2023-01-24 00:31:06.468804: step: 654/466, loss: 2.127203941345215 2023-01-24 00:31:07.069319: step: 656/466, loss: 1.7913119792938232 2023-01-24 00:31:07.681365: step: 658/466, loss: 2.031116247177124 2023-01-24 00:31:08.333392: step: 660/466, loss: 2.6137585639953613 2023-01-24 00:31:08.994196: step: 662/466, loss: 2.5916874408721924 2023-01-24 00:31:09.584110: step: 664/466, loss: 0.9867198467254639 2023-01-24 00:31:10.218065: step: 666/466, loss: 4.332546710968018 2023-01-24 00:31:10.827402: step: 668/466, loss: 0.40057533979415894 2023-01-24 00:31:11.478875: step: 670/466, loss: 0.5043292045593262 2023-01-24 00:31:12.113033: step: 672/466, loss: 0.16987332701683044 2023-01-24 00:31:12.712499: step: 674/466, loss: 0.329906165599823 2023-01-24 00:31:13.375879: step: 676/466, loss: 1.4557455778121948 2023-01-24 00:31:14.020188: step: 678/466, loss: 1.384361743927002 2023-01-24 00:31:14.660460: step: 680/466, loss: 0.8367301225662231 2023-01-24 00:31:15.271591: step: 682/466, loss: 1.65016770362854 2023-01-24 00:31:15.927811: step: 684/466, loss: 0.9726030826568604 2023-01-24 00:31:16.596355: step: 686/466, loss: 9.812848091125488 2023-01-24 00:31:17.261791: step: 688/466, loss: 3.3158957958221436 2023-01-24 00:31:17.859525: step: 690/466, loss: 0.994577169418335 2023-01-24 00:31:18.520040: step: 692/466, loss: 6.210993766784668 2023-01-24 00:31:19.181488: step: 694/466, loss: 0.6562169790267944 2023-01-24 00:31:19.766119: step: 696/466, loss: 0.40056052803993225 2023-01-24 00:31:20.411640: step: 698/466, loss: 0.6570945382118225 2023-01-24 00:31:21.104862: step: 700/466, loss: 4.279087066650391 2023-01-24 00:31:21.722323: step: 702/466, loss: 0.7729417085647583 2023-01-24 00:31:22.396571: step: 704/466, loss: 0.7772341966629028 2023-01-24 00:31:23.049849: step: 706/466, loss: 2.9096169471740723 2023-01-24 00:31:23.740217: step: 708/466, loss: 1.0564897060394287 2023-01-24 00:31:24.435475: step: 710/466, loss: 0.5298444032669067 2023-01-24 00:31:25.087168: step: 712/466, loss: 0.607456624507904 2023-01-24 00:31:25.739448: step: 714/466, loss: 0.9207783937454224 2023-01-24 00:31:26.371441: step: 716/466, loss: 20.0753116607666 2023-01-24 00:31:26.951637: step: 718/466, loss: 1.4352962970733643 2023-01-24 00:31:27.556379: step: 720/466, loss: 1.1125893592834473 2023-01-24 00:31:28.136028: step: 722/466, loss: 2.0145692825317383 2023-01-24 00:31:28.764659: step: 724/466, loss: 1.474932312965393 2023-01-24 00:31:29.385007: step: 726/466, loss: 1.502770185470581 2023-01-24 00:31:30.025110: step: 728/466, loss: 0.8884294033050537 2023-01-24 00:31:30.643679: step: 730/466, loss: 0.4197545051574707 2023-01-24 00:31:31.254828: step: 732/466, loss: 2.686546802520752 2023-01-24 00:31:31.840938: step: 734/466, loss: 1.3755604028701782 2023-01-24 00:31:32.463205: step: 736/466, loss: 1.4181619882583618 2023-01-24 00:31:33.085641: step: 738/466, loss: 1.007577896118164 2023-01-24 00:31:33.726054: step: 740/466, loss: 1.4114456176757812 2023-01-24 00:31:34.304361: step: 742/466, loss: 1.0807565450668335 2023-01-24 00:31:34.908892: step: 744/466, loss: 0.7102230191230774 2023-01-24 00:31:35.536122: step: 746/466, loss: 1.7669364213943481 2023-01-24 00:31:36.129736: step: 748/466, loss: 0.5153622031211853 2023-01-24 00:31:36.755795: step: 750/466, loss: 6.274852752685547 2023-01-24 00:31:37.379882: step: 752/466, loss: 2.974384069442749 2023-01-24 00:31:38.003617: step: 754/466, loss: 1.164354920387268 2023-01-24 00:31:38.662353: step: 756/466, loss: 1.9302465915679932 2023-01-24 00:31:39.260414: step: 758/466, loss: 2.3095157146453857 2023-01-24 00:31:39.902371: step: 760/466, loss: 1.5579822063446045 2023-01-24 00:31:40.490189: step: 762/466, loss: 1.3251173496246338 2023-01-24 00:31:41.156894: step: 764/466, loss: 1.529748797416687 2023-01-24 00:31:41.735653: step: 766/466, loss: 0.7907923460006714 2023-01-24 00:31:42.378004: step: 768/466, loss: 0.675163984298706 2023-01-24 00:31:43.003737: step: 770/466, loss: 0.3156393766403198 2023-01-24 00:31:43.696792: step: 772/466, loss: 1.0035383701324463 2023-01-24 00:31:44.353722: step: 774/466, loss: 0.6493601202964783 2023-01-24 00:31:44.954462: step: 776/466, loss: 1.1197794675827026 2023-01-24 00:31:45.580448: step: 778/466, loss: 1.0847022533416748 2023-01-24 00:31:46.224094: step: 780/466, loss: 2.637948513031006 2023-01-24 00:31:46.926025: step: 782/466, loss: 0.8970670700073242 2023-01-24 00:31:47.505013: step: 784/466, loss: 2.524245023727417 2023-01-24 00:31:48.176008: step: 786/466, loss: 0.7719473242759705 2023-01-24 00:31:48.831646: step: 788/466, loss: 2.2784554958343506 2023-01-24 00:31:49.417693: step: 790/466, loss: 0.5189509987831116 2023-01-24 00:31:50.085945: step: 792/466, loss: 4.731783390045166 2023-01-24 00:31:50.664404: step: 794/466, loss: 1.8328797817230225 2023-01-24 00:31:51.271800: step: 796/466, loss: 0.6995648741722107 2023-01-24 00:31:51.922849: step: 798/466, loss: 0.4558100700378418 2023-01-24 00:31:52.601299: step: 800/466, loss: 0.5942729115486145 2023-01-24 00:31:53.280198: step: 802/466, loss: 3.3813037872314453 2023-01-24 00:31:53.878419: step: 804/466, loss: 0.9906265139579773 2023-01-24 00:31:54.455276: step: 806/466, loss: 0.8813824653625488 2023-01-24 00:31:55.077397: step: 808/466, loss: 1.026023268699646 2023-01-24 00:31:55.743238: step: 810/466, loss: 1.2499135732650757 2023-01-24 00:31:56.420928: step: 812/466, loss: 0.6142721772193909 2023-01-24 00:31:56.948077: step: 814/466, loss: 4.311723709106445 2023-01-24 00:31:57.588823: step: 816/466, loss: 1.9721442461013794 2023-01-24 00:31:58.203643: step: 818/466, loss: 0.46852439641952515 2023-01-24 00:31:58.839979: step: 820/466, loss: 0.37276071310043335 2023-01-24 00:31:59.469408: step: 822/466, loss: 0.5146880149841309 2023-01-24 00:32:00.143320: step: 824/466, loss: 0.17100459337234497 2023-01-24 00:32:00.764590: step: 826/466, loss: 0.8161599040031433 2023-01-24 00:32:01.372173: step: 828/466, loss: 3.149770736694336 2023-01-24 00:32:01.979007: step: 830/466, loss: 2.1626739501953125 2023-01-24 00:32:02.572076: step: 832/466, loss: 0.6332207918167114 2023-01-24 00:32:03.217437: step: 834/466, loss: 0.6228510141372681 2023-01-24 00:32:03.813898: step: 836/466, loss: 1.3058120012283325 2023-01-24 00:32:04.422386: step: 838/466, loss: 1.0543804168701172 2023-01-24 00:32:05.050757: step: 840/466, loss: 1.508296012878418 2023-01-24 00:32:05.738663: step: 842/466, loss: 0.8300260305404663 2023-01-24 00:32:06.472959: step: 844/466, loss: 2.9184799194335938 2023-01-24 00:32:07.110177: step: 846/466, loss: 0.8929555416107178 2023-01-24 00:32:07.817252: step: 848/466, loss: 0.19070546329021454 2023-01-24 00:32:08.411073: step: 850/466, loss: 2.262145519256592 2023-01-24 00:32:09.035744: step: 852/466, loss: 0.9234271049499512 2023-01-24 00:32:09.644387: step: 854/466, loss: 1.4189567565917969 2023-01-24 00:32:10.261205: step: 856/466, loss: 1.4584400653839111 2023-01-24 00:32:10.893975: step: 858/466, loss: 0.23545756936073303 2023-01-24 00:32:11.516463: step: 860/466, loss: 1.1982975006103516 2023-01-24 00:32:12.195027: step: 862/466, loss: 2.1789932250976562 2023-01-24 00:32:12.754927: step: 864/466, loss: 1.9957928657531738 2023-01-24 00:32:13.344434: step: 866/466, loss: 1.0068098306655884 2023-01-24 00:32:14.002187: step: 868/466, loss: 0.5375741720199585 2023-01-24 00:32:14.615745: step: 870/466, loss: 0.4122433662414551 2023-01-24 00:32:15.248863: step: 872/466, loss: 1.5965980291366577 2023-01-24 00:32:15.908463: step: 874/466, loss: 1.0646878480911255 2023-01-24 00:32:16.632914: step: 876/466, loss: 0.7865087389945984 2023-01-24 00:32:17.226725: step: 878/466, loss: 0.2480173259973526 2023-01-24 00:32:17.910364: step: 880/466, loss: 1.5218760967254639 2023-01-24 00:32:18.553218: step: 882/466, loss: 0.9669118523597717 2023-01-24 00:32:19.118173: step: 884/466, loss: 1.3069440126419067 2023-01-24 00:32:19.732564: step: 886/466, loss: 2.0562679767608643 2023-01-24 00:32:20.401380: step: 888/466, loss: 0.3450886905193329 2023-01-24 00:32:21.001832: step: 890/466, loss: 0.26835256814956665 2023-01-24 00:32:21.624039: step: 892/466, loss: 1.6029860973358154 2023-01-24 00:32:22.244716: step: 894/466, loss: 8.579366683959961 2023-01-24 00:32:22.802973: step: 896/466, loss: 3.9586119651794434 2023-01-24 00:32:23.401059: step: 898/466, loss: 7.937643527984619 2023-01-24 00:32:23.991654: step: 900/466, loss: 8.824544906616211 2023-01-24 00:32:24.579540: step: 902/466, loss: 0.5659041404724121 2023-01-24 00:32:25.225440: step: 904/466, loss: 0.9789327383041382 2023-01-24 00:32:25.830026: step: 906/466, loss: 2.2036490440368652 2023-01-24 00:32:26.429485: step: 908/466, loss: 1.389280915260315 2023-01-24 00:32:27.018930: step: 910/466, loss: 0.3952607810497284 2023-01-24 00:32:27.608642: step: 912/466, loss: 1.1622967720031738 2023-01-24 00:32:28.265640: step: 914/466, loss: 1.0768131017684937 2023-01-24 00:32:28.910991: step: 916/466, loss: 1.861532211303711 2023-01-24 00:32:29.549563: step: 918/466, loss: 1.5120859146118164 2023-01-24 00:32:30.198147: step: 920/466, loss: 0.650384247303009 2023-01-24 00:32:30.787806: step: 922/466, loss: 1.4881584644317627 2023-01-24 00:32:31.426063: step: 924/466, loss: 5.14249849319458 2023-01-24 00:32:32.039634: step: 926/466, loss: 6.586747646331787 2023-01-24 00:32:32.628636: step: 928/466, loss: 2.0965332984924316 2023-01-24 00:32:33.312478: step: 930/466, loss: 0.7308897376060486 2023-01-24 00:32:33.969130: step: 932/466, loss: 0.4634906053543091 ================================================== Loss: 2.258 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.38679331129366196, 'r': 0.2039987407390013, 'f1': 0.2671171629973556}, 'combined': 0.19682317273489358, 'epoch': 1} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32556260045666824, 'r': 0.15588254359106476, 'f1': 0.21082163517376934}, 'combined': 0.13200980894058456, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3622257161602919, 'r': 0.2095294312760857, 'f1': 0.26548759076658524}, 'combined': 0.19562243530169438, 'epoch': 1} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3311317945498736, 'r': 0.15854912795850576, 'f1': 0.2144280279341255}, 'combined': 0.1328849750577679, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.360659200968523, 'r': 0.2015046672077922, 'f1': 0.25855276861655957}, 'combined': 0.19051256634904387, 'epoch': 1} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.37830051778663865, 'r': 0.14920336306205872, 'f1': 0.21400301135632502}, 'combined': 0.1419623936720176, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.14285714285714285, 'f1': 0.22727272727272727}, 'combined': 0.1515151515151515, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39285714285714285, 'r': 0.09482758620689655, 'f1': 0.15277777777777776}, 'combined': 0.10185185185185183, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.38679331129366196, 'r': 0.2039987407390013, 'f1': 0.2671171629973556}, 'combined': 0.19682317273489358, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32556260045666824, 'r': 0.15588254359106476, 'f1': 0.21082163517376934}, 'combined': 0.13200980894058456, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.14285714285714285, 'f1': 0.22727272727272727}, 'combined': 0.1515151515151515, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3622257161602919, 'r': 0.2095294312760857, 'f1': 0.26548759076658524}, 'combined': 0.19562243530169438, 'epoch': 1} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3311317945498736, 'r': 0.15854912795850576, 'f1': 0.2144280279341255}, 'combined': 0.1328849750577679, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.360659200968523, 'r': 0.2015046672077922, 'f1': 0.25855276861655957}, 'combined': 0.19051256634904387, 'epoch': 1} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.37830051778663865, 'r': 0.14920336306205872, 'f1': 0.21400301135632502}, 'combined': 0.1419623936720176, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39285714285714285, 'r': 0.09482758620689655, 'f1': 0.15277777777777776}, 'combined': 0.10185185185185183, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:35:33.968140: step: 2/466, loss: 1.0007905960083008 2023-01-24 00:35:34.592905: step: 4/466, loss: 1.2795113325119019 2023-01-24 00:35:35.271616: step: 6/466, loss: 1.0669078826904297 2023-01-24 00:35:35.893072: step: 8/466, loss: 1.4870511293411255 2023-01-24 00:35:36.475853: step: 10/466, loss: 0.9705473184585571 2023-01-24 00:35:37.212240: step: 12/466, loss: 1.5953006744384766 2023-01-24 00:35:37.870621: step: 14/466, loss: 0.5289072394371033 2023-01-24 00:35:38.492889: step: 16/466, loss: 3.811513900756836 2023-01-24 00:35:39.101585: step: 18/466, loss: 1.093912124633789 2023-01-24 00:35:39.743234: step: 20/466, loss: 1.7505240440368652 2023-01-24 00:35:40.392223: step: 22/466, loss: 0.5484552979469299 2023-01-24 00:35:41.055749: step: 24/466, loss: 0.9728226661682129 2023-01-24 00:35:41.672719: step: 26/466, loss: 2.6790177822113037 2023-01-24 00:35:42.365889: step: 28/466, loss: 0.8624144792556763 2023-01-24 00:35:43.032764: step: 30/466, loss: 1.5027284622192383 2023-01-24 00:35:43.627541: step: 32/466, loss: 1.7417893409729004 2023-01-24 00:35:44.277079: step: 34/466, loss: 2.7208542823791504 2023-01-24 00:35:44.877489: step: 36/466, loss: 0.5122177004814148 2023-01-24 00:35:45.460446: step: 38/466, loss: 2.3946847915649414 2023-01-24 00:35:46.110178: step: 40/466, loss: 0.323329895734787 2023-01-24 00:35:46.737374: step: 42/466, loss: 0.25720643997192383 2023-01-24 00:35:47.385062: step: 44/466, loss: 2.548123359680176 2023-01-24 00:35:48.046483: step: 46/466, loss: 1.8145484924316406 2023-01-24 00:35:48.717176: step: 48/466, loss: 0.6130473613739014 2023-01-24 00:35:49.419639: step: 50/466, loss: 1.706362247467041 2023-01-24 00:35:50.065700: step: 52/466, loss: 0.9382619261741638 2023-01-24 00:35:50.690712: step: 54/466, loss: 0.4019174575805664 2023-01-24 00:35:51.264429: step: 56/466, loss: 0.6776026487350464 2023-01-24 00:35:51.871992: step: 58/466, loss: 0.9875388741493225 2023-01-24 00:35:52.515131: step: 60/466, loss: 6.833479881286621 2023-01-24 00:35:53.092347: step: 62/466, loss: 1.4245370626449585 2023-01-24 00:35:53.663183: step: 64/466, loss: 2.217290163040161 2023-01-24 00:35:54.362128: step: 66/466, loss: 4.7894487380981445 2023-01-24 00:35:54.993545: step: 68/466, loss: 1.358396291732788 2023-01-24 00:35:55.612180: step: 70/466, loss: 0.8565866947174072 2023-01-24 00:35:56.252650: step: 72/466, loss: 4.278141498565674 2023-01-24 00:35:56.921221: step: 74/466, loss: 0.4032544791698456 2023-01-24 00:35:57.565585: step: 76/466, loss: 2.2673137187957764 2023-01-24 00:35:58.191481: step: 78/466, loss: 3.062309741973877 2023-01-24 00:35:58.787096: step: 80/466, loss: 5.96394681930542 2023-01-24 00:35:59.415910: step: 82/466, loss: 0.931776225566864 2023-01-24 00:36:00.070271: step: 84/466, loss: 1.801688551902771 2023-01-24 00:36:00.641112: step: 86/466, loss: 3.738779067993164 2023-01-24 00:36:01.283207: step: 88/466, loss: 0.6716089844703674 2023-01-24 00:36:01.860943: step: 90/466, loss: 1.2108234167099 2023-01-24 00:36:02.461906: step: 92/466, loss: 2.0988945960998535 2023-01-24 00:36:03.144701: step: 94/466, loss: 0.8946915864944458 2023-01-24 00:36:03.780154: step: 96/466, loss: 2.214448928833008 2023-01-24 00:36:04.406534: step: 98/466, loss: 1.5128629207611084 2023-01-24 00:36:05.007250: step: 100/466, loss: 0.4282916188240051 2023-01-24 00:36:05.653896: step: 102/466, loss: 2.5158019065856934 2023-01-24 00:36:06.329471: step: 104/466, loss: 0.8134267926216125 2023-01-24 00:36:06.935163: step: 106/466, loss: 1.235172986984253 2023-01-24 00:36:07.604080: step: 108/466, loss: 2.3581175804138184 2023-01-24 00:36:08.222210: step: 110/466, loss: 0.6340959668159485 2023-01-24 00:36:08.789942: step: 112/466, loss: 0.33954349160194397 2023-01-24 00:36:09.418704: step: 114/466, loss: 3.7588131427764893 2023-01-24 00:36:10.045453: step: 116/466, loss: 1.792893648147583 2023-01-24 00:36:10.753954: step: 118/466, loss: 1.8706941604614258 2023-01-24 00:36:11.375796: step: 120/466, loss: 0.8998050093650818 2023-01-24 00:36:12.016141: step: 122/466, loss: 2.079965829849243 2023-01-24 00:36:12.658107: step: 124/466, loss: 1.328823208808899 2023-01-24 00:36:13.302138: step: 126/466, loss: 0.9241964817047119 2023-01-24 00:36:13.820095: step: 128/466, loss: 2.9763731956481934 2023-01-24 00:36:14.400185: step: 130/466, loss: 0.8257662653923035 2023-01-24 00:36:15.074376: step: 132/466, loss: 4.552613258361816 2023-01-24 00:36:15.731341: step: 134/466, loss: 0.3572910726070404 2023-01-24 00:36:16.310266: step: 136/466, loss: 0.5996128916740417 2023-01-24 00:36:16.984645: step: 138/466, loss: 4.204519271850586 2023-01-24 00:36:17.615633: step: 140/466, loss: 0.29403766989707947 2023-01-24 00:36:18.309593: step: 142/466, loss: 5.7278218269348145 2023-01-24 00:36:18.941103: step: 144/466, loss: 3.6073617935180664 2023-01-24 00:36:19.544828: step: 146/466, loss: 0.6985445022583008 2023-01-24 00:36:20.199065: step: 148/466, loss: 5.286581993103027 2023-01-24 00:36:20.812475: step: 150/466, loss: 0.6464492678642273 2023-01-24 00:36:21.458087: step: 152/466, loss: 0.5447022914886475 2023-01-24 00:36:22.133061: step: 154/466, loss: 0.7505475878715515 2023-01-24 00:36:22.749140: step: 156/466, loss: 1.7825226783752441 2023-01-24 00:36:23.446535: step: 158/466, loss: 6.1614603996276855 2023-01-24 00:36:24.068276: step: 160/466, loss: 2.721989154815674 2023-01-24 00:36:24.735348: step: 162/466, loss: 0.7275187373161316 2023-01-24 00:36:25.289105: step: 164/466, loss: 2.613816499710083 2023-01-24 00:36:25.890931: step: 166/466, loss: 2.0387091636657715 2023-01-24 00:36:26.533873: step: 168/466, loss: 0.9608566761016846 2023-01-24 00:36:27.220218: step: 170/466, loss: 0.5621725916862488 2023-01-24 00:36:27.878052: step: 172/466, loss: 0.38025912642478943 2023-01-24 00:36:28.525929: step: 174/466, loss: 2.2258999347686768 2023-01-24 00:36:29.168428: step: 176/466, loss: 1.0006108283996582 2023-01-24 00:36:29.787628: step: 178/466, loss: 1.6930632591247559 2023-01-24 00:36:30.365220: step: 180/466, loss: 0.9917345643043518 2023-01-24 00:36:30.986392: step: 182/466, loss: 1.0644450187683105 2023-01-24 00:36:31.633344: step: 184/466, loss: 5.928044319152832 2023-01-24 00:36:32.279856: step: 186/466, loss: 1.5287227630615234 2023-01-24 00:36:32.927939: step: 188/466, loss: 0.4561346173286438 2023-01-24 00:36:33.536426: step: 190/466, loss: 1.8703564405441284 2023-01-24 00:36:34.127382: step: 192/466, loss: 0.7041752338409424 2023-01-24 00:36:34.734668: step: 194/466, loss: 0.954352855682373 2023-01-24 00:36:35.374885: step: 196/466, loss: 4.054781436920166 2023-01-24 00:36:35.995983: step: 198/466, loss: 2.8203413486480713 2023-01-24 00:36:36.653140: step: 200/466, loss: 1.3781147003173828 2023-01-24 00:36:37.286902: step: 202/466, loss: 0.9691088199615479 2023-01-24 00:36:37.908371: step: 204/466, loss: 0.4873339533805847 2023-01-24 00:36:38.498546: step: 206/466, loss: 0.4727945923805237 2023-01-24 00:36:39.094414: step: 208/466, loss: 1.238034963607788 2023-01-24 00:36:39.730546: step: 210/466, loss: 0.9873964786529541 2023-01-24 00:36:40.367085: step: 212/466, loss: 1.174421787261963 2023-01-24 00:36:40.992328: step: 214/466, loss: 0.6589047908782959 2023-01-24 00:36:41.567299: step: 216/466, loss: 0.8934488892555237 2023-01-24 00:36:42.226616: step: 218/466, loss: 0.25499317049980164 2023-01-24 00:36:42.885419: step: 220/466, loss: 2.4782047271728516 2023-01-24 00:36:43.523979: step: 222/466, loss: 2.103694438934326 2023-01-24 00:36:44.111756: step: 224/466, loss: 1.934326410293579 2023-01-24 00:36:44.763858: step: 226/466, loss: 1.7219918966293335 2023-01-24 00:36:45.434246: step: 228/466, loss: 6.521765232086182 2023-01-24 00:36:46.067868: step: 230/466, loss: 0.5097403526306152 2023-01-24 00:36:46.808729: step: 232/466, loss: 1.2184298038482666 2023-01-24 00:36:47.423672: step: 234/466, loss: 1.6723867654800415 2023-01-24 00:36:48.019528: step: 236/466, loss: 0.8256341814994812 2023-01-24 00:36:48.658048: step: 238/466, loss: 1.6084046363830566 2023-01-24 00:36:49.256257: step: 240/466, loss: 1.970080852508545 2023-01-24 00:36:49.831603: step: 242/466, loss: 7.471807479858398 2023-01-24 00:36:50.412297: step: 244/466, loss: 3.635483741760254 2023-01-24 00:36:51.083995: step: 246/466, loss: 6.248327732086182 2023-01-24 00:36:51.722924: step: 248/466, loss: 0.9783942699432373 2023-01-24 00:36:52.326245: step: 250/466, loss: 0.5623174905776978 2023-01-24 00:36:52.973345: step: 252/466, loss: 1.348527431488037 2023-01-24 00:36:53.614318: step: 254/466, loss: 5.015925407409668 2023-01-24 00:36:54.249042: step: 256/466, loss: 0.29038581252098083 2023-01-24 00:36:54.881943: step: 258/466, loss: 1.9894683361053467 2023-01-24 00:36:55.486785: step: 260/466, loss: 1.0170519351959229 2023-01-24 00:36:56.111663: step: 262/466, loss: 1.5105358362197876 2023-01-24 00:36:56.654522: step: 264/466, loss: 1.338410496711731 2023-01-24 00:36:57.495326: step: 266/466, loss: 1.3438609838485718 2023-01-24 00:36:58.165861: step: 268/466, loss: 1.1082217693328857 2023-01-24 00:36:58.801150: step: 270/466, loss: 0.8171354532241821 2023-01-24 00:36:59.368818: step: 272/466, loss: 1.6580418348312378 2023-01-24 00:37:00.015922: step: 274/466, loss: 0.27594417333602905 2023-01-24 00:37:00.655258: step: 276/466, loss: 1.3367584943771362 2023-01-24 00:37:01.261153: step: 278/466, loss: 7.378208160400391 2023-01-24 00:37:01.923456: step: 280/466, loss: 1.6158980131149292 2023-01-24 00:37:02.584993: step: 282/466, loss: 0.30594396591186523 2023-01-24 00:37:03.231477: step: 284/466, loss: 0.8640335202217102 2023-01-24 00:37:03.842751: step: 286/466, loss: 2.3039379119873047 2023-01-24 00:37:04.427935: step: 288/466, loss: 0.6274563074111938 2023-01-24 00:37:05.024975: step: 290/466, loss: 4.776246070861816 2023-01-24 00:37:05.658713: step: 292/466, loss: 2.09666109085083 2023-01-24 00:37:06.251052: step: 294/466, loss: 0.856423020362854 2023-01-24 00:37:06.903075: step: 296/466, loss: 0.7674209475517273 2023-01-24 00:37:07.510601: step: 298/466, loss: 1.4367313385009766 2023-01-24 00:37:08.171001: step: 300/466, loss: 7.290987014770508 2023-01-24 00:37:08.845029: step: 302/466, loss: 1.410239338874817 2023-01-24 00:37:09.484244: step: 304/466, loss: 2.5137248039245605 2023-01-24 00:37:10.087557: step: 306/466, loss: 3.087893009185791 2023-01-24 00:37:10.758808: step: 308/466, loss: 2.1465885639190674 2023-01-24 00:37:11.404484: step: 310/466, loss: 0.7133780121803284 2023-01-24 00:37:12.079780: step: 312/466, loss: 1.4325982332229614 2023-01-24 00:37:12.703279: step: 314/466, loss: 1.3673746585845947 2023-01-24 00:37:13.327948: step: 316/466, loss: 2.9142794609069824 2023-01-24 00:37:13.987680: step: 318/466, loss: 0.836786150932312 2023-01-24 00:37:14.522591: step: 320/466, loss: 3.114619731903076 2023-01-24 00:37:15.097899: step: 322/466, loss: 0.8955129384994507 2023-01-24 00:37:15.715296: step: 324/466, loss: 0.37643033266067505 2023-01-24 00:37:16.305690: step: 326/466, loss: 3.5487020015716553 2023-01-24 00:37:16.864803: step: 328/466, loss: 0.6180459856987 2023-01-24 00:37:17.541484: step: 330/466, loss: 1.730918526649475 2023-01-24 00:37:18.175701: step: 332/466, loss: 0.4487214982509613 2023-01-24 00:37:18.739191: step: 334/466, loss: 1.153430461883545 2023-01-24 00:37:19.395246: step: 336/466, loss: 4.707362174987793 2023-01-24 00:37:20.140399: step: 338/466, loss: 0.5890976190567017 2023-01-24 00:37:20.691205: step: 340/466, loss: 0.44397884607315063 2023-01-24 00:37:21.273129: step: 342/466, loss: 1.1188628673553467 2023-01-24 00:37:21.926230: step: 344/466, loss: 1.7424533367156982 2023-01-24 00:37:22.567784: step: 346/466, loss: 0.40749967098236084 2023-01-24 00:37:23.236563: step: 348/466, loss: 0.6263022422790527 2023-01-24 00:37:23.882432: step: 350/466, loss: 7.1182403564453125 2023-01-24 00:37:24.535576: step: 352/466, loss: 1.014331579208374 2023-01-24 00:37:25.108536: step: 354/466, loss: 0.2972826063632965 2023-01-24 00:37:25.774946: step: 356/466, loss: 2.3395979404449463 2023-01-24 00:37:26.435640: step: 358/466, loss: 0.20275095105171204 2023-01-24 00:37:27.011170: step: 360/466, loss: 1.164963960647583 2023-01-24 00:37:27.628133: step: 362/466, loss: 1.2121949195861816 2023-01-24 00:37:28.264315: step: 364/466, loss: 1.8604505062103271 2023-01-24 00:37:28.933434: step: 366/466, loss: 1.8596770763397217 2023-01-24 00:37:29.510951: step: 368/466, loss: 0.3896671533584595 2023-01-24 00:37:30.139077: step: 370/466, loss: 0.41178327798843384 2023-01-24 00:37:30.746717: step: 372/466, loss: 1.0141046047210693 2023-01-24 00:37:31.430629: step: 374/466, loss: 4.194189071655273 2023-01-24 00:37:32.079506: step: 376/466, loss: 0.7731515169143677 2023-01-24 00:37:32.703468: step: 378/466, loss: 1.2757165431976318 2023-01-24 00:37:33.379619: step: 380/466, loss: 0.7043675780296326 2023-01-24 00:37:34.034940: step: 382/466, loss: 0.7703564763069153 2023-01-24 00:37:34.729093: step: 384/466, loss: 4.547868728637695 2023-01-24 00:37:35.372275: step: 386/466, loss: 1.9865307807922363 2023-01-24 00:37:35.957557: step: 388/466, loss: 1.0352140665054321 2023-01-24 00:37:36.529125: step: 390/466, loss: 0.7538135647773743 2023-01-24 00:37:37.150811: step: 392/466, loss: 0.7600349187850952 2023-01-24 00:37:37.843279: step: 394/466, loss: 3.319429397583008 2023-01-24 00:37:38.457171: step: 396/466, loss: 1.6095876693725586 2023-01-24 00:37:39.153541: step: 398/466, loss: 0.44667908549308777 2023-01-24 00:37:39.750557: step: 400/466, loss: 2.0603199005126953 2023-01-24 00:37:40.339403: step: 402/466, loss: 0.6938265562057495 2023-01-24 00:37:40.920577: step: 404/466, loss: 1.0607950687408447 2023-01-24 00:37:41.587696: step: 406/466, loss: 2.9250197410583496 2023-01-24 00:37:42.276224: step: 408/466, loss: 0.7286068797111511 2023-01-24 00:37:42.906501: step: 410/466, loss: 0.30672362446784973 2023-01-24 00:37:43.534853: step: 412/466, loss: 4.567588806152344 2023-01-24 00:37:44.214255: step: 414/466, loss: 0.5163266658782959 2023-01-24 00:37:44.884417: step: 416/466, loss: 0.4423064589500427 2023-01-24 00:37:45.500526: step: 418/466, loss: 3.9670162200927734 2023-01-24 00:37:46.183880: step: 420/466, loss: 2.407794713973999 2023-01-24 00:37:46.873673: step: 422/466, loss: 1.1102468967437744 2023-01-24 00:37:47.537251: step: 424/466, loss: 0.608360230922699 2023-01-24 00:37:48.099513: step: 426/466, loss: 1.0862195491790771 2023-01-24 00:37:48.723040: step: 428/466, loss: 1.6679542064666748 2023-01-24 00:37:49.319073: step: 430/466, loss: 1.446641206741333 2023-01-24 00:37:49.956199: step: 432/466, loss: 0.6515058279037476 2023-01-24 00:37:50.555128: step: 434/466, loss: 1.2712185382843018 2023-01-24 00:37:51.134573: step: 436/466, loss: 0.557579517364502 2023-01-24 00:37:51.750473: step: 438/466, loss: 2.2417097091674805 2023-01-24 00:37:52.334924: step: 440/466, loss: 1.2886192798614502 2023-01-24 00:37:52.996168: step: 442/466, loss: 2.7027368545532227 2023-01-24 00:37:53.620572: step: 444/466, loss: 1.0164638757705688 2023-01-24 00:37:54.244083: step: 446/466, loss: 1.0663093328475952 2023-01-24 00:37:54.876481: step: 448/466, loss: 0.3144289553165436 2023-01-24 00:37:55.526960: step: 450/466, loss: 1.0528595447540283 2023-01-24 00:37:56.209918: step: 452/466, loss: 0.9924498200416565 2023-01-24 00:37:56.883661: step: 454/466, loss: 0.743733823299408 2023-01-24 00:37:57.464302: step: 456/466, loss: 1.4638175964355469 2023-01-24 00:37:58.066474: step: 458/466, loss: 2.861349105834961 2023-01-24 00:37:58.653470: step: 460/466, loss: 1.7835991382598877 2023-01-24 00:37:59.276750: step: 462/466, loss: 0.4124907851219177 2023-01-24 00:37:59.952602: step: 464/466, loss: 1.207486629486084 2023-01-24 00:38:00.607246: step: 466/466, loss: 0.31861570477485657 2023-01-24 00:38:01.279356: step: 468/466, loss: 1.9745697975158691 2023-01-24 00:38:01.862744: step: 470/466, loss: 2.2617297172546387 2023-01-24 00:38:02.465540: step: 472/466, loss: 2.919684410095215 2023-01-24 00:38:03.158886: step: 474/466, loss: 3.470987558364868 2023-01-24 00:38:03.784689: step: 476/466, loss: 0.8952714204788208 2023-01-24 00:38:04.464436: step: 478/466, loss: 2.6644530296325684 2023-01-24 00:38:05.100900: step: 480/466, loss: 2.006141185760498 2023-01-24 00:38:05.713148: step: 482/466, loss: 4.732282638549805 2023-01-24 00:38:06.356078: step: 484/466, loss: 1.192002773284912 2023-01-24 00:38:06.999378: step: 486/466, loss: 2.153209686279297 2023-01-24 00:38:07.572975: step: 488/466, loss: 1.0872876644134521 2023-01-24 00:38:08.214054: step: 490/466, loss: 2.7692017555236816 2023-01-24 00:38:08.820549: step: 492/466, loss: 0.38183367252349854 2023-01-24 00:38:09.416355: step: 494/466, loss: 1.6027545928955078 2023-01-24 00:38:10.025430: step: 496/466, loss: 0.4790688157081604 2023-01-24 00:38:10.648961: step: 498/466, loss: 0.7380142211914062 2023-01-24 00:38:11.339214: step: 500/466, loss: 3.60632586479187 2023-01-24 00:38:12.081336: step: 502/466, loss: 1.2529616355895996 2023-01-24 00:38:12.702000: step: 504/466, loss: 2.807490348815918 2023-01-24 00:38:13.285425: step: 506/466, loss: 1.3200613260269165 2023-01-24 00:38:13.917235: step: 508/466, loss: 5.073716163635254 2023-01-24 00:38:14.543680: step: 510/466, loss: 0.47445380687713623 2023-01-24 00:38:15.177174: step: 512/466, loss: 0.7447572350502014 2023-01-24 00:38:15.799917: step: 514/466, loss: 2.9716830253601074 2023-01-24 00:38:16.372781: step: 516/466, loss: 1.3143442869186401 2023-01-24 00:38:17.016409: step: 518/466, loss: 3.1109838485717773 2023-01-24 00:38:17.760042: step: 520/466, loss: 0.7308715581893921 2023-01-24 00:38:18.375388: step: 522/466, loss: 2.5015788078308105 2023-01-24 00:38:19.076127: step: 524/466, loss: 1.1616835594177246 2023-01-24 00:38:19.667880: step: 526/466, loss: 1.4940903186798096 2023-01-24 00:38:20.295816: step: 528/466, loss: 1.7208335399627686 2023-01-24 00:38:20.877528: step: 530/466, loss: 2.474591016769409 2023-01-24 00:38:21.487105: step: 532/466, loss: 2.384066581726074 2023-01-24 00:38:22.160220: step: 534/466, loss: 0.5469551086425781 2023-01-24 00:38:22.789024: step: 536/466, loss: 0.5962178707122803 2023-01-24 00:38:23.442832: step: 538/466, loss: 1.3938179016113281 2023-01-24 00:38:24.103814: step: 540/466, loss: 1.20782470703125 2023-01-24 00:38:24.727311: step: 542/466, loss: 0.7852320075035095 2023-01-24 00:38:25.349513: step: 544/466, loss: 0.7039216160774231 2023-01-24 00:38:25.991254: step: 546/466, loss: 0.3611880838871002 2023-01-24 00:38:26.589750: step: 548/466, loss: 0.287416934967041 2023-01-24 00:38:27.257188: step: 550/466, loss: 0.48172226548194885 2023-01-24 00:38:27.826513: step: 552/466, loss: 1.6074293851852417 2023-01-24 00:38:28.426818: step: 554/466, loss: 0.7348114848136902 2023-01-24 00:38:29.062642: step: 556/466, loss: 1.5510615110397339 2023-01-24 00:38:29.707105: step: 558/466, loss: 1.1155942678451538 2023-01-24 00:38:30.335631: step: 560/466, loss: 0.3913913667201996 2023-01-24 00:38:30.989513: step: 562/466, loss: 0.7518240809440613 2023-01-24 00:38:31.596810: step: 564/466, loss: 0.8426482677459717 2023-01-24 00:38:32.219130: step: 566/466, loss: 0.9242572784423828 2023-01-24 00:38:32.808009: step: 568/466, loss: 1.272059440612793 2023-01-24 00:38:33.388524: step: 570/466, loss: 0.3866838216781616 2023-01-24 00:38:34.023836: step: 572/466, loss: 1.8356395959854126 2023-01-24 00:38:34.628818: step: 574/466, loss: 1.0556504726409912 2023-01-24 00:38:35.227047: step: 576/466, loss: 0.3894895613193512 2023-01-24 00:38:35.858823: step: 578/466, loss: 1.6263072490692139 2023-01-24 00:38:36.534856: step: 580/466, loss: 0.590576708316803 2023-01-24 00:38:37.186337: step: 582/466, loss: 0.2043585479259491 2023-01-24 00:38:37.820433: step: 584/466, loss: 0.3871784806251526 2023-01-24 00:38:38.438937: step: 586/466, loss: 0.532533586025238 2023-01-24 00:38:39.012293: step: 588/466, loss: 0.3990240693092346 2023-01-24 00:38:39.610964: step: 590/466, loss: 1.0591825246810913 2023-01-24 00:38:40.262690: step: 592/466, loss: 0.41628965735435486 2023-01-24 00:38:40.894907: step: 594/466, loss: 0.5125265121459961 2023-01-24 00:38:41.470980: step: 596/466, loss: 0.779667317867279 2023-01-24 00:38:42.064057: step: 598/466, loss: 0.9149830341339111 2023-01-24 00:38:42.652792: step: 600/466, loss: 0.9261771440505981 2023-01-24 00:38:43.338110: step: 602/466, loss: 0.8956090211868286 2023-01-24 00:38:43.958348: step: 604/466, loss: 1.623434066772461 2023-01-24 00:38:44.576484: step: 606/466, loss: 0.27385449409484863 2023-01-24 00:38:45.158579: step: 608/466, loss: 0.6099010109901428 2023-01-24 00:38:45.758566: step: 610/466, loss: 13.654857635498047 2023-01-24 00:38:46.365003: step: 612/466, loss: 0.9391835331916809 2023-01-24 00:38:47.010678: step: 614/466, loss: 1.1547439098358154 2023-01-24 00:38:47.621227: step: 616/466, loss: 0.310383677482605 2023-01-24 00:38:48.200638: step: 618/466, loss: 1.902803897857666 2023-01-24 00:38:48.763864: step: 620/466, loss: 1.2837306261062622 2023-01-24 00:38:49.431554: step: 622/466, loss: 0.4341290593147278 2023-01-24 00:38:50.052534: step: 624/466, loss: 0.219343900680542 2023-01-24 00:38:50.694602: step: 626/466, loss: 0.5104776620864868 2023-01-24 00:38:51.325678: step: 628/466, loss: 1.1290161609649658 2023-01-24 00:38:52.001952: step: 630/466, loss: 0.6613913178443909 2023-01-24 00:38:52.560805: step: 632/466, loss: 0.8362389802932739 2023-01-24 00:38:53.136887: step: 634/466, loss: 0.9189055562019348 2023-01-24 00:38:53.757325: step: 636/466, loss: 1.5628864765167236 2023-01-24 00:38:54.399698: step: 638/466, loss: 0.7641006708145142 2023-01-24 00:38:54.995323: step: 640/466, loss: 0.5106600522994995 2023-01-24 00:38:55.618082: step: 642/466, loss: 0.38074567914009094 2023-01-24 00:38:56.402882: step: 644/466, loss: 1.1373883485794067 2023-01-24 00:38:57.010585: step: 646/466, loss: 1.6016144752502441 2023-01-24 00:38:57.615193: step: 648/466, loss: 0.6494314670562744 2023-01-24 00:38:58.211002: step: 650/466, loss: 1.4632269144058228 2023-01-24 00:38:58.848237: step: 652/466, loss: 0.897118866443634 2023-01-24 00:38:59.456406: step: 654/466, loss: 4.597709655761719 2023-01-24 00:39:00.050432: step: 656/466, loss: 1.9993581771850586 2023-01-24 00:39:00.770331: step: 658/466, loss: 0.49946463108062744 2023-01-24 00:39:01.360821: step: 660/466, loss: 1.2993544340133667 2023-01-24 00:39:02.025357: step: 662/466, loss: 0.7454898357391357 2023-01-24 00:39:02.618636: step: 664/466, loss: 1.7169160842895508 2023-01-24 00:39:03.239378: step: 666/466, loss: 1.1759848594665527 2023-01-24 00:39:03.905081: step: 668/466, loss: 8.05697250366211 2023-01-24 00:39:04.509426: step: 670/466, loss: 0.5740476250648499 2023-01-24 00:39:05.201734: step: 672/466, loss: 2.474055290222168 2023-01-24 00:39:05.797797: step: 674/466, loss: 0.395702600479126 2023-01-24 00:39:06.367509: step: 676/466, loss: 1.6007153987884521 2023-01-24 00:39:06.970168: step: 678/466, loss: 0.2818623185157776 2023-01-24 00:39:07.557952: step: 680/466, loss: 4.446594715118408 2023-01-24 00:39:08.246649: step: 682/466, loss: 1.9578866958618164 2023-01-24 00:39:08.890479: step: 684/466, loss: 1.037970781326294 2023-01-24 00:39:09.474748: step: 686/466, loss: 0.7141876220703125 2023-01-24 00:39:10.094322: step: 688/466, loss: 2.255034923553467 2023-01-24 00:39:10.767866: step: 690/466, loss: 10.886655807495117 2023-01-24 00:39:11.403325: step: 692/466, loss: 1.1650443077087402 2023-01-24 00:39:12.079126: step: 694/466, loss: 5.413592338562012 2023-01-24 00:39:12.783811: step: 696/466, loss: 0.4792379140853882 2023-01-24 00:39:13.588838: step: 698/466, loss: 2.427898645401001 2023-01-24 00:39:14.215325: step: 700/466, loss: 3.009459972381592 2023-01-24 00:39:14.838824: step: 702/466, loss: 1.1028814315795898 2023-01-24 00:39:15.433531: step: 704/466, loss: 1.290452241897583 2023-01-24 00:39:16.046719: step: 706/466, loss: 1.0856568813323975 2023-01-24 00:39:16.699358: step: 708/466, loss: 5.001242160797119 2023-01-24 00:39:17.286022: step: 710/466, loss: 0.9065005779266357 2023-01-24 00:39:17.937960: step: 712/466, loss: 3.011390209197998 2023-01-24 00:39:18.586440: step: 714/466, loss: 1.2671661376953125 2023-01-24 00:39:19.306377: step: 716/466, loss: 1.3516714572906494 2023-01-24 00:39:19.939946: step: 718/466, loss: 0.7642638087272644 2023-01-24 00:39:20.528545: step: 720/466, loss: 2.3630306720733643 2023-01-24 00:39:21.130539: step: 722/466, loss: 4.939443588256836 2023-01-24 00:39:21.785411: step: 724/466, loss: 3.4752659797668457 2023-01-24 00:39:22.431793: step: 726/466, loss: 0.9839559197425842 2023-01-24 00:39:23.037350: step: 728/466, loss: 0.7253423929214478 2023-01-24 00:39:23.581139: step: 730/466, loss: 0.8835292458534241 2023-01-24 00:39:24.164085: step: 732/466, loss: 12.320352554321289 2023-01-24 00:39:24.740711: step: 734/466, loss: 0.40207937359809875 2023-01-24 00:39:25.392809: step: 736/466, loss: 1.4247490167617798 2023-01-24 00:39:26.047772: step: 738/466, loss: 1.3991518020629883 2023-01-24 00:39:26.672818: step: 740/466, loss: 0.4203867018222809 2023-01-24 00:39:27.315125: step: 742/466, loss: 1.7171530723571777 2023-01-24 00:39:27.986787: step: 744/466, loss: 0.23868979513645172 2023-01-24 00:39:28.649976: step: 746/466, loss: 0.8822340965270996 2023-01-24 00:39:29.246043: step: 748/466, loss: 1.2991575002670288 2023-01-24 00:39:29.838353: step: 750/466, loss: 0.4201277196407318 2023-01-24 00:39:30.560732: step: 752/466, loss: 0.22441129386425018 2023-01-24 00:39:31.179811: step: 754/466, loss: 0.4418560266494751 2023-01-24 00:39:31.862381: step: 756/466, loss: 1.5701426267623901 2023-01-24 00:39:32.478814: step: 758/466, loss: 1.8360600471496582 2023-01-24 00:39:33.090333: step: 760/466, loss: 1.4463838338851929 2023-01-24 00:39:33.713549: step: 762/466, loss: 0.6107949018478394 2023-01-24 00:39:34.316052: step: 764/466, loss: 0.6653521060943604 2023-01-24 00:39:34.934422: step: 766/466, loss: 0.731359601020813 2023-01-24 00:39:35.559608: step: 768/466, loss: 1.046613097190857 2023-01-24 00:39:36.166977: step: 770/466, loss: 0.967674970626831 2023-01-24 00:39:36.813652: step: 772/466, loss: 0.46457409858703613 2023-01-24 00:39:37.456884: step: 774/466, loss: 0.9775880575180054 2023-01-24 00:39:38.065085: step: 776/466, loss: 1.8870127201080322 2023-01-24 00:39:38.654905: step: 778/466, loss: 0.9271981716156006 2023-01-24 00:39:39.309099: step: 780/466, loss: 0.6026844382286072 2023-01-24 00:39:39.973390: step: 782/466, loss: 0.5312908887863159 2023-01-24 00:39:40.577917: step: 784/466, loss: 1.074860692024231 2023-01-24 00:39:41.170883: step: 786/466, loss: 0.8712797164916992 2023-01-24 00:39:41.761273: step: 788/466, loss: 0.5918893814086914 2023-01-24 00:39:42.466257: step: 790/466, loss: 4.984042644500732 2023-01-24 00:39:43.068556: step: 792/466, loss: 1.0119539499282837 2023-01-24 00:39:43.677851: step: 794/466, loss: 0.4847181439399719 2023-01-24 00:39:44.333493: step: 796/466, loss: 1.4674978256225586 2023-01-24 00:39:44.996171: step: 798/466, loss: 1.7132019996643066 2023-01-24 00:39:45.610811: step: 800/466, loss: 1.2459526062011719 2023-01-24 00:39:46.226987: step: 802/466, loss: 1.554520606994629 2023-01-24 00:39:46.890632: step: 804/466, loss: 0.8004475831985474 2023-01-24 00:39:47.456907: step: 806/466, loss: 1.0957187414169312 2023-01-24 00:39:48.102977: step: 808/466, loss: 1.0078699588775635 2023-01-24 00:39:48.749471: step: 810/466, loss: 0.3722212314605713 2023-01-24 00:39:49.294534: step: 812/466, loss: 1.91087806224823 2023-01-24 00:39:49.905272: step: 814/466, loss: 0.80485600233078 2023-01-24 00:39:50.499368: step: 816/466, loss: 0.5591756701469421 2023-01-24 00:39:51.128985: step: 818/466, loss: 1.6538398265838623 2023-01-24 00:39:51.741998: step: 820/466, loss: 3.7490296363830566 2023-01-24 00:39:52.420102: step: 822/466, loss: 2.7190322875976562 2023-01-24 00:39:53.052387: step: 824/466, loss: 0.6726736426353455 2023-01-24 00:39:53.631439: step: 826/466, loss: 2.7653231620788574 2023-01-24 00:39:54.261640: step: 828/466, loss: 1.475852131843567 2023-01-24 00:39:54.960541: step: 830/466, loss: 2.943328857421875 2023-01-24 00:39:55.619001: step: 832/466, loss: 1.0336472988128662 2023-01-24 00:39:56.233502: step: 834/466, loss: 0.3038322627544403 2023-01-24 00:39:56.816669: step: 836/466, loss: 1.8001176118850708 2023-01-24 00:39:57.385853: step: 838/466, loss: 0.42354637384414673 2023-01-24 00:39:57.991075: step: 840/466, loss: 0.46030494570732117 2023-01-24 00:39:58.601283: step: 842/466, loss: 0.9527333378791809 2023-01-24 00:39:59.217098: step: 844/466, loss: 0.7411099076271057 2023-01-24 00:39:59.898518: step: 846/466, loss: 1.9437055587768555 2023-01-24 00:40:00.733353: step: 848/466, loss: 2.4826438426971436 2023-01-24 00:40:01.277111: step: 850/466, loss: 1.0268150568008423 2023-01-24 00:40:01.897227: step: 852/466, loss: 0.6188070178031921 2023-01-24 00:40:02.552705: step: 854/466, loss: 5.253035068511963 2023-01-24 00:40:03.186875: step: 856/466, loss: 0.48597168922424316 2023-01-24 00:40:03.809424: step: 858/466, loss: 0.34880343079566956 2023-01-24 00:40:04.420780: step: 860/466, loss: 2.08416485786438 2023-01-24 00:40:05.054865: step: 862/466, loss: 0.24188533425331116 2023-01-24 00:40:05.694820: step: 864/466, loss: 0.2954680323600769 2023-01-24 00:40:06.367368: step: 866/466, loss: 0.2146918773651123 2023-01-24 00:40:06.931192: step: 868/466, loss: 3.6382498741149902 2023-01-24 00:40:07.616249: step: 870/466, loss: 0.46276894211769104 2023-01-24 00:40:08.302358: step: 872/466, loss: 2.3691139221191406 2023-01-24 00:40:08.963819: step: 874/466, loss: 1.9170887470245361 2023-01-24 00:40:09.529646: step: 876/466, loss: 0.5025712251663208 2023-01-24 00:40:10.146566: step: 878/466, loss: 1.055784821510315 2023-01-24 00:40:10.746071: step: 880/466, loss: 1.6783256530761719 2023-01-24 00:40:11.344894: step: 882/466, loss: 0.9137502312660217 2023-01-24 00:40:12.084942: step: 884/466, loss: 0.7934946417808533 2023-01-24 00:40:12.746657: step: 886/466, loss: 1.179140567779541 2023-01-24 00:40:13.464973: step: 888/466, loss: 0.5907734036445618 2023-01-24 00:40:14.056505: step: 890/466, loss: 0.8271014094352722 2023-01-24 00:40:14.660254: step: 892/466, loss: 1.8222496509552002 2023-01-24 00:40:15.278088: step: 894/466, loss: 0.6691303253173828 2023-01-24 00:40:15.866206: step: 896/466, loss: 1.140409231185913 2023-01-24 00:40:16.501701: step: 898/466, loss: 0.9140684604644775 2023-01-24 00:40:17.065519: step: 900/466, loss: 0.8692017197608948 2023-01-24 00:40:17.669164: step: 902/466, loss: 0.9234033226966858 2023-01-24 00:40:18.294912: step: 904/466, loss: 3.5146474838256836 2023-01-24 00:40:18.927969: step: 906/466, loss: 0.4510563313961029 2023-01-24 00:40:19.622584: step: 908/466, loss: 1.0838690996170044 2023-01-24 00:40:20.253954: step: 910/466, loss: 1.114465355873108 2023-01-24 00:40:20.860020: step: 912/466, loss: 0.6830224394798279 2023-01-24 00:40:21.439545: step: 914/466, loss: 0.6976163983345032 2023-01-24 00:40:22.013884: step: 916/466, loss: 2.7160091400146484 2023-01-24 00:40:22.674273: step: 918/466, loss: 0.525048017501831 2023-01-24 00:40:23.304016: step: 920/466, loss: 1.2110346555709839 2023-01-24 00:40:23.912556: step: 922/466, loss: 0.9209542870521545 2023-01-24 00:40:24.521764: step: 924/466, loss: 1.5772969722747803 2023-01-24 00:40:25.174465: step: 926/466, loss: 1.398496389389038 2023-01-24 00:40:25.896894: step: 928/466, loss: 1.5231181383132935 2023-01-24 00:40:26.555610: step: 930/466, loss: 0.5613966584205627 2023-01-24 00:40:27.189553: step: 932/466, loss: 1.8047528266906738 ================================================== Loss: 1.637 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3201661933886705, 'r': 0.1982847447691198, 'f1': 0.2448990531885269}, 'combined': 0.18045193392838824, 'epoch': 2} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3076271777699269, 'r': 0.20767610918041995, 'f1': 0.24795811311627725}, 'combined': 0.15526349139056614, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29765266726159495, 'r': 0.19146917305821762, 'f1': 0.23303522910318866}, 'combined': 0.17171016881287585, 'epoch': 2} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.32252191694282595, 'r': 0.2131727368071158, 'f1': 0.25668682423354117}, 'combined': 0.1590735248771241, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2728565523989473, 'r': 0.18898034464063712, 'f1': 0.22330188705295015}, 'combined': 0.16453823256533168, 'epoch': 2} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3617649968001654, 'r': 0.21274526912096448, 'f1': 0.26792834234005425}, 'combined': 0.17773464293845184, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.17142857142857143, 'f1': 0.24000000000000002}, 'combined': 0.16, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36764705882352944, 'r': 0.2717391304347826, 'f1': 0.3125}, 'combined': 0.15625, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.06896551724137931, 'f1': 0.1081081081081081}, 'combined': 0.07207207207207206, 'epoch': 2} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.38679331129366196, 'r': 0.2039987407390013, 'f1': 0.2671171629973556}, 'combined': 0.19682317273489358, 'epoch': 1} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32556260045666824, 'r': 0.15588254359106476, 'f1': 0.21082163517376934}, 'combined': 0.13200980894058456, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.14285714285714285, 'f1': 0.22727272727272727}, 'combined': 0.1515151515151515, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29765266726159495, 'r': 0.19146917305821762, 'f1': 0.23303522910318866}, 'combined': 0.17171016881287585, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.32252191694282595, 'r': 0.2131727368071158, 'f1': 0.25668682423354117}, 'combined': 0.1590735248771241, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36764705882352944, 'r': 0.2717391304347826, 'f1': 0.3125}, 'combined': 0.15625, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.360659200968523, 'r': 0.2015046672077922, 'f1': 0.25855276861655957}, 'combined': 0.19051256634904387, 'epoch': 1} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.37830051778663865, 'r': 0.14920336306205872, 'f1': 0.21400301135632502}, 'combined': 0.1419623936720176, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39285714285714285, 'r': 0.09482758620689655, 'f1': 0.15277777777777776}, 'combined': 0.10185185185185183, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:43:09.624658: step: 2/466, loss: 3.664102077484131 2023-01-24 00:43:10.180666: step: 4/466, loss: 0.40151816606521606 2023-01-24 00:43:10.759036: step: 6/466, loss: 2.1475913524627686 2023-01-24 00:43:11.330642: step: 8/466, loss: 0.7339213490486145 2023-01-24 00:43:11.987510: step: 10/466, loss: 4.051681041717529 2023-01-24 00:43:12.590011: step: 12/466, loss: 1.5498143434524536 2023-01-24 00:43:13.287303: step: 14/466, loss: 2.397935390472412 2023-01-24 00:43:13.887569: step: 16/466, loss: 1.6067132949829102 2023-01-24 00:43:14.511008: step: 18/466, loss: 3.1149888038635254 2023-01-24 00:43:15.109618: step: 20/466, loss: 1.5309289693832397 2023-01-24 00:43:15.683332: step: 22/466, loss: 0.772601842880249 2023-01-24 00:43:16.291398: step: 24/466, loss: 0.8743709921836853 2023-01-24 00:43:16.910759: step: 26/466, loss: 0.5453883409500122 2023-01-24 00:43:17.557556: step: 28/466, loss: 0.5393946766853333 2023-01-24 00:43:18.215420: step: 30/466, loss: 0.6002148389816284 2023-01-24 00:43:18.809647: step: 32/466, loss: 0.5188323259353638 2023-01-24 00:43:19.451970: step: 34/466, loss: 0.7406638264656067 2023-01-24 00:43:20.079702: step: 36/466, loss: 1.0719877481460571 2023-01-24 00:43:20.759208: step: 38/466, loss: 0.2701267898082733 2023-01-24 00:43:21.412021: step: 40/466, loss: 1.4338196516036987 2023-01-24 00:43:21.941469: step: 42/466, loss: 0.8116973638534546 2023-01-24 00:43:22.524977: step: 44/466, loss: 1.9013493061065674 2023-01-24 00:43:23.134833: step: 46/466, loss: 0.285178005695343 2023-01-24 00:43:23.758726: step: 48/466, loss: 1.4673213958740234 2023-01-24 00:43:24.379244: step: 50/466, loss: 0.21668323874473572 2023-01-24 00:43:25.015992: step: 52/466, loss: 0.46653103828430176 2023-01-24 00:43:25.666713: step: 54/466, loss: 0.36020365357398987 2023-01-24 00:43:26.292261: step: 56/466, loss: 1.5513995885849 2023-01-24 00:43:26.905156: step: 58/466, loss: 2.431891441345215 2023-01-24 00:43:27.537161: step: 60/466, loss: 1.670435905456543 2023-01-24 00:43:28.137868: step: 62/466, loss: 0.7893364429473877 2023-01-24 00:43:28.730956: step: 64/466, loss: 16.907676696777344 2023-01-24 00:43:29.428555: step: 66/466, loss: 2.427928924560547 2023-01-24 00:43:30.043939: step: 68/466, loss: 0.8623694777488708 2023-01-24 00:43:30.641370: step: 70/466, loss: 0.2605237066745758 2023-01-24 00:43:31.237187: step: 72/466, loss: 2.150390625 2023-01-24 00:43:31.868003: step: 74/466, loss: 1.1520700454711914 2023-01-24 00:43:32.579854: step: 76/466, loss: 0.7585049867630005 2023-01-24 00:43:33.265288: step: 78/466, loss: 14.417007446289062 2023-01-24 00:43:33.873985: step: 80/466, loss: 1.6159945726394653 2023-01-24 00:43:34.438464: step: 82/466, loss: 0.515458345413208 2023-01-24 00:43:35.084426: step: 84/466, loss: 1.3202629089355469 2023-01-24 00:43:35.717859: step: 86/466, loss: 1.8619874715805054 2023-01-24 00:43:36.327273: step: 88/466, loss: 0.9497697949409485 2023-01-24 00:43:36.971025: step: 90/466, loss: 2.008118152618408 2023-01-24 00:43:37.669035: step: 92/466, loss: 2.1800649166107178 2023-01-24 00:43:38.312658: step: 94/466, loss: 0.2916567325592041 2023-01-24 00:43:38.960934: step: 96/466, loss: 0.7603704929351807 2023-01-24 00:43:39.652288: step: 98/466, loss: 1.2155827283859253 2023-01-24 00:43:40.250295: step: 100/466, loss: 0.31005045771598816 2023-01-24 00:43:40.883641: step: 102/466, loss: 0.2987905442714691 2023-01-24 00:43:41.536664: step: 104/466, loss: 0.3084142208099365 2023-01-24 00:43:42.189428: step: 106/466, loss: 0.5264069437980652 2023-01-24 00:43:42.843164: step: 108/466, loss: 2.4641854763031006 2023-01-24 00:43:43.469478: step: 110/466, loss: 2.1395678520202637 2023-01-24 00:43:44.098854: step: 112/466, loss: 1.2254267930984497 2023-01-24 00:43:44.862248: step: 114/466, loss: 0.19523349404335022 2023-01-24 00:43:45.485903: step: 116/466, loss: 0.5350965857505798 2023-01-24 00:43:46.123976: step: 118/466, loss: 0.7030841112136841 2023-01-24 00:43:46.793232: step: 120/466, loss: 3.22244930267334 2023-01-24 00:43:47.523572: step: 122/466, loss: 2.011723518371582 2023-01-24 00:43:48.124414: step: 124/466, loss: 0.3849104344844818 2023-01-24 00:43:48.708412: step: 126/466, loss: 1.2131569385528564 2023-01-24 00:43:49.349405: step: 128/466, loss: 3.6725645065307617 2023-01-24 00:43:49.958422: step: 130/466, loss: 0.6502686142921448 2023-01-24 00:43:50.563093: step: 132/466, loss: 0.8727032542228699 2023-01-24 00:43:51.106582: step: 134/466, loss: 0.8338615894317627 2023-01-24 00:43:51.851320: step: 136/466, loss: 0.6537538170814514 2023-01-24 00:43:52.433079: step: 138/466, loss: 0.39510753750801086 2023-01-24 00:43:53.050834: step: 140/466, loss: 0.3558765947818756 2023-01-24 00:43:53.680981: step: 142/466, loss: 1.0830470323562622 2023-01-24 00:43:54.321465: step: 144/466, loss: 2.405442953109741 2023-01-24 00:43:55.010035: step: 146/466, loss: 1.432064175605774 2023-01-24 00:43:55.667221: step: 148/466, loss: 1.0634498596191406 2023-01-24 00:43:56.295095: step: 150/466, loss: 0.53861403465271 2023-01-24 00:43:56.911342: step: 152/466, loss: 0.768478274345398 2023-01-24 00:43:57.540725: step: 154/466, loss: 2.361758232116699 2023-01-24 00:43:58.219803: step: 156/466, loss: 1.7955210208892822 2023-01-24 00:43:58.885663: step: 158/466, loss: 0.30507999658584595 2023-01-24 00:43:59.552259: step: 160/466, loss: 0.5709603428840637 2023-01-24 00:44:00.158621: step: 162/466, loss: 0.5453020334243774 2023-01-24 00:44:00.777865: step: 164/466, loss: 0.49675092101097107 2023-01-24 00:44:01.397428: step: 166/466, loss: 3.5595688819885254 2023-01-24 00:44:02.034745: step: 168/466, loss: 0.6276785135269165 2023-01-24 00:44:02.659997: step: 170/466, loss: 0.7200762629508972 2023-01-24 00:44:03.338306: step: 172/466, loss: 0.27785107493400574 2023-01-24 00:44:03.939016: step: 174/466, loss: 4.538939952850342 2023-01-24 00:44:04.590739: step: 176/466, loss: 0.8747923374176025 2023-01-24 00:44:05.227167: step: 178/466, loss: 1.369985580444336 2023-01-24 00:44:05.870711: step: 180/466, loss: 0.858673632144928 2023-01-24 00:44:06.521067: step: 182/466, loss: 0.38743600249290466 2023-01-24 00:44:07.154123: step: 184/466, loss: 1.7733240127563477 2023-01-24 00:44:07.798474: step: 186/466, loss: 1.5448582172393799 2023-01-24 00:44:08.414017: step: 188/466, loss: 2.079050064086914 2023-01-24 00:44:09.091540: step: 190/466, loss: 0.4500921964645386 2023-01-24 00:44:09.700455: step: 192/466, loss: 0.7650596499443054 2023-01-24 00:44:10.357084: step: 194/466, loss: 0.4266965389251709 2023-01-24 00:44:11.030240: step: 196/466, loss: 0.25606733560562134 2023-01-24 00:44:11.650173: step: 198/466, loss: 1.3332277536392212 2023-01-24 00:44:12.217552: step: 200/466, loss: 2.5043814182281494 2023-01-24 00:44:12.851947: step: 202/466, loss: 0.5503528714179993 2023-01-24 00:44:13.456150: step: 204/466, loss: 0.5619066953659058 2023-01-24 00:44:14.112866: step: 206/466, loss: 4.957601070404053 2023-01-24 00:44:14.777392: step: 208/466, loss: 2.668531894683838 2023-01-24 00:44:15.369135: step: 210/466, loss: 2.5844016075134277 2023-01-24 00:44:16.011297: step: 212/466, loss: 0.9975607395172119 2023-01-24 00:44:16.647662: step: 214/466, loss: 2.120330333709717 2023-01-24 00:44:17.254270: step: 216/466, loss: 0.9294872879981995 2023-01-24 00:44:17.870196: step: 218/466, loss: 2.3738186359405518 2023-01-24 00:44:18.456107: step: 220/466, loss: 0.3995184302330017 2023-01-24 00:44:19.136376: step: 222/466, loss: 0.6052391529083252 2023-01-24 00:44:19.797838: step: 224/466, loss: 0.3948894739151001 2023-01-24 00:44:20.427741: step: 226/466, loss: 0.21116848289966583 2023-01-24 00:44:21.110007: step: 228/466, loss: 3.6097004413604736 2023-01-24 00:44:21.670605: step: 230/466, loss: 0.32661372423171997 2023-01-24 00:44:22.282961: step: 232/466, loss: 0.31234103441238403 2023-01-24 00:44:22.912707: step: 234/466, loss: 1.1091272830963135 2023-01-24 00:44:23.575540: step: 236/466, loss: 0.518258810043335 2023-01-24 00:44:24.178644: step: 238/466, loss: 1.1867866516113281 2023-01-24 00:44:24.853797: step: 240/466, loss: 0.2828126847743988 2023-01-24 00:44:25.525913: step: 242/466, loss: 2.3363542556762695 2023-01-24 00:44:26.170371: step: 244/466, loss: 0.9250749349594116 2023-01-24 00:44:26.804906: step: 246/466, loss: 5.13469123840332 2023-01-24 00:44:27.397403: step: 248/466, loss: 2.2560393810272217 2023-01-24 00:44:28.019427: step: 250/466, loss: 1.9248523712158203 2023-01-24 00:44:28.716870: step: 252/466, loss: 2.239680528640747 2023-01-24 00:44:29.343185: step: 254/466, loss: 1.234459400177002 2023-01-24 00:44:29.960807: step: 256/466, loss: 2.088820219039917 2023-01-24 00:44:30.561668: step: 258/466, loss: 0.6357910633087158 2023-01-24 00:44:31.214968: step: 260/466, loss: 0.7092756032943726 2023-01-24 00:44:31.876991: step: 262/466, loss: 0.9330479502677917 2023-01-24 00:44:32.522530: step: 264/466, loss: 1.0234520435333252 2023-01-24 00:44:33.244766: step: 266/466, loss: 1.4933377504348755 2023-01-24 00:44:33.845400: step: 268/466, loss: 0.2533833980560303 2023-01-24 00:44:34.553912: step: 270/466, loss: 1.2477463483810425 2023-01-24 00:44:35.122323: step: 272/466, loss: 1.2700772285461426 2023-01-24 00:44:35.805535: step: 274/466, loss: 0.3964977562427521 2023-01-24 00:44:36.392252: step: 276/466, loss: 0.6327396035194397 2023-01-24 00:44:37.032046: step: 278/466, loss: 1.403305172920227 2023-01-24 00:44:37.605466: step: 280/466, loss: 0.45391255617141724 2023-01-24 00:44:38.192832: step: 282/466, loss: 3.441476345062256 2023-01-24 00:44:38.887713: step: 284/466, loss: 0.4469655454158783 2023-01-24 00:44:39.573003: step: 286/466, loss: 0.7628692984580994 2023-01-24 00:44:40.211576: step: 288/466, loss: 0.2860978841781616 2023-01-24 00:44:40.871167: step: 290/466, loss: 1.103793740272522 2023-01-24 00:44:41.468014: step: 292/466, loss: 1.050951361656189 2023-01-24 00:44:42.077185: step: 294/466, loss: 0.6237990260124207 2023-01-24 00:44:42.704959: step: 296/466, loss: 2.5325663089752197 2023-01-24 00:44:43.323090: step: 298/466, loss: 3.6511988639831543 2023-01-24 00:44:43.925713: step: 300/466, loss: 0.3682664632797241 2023-01-24 00:44:44.603005: step: 302/466, loss: 0.960863471031189 2023-01-24 00:44:45.187040: step: 304/466, loss: 1.7748119831085205 2023-01-24 00:44:45.851060: step: 306/466, loss: 1.5512523651123047 2023-01-24 00:44:46.488307: step: 308/466, loss: 0.5298848152160645 2023-01-24 00:44:47.031888: step: 310/466, loss: 0.7937417030334473 2023-01-24 00:44:47.674736: step: 312/466, loss: 3.202949047088623 2023-01-24 00:44:48.271620: step: 314/466, loss: 1.4959990978240967 2023-01-24 00:44:48.872656: step: 316/466, loss: 0.8022971153259277 2023-01-24 00:44:49.508669: step: 318/466, loss: 0.6434112191200256 2023-01-24 00:44:50.115533: step: 320/466, loss: 1.8489172458648682 2023-01-24 00:44:50.731079: step: 322/466, loss: 3.028909921646118 2023-01-24 00:44:51.333274: step: 324/466, loss: 6.65090274810791 2023-01-24 00:44:51.917514: step: 326/466, loss: 0.27499574422836304 2023-01-24 00:44:52.558360: step: 328/466, loss: 0.5359196066856384 2023-01-24 00:44:53.221610: step: 330/466, loss: 0.7141134738922119 2023-01-24 00:44:53.848274: step: 332/466, loss: 1.1901698112487793 2023-01-24 00:44:54.493741: step: 334/466, loss: 0.29804643988609314 2023-01-24 00:44:55.142932: step: 336/466, loss: 1.2590359449386597 2023-01-24 00:44:55.803395: step: 338/466, loss: 0.5071896910667419 2023-01-24 00:44:56.376848: step: 340/466, loss: 0.5313383340835571 2023-01-24 00:44:56.979444: step: 342/466, loss: 0.27631691098213196 2023-01-24 00:44:57.631929: step: 344/466, loss: 5.005047798156738 2023-01-24 00:44:58.273592: step: 346/466, loss: 1.0587619543075562 2023-01-24 00:44:58.934163: step: 348/466, loss: 1.04067063331604 2023-01-24 00:44:59.591708: step: 350/466, loss: 0.7760108709335327 2023-01-24 00:45:00.280274: step: 352/466, loss: 0.7545616626739502 2023-01-24 00:45:00.918393: step: 354/466, loss: 0.33428868651390076 2023-01-24 00:45:01.519941: step: 356/466, loss: 1.632312297821045 2023-01-24 00:45:02.091748: step: 358/466, loss: 0.4681653678417206 2023-01-24 00:45:02.713738: step: 360/466, loss: 1.2167729139328003 2023-01-24 00:45:03.345723: step: 362/466, loss: 1.0824363231658936 2023-01-24 00:45:03.973837: step: 364/466, loss: 0.20817844569683075 2023-01-24 00:45:04.553688: step: 366/466, loss: 0.18479155004024506 2023-01-24 00:45:05.229773: step: 368/466, loss: 1.523261547088623 2023-01-24 00:45:05.867612: step: 370/466, loss: 1.6464836597442627 2023-01-24 00:45:06.449170: step: 372/466, loss: 0.3085121810436249 2023-01-24 00:45:07.092490: step: 374/466, loss: 1.1452617645263672 2023-01-24 00:45:07.783746: step: 376/466, loss: 2.8488006591796875 2023-01-24 00:45:08.372598: step: 378/466, loss: 1.1419049501419067 2023-01-24 00:45:08.978782: step: 380/466, loss: 1.8636515140533447 2023-01-24 00:45:09.558791: step: 382/466, loss: 0.4585052728652954 2023-01-24 00:45:10.206439: step: 384/466, loss: 0.5190721154212952 2023-01-24 00:45:10.855101: step: 386/466, loss: 2.541865348815918 2023-01-24 00:45:11.452203: step: 388/466, loss: 2.4935693740844727 2023-01-24 00:45:12.130532: step: 390/466, loss: 1.0227413177490234 2023-01-24 00:45:12.699527: step: 392/466, loss: 0.1942005157470703 2023-01-24 00:45:13.421259: step: 394/466, loss: 1.268861174583435 2023-01-24 00:45:14.011159: step: 396/466, loss: 1.6738542318344116 2023-01-24 00:45:14.847959: step: 398/466, loss: 0.8321402668952942 2023-01-24 00:45:15.492436: step: 400/466, loss: 0.4387907087802887 2023-01-24 00:45:16.070626: step: 402/466, loss: 0.31822243332862854 2023-01-24 00:45:16.669187: step: 404/466, loss: 0.49491021037101746 2023-01-24 00:45:17.316859: step: 406/466, loss: 1.4946244955062866 2023-01-24 00:45:17.971340: step: 408/466, loss: 1.4691944122314453 2023-01-24 00:45:18.575015: step: 410/466, loss: 0.5321866273880005 2023-01-24 00:45:19.209090: step: 412/466, loss: 0.9545515775680542 2023-01-24 00:45:19.832119: step: 414/466, loss: 1.2634650468826294 2023-01-24 00:45:20.507600: step: 416/466, loss: 0.5948762893676758 2023-01-24 00:45:21.175338: step: 418/466, loss: 1.9317556619644165 2023-01-24 00:45:21.826589: step: 420/466, loss: 1.3317902088165283 2023-01-24 00:45:22.534046: step: 422/466, loss: 3.525007724761963 2023-01-24 00:45:23.108236: step: 424/466, loss: 1.9378377199172974 2023-01-24 00:45:23.723044: step: 426/466, loss: 0.842423677444458 2023-01-24 00:45:24.389323: step: 428/466, loss: 0.8884983658790588 2023-01-24 00:45:25.051842: step: 430/466, loss: 1.428268551826477 2023-01-24 00:45:25.700266: step: 432/466, loss: 1.537893295288086 2023-01-24 00:45:26.342670: step: 434/466, loss: 1.5906682014465332 2023-01-24 00:45:27.043741: step: 436/466, loss: 0.4472990334033966 2023-01-24 00:45:27.668130: step: 438/466, loss: 5.959825038909912 2023-01-24 00:45:28.334425: step: 440/466, loss: 0.9203157424926758 2023-01-24 00:45:28.951604: step: 442/466, loss: 0.6418544054031372 2023-01-24 00:45:29.611705: step: 444/466, loss: 1.447567343711853 2023-01-24 00:45:30.196207: step: 446/466, loss: 1.3381247520446777 2023-01-24 00:45:30.850502: step: 448/466, loss: 0.4526243507862091 2023-01-24 00:45:31.455809: step: 450/466, loss: 0.39733874797821045 2023-01-24 00:45:32.110176: step: 452/466, loss: 0.9563053250312805 2023-01-24 00:45:32.706986: step: 454/466, loss: 0.2351032793521881 2023-01-24 00:45:33.322561: step: 456/466, loss: 1.4261122941970825 2023-01-24 00:45:33.863573: step: 458/466, loss: 0.7196982502937317 2023-01-24 00:45:34.487554: step: 460/466, loss: 2.4283785820007324 2023-01-24 00:45:35.086042: step: 462/466, loss: 1.0949922800064087 2023-01-24 00:45:35.742395: step: 464/466, loss: 0.6088125705718994 2023-01-24 00:45:36.460735: step: 466/466, loss: 0.16389231383800507 2023-01-24 00:45:37.082073: step: 468/466, loss: 1.042517066001892 2023-01-24 00:45:37.758710: step: 470/466, loss: 0.8288591504096985 2023-01-24 00:45:38.352743: step: 472/466, loss: 1.2409992218017578 2023-01-24 00:45:39.019189: step: 474/466, loss: 1.3684990406036377 2023-01-24 00:45:39.712645: step: 476/466, loss: 0.4185815155506134 2023-01-24 00:45:40.240353: step: 478/466, loss: 1.293931245803833 2023-01-24 00:45:40.887430: step: 480/466, loss: 1.0922472476959229 2023-01-24 00:45:41.530110: step: 482/466, loss: 1.0459352731704712 2023-01-24 00:45:42.180835: step: 484/466, loss: 0.6630339622497559 2023-01-24 00:45:42.829697: step: 486/466, loss: 2.040816307067871 2023-01-24 00:45:43.522182: step: 488/466, loss: 0.9988837838172913 2023-01-24 00:45:44.196605: step: 490/466, loss: 0.9036743640899658 2023-01-24 00:45:44.912904: step: 492/466, loss: 3.2113442420959473 2023-01-24 00:45:45.553205: step: 494/466, loss: 1.0254572629928589 2023-01-24 00:45:46.234889: step: 496/466, loss: 0.71244215965271 2023-01-24 00:45:46.838107: step: 498/466, loss: 0.4137183427810669 2023-01-24 00:45:47.493091: step: 500/466, loss: 0.9377108812332153 2023-01-24 00:45:48.121009: step: 502/466, loss: 0.9592562317848206 2023-01-24 00:45:48.737344: step: 504/466, loss: 0.9918196797370911 2023-01-24 00:45:49.390756: step: 506/466, loss: 3.5560927391052246 2023-01-24 00:45:49.985861: step: 508/466, loss: 2.3743810653686523 2023-01-24 00:45:50.591376: step: 510/466, loss: 1.9230139255523682 2023-01-24 00:45:51.177128: step: 512/466, loss: 1.1159083843231201 2023-01-24 00:45:51.788688: step: 514/466, loss: 1.7958159446716309 2023-01-24 00:45:52.408763: step: 516/466, loss: 0.5810664296150208 2023-01-24 00:45:53.049207: step: 518/466, loss: 0.9423800706863403 2023-01-24 00:45:53.623338: step: 520/466, loss: 0.35207512974739075 2023-01-24 00:45:54.236639: step: 522/466, loss: 2.6577353477478027 2023-01-24 00:45:54.878601: step: 524/466, loss: 1.07778799533844 2023-01-24 00:45:55.534370: step: 526/466, loss: 2.047811985015869 2023-01-24 00:45:56.105501: step: 528/466, loss: 0.7667275071144104 2023-01-24 00:45:56.742126: step: 530/466, loss: 2.874121904373169 2023-01-24 00:45:57.366627: step: 532/466, loss: 0.4974350035190582 2023-01-24 00:45:57.988570: step: 534/466, loss: 0.31851255893707275 2023-01-24 00:45:58.617177: step: 536/466, loss: 1.2831884622573853 2023-01-24 00:45:59.236759: step: 538/466, loss: 0.3160209655761719 2023-01-24 00:45:59.790503: step: 540/466, loss: 0.5134939551353455 2023-01-24 00:46:00.408014: step: 542/466, loss: 0.2660251259803772 2023-01-24 00:46:01.045237: step: 544/466, loss: 1.1474089622497559 2023-01-24 00:46:01.665486: step: 546/466, loss: 0.7397912740707397 2023-01-24 00:46:02.257563: step: 548/466, loss: 1.9008302688598633 2023-01-24 00:46:02.899188: step: 550/466, loss: 0.5208633542060852 2023-01-24 00:46:03.548111: step: 552/466, loss: 0.4891583323478699 2023-01-24 00:46:04.137516: step: 554/466, loss: 1.0825510025024414 2023-01-24 00:46:04.750188: step: 556/466, loss: 0.7252593040466309 2023-01-24 00:46:05.330972: step: 558/466, loss: 0.3966256380081177 2023-01-24 00:46:06.073552: step: 560/466, loss: 0.15954799950122833 2023-01-24 00:46:06.712746: step: 562/466, loss: 1.3751274347305298 2023-01-24 00:46:07.339752: step: 564/466, loss: 0.7945005297660828 2023-01-24 00:46:07.939209: step: 566/466, loss: 11.734745025634766 2023-01-24 00:46:08.588290: step: 568/466, loss: 0.872058629989624 2023-01-24 00:46:09.220140: step: 570/466, loss: 2.14370059967041 2023-01-24 00:46:09.949722: step: 572/466, loss: 1.0840165615081787 2023-01-24 00:46:10.570577: step: 574/466, loss: 0.42503124475479126 2023-01-24 00:46:11.173680: step: 576/466, loss: 1.4932385683059692 2023-01-24 00:46:11.888425: step: 578/466, loss: 1.268698811531067 2023-01-24 00:46:12.497384: step: 580/466, loss: 0.6315069198608398 2023-01-24 00:46:13.061372: step: 582/466, loss: 1.204034686088562 2023-01-24 00:46:13.767198: step: 584/466, loss: 4.475171089172363 2023-01-24 00:46:14.403196: step: 586/466, loss: 0.7657732367515564 2023-01-24 00:46:15.000813: step: 588/466, loss: 0.6819864511489868 2023-01-24 00:46:15.639592: step: 590/466, loss: 0.8920553922653198 2023-01-24 00:46:16.243552: step: 592/466, loss: 1.0124226808547974 2023-01-24 00:46:16.808032: step: 594/466, loss: 0.6939584016799927 2023-01-24 00:46:17.472680: step: 596/466, loss: 0.49833014607429504 2023-01-24 00:46:18.086920: step: 598/466, loss: 1.2310301065444946 2023-01-24 00:46:18.729800: step: 600/466, loss: 0.3144940435886383 2023-01-24 00:46:19.333844: step: 602/466, loss: 2.4529874324798584 2023-01-24 00:46:19.935207: step: 604/466, loss: 1.0824910402297974 2023-01-24 00:46:20.538865: step: 606/466, loss: 0.5533326864242554 2023-01-24 00:46:21.117649: step: 608/466, loss: 0.390047162771225 2023-01-24 00:46:21.709253: step: 610/466, loss: 1.4454784393310547 2023-01-24 00:46:22.314409: step: 612/466, loss: 2.1368541717529297 2023-01-24 00:46:22.894718: step: 614/466, loss: 1.6841633319854736 2023-01-24 00:46:23.516557: step: 616/466, loss: 0.9814568161964417 2023-01-24 00:46:24.149410: step: 618/466, loss: 0.7741979360580444 2023-01-24 00:46:24.835375: step: 620/466, loss: 0.892448902130127 2023-01-24 00:46:25.518300: step: 622/466, loss: 0.2893030643463135 2023-01-24 00:46:26.118026: step: 624/466, loss: 1.7950413227081299 2023-01-24 00:46:26.755888: step: 626/466, loss: 0.40497180819511414 2023-01-24 00:46:27.426546: step: 628/466, loss: 1.072169303894043 2023-01-24 00:46:28.057967: step: 630/466, loss: 0.420509397983551 2023-01-24 00:46:28.717959: step: 632/466, loss: 0.37140047550201416 2023-01-24 00:46:29.422735: step: 634/466, loss: 0.42654693126678467 2023-01-24 00:46:29.949260: step: 636/466, loss: 0.6957866549491882 2023-01-24 00:46:30.565917: step: 638/466, loss: 0.48537787795066833 2023-01-24 00:46:31.218329: step: 640/466, loss: 2.398995876312256 2023-01-24 00:46:31.864798: step: 642/466, loss: 1.7987191677093506 2023-01-24 00:46:32.464690: step: 644/466, loss: 0.6381231546401978 2023-01-24 00:46:33.108513: step: 646/466, loss: 2.920368194580078 2023-01-24 00:46:33.754083: step: 648/466, loss: 0.937603235244751 2023-01-24 00:46:34.400501: step: 650/466, loss: 0.8973060846328735 2023-01-24 00:46:35.082537: step: 652/466, loss: 0.3030257225036621 2023-01-24 00:46:35.672345: step: 654/466, loss: 0.23769119381904602 2023-01-24 00:46:36.262322: step: 656/466, loss: 7.699764251708984 2023-01-24 00:46:36.931048: step: 658/466, loss: 0.7156766057014465 2023-01-24 00:46:37.566033: step: 660/466, loss: 0.5250595211982727 2023-01-24 00:46:38.193934: step: 662/466, loss: 0.7946426868438721 2023-01-24 00:46:38.831989: step: 664/466, loss: 0.6435459852218628 2023-01-24 00:46:39.482007: step: 666/466, loss: 1.4385056495666504 2023-01-24 00:46:40.122732: step: 668/466, loss: 0.6356618404388428 2023-01-24 00:46:40.795486: step: 670/466, loss: 0.76902174949646 2023-01-24 00:46:41.412770: step: 672/466, loss: 0.5062452554702759 2023-01-24 00:46:42.071282: step: 674/466, loss: 1.047088861465454 2023-01-24 00:46:42.709960: step: 676/466, loss: 0.5389862656593323 2023-01-24 00:46:43.393108: step: 678/466, loss: 0.564977765083313 2023-01-24 00:46:44.064779: step: 680/466, loss: 1.376876950263977 2023-01-24 00:46:44.725858: step: 682/466, loss: 0.615401029586792 2023-01-24 00:46:45.341240: step: 684/466, loss: 1.184968113899231 2023-01-24 00:46:45.968172: step: 686/466, loss: 0.5089195370674133 2023-01-24 00:46:46.647434: step: 688/466, loss: 1.1755729913711548 2023-01-24 00:46:47.309009: step: 690/466, loss: 0.7577053308486938 2023-01-24 00:46:47.965261: step: 692/466, loss: 0.29133859276771545 2023-01-24 00:46:48.583898: step: 694/466, loss: 0.8675771951675415 2023-01-24 00:46:49.203543: step: 696/466, loss: 0.32761046290397644 2023-01-24 00:46:49.819168: step: 698/466, loss: 1.0314862728118896 2023-01-24 00:46:50.421628: step: 700/466, loss: 1.219602346420288 2023-01-24 00:46:51.094664: step: 702/466, loss: 0.7685061097145081 2023-01-24 00:46:51.770216: step: 704/466, loss: 2.6815826892852783 2023-01-24 00:46:52.417602: step: 706/466, loss: 0.17379489541053772 2023-01-24 00:46:53.063159: step: 708/466, loss: 1.164201259613037 2023-01-24 00:46:53.643370: step: 710/466, loss: 0.624973475933075 2023-01-24 00:46:54.254128: step: 712/466, loss: 1.5552754402160645 2023-01-24 00:46:54.946716: step: 714/466, loss: 0.37641841173171997 2023-01-24 00:46:55.522601: step: 716/466, loss: 0.8614609837532043 2023-01-24 00:46:56.208732: step: 718/466, loss: 0.3931480944156647 2023-01-24 00:46:56.819000: step: 720/466, loss: 0.5416122078895569 2023-01-24 00:46:57.422290: step: 722/466, loss: 0.3402988314628601 2023-01-24 00:46:58.057268: step: 724/466, loss: 5.0532941818237305 2023-01-24 00:46:58.771797: step: 726/466, loss: 2.229910373687744 2023-01-24 00:46:59.445312: step: 728/466, loss: 1.0713385343551636 2023-01-24 00:47:00.045476: step: 730/466, loss: 2.3561525344848633 2023-01-24 00:47:00.682282: step: 732/466, loss: 0.47308316826820374 2023-01-24 00:47:01.297495: step: 734/466, loss: 0.5159525275230408 2023-01-24 00:47:01.905223: step: 736/466, loss: 0.6816383600234985 2023-01-24 00:47:02.559296: step: 738/466, loss: 3.2255191802978516 2023-01-24 00:47:03.206623: step: 740/466, loss: 0.5438644886016846 2023-01-24 00:47:03.916879: step: 742/466, loss: 1.8124189376831055 2023-01-24 00:47:04.574003: step: 744/466, loss: 0.43985387682914734 2023-01-24 00:47:05.371381: step: 746/466, loss: 2.1252360343933105 2023-01-24 00:47:05.986204: step: 748/466, loss: 1.001505732536316 2023-01-24 00:47:06.624382: step: 750/466, loss: 0.34168243408203125 2023-01-24 00:47:07.244597: step: 752/466, loss: 0.82155442237854 2023-01-24 00:47:07.877093: step: 754/466, loss: 0.3661108911037445 2023-01-24 00:47:08.618347: step: 756/466, loss: 0.38035377860069275 2023-01-24 00:47:09.170323: step: 758/466, loss: 0.4773160517215729 2023-01-24 00:47:09.796662: step: 760/466, loss: 0.8049483895301819 2023-01-24 00:47:10.403906: step: 762/466, loss: 1.2205617427825928 2023-01-24 00:47:11.031152: step: 764/466, loss: 0.9722318053245544 2023-01-24 00:47:11.631874: step: 766/466, loss: 3.272855043411255 2023-01-24 00:47:12.230156: step: 768/466, loss: 1.0239043235778809 2023-01-24 00:47:12.898765: step: 770/466, loss: 0.6385643482208252 2023-01-24 00:47:13.535909: step: 772/466, loss: 2.9943175315856934 2023-01-24 00:47:14.235747: step: 774/466, loss: 2.038254976272583 2023-01-24 00:47:14.823752: step: 776/466, loss: 0.8814162015914917 2023-01-24 00:47:15.439088: step: 778/466, loss: 3.2607007026672363 2023-01-24 00:47:16.040773: step: 780/466, loss: 3.3752236366271973 2023-01-24 00:47:16.779028: step: 782/466, loss: 7.300093650817871 2023-01-24 00:47:17.381269: step: 784/466, loss: 0.7274892330169678 2023-01-24 00:47:18.010267: step: 786/466, loss: 0.9277554750442505 2023-01-24 00:47:18.687288: step: 788/466, loss: 1.0070232152938843 2023-01-24 00:47:19.367679: step: 790/466, loss: 0.9627074003219604 2023-01-24 00:47:20.055611: step: 792/466, loss: 3.5604138374328613 2023-01-24 00:47:20.682403: step: 794/466, loss: 0.5762374401092529 2023-01-24 00:47:21.310398: step: 796/466, loss: 1.2100515365600586 2023-01-24 00:47:22.064162: step: 798/466, loss: 0.7598632574081421 2023-01-24 00:47:22.695265: step: 800/466, loss: 0.3530119061470032 2023-01-24 00:47:23.342475: step: 802/466, loss: 0.5504021048545837 2023-01-24 00:47:23.993527: step: 804/466, loss: 1.1767265796661377 2023-01-24 00:47:24.619839: step: 806/466, loss: 1.640741229057312 2023-01-24 00:47:25.180515: step: 808/466, loss: 0.6865524649620056 2023-01-24 00:47:25.733152: step: 810/466, loss: 3.889336109161377 2023-01-24 00:47:26.392904: step: 812/466, loss: 0.7453906536102295 2023-01-24 00:47:27.045923: step: 814/466, loss: 0.771681010723114 2023-01-24 00:47:27.690801: step: 816/466, loss: 1.5986517667770386 2023-01-24 00:47:28.336504: step: 818/466, loss: 1.1174575090408325 2023-01-24 00:47:29.025614: step: 820/466, loss: 0.34714770317077637 2023-01-24 00:47:29.622380: step: 822/466, loss: 2.073472499847412 2023-01-24 00:47:30.361845: step: 824/466, loss: 0.419218510389328 2023-01-24 00:47:31.002771: step: 826/466, loss: 0.43204182386398315 2023-01-24 00:47:31.596142: step: 828/466, loss: 0.655705451965332 2023-01-24 00:47:32.216376: step: 830/466, loss: 0.9431020617485046 2023-01-24 00:47:32.817766: step: 832/466, loss: 2.661907196044922 2023-01-24 00:47:33.441682: step: 834/466, loss: 0.28297674655914307 2023-01-24 00:47:34.083132: step: 836/466, loss: 0.3602883815765381 2023-01-24 00:47:34.685941: step: 838/466, loss: 0.5022509694099426 2023-01-24 00:47:35.347635: step: 840/466, loss: 0.3109540641307831 2023-01-24 00:47:35.950927: step: 842/466, loss: 9.10473918914795 2023-01-24 00:47:36.554443: step: 844/466, loss: 0.667523980140686 2023-01-24 00:47:37.224698: step: 846/466, loss: 2.73602032661438 2023-01-24 00:47:37.864710: step: 848/466, loss: 0.6200502514839172 2023-01-24 00:47:38.428443: step: 850/466, loss: 1.79677152633667 2023-01-24 00:47:38.973670: step: 852/466, loss: 0.5190490484237671 2023-01-24 00:47:39.678258: step: 854/466, loss: 0.5402984619140625 2023-01-24 00:47:40.286078: step: 856/466, loss: 1.1196444034576416 2023-01-24 00:47:40.939266: step: 858/466, loss: 0.24323970079421997 2023-01-24 00:47:41.607592: step: 860/466, loss: 1.542398452758789 2023-01-24 00:47:42.259895: step: 862/466, loss: 1.082638144493103 2023-01-24 00:47:42.903254: step: 864/466, loss: 1.3859745264053345 2023-01-24 00:47:43.493258: step: 866/466, loss: 0.6537830829620361 2023-01-24 00:47:44.100665: step: 868/466, loss: 8.41200065612793 2023-01-24 00:47:44.787553: step: 870/466, loss: 1.0243313312530518 2023-01-24 00:47:45.443973: step: 872/466, loss: 1.7913784980773926 2023-01-24 00:47:46.074999: step: 874/466, loss: 0.897002100944519 2023-01-24 00:47:46.612372: step: 876/466, loss: 0.3358590602874756 2023-01-24 00:47:47.224069: step: 878/466, loss: 0.19971369206905365 2023-01-24 00:47:47.889063: step: 880/466, loss: 1.4521043300628662 2023-01-24 00:47:48.541776: step: 882/466, loss: 0.7002207040786743 2023-01-24 00:47:49.203061: step: 884/466, loss: 0.8930506110191345 2023-01-24 00:47:49.890544: step: 886/466, loss: 1.1874725818634033 2023-01-24 00:47:50.509903: step: 888/466, loss: 1.0502787828445435 2023-01-24 00:47:51.177497: step: 890/466, loss: 0.3964206576347351 2023-01-24 00:47:51.830710: step: 892/466, loss: 1.472832441329956 2023-01-24 00:47:52.480639: step: 894/466, loss: 0.9696194529533386 2023-01-24 00:47:53.118985: step: 896/466, loss: 0.9544723629951477 2023-01-24 00:47:53.790720: step: 898/466, loss: 0.18317130208015442 2023-01-24 00:47:54.425441: step: 900/466, loss: 2.1016006469726562 2023-01-24 00:47:55.089022: step: 902/466, loss: 1.6702429056167603 2023-01-24 00:47:55.721213: step: 904/466, loss: 0.48005521297454834 2023-01-24 00:47:56.430368: step: 906/466, loss: 1.803809642791748 2023-01-24 00:47:57.062704: step: 908/466, loss: 1.603433609008789 2023-01-24 00:47:57.681244: step: 910/466, loss: 3.0145821571350098 2023-01-24 00:47:58.436965: step: 912/466, loss: 1.1885614395141602 2023-01-24 00:47:59.087245: step: 914/466, loss: 0.4076838791370392 2023-01-24 00:47:59.801314: step: 916/466, loss: 0.33982279896736145 2023-01-24 00:48:00.486805: step: 918/466, loss: 0.5260677337646484 2023-01-24 00:48:01.201993: step: 920/466, loss: 1.7159318923950195 2023-01-24 00:48:01.911185: step: 922/466, loss: 3.7175893783569336 2023-01-24 00:48:02.472325: step: 924/466, loss: 0.6074461936950684 2023-01-24 00:48:03.099186: step: 926/466, loss: 1.825785756111145 2023-01-24 00:48:03.717437: step: 928/466, loss: 0.3798740804195404 2023-01-24 00:48:04.439410: step: 930/466, loss: 0.7044768333435059 2023-01-24 00:48:05.063592: step: 932/466, loss: 1.870772361755371 ================================================== Loss: 1.351 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3434528006267137, 'r': 0.24067715195432585, 'f1': 0.28302346599528744}, 'combined': 0.20854360652284337, 'epoch': 3} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3391664863325753, 'r': 0.23915977665651386, 'f1': 0.28051633242948054}, 'combined': 0.1756504137642542, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32806827557755774, 'r': 0.2510219381313131, 'f1': 0.2844197067238912}, 'combined': 0.20957241548076194, 'epoch': 3} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.34589165886158363, 'r': 0.2423424877686659, 'f1': 0.2850029893270949}, 'combined': 0.176621570850594, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31270884270578647, 'r': 0.24223090277777778, 'f1': 0.2729944859480612}, 'combined': 0.20115383175120297, 'epoch': 3} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3810161274971447, 'r': 0.2402432312493288, 'f1': 0.29468061716676675}, 'combined': 0.19548120148686507, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.25, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.13043478260869565, 'f1': 0.1714285714285714}, 'combined': 0.0857142857142857, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.06896551724137931, 'f1': 0.1111111111111111}, 'combined': 0.07407407407407407, 'epoch': 3} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3434528006267137, 'r': 0.24067715195432585, 'f1': 0.28302346599528744}, 'combined': 0.20854360652284337, 'epoch': 3} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3391664863325753, 'r': 0.23915977665651386, 'f1': 0.28051633242948054}, 'combined': 0.1756504137642542, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.25, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29765266726159495, 'r': 0.19146917305821762, 'f1': 0.23303522910318866}, 'combined': 0.17171016881287585, 'epoch': 2} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.32252191694282595, 'r': 0.2131727368071158, 'f1': 0.25668682423354117}, 'combined': 0.1590735248771241, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36764705882352944, 'r': 0.2717391304347826, 'f1': 0.3125}, 'combined': 0.15625, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.360659200968523, 'r': 0.2015046672077922, 'f1': 0.25855276861655957}, 'combined': 0.19051256634904387, 'epoch': 1} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.37830051778663865, 'r': 0.14920336306205872, 'f1': 0.21400301135632502}, 'combined': 0.1419623936720176, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39285714285714285, 'r': 0.09482758620689655, 'f1': 0.15277777777777776}, 'combined': 0.10185185185185183, 'epoch': 1} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:50:46.868886: step: 2/466, loss: 0.8234193325042725 2023-01-24 00:50:47.497865: step: 4/466, loss: 0.5527621507644653 2023-01-24 00:50:48.113636: step: 6/466, loss: 0.5562136173248291 2023-01-24 00:50:48.708266: step: 8/466, loss: 0.6088646054267883 2023-01-24 00:50:49.298550: step: 10/466, loss: 3.758753776550293 2023-01-24 00:50:49.842204: step: 12/466, loss: 0.39627838134765625 2023-01-24 00:50:50.475983: step: 14/466, loss: 0.7602285146713257 2023-01-24 00:50:51.116194: step: 16/466, loss: 0.8169283866882324 2023-01-24 00:50:51.731310: step: 18/466, loss: 1.195311188697815 2023-01-24 00:50:52.329735: step: 20/466, loss: 0.9847410917282104 2023-01-24 00:50:52.992039: step: 22/466, loss: 0.3099139928817749 2023-01-24 00:50:53.630066: step: 24/466, loss: 0.53386390209198 2023-01-24 00:50:54.245428: step: 26/466, loss: 2.3639395236968994 2023-01-24 00:50:54.873402: step: 28/466, loss: 1.3981447219848633 2023-01-24 00:50:55.565131: step: 30/466, loss: 0.45659926533699036 2023-01-24 00:50:56.190499: step: 32/466, loss: 0.33734163641929626 2023-01-24 00:50:56.813381: step: 34/466, loss: 1.5848418474197388 2023-01-24 00:50:57.484162: step: 36/466, loss: 0.6364235281944275 2023-01-24 00:50:58.116086: step: 38/466, loss: 0.6908300518989563 2023-01-24 00:50:58.799009: step: 40/466, loss: 0.4133012294769287 2023-01-24 00:50:59.493761: step: 42/466, loss: 0.43771636486053467 2023-01-24 00:51:00.105991: step: 44/466, loss: 1.4768908023834229 2023-01-24 00:51:00.757287: step: 46/466, loss: 1.4073878526687622 2023-01-24 00:51:01.359009: step: 48/466, loss: 1.0671855211257935 2023-01-24 00:51:01.982327: step: 50/466, loss: 0.3492785692214966 2023-01-24 00:51:02.605138: step: 52/466, loss: 0.7020103335380554 2023-01-24 00:51:03.138031: step: 54/466, loss: 0.5150268077850342 2023-01-24 00:51:03.740457: step: 56/466, loss: 0.7967656254768372 2023-01-24 00:51:04.355041: step: 58/466, loss: 0.5937609672546387 2023-01-24 00:51:05.001094: step: 60/466, loss: 1.9360209703445435 2023-01-24 00:51:05.668307: step: 62/466, loss: 0.5878614187240601 2023-01-24 00:51:06.337802: step: 64/466, loss: 0.24815517663955688 2023-01-24 00:51:07.000659: step: 66/466, loss: 0.32944440841674805 2023-01-24 00:51:07.590211: step: 68/466, loss: 0.6284695863723755 2023-01-24 00:51:08.229491: step: 70/466, loss: 1.988524317741394 2023-01-24 00:51:08.827656: step: 72/466, loss: 1.0196870565414429 2023-01-24 00:51:09.480785: step: 74/466, loss: 0.7447625398635864 2023-01-24 00:51:10.071564: step: 76/466, loss: 0.29737746715545654 2023-01-24 00:51:10.707936: step: 78/466, loss: 2.552250385284424 2023-01-24 00:51:11.361943: step: 80/466, loss: 1.034410834312439 2023-01-24 00:51:12.064969: step: 82/466, loss: 1.1705375909805298 2023-01-24 00:51:12.728992: step: 84/466, loss: 2.288651704788208 2023-01-24 00:51:13.354589: step: 86/466, loss: 4.031686305999756 2023-01-24 00:51:13.985217: step: 88/466, loss: 9.568753242492676 2023-01-24 00:51:14.557008: step: 90/466, loss: 1.692314863204956 2023-01-24 00:51:15.132156: step: 92/466, loss: 1.6322171688079834 2023-01-24 00:51:15.772125: step: 94/466, loss: 0.867580771446228 2023-01-24 00:51:16.414850: step: 96/466, loss: 0.3122093379497528 2023-01-24 00:51:17.002208: step: 98/466, loss: 1.1003360748291016 2023-01-24 00:51:17.662875: step: 100/466, loss: 0.5618632435798645 2023-01-24 00:51:18.271483: step: 102/466, loss: 0.3283842206001282 2023-01-24 00:51:18.832124: step: 104/466, loss: 0.6832338571548462 2023-01-24 00:51:19.513337: step: 106/466, loss: 0.2865106165409088 2023-01-24 00:51:20.140695: step: 108/466, loss: 3.1510965824127197 2023-01-24 00:51:20.779682: step: 110/466, loss: 0.6936209201812744 2023-01-24 00:51:21.353147: step: 112/466, loss: 1.8776599168777466 2023-01-24 00:51:22.002957: step: 114/466, loss: 2.3227715492248535 2023-01-24 00:51:22.653464: step: 116/466, loss: 0.4241205155849457 2023-01-24 00:51:23.269172: step: 118/466, loss: 0.759576678276062 2023-01-24 00:51:23.909788: step: 120/466, loss: 0.23246560990810394 2023-01-24 00:51:24.508423: step: 122/466, loss: 0.2546248137950897 2023-01-24 00:51:25.127714: step: 124/466, loss: 0.7287163734436035 2023-01-24 00:51:25.827021: step: 126/466, loss: 8.166577339172363 2023-01-24 00:51:26.443731: step: 128/466, loss: 1.5923850536346436 2023-01-24 00:51:27.138638: step: 130/466, loss: 0.37262099981307983 2023-01-24 00:51:27.781557: step: 132/466, loss: 1.4601284265518188 2023-01-24 00:51:28.354898: step: 134/466, loss: 0.18822559714317322 2023-01-24 00:51:28.912567: step: 136/466, loss: 1.4293346405029297 2023-01-24 00:51:29.482787: step: 138/466, loss: 0.3131538927555084 2023-01-24 00:51:30.099490: step: 140/466, loss: 0.611158013343811 2023-01-24 00:51:30.702600: step: 142/466, loss: 0.7587867379188538 2023-01-24 00:51:31.376854: step: 144/466, loss: 1.0173994302749634 2023-01-24 00:51:32.031245: step: 146/466, loss: 1.2000371217727661 2023-01-24 00:51:32.670893: step: 148/466, loss: 0.832184374332428 2023-01-24 00:51:33.296766: step: 150/466, loss: 0.9794172048568726 2023-01-24 00:51:33.927750: step: 152/466, loss: 0.5464068651199341 2023-01-24 00:51:34.560263: step: 154/466, loss: 1.0173841714859009 2023-01-24 00:51:35.168030: step: 156/466, loss: 3.6597390174865723 2023-01-24 00:51:35.757163: step: 158/466, loss: 5.955989837646484 2023-01-24 00:51:36.365504: step: 160/466, loss: 0.3543793559074402 2023-01-24 00:51:36.998123: step: 162/466, loss: 2.2218308448791504 2023-01-24 00:51:37.638652: step: 164/466, loss: 1.0682404041290283 2023-01-24 00:51:38.268641: step: 166/466, loss: 1.0099315643310547 2023-01-24 00:51:38.993500: step: 168/466, loss: 1.0879361629486084 2023-01-24 00:51:39.585044: step: 170/466, loss: 0.2668413519859314 2023-01-24 00:51:40.232947: step: 172/466, loss: 0.5412415862083435 2023-01-24 00:51:40.937961: step: 174/466, loss: 1.0546385049819946 2023-01-24 00:51:41.571371: step: 176/466, loss: 0.1540793776512146 2023-01-24 00:51:42.215539: step: 178/466, loss: 0.30003786087036133 2023-01-24 00:51:42.813675: step: 180/466, loss: 0.7003176808357239 2023-01-24 00:51:43.436925: step: 182/466, loss: 0.372450053691864 2023-01-24 00:51:44.137185: step: 184/466, loss: 6.132852554321289 2023-01-24 00:51:44.749584: step: 186/466, loss: 0.26928791403770447 2023-01-24 00:51:45.382795: step: 188/466, loss: 0.7455117106437683 2023-01-24 00:51:46.038052: step: 190/466, loss: 2.13204288482666 2023-01-24 00:51:46.775330: step: 192/466, loss: 1.1307880878448486 2023-01-24 00:51:47.427038: step: 194/466, loss: 1.6704050302505493 2023-01-24 00:51:48.119011: step: 196/466, loss: 0.37431788444519043 2023-01-24 00:51:48.722095: step: 198/466, loss: 1.84345281124115 2023-01-24 00:51:49.387130: step: 200/466, loss: 0.3616538941860199 2023-01-24 00:51:49.941528: step: 202/466, loss: 0.933368444442749 2023-01-24 00:51:50.599196: step: 204/466, loss: 5.045001029968262 2023-01-24 00:51:51.211573: step: 206/466, loss: 0.23763899505138397 2023-01-24 00:51:51.832322: step: 208/466, loss: 2.020751476287842 2023-01-24 00:51:52.440793: step: 210/466, loss: 0.3125147819519043 2023-01-24 00:51:53.054651: step: 212/466, loss: 1.8077600002288818 2023-01-24 00:51:53.724120: step: 214/466, loss: 2.1511735916137695 2023-01-24 00:51:54.376445: step: 216/466, loss: 15.920232772827148 2023-01-24 00:51:55.013492: step: 218/466, loss: 4.202001094818115 2023-01-24 00:51:55.681581: step: 220/466, loss: 1.3560482263565063 2023-01-24 00:51:56.339077: step: 222/466, loss: 1.4759902954101562 2023-01-24 00:51:56.981959: step: 224/466, loss: 0.2740112245082855 2023-01-24 00:51:57.579388: step: 226/466, loss: 1.1533056497573853 2023-01-24 00:51:58.259450: step: 228/466, loss: 1.33697509765625 2023-01-24 00:51:59.050551: step: 230/466, loss: 2.2029709815979004 2023-01-24 00:51:59.644087: step: 232/466, loss: 0.7078940868377686 2023-01-24 00:52:00.335890: step: 234/466, loss: 0.8712655305862427 2023-01-24 00:52:00.924838: step: 236/466, loss: 0.7303901314735413 2023-01-24 00:52:01.596443: step: 238/466, loss: 2.421600341796875 2023-01-24 00:52:02.215953: step: 240/466, loss: 0.9266837239265442 2023-01-24 00:52:02.899175: step: 242/466, loss: 1.455971121788025 2023-01-24 00:52:03.506265: step: 244/466, loss: 0.9863547086715698 2023-01-24 00:52:04.151451: step: 246/466, loss: 0.3293907642364502 2023-01-24 00:52:04.844057: step: 248/466, loss: 0.6340574026107788 2023-01-24 00:52:05.392896: step: 250/466, loss: 0.7300431728363037 2023-01-24 00:52:05.977772: step: 252/466, loss: 7.62088680267334 2023-01-24 00:52:06.622935: step: 254/466, loss: 1.5418038368225098 2023-01-24 00:52:07.266716: step: 256/466, loss: 1.0736063718795776 2023-01-24 00:52:07.921289: step: 258/466, loss: 0.27653467655181885 2023-01-24 00:52:08.522674: step: 260/466, loss: 1.1074532270431519 2023-01-24 00:52:09.170552: step: 262/466, loss: 1.368119239807129 2023-01-24 00:52:09.803066: step: 264/466, loss: 1.368348479270935 2023-01-24 00:52:10.480065: step: 266/466, loss: 0.3171231150627136 2023-01-24 00:52:11.265481: step: 268/466, loss: 0.7048443555831909 2023-01-24 00:52:11.924569: step: 270/466, loss: 1.8752702474594116 2023-01-24 00:52:12.572825: step: 272/466, loss: 0.34659889340400696 2023-01-24 00:52:13.192956: step: 274/466, loss: 0.3979450464248657 2023-01-24 00:52:13.819558: step: 276/466, loss: 0.4131494164466858 2023-01-24 00:52:14.412749: step: 278/466, loss: 1.8385015726089478 2023-01-24 00:52:15.052543: step: 280/466, loss: 0.54398512840271 2023-01-24 00:52:15.719528: step: 282/466, loss: 0.26953792572021484 2023-01-24 00:52:16.346837: step: 284/466, loss: 0.14041590690612793 2023-01-24 00:52:16.929856: step: 286/466, loss: 0.2797807455062866 2023-01-24 00:52:17.552880: step: 288/466, loss: 1.2565838098526 2023-01-24 00:52:18.151782: step: 290/466, loss: 0.25621214509010315 2023-01-24 00:52:18.748357: step: 292/466, loss: 0.23986800014972687 2023-01-24 00:52:19.391127: step: 294/466, loss: 1.2190638780593872 2023-01-24 00:52:20.080123: step: 296/466, loss: 0.9308200478553772 2023-01-24 00:52:20.766392: step: 298/466, loss: 0.9362515211105347 2023-01-24 00:52:21.420204: step: 300/466, loss: 0.7603223323822021 2023-01-24 00:52:22.038077: step: 302/466, loss: 0.5132754445075989 2023-01-24 00:52:22.651024: step: 304/466, loss: 0.25197291374206543 2023-01-24 00:52:23.279690: step: 306/466, loss: 1.2909669876098633 2023-01-24 00:52:23.942313: step: 308/466, loss: 1.3695316314697266 2023-01-24 00:52:24.531262: step: 310/466, loss: 0.8792002201080322 2023-01-24 00:52:25.136016: step: 312/466, loss: 0.6641198992729187 2023-01-24 00:52:25.770409: step: 314/466, loss: 1.4005305767059326 2023-01-24 00:52:26.377399: step: 316/466, loss: 0.47896987199783325 2023-01-24 00:52:27.029784: step: 318/466, loss: 0.6251979470252991 2023-01-24 00:52:27.636187: step: 320/466, loss: 0.3861716687679291 2023-01-24 00:52:28.233391: step: 322/466, loss: 0.2425888180732727 2023-01-24 00:52:28.851789: step: 324/466, loss: 0.9359714984893799 2023-01-24 00:52:29.574875: step: 326/466, loss: 0.6627274751663208 2023-01-24 00:52:30.217501: step: 328/466, loss: 1.0809791088104248 2023-01-24 00:52:30.835702: step: 330/466, loss: 0.7750211358070374 2023-01-24 00:52:31.473575: step: 332/466, loss: 0.6194950938224792 2023-01-24 00:52:32.173905: step: 334/466, loss: 0.20308737456798553 2023-01-24 00:52:32.789660: step: 336/466, loss: 2.0507476329803467 2023-01-24 00:52:33.427323: step: 338/466, loss: 1.2626909017562866 2023-01-24 00:52:34.003392: step: 340/466, loss: 0.49783921241760254 2023-01-24 00:52:34.672989: step: 342/466, loss: 2.4868323802948 2023-01-24 00:52:35.312895: step: 344/466, loss: 0.5746316909790039 2023-01-24 00:52:35.920871: step: 346/466, loss: 0.9443038105964661 2023-01-24 00:52:36.523652: step: 348/466, loss: 0.4447798430919647 2023-01-24 00:52:37.211226: step: 350/466, loss: 0.6863400936126709 2023-01-24 00:52:37.861294: step: 352/466, loss: 0.35676565766334534 2023-01-24 00:52:38.473445: step: 354/466, loss: 0.5012394189834595 2023-01-24 00:52:39.057027: step: 356/466, loss: 1.7943192720413208 2023-01-24 00:52:39.695497: step: 358/466, loss: 0.8492233157157898 2023-01-24 00:52:40.330304: step: 360/466, loss: 0.7463604211807251 2023-01-24 00:52:40.928887: step: 362/466, loss: 0.9617664813995361 2023-01-24 00:52:41.511964: step: 364/466, loss: 0.5592828989028931 2023-01-24 00:52:42.209693: step: 366/466, loss: 1.3846055269241333 2023-01-24 00:52:42.828385: step: 368/466, loss: 0.6915057301521301 2023-01-24 00:52:43.441131: step: 370/466, loss: 0.8128398656845093 2023-01-24 00:52:44.127754: step: 372/466, loss: 1.2673065662384033 2023-01-24 00:52:44.783990: step: 374/466, loss: 0.8283820748329163 2023-01-24 00:52:45.428058: step: 376/466, loss: 1.3461519479751587 2023-01-24 00:52:46.021550: step: 378/466, loss: 0.9876441359519958 2023-01-24 00:52:46.622181: step: 380/466, loss: 1.5725579261779785 2023-01-24 00:52:47.232074: step: 382/466, loss: 0.7976436614990234 2023-01-24 00:52:47.812131: step: 384/466, loss: 1.8078581094741821 2023-01-24 00:52:48.492664: step: 386/466, loss: 1.8934369087219238 2023-01-24 00:52:49.205868: step: 388/466, loss: 0.28402432799339294 2023-01-24 00:52:49.909202: step: 390/466, loss: 1.4807446002960205 2023-01-24 00:52:50.569608: step: 392/466, loss: 0.7380787134170532 2023-01-24 00:52:51.234372: step: 394/466, loss: 0.47656989097595215 2023-01-24 00:52:51.840676: step: 396/466, loss: 0.4588451385498047 2023-01-24 00:52:52.440531: step: 398/466, loss: 0.6063854694366455 2023-01-24 00:52:53.094389: step: 400/466, loss: 0.9722875952720642 2023-01-24 00:52:53.729766: step: 402/466, loss: 0.24728664755821228 2023-01-24 00:52:54.377827: step: 404/466, loss: 0.202083557844162 2023-01-24 00:52:54.971666: step: 406/466, loss: 1.6964472532272339 2023-01-24 00:52:55.588826: step: 408/466, loss: 1.0508222579956055 2023-01-24 00:52:56.174616: step: 410/466, loss: 0.32941722869873047 2023-01-24 00:52:56.809599: step: 412/466, loss: 1.6181867122650146 2023-01-24 00:52:57.443103: step: 414/466, loss: 0.5365049242973328 2023-01-24 00:52:58.050280: step: 416/466, loss: 0.25431028008461 2023-01-24 00:52:58.617916: step: 418/466, loss: 0.9074522852897644 2023-01-24 00:52:59.181206: step: 420/466, loss: 4.191633224487305 2023-01-24 00:52:59.808098: step: 422/466, loss: 1.4547134637832642 2023-01-24 00:53:00.511412: step: 424/466, loss: 2.038376569747925 2023-01-24 00:53:01.249361: step: 426/466, loss: 2.3599841594696045 2023-01-24 00:53:01.862882: step: 428/466, loss: 0.7275699973106384 2023-01-24 00:53:02.479503: step: 430/466, loss: 0.6593179702758789 2023-01-24 00:53:03.103938: step: 432/466, loss: 0.6755656003952026 2023-01-24 00:53:03.784736: step: 434/466, loss: 3.1001334190368652 2023-01-24 00:53:04.464341: step: 436/466, loss: 0.731447696685791 2023-01-24 00:53:05.038165: step: 438/466, loss: 0.3785054385662079 2023-01-24 00:53:05.632219: step: 440/466, loss: 0.9702725410461426 2023-01-24 00:53:06.214249: step: 442/466, loss: 0.6594955921173096 2023-01-24 00:53:06.809955: step: 444/466, loss: 0.7122897505760193 2023-01-24 00:53:07.431812: step: 446/466, loss: 0.36549654603004456 2023-01-24 00:53:08.055330: step: 448/466, loss: 0.6760743856430054 2023-01-24 00:53:08.666389: step: 450/466, loss: 1.709719181060791 2023-01-24 00:53:09.207648: step: 452/466, loss: 1.601709246635437 2023-01-24 00:53:09.843943: step: 454/466, loss: 1.7167366743087769 2023-01-24 00:53:10.517547: step: 456/466, loss: 0.5454731583595276 2023-01-24 00:53:11.171833: step: 458/466, loss: 0.9240479469299316 2023-01-24 00:53:11.891404: step: 460/466, loss: 2.346407890319824 2023-01-24 00:53:12.547774: step: 462/466, loss: 3.4541802406311035 2023-01-24 00:53:13.126113: step: 464/466, loss: 0.7184671759605408 2023-01-24 00:53:13.773638: step: 466/466, loss: 1.4777228832244873 2023-01-24 00:53:14.389872: step: 468/466, loss: 0.3892070949077606 2023-01-24 00:53:15.002542: step: 470/466, loss: 0.7117869853973389 2023-01-24 00:53:15.632836: step: 472/466, loss: 0.46897411346435547 2023-01-24 00:53:16.283358: step: 474/466, loss: 0.5022502541542053 2023-01-24 00:53:16.933121: step: 476/466, loss: 9.41117000579834 2023-01-24 00:53:17.491581: step: 478/466, loss: 0.522497296333313 2023-01-24 00:53:18.098572: step: 480/466, loss: 0.438643217086792 2023-01-24 00:53:18.770643: step: 482/466, loss: 0.21643486618995667 2023-01-24 00:53:19.405564: step: 484/466, loss: 0.9862494468688965 2023-01-24 00:53:20.012381: step: 486/466, loss: 1.7793397903442383 2023-01-24 00:53:20.692885: step: 488/466, loss: 0.77402263879776 2023-01-24 00:53:21.305700: step: 490/466, loss: 1.1911898851394653 2023-01-24 00:53:21.946541: step: 492/466, loss: 3.382206678390503 2023-01-24 00:53:22.572833: step: 494/466, loss: 1.221908688545227 2023-01-24 00:53:23.147917: step: 496/466, loss: 1.3005276918411255 2023-01-24 00:53:23.813088: step: 498/466, loss: 0.49820587038993835 2023-01-24 00:53:24.371278: step: 500/466, loss: 0.5385996103286743 2023-01-24 00:53:25.049361: step: 502/466, loss: 0.3212437033653259 2023-01-24 00:53:25.679145: step: 504/466, loss: 1.192125678062439 2023-01-24 00:53:26.323034: step: 506/466, loss: 4.746476173400879 2023-01-24 00:53:26.991760: step: 508/466, loss: 0.5380405783653259 2023-01-24 00:53:27.579051: step: 510/466, loss: 2.430771589279175 2023-01-24 00:53:28.128469: step: 512/466, loss: 0.5627239942550659 2023-01-24 00:53:28.755725: step: 514/466, loss: 1.427639365196228 2023-01-24 00:53:29.374204: step: 516/466, loss: 1.5477802753448486 2023-01-24 00:53:29.927679: step: 518/466, loss: 1.1179890632629395 2023-01-24 00:53:30.606389: step: 520/466, loss: 0.30601030588150024 2023-01-24 00:53:31.191719: step: 522/466, loss: 0.421135812997818 2023-01-24 00:53:31.782273: step: 524/466, loss: 2.8707714080810547 2023-01-24 00:53:32.409815: step: 526/466, loss: 2.263228416442871 2023-01-24 00:53:33.057171: step: 528/466, loss: 0.7130519151687622 2023-01-24 00:53:33.701124: step: 530/466, loss: 1.3197085857391357 2023-01-24 00:53:34.246676: step: 532/466, loss: 0.6716258525848389 2023-01-24 00:53:34.886460: step: 534/466, loss: 0.5461893677711487 2023-01-24 00:53:35.536070: step: 536/466, loss: 0.38266992568969727 2023-01-24 00:53:36.114730: step: 538/466, loss: 0.4114155173301697 2023-01-24 00:53:36.731908: step: 540/466, loss: 1.5288643836975098 2023-01-24 00:53:37.310751: step: 542/466, loss: 2.4292304515838623 2023-01-24 00:53:37.960372: step: 544/466, loss: 0.3957027196884155 2023-01-24 00:53:38.537051: step: 546/466, loss: 0.5813397169113159 2023-01-24 00:53:39.132916: step: 548/466, loss: 1.0507454872131348 2023-01-24 00:53:39.834896: step: 550/466, loss: 1.3837651014328003 2023-01-24 00:53:40.497961: step: 552/466, loss: 1.1009317636489868 2023-01-24 00:53:41.195041: step: 554/466, loss: 0.4339676797389984 2023-01-24 00:53:41.878280: step: 556/466, loss: 1.4487650394439697 2023-01-24 00:53:42.473114: step: 558/466, loss: 0.7837366461753845 2023-01-24 00:53:43.157285: step: 560/466, loss: 0.9487995505332947 2023-01-24 00:53:43.847605: step: 562/466, loss: 2.581289768218994 2023-01-24 00:53:44.499531: step: 564/466, loss: 2.4496123790740967 2023-01-24 00:53:45.090791: step: 566/466, loss: 1.0049906969070435 2023-01-24 00:53:45.634506: step: 568/466, loss: 1.7165555953979492 2023-01-24 00:53:46.241043: step: 570/466, loss: 1.0403224229812622 2023-01-24 00:53:46.880455: step: 572/466, loss: 0.5541769862174988 2023-01-24 00:53:47.480737: step: 574/466, loss: 0.27094000577926636 2023-01-24 00:53:48.099585: step: 576/466, loss: 0.6843553185462952 2023-01-24 00:53:48.719994: step: 578/466, loss: 0.1781836748123169 2023-01-24 00:53:49.327675: step: 580/466, loss: 2.5594866275787354 2023-01-24 00:53:50.051400: step: 582/466, loss: 1.2028337717056274 2023-01-24 00:53:50.683174: step: 584/466, loss: 0.4208811819553375 2023-01-24 00:53:51.282900: step: 586/466, loss: 0.6839389204978943 2023-01-24 00:53:51.947606: step: 588/466, loss: 0.609345555305481 2023-01-24 00:53:52.524079: step: 590/466, loss: 0.9183725714683533 2023-01-24 00:53:53.145306: step: 592/466, loss: 0.50873863697052 2023-01-24 00:53:53.799542: step: 594/466, loss: 1.2759383916854858 2023-01-24 00:53:54.454228: step: 596/466, loss: 0.9638827443122864 2023-01-24 00:53:55.108802: step: 598/466, loss: 0.3420892059803009 2023-01-24 00:53:55.707985: step: 600/466, loss: 0.9437588453292847 2023-01-24 00:53:56.422235: step: 602/466, loss: 0.6606385707855225 2023-01-24 00:53:57.031291: step: 604/466, loss: 0.629491925239563 2023-01-24 00:53:57.649995: step: 606/466, loss: 1.1778596639633179 2023-01-24 00:53:58.268303: step: 608/466, loss: 0.7342720627784729 2023-01-24 00:53:58.834257: step: 610/466, loss: 0.17956723272800446 2023-01-24 00:53:59.503943: step: 612/466, loss: 0.9784295558929443 2023-01-24 00:54:00.157239: step: 614/466, loss: 3.4845693111419678 2023-01-24 00:54:00.799083: step: 616/466, loss: 0.5351054668426514 2023-01-24 00:54:01.397502: step: 618/466, loss: 1.9158273935317993 2023-01-24 00:54:02.041164: step: 620/466, loss: 0.2676509916782379 2023-01-24 00:54:02.707088: step: 622/466, loss: 0.2582094967365265 2023-01-24 00:54:03.328461: step: 624/466, loss: 0.6532222628593445 2023-01-24 00:54:03.905154: step: 626/466, loss: 0.21216334402561188 2023-01-24 00:54:04.576024: step: 628/466, loss: 2.3480544090270996 2023-01-24 00:54:05.201033: step: 630/466, loss: 1.132393717765808 2023-01-24 00:54:05.859840: step: 632/466, loss: 5.891961097717285 2023-01-24 00:54:06.466022: step: 634/466, loss: 0.527739405632019 2023-01-24 00:54:07.117660: step: 636/466, loss: 1.0703450441360474 2023-01-24 00:54:07.817978: step: 638/466, loss: 0.42220592498779297 2023-01-24 00:54:08.498845: step: 640/466, loss: 1.296844720840454 2023-01-24 00:54:09.251767: step: 642/466, loss: 0.9484662413597107 2023-01-24 00:54:09.830420: step: 644/466, loss: 1.2204694747924805 2023-01-24 00:54:10.453340: step: 646/466, loss: 1.5694851875305176 2023-01-24 00:54:11.099333: step: 648/466, loss: 0.7809709310531616 2023-01-24 00:54:11.745719: step: 650/466, loss: 0.31678450107574463 2023-01-24 00:54:12.375973: step: 652/466, loss: 0.7704044580459595 2023-01-24 00:54:13.019524: step: 654/466, loss: 0.8784146308898926 2023-01-24 00:54:13.662530: step: 656/466, loss: 1.0362260341644287 2023-01-24 00:54:14.340485: step: 658/466, loss: 0.5954376459121704 2023-01-24 00:54:15.049334: step: 660/466, loss: 0.31657710671424866 2023-01-24 00:54:15.701189: step: 662/466, loss: 0.3830662965774536 2023-01-24 00:54:16.282993: step: 664/466, loss: 1.6849455833435059 2023-01-24 00:54:16.872994: step: 666/466, loss: 1.2949471473693848 2023-01-24 00:54:17.549352: step: 668/466, loss: 2.7972347736358643 2023-01-24 00:54:18.204802: step: 670/466, loss: 0.7938665151596069 2023-01-24 00:54:18.754556: step: 672/466, loss: 0.7428727149963379 2023-01-24 00:54:19.346356: step: 674/466, loss: 0.5348787903785706 2023-01-24 00:54:20.161551: step: 676/466, loss: 1.3804330825805664 2023-01-24 00:54:20.759346: step: 678/466, loss: 2.004319190979004 2023-01-24 00:54:21.358145: step: 680/466, loss: 0.28002798557281494 2023-01-24 00:54:21.976476: step: 682/466, loss: 1.3072905540466309 2023-01-24 00:54:22.571776: step: 684/466, loss: 0.21807393431663513 2023-01-24 00:54:23.206614: step: 686/466, loss: 0.422049343585968 2023-01-24 00:54:23.806053: step: 688/466, loss: 0.12766236066818237 2023-01-24 00:54:24.390440: step: 690/466, loss: 0.21372602880001068 2023-01-24 00:54:25.031656: step: 692/466, loss: 0.6505768895149231 2023-01-24 00:54:25.639228: step: 694/466, loss: 1.8058565855026245 2023-01-24 00:54:26.290833: step: 696/466, loss: 1.6718077659606934 2023-01-24 00:54:26.963092: step: 698/466, loss: 0.3383273482322693 2023-01-24 00:54:27.672379: step: 700/466, loss: 0.31181371212005615 2023-01-24 00:54:28.267172: step: 702/466, loss: 0.33269938826560974 2023-01-24 00:54:28.928295: step: 704/466, loss: 1.0136563777923584 2023-01-24 00:54:29.449009: step: 706/466, loss: 0.47253674268722534 2023-01-24 00:54:30.062333: step: 708/466, loss: 0.7834235429763794 2023-01-24 00:54:30.682653: step: 710/466, loss: 0.43739745020866394 2023-01-24 00:54:31.286756: step: 712/466, loss: 0.7709540724754333 2023-01-24 00:54:31.922204: step: 714/466, loss: 1.3063597679138184 2023-01-24 00:54:32.535760: step: 716/466, loss: 0.9646741151809692 2023-01-24 00:54:33.105888: step: 718/466, loss: 0.3246005177497864 2023-01-24 00:54:33.763470: step: 720/466, loss: 0.23996220529079437 2023-01-24 00:54:34.326974: step: 722/466, loss: 0.6034432053565979 2023-01-24 00:54:34.943717: step: 724/466, loss: 0.6511892676353455 2023-01-24 00:54:35.566120: step: 726/466, loss: 0.30328404903411865 2023-01-24 00:54:36.138884: step: 728/466, loss: 0.8836084008216858 2023-01-24 00:54:36.758183: step: 730/466, loss: 0.3370964229106903 2023-01-24 00:54:37.458802: step: 732/466, loss: 0.6672161817550659 2023-01-24 00:54:38.156345: step: 734/466, loss: 0.6693808436393738 2023-01-24 00:54:38.805789: step: 736/466, loss: 0.3571440577507019 2023-01-24 00:54:39.443721: step: 738/466, loss: 0.9070135951042175 2023-01-24 00:54:40.083710: step: 740/466, loss: 1.1195652484893799 2023-01-24 00:54:40.728613: step: 742/466, loss: 0.1802312284708023 2023-01-24 00:54:41.379969: step: 744/466, loss: 0.5180309414863586 2023-01-24 00:54:41.977882: step: 746/466, loss: 0.132245272397995 2023-01-24 00:54:42.615073: step: 748/466, loss: 0.9627670049667358 2023-01-24 00:54:43.225717: step: 750/466, loss: 0.22905667126178741 2023-01-24 00:54:43.859139: step: 752/466, loss: 0.6364123821258545 2023-01-24 00:54:44.555546: step: 754/466, loss: 7.123708724975586 2023-01-24 00:54:45.205579: step: 756/466, loss: 0.43862396478652954 2023-01-24 00:54:45.826791: step: 758/466, loss: 0.3531898558139801 2023-01-24 00:54:46.408028: step: 760/466, loss: 0.9491121768951416 2023-01-24 00:54:47.080200: step: 762/466, loss: 0.4505407214164734 2023-01-24 00:54:47.730414: step: 764/466, loss: 0.48531121015548706 2023-01-24 00:54:48.381261: step: 766/466, loss: 1.101506233215332 2023-01-24 00:54:48.988742: step: 768/466, loss: 0.8574106693267822 2023-01-24 00:54:49.615158: step: 770/466, loss: 0.5556524395942688 2023-01-24 00:54:50.326938: step: 772/466, loss: 0.4280818700790405 2023-01-24 00:54:50.952135: step: 774/466, loss: 0.9926246404647827 2023-01-24 00:54:51.541199: step: 776/466, loss: 0.30997520685195923 2023-01-24 00:54:52.311263: step: 778/466, loss: 0.3498050272464752 2023-01-24 00:54:52.941522: step: 780/466, loss: 0.736324667930603 2023-01-24 00:54:53.632623: step: 782/466, loss: 0.3913474380970001 2023-01-24 00:54:54.249127: step: 784/466, loss: 0.7822198271751404 2023-01-24 00:54:54.961182: step: 786/466, loss: 1.2186057567596436 2023-01-24 00:54:55.537738: step: 788/466, loss: 1.2209784984588623 2023-01-24 00:54:56.168513: step: 790/466, loss: 0.35334426164627075 2023-01-24 00:54:56.752815: step: 792/466, loss: 1.4263582229614258 2023-01-24 00:54:57.410121: step: 794/466, loss: 0.28296440839767456 2023-01-24 00:54:57.947228: step: 796/466, loss: 1.9911689758300781 2023-01-24 00:54:58.585658: step: 798/466, loss: 1.1145910024642944 2023-01-24 00:54:59.221402: step: 800/466, loss: 0.26959559321403503 2023-01-24 00:54:59.841186: step: 802/466, loss: 0.5132648348808289 2023-01-24 00:55:00.455103: step: 804/466, loss: 9.53636646270752 2023-01-24 00:55:01.044664: step: 806/466, loss: 0.8781405091285706 2023-01-24 00:55:01.682846: step: 808/466, loss: 0.5908510088920593 2023-01-24 00:55:02.361137: step: 810/466, loss: 1.6838551759719849 2023-01-24 00:55:02.947936: step: 812/466, loss: 0.2623120844364166 2023-01-24 00:55:03.535121: step: 814/466, loss: 2.2486305236816406 2023-01-24 00:55:04.170523: step: 816/466, loss: 1.1749213933944702 2023-01-24 00:55:04.769220: step: 818/466, loss: 3.2100768089294434 2023-01-24 00:55:05.419998: step: 820/466, loss: 0.8678385019302368 2023-01-24 00:55:06.066774: step: 822/466, loss: 1.8635488748550415 2023-01-24 00:55:06.706431: step: 824/466, loss: 1.5381243228912354 2023-01-24 00:55:07.343205: step: 826/466, loss: 1.2123987674713135 2023-01-24 00:55:08.028891: step: 828/466, loss: 1.2208527326583862 2023-01-24 00:55:08.680542: step: 830/466, loss: 4.604278564453125 2023-01-24 00:55:09.257027: step: 832/466, loss: 0.23117735981941223 2023-01-24 00:55:09.889592: step: 834/466, loss: 1.1699235439300537 2023-01-24 00:55:10.583162: step: 836/466, loss: 2.572183132171631 2023-01-24 00:55:11.217124: step: 838/466, loss: 0.9678937792778015 2023-01-24 00:55:11.895894: step: 840/466, loss: 4.89470100402832 2023-01-24 00:55:12.538299: step: 842/466, loss: 0.48709937930107117 2023-01-24 00:55:13.218865: step: 844/466, loss: 0.44832372665405273 2023-01-24 00:55:13.801936: step: 846/466, loss: 0.7639361619949341 2023-01-24 00:55:14.433448: step: 848/466, loss: 2.037889003753662 2023-01-24 00:55:14.990280: step: 850/466, loss: 1.6136598587036133 2023-01-24 00:55:15.631170: step: 852/466, loss: 0.6163361072540283 2023-01-24 00:55:16.194916: step: 854/466, loss: 0.6797692775726318 2023-01-24 00:55:16.790697: step: 856/466, loss: 1.4036130905151367 2023-01-24 00:55:17.409435: step: 858/466, loss: 0.15858228504657745 2023-01-24 00:55:18.054396: step: 860/466, loss: 0.4420839846134186 2023-01-24 00:55:18.711724: step: 862/466, loss: 1.3558509349822998 2023-01-24 00:55:19.391211: step: 864/466, loss: 0.7650389671325684 2023-01-24 00:55:20.097096: step: 866/466, loss: 1.6276443004608154 2023-01-24 00:55:20.695307: step: 868/466, loss: 0.6392664909362793 2023-01-24 00:55:21.361641: step: 870/466, loss: 0.6262152194976807 2023-01-24 00:55:22.004216: step: 872/466, loss: 1.5029776096343994 2023-01-24 00:55:22.632520: step: 874/466, loss: 0.5986423492431641 2023-01-24 00:55:23.258878: step: 876/466, loss: 1.5892155170440674 2023-01-24 00:55:23.887009: step: 878/466, loss: 0.380433589220047 2023-01-24 00:55:24.529570: step: 880/466, loss: 1.5511170625686646 2023-01-24 00:55:25.094937: step: 882/466, loss: 0.8453248143196106 2023-01-24 00:55:25.755489: step: 884/466, loss: 1.493863821029663 2023-01-24 00:55:26.388889: step: 886/466, loss: 0.7681907415390015 2023-01-24 00:55:27.024777: step: 888/466, loss: 0.3795468509197235 2023-01-24 00:55:27.582073: step: 890/466, loss: 0.41383445262908936 2023-01-24 00:55:28.169287: step: 892/466, loss: 0.41684284806251526 2023-01-24 00:55:28.778498: step: 894/466, loss: 0.5345543026924133 2023-01-24 00:55:29.500370: step: 896/466, loss: 6.320236682891846 2023-01-24 00:55:30.124705: step: 898/466, loss: 1.5392639636993408 2023-01-24 00:55:30.791667: step: 900/466, loss: 0.6952849626541138 2023-01-24 00:55:31.460056: step: 902/466, loss: 0.41941577196121216 2023-01-24 00:55:32.072898: step: 904/466, loss: 5.019166946411133 2023-01-24 00:55:32.657533: step: 906/466, loss: 0.20151279866695404 2023-01-24 00:55:33.276196: step: 908/466, loss: 1.0805621147155762 2023-01-24 00:55:33.999831: step: 910/466, loss: 0.6773832440376282 2023-01-24 00:55:34.659929: step: 912/466, loss: 0.6606972217559814 2023-01-24 00:55:35.270041: step: 914/466, loss: 0.5137636065483093 2023-01-24 00:55:35.806902: step: 916/466, loss: 0.9369035363197327 2023-01-24 00:55:36.396704: step: 918/466, loss: 0.3441203832626343 2023-01-24 00:55:37.035026: step: 920/466, loss: 0.8426586389541626 2023-01-24 00:55:37.671256: step: 922/466, loss: 2.424509286880493 2023-01-24 00:55:38.294004: step: 924/466, loss: 1.6193931102752686 2023-01-24 00:55:38.880760: step: 926/466, loss: 2.54475998878479 2023-01-24 00:55:39.496705: step: 928/466, loss: 1.66229248046875 2023-01-24 00:55:40.078666: step: 930/466, loss: 0.48374447226524353 2023-01-24 00:55:40.784795: step: 932/466, loss: 0.8983269929885864 ================================================== Loss: 1.197 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35266479985310323, 'r': 0.2603161425860667, 'f1': 0.2995340767311292}, 'combined': 0.2207093196966215, 'epoch': 4} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35061171058748686, 'r': 0.22688501488377985, 'f1': 0.27549435231915403}, 'combined': 0.17250580939610582, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3436874568668047, 'r': 0.2699935619409054, 'f1': 0.30241574313040837}, 'combined': 0.22283265283293247, 'epoch': 4} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.360562971273461, 'r': 0.23343932675775023, 'f1': 0.283398153666497}, 'combined': 0.17562702480740658, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31668397009966776, 'r': 0.2583948902141502, 'f1': 0.28458538587848936}, 'combined': 0.20969449485783426, 'epoch': 4} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.39165853086307634, 'r': 0.23329840286067724, 'f1': 0.2924147402371384}, 'combined': 0.19397809500879482, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40865384615384615, 'r': 0.30357142857142855, 'f1': 0.3483606557377049}, 'combined': 0.23224043715846993, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3958333333333333, 'r': 0.20652173913043478, 'f1': 0.27142857142857146}, 'combined': 0.13571428571428573, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.1896551724137931, 'f1': 0.2619047619047619}, 'combined': 0.1746031746031746, 'epoch': 4} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35266479985310323, 'r': 0.2603161425860667, 'f1': 0.2995340767311292}, 'combined': 0.2207093196966215, 'epoch': 4} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35061171058748686, 'r': 0.22688501488377985, 'f1': 0.27549435231915403}, 'combined': 0.17250580939610582, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40865384615384615, 'r': 0.30357142857142855, 'f1': 0.3483606557377049}, 'combined': 0.23224043715846993, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3436874568668047, 'r': 0.2699935619409054, 'f1': 0.30241574313040837}, 'combined': 0.22283265283293247, 'epoch': 4} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.360562971273461, 'r': 0.23343932675775023, 'f1': 0.283398153666497}, 'combined': 0.17562702480740658, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3958333333333333, 'r': 0.20652173913043478, 'f1': 0.27142857142857146}, 'combined': 0.13571428571428573, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31668397009966776, 'r': 0.2583948902141502, 'f1': 0.28458538587848936}, 'combined': 0.20969449485783426, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.39165853086307634, 'r': 0.23329840286067724, 'f1': 0.2924147402371384}, 'combined': 0.19397809500879482, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.1896551724137931, 'f1': 0.2619047619047619}, 'combined': 0.1746031746031746, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:58:30.772720: step: 2/466, loss: 0.5471147298812866 2023-01-24 00:58:31.426282: step: 4/466, loss: 0.4550301134586334 2023-01-24 00:58:32.054708: step: 6/466, loss: 1.0673224925994873 2023-01-24 00:58:32.734471: step: 8/466, loss: 1.0695760250091553 2023-01-24 00:58:33.485899: step: 10/466, loss: 4.457903861999512 2023-01-24 00:58:34.081327: step: 12/466, loss: 0.25589051842689514 2023-01-24 00:58:34.803596: step: 14/466, loss: 0.45844224095344543 2023-01-24 00:58:35.391822: step: 16/466, loss: 0.7497357130050659 2023-01-24 00:58:36.039520: step: 18/466, loss: 0.3468421697616577 2023-01-24 00:58:36.633927: step: 20/466, loss: 0.6334825754165649 2023-01-24 00:58:37.295921: step: 22/466, loss: 0.2468576431274414 2023-01-24 00:58:37.981994: step: 24/466, loss: 1.0535274744033813 2023-01-24 00:58:38.660097: step: 26/466, loss: 0.5754486918449402 2023-01-24 00:58:39.264396: step: 28/466, loss: 0.5943377614021301 2023-01-24 00:58:39.917687: step: 30/466, loss: 0.04559643566608429 2023-01-24 00:58:40.534639: step: 32/466, loss: 0.26631873846054077 2023-01-24 00:58:41.123815: step: 34/466, loss: 0.13600394129753113 2023-01-24 00:58:41.721144: step: 36/466, loss: 0.40314531326293945 2023-01-24 00:58:42.412285: step: 38/466, loss: 0.6339420080184937 2023-01-24 00:58:43.026600: step: 40/466, loss: 0.7132548093795776 2023-01-24 00:58:43.737792: step: 42/466, loss: 1.5578649044036865 2023-01-24 00:58:44.395930: step: 44/466, loss: 0.669644832611084 2023-01-24 00:58:45.032809: step: 46/466, loss: 0.5667735934257507 2023-01-24 00:58:45.645829: step: 48/466, loss: 0.3961609899997711 2023-01-24 00:58:46.366380: step: 50/466, loss: 0.4249700605869293 2023-01-24 00:58:46.997539: step: 52/466, loss: 0.7943816781044006 2023-01-24 00:58:47.624533: step: 54/466, loss: 0.4422169327735901 2023-01-24 00:58:48.307598: step: 56/466, loss: 2.761477470397949 2023-01-24 00:58:48.967281: step: 58/466, loss: 0.96144700050354 2023-01-24 00:58:49.587207: step: 60/466, loss: 0.18480552732944489 2023-01-24 00:58:50.191885: step: 62/466, loss: 0.50814288854599 2023-01-24 00:58:50.787640: step: 64/466, loss: 1.698656439781189 2023-01-24 00:58:51.373747: step: 66/466, loss: 0.9207381010055542 2023-01-24 00:58:51.995310: step: 68/466, loss: 1.8269872665405273 2023-01-24 00:58:52.621554: step: 70/466, loss: 0.7651251554489136 2023-01-24 00:58:53.301062: step: 72/466, loss: 0.3633600175380707 2023-01-24 00:58:53.915007: step: 74/466, loss: 2.6156022548675537 2023-01-24 00:58:54.522182: step: 76/466, loss: 0.8331165313720703 2023-01-24 00:58:55.179806: step: 78/466, loss: 0.9168491363525391 2023-01-24 00:58:55.806712: step: 80/466, loss: 9.40099048614502 2023-01-24 00:58:56.442962: step: 82/466, loss: 0.814190149307251 2023-01-24 00:58:57.049765: step: 84/466, loss: 0.9722875952720642 2023-01-24 00:58:57.742943: step: 86/466, loss: 0.3626081049442291 2023-01-24 00:58:58.345202: step: 88/466, loss: 0.2767053544521332 2023-01-24 00:58:59.065402: step: 90/466, loss: 0.5099116563796997 2023-01-24 00:58:59.617122: step: 92/466, loss: 2.0675015449523926 2023-01-24 00:59:00.227435: step: 94/466, loss: 0.41941386461257935 2023-01-24 00:59:00.847007: step: 96/466, loss: 0.9571064114570618 2023-01-24 00:59:01.436912: step: 98/466, loss: 0.5113182663917542 2023-01-24 00:59:02.087755: step: 100/466, loss: 0.6901702880859375 2023-01-24 00:59:02.801776: step: 102/466, loss: 0.28122299909591675 2023-01-24 00:59:03.408932: step: 104/466, loss: 0.46844592690467834 2023-01-24 00:59:03.976928: step: 106/466, loss: 0.5279050469398499 2023-01-24 00:59:04.596476: step: 108/466, loss: 1.4865195751190186 2023-01-24 00:59:05.205454: step: 110/466, loss: 1.2874722480773926 2023-01-24 00:59:05.837794: step: 112/466, loss: 0.5957018136978149 2023-01-24 00:59:06.442646: step: 114/466, loss: 0.7692592740058899 2023-01-24 00:59:07.029118: step: 116/466, loss: 0.8668258786201477 2023-01-24 00:59:07.705705: step: 118/466, loss: 0.5483438372612 2023-01-24 00:59:08.370643: step: 120/466, loss: 0.26125210523605347 2023-01-24 00:59:09.038003: step: 122/466, loss: 0.9838550090789795 2023-01-24 00:59:09.590812: step: 124/466, loss: 0.8422924876213074 2023-01-24 00:59:10.188798: step: 126/466, loss: 0.402086079120636 2023-01-24 00:59:10.808155: step: 128/466, loss: 0.9473074674606323 2023-01-24 00:59:11.488589: step: 130/466, loss: 0.4960426688194275 2023-01-24 00:59:12.146271: step: 132/466, loss: 0.31938955187797546 2023-01-24 00:59:12.814731: step: 134/466, loss: 0.5474739670753479 2023-01-24 00:59:13.549472: step: 136/466, loss: 0.9406565427780151 2023-01-24 00:59:14.216730: step: 138/466, loss: 0.9508500695228577 2023-01-24 00:59:14.879925: step: 140/466, loss: 0.6886086463928223 2023-01-24 00:59:15.499342: step: 142/466, loss: 0.4171375036239624 2023-01-24 00:59:16.087401: step: 144/466, loss: 1.6779673099517822 2023-01-24 00:59:16.741023: step: 146/466, loss: 1.1907424926757812 2023-01-24 00:59:17.356235: step: 148/466, loss: 0.4422885775566101 2023-01-24 00:59:17.952890: step: 150/466, loss: 0.3932275176048279 2023-01-24 00:59:18.626044: step: 152/466, loss: 1.4827567338943481 2023-01-24 00:59:19.250430: step: 154/466, loss: 1.9700175523757935 2023-01-24 00:59:19.905220: step: 156/466, loss: 0.7869168519973755 2023-01-24 00:59:20.594403: step: 158/466, loss: 1.7329630851745605 2023-01-24 00:59:21.236617: step: 160/466, loss: 0.7041877508163452 2023-01-24 00:59:21.822805: step: 162/466, loss: 0.39406371116638184 2023-01-24 00:59:22.388302: step: 164/466, loss: 0.28115153312683105 2023-01-24 00:59:23.006017: step: 166/466, loss: 1.389836311340332 2023-01-24 00:59:23.685057: step: 168/466, loss: 1.2109633684158325 2023-01-24 00:59:24.214945: step: 170/466, loss: 0.3636434078216553 2023-01-24 00:59:24.867635: step: 172/466, loss: 0.9906993508338928 2023-01-24 00:59:25.373350: step: 174/466, loss: 0.27599167823791504 2023-01-24 00:59:25.993419: step: 176/466, loss: 0.22614003717899323 2023-01-24 00:59:26.654623: step: 178/466, loss: 0.966813325881958 2023-01-24 00:59:27.274849: step: 180/466, loss: 0.6766749024391174 2023-01-24 00:59:27.967926: step: 182/466, loss: 1.502366304397583 2023-01-24 00:59:28.588206: step: 184/466, loss: 0.789996325969696 2023-01-24 00:59:29.176131: step: 186/466, loss: 1.4622113704681396 2023-01-24 00:59:29.825725: step: 188/466, loss: 0.2746245265007019 2023-01-24 00:59:30.489765: step: 190/466, loss: 0.450369656085968 2023-01-24 00:59:31.086235: step: 192/466, loss: 0.6498920917510986 2023-01-24 00:59:31.746387: step: 194/466, loss: 0.5333251953125 2023-01-24 00:59:32.445840: step: 196/466, loss: 0.5039653182029724 2023-01-24 00:59:33.093354: step: 198/466, loss: 0.7706683874130249 2023-01-24 00:59:33.702696: step: 200/466, loss: 1.9963417053222656 2023-01-24 00:59:34.348121: step: 202/466, loss: 0.7868274450302124 2023-01-24 00:59:34.920421: step: 204/466, loss: 2.225334882736206 2023-01-24 00:59:35.627260: step: 206/466, loss: 1.7012133598327637 2023-01-24 00:59:36.237125: step: 208/466, loss: 1.375243902206421 2023-01-24 00:59:36.800332: step: 210/466, loss: 0.24912454187870026 2023-01-24 00:59:37.394155: step: 212/466, loss: 0.4540861248970032 2023-01-24 00:59:38.083610: step: 214/466, loss: 0.18188805878162384 2023-01-24 00:59:38.626960: step: 216/466, loss: 1.0026720762252808 2023-01-24 00:59:39.276555: step: 218/466, loss: 0.1911715269088745 2023-01-24 00:59:39.907836: step: 220/466, loss: 1.7427427768707275 2023-01-24 00:59:40.533981: step: 222/466, loss: 0.2735958397388458 2023-01-24 00:59:41.237424: step: 224/466, loss: 1.141740322113037 2023-01-24 00:59:41.830148: step: 226/466, loss: 0.9161017537117004 2023-01-24 00:59:42.450073: step: 228/466, loss: 0.6430374383926392 2023-01-24 00:59:43.122123: step: 230/466, loss: 0.2332015037536621 2023-01-24 00:59:43.695478: step: 232/466, loss: 0.7723779678344727 2023-01-24 00:59:44.312131: step: 234/466, loss: 1.0226749181747437 2023-01-24 00:59:44.931218: step: 236/466, loss: 1.4921715259552002 2023-01-24 00:59:45.622563: step: 238/466, loss: 0.9061295390129089 2023-01-24 00:59:46.231764: step: 240/466, loss: 0.632529616355896 2023-01-24 00:59:46.867347: step: 242/466, loss: 0.4292992651462555 2023-01-24 00:59:47.520162: step: 244/466, loss: 5.121511459350586 2023-01-24 00:59:48.116754: step: 246/466, loss: 0.35150039196014404 2023-01-24 00:59:48.643193: step: 248/466, loss: 0.8618891835212708 2023-01-24 00:59:49.194141: step: 250/466, loss: 0.5347373485565186 2023-01-24 00:59:49.795604: step: 252/466, loss: 1.1457524299621582 2023-01-24 00:59:50.434109: step: 254/466, loss: 0.5770044922828674 2023-01-24 00:59:51.039917: step: 256/466, loss: 0.24066293239593506 2023-01-24 00:59:51.662971: step: 258/466, loss: 1.601750135421753 2023-01-24 00:59:52.321586: step: 260/466, loss: 1.0965805053710938 2023-01-24 00:59:52.965939: step: 262/466, loss: 0.6310383677482605 2023-01-24 00:59:53.600255: step: 264/466, loss: 0.7483865022659302 2023-01-24 00:59:54.197325: step: 266/466, loss: 0.2918630540370941 2023-01-24 00:59:54.783560: step: 268/466, loss: 0.26249969005584717 2023-01-24 00:59:55.435467: step: 270/466, loss: 0.6872038245201111 2023-01-24 00:59:56.060624: step: 272/466, loss: 0.8737920522689819 2023-01-24 00:59:56.663357: step: 274/466, loss: 0.9862958192825317 2023-01-24 00:59:57.279907: step: 276/466, loss: 0.27447277307510376 2023-01-24 00:59:57.894345: step: 278/466, loss: 0.42368435859680176 2023-01-24 00:59:58.580937: step: 280/466, loss: 2.0328128337860107 2023-01-24 00:59:59.172942: step: 282/466, loss: 0.27252835035324097 2023-01-24 00:59:59.826670: step: 284/466, loss: 1.5242037773132324 2023-01-24 01:00:00.510235: step: 286/466, loss: 0.49943602085113525 2023-01-24 01:00:01.087385: step: 288/466, loss: 0.5066887140274048 2023-01-24 01:00:01.718556: step: 290/466, loss: 0.6561540365219116 2023-01-24 01:00:02.328147: step: 292/466, loss: 1.1317684650421143 2023-01-24 01:00:02.885657: step: 294/466, loss: 0.6730414032936096 2023-01-24 01:00:03.519291: step: 296/466, loss: 0.3380054533481598 2023-01-24 01:00:04.145655: step: 298/466, loss: 6.33916711807251 2023-01-24 01:00:04.820997: step: 300/466, loss: 1.1168296337127686 2023-01-24 01:00:05.444218: step: 302/466, loss: 0.9960553646087646 2023-01-24 01:00:06.082427: step: 304/466, loss: 2.274310827255249 2023-01-24 01:00:06.651534: step: 306/466, loss: 0.43179214000701904 2023-01-24 01:00:07.248324: step: 308/466, loss: 0.5681861042976379 2023-01-24 01:00:07.860109: step: 310/466, loss: 0.48849573731422424 2023-01-24 01:00:08.446669: step: 312/466, loss: 0.5110349059104919 2023-01-24 01:00:09.077437: step: 314/466, loss: 0.3079199194908142 2023-01-24 01:00:09.745237: step: 316/466, loss: 0.26568320393562317 2023-01-24 01:00:10.442211: step: 318/466, loss: 0.6109682321548462 2023-01-24 01:00:11.060750: step: 320/466, loss: 0.4193262755870819 2023-01-24 01:00:11.673982: step: 322/466, loss: 1.9041683673858643 2023-01-24 01:00:12.315554: step: 324/466, loss: 0.544060230255127 2023-01-24 01:00:12.857999: step: 326/466, loss: 0.18650314211845398 2023-01-24 01:00:13.457083: step: 328/466, loss: 0.8428384065628052 2023-01-24 01:00:14.067552: step: 330/466, loss: 0.7781572341918945 2023-01-24 01:00:14.692682: step: 332/466, loss: 0.1482735127210617 2023-01-24 01:00:15.255024: step: 334/466, loss: 0.6369302272796631 2023-01-24 01:00:15.918388: step: 336/466, loss: 3.364443302154541 2023-01-24 01:00:16.551962: step: 338/466, loss: 0.43260252475738525 2023-01-24 01:00:17.266535: step: 340/466, loss: 3.7939000129699707 2023-01-24 01:00:17.878950: step: 342/466, loss: 0.2519223392009735 2023-01-24 01:00:18.509725: step: 344/466, loss: 1.088708758354187 2023-01-24 01:00:19.187800: step: 346/466, loss: 0.42403557896614075 2023-01-24 01:00:19.836023: step: 348/466, loss: 0.7898201942443848 2023-01-24 01:00:20.451991: step: 350/466, loss: 0.32544323801994324 2023-01-24 01:00:21.088828: step: 352/466, loss: 1.1407803297042847 2023-01-24 01:00:21.702821: step: 354/466, loss: 0.7153778076171875 2023-01-24 01:00:22.310141: step: 356/466, loss: 0.13664700090885162 2023-01-24 01:00:22.919603: step: 358/466, loss: 0.6526727676391602 2023-01-24 01:00:23.577821: step: 360/466, loss: 2.440610885620117 2023-01-24 01:00:24.241025: step: 362/466, loss: 0.9656744003295898 2023-01-24 01:00:24.815126: step: 364/466, loss: 0.3585507869720459 2023-01-24 01:00:25.423141: step: 366/466, loss: 0.5643820762634277 2023-01-24 01:00:26.051872: step: 368/466, loss: 0.9318143725395203 2023-01-24 01:00:26.646298: step: 370/466, loss: 0.39900118112564087 2023-01-24 01:00:27.244141: step: 372/466, loss: 0.21007654070854187 2023-01-24 01:00:27.873620: step: 374/466, loss: 0.2853618264198303 2023-01-24 01:00:28.564807: step: 376/466, loss: 1.3852022886276245 2023-01-24 01:00:29.190746: step: 378/466, loss: 2.298675298690796 2023-01-24 01:00:29.817342: step: 380/466, loss: 0.691215991973877 2023-01-24 01:00:30.391052: step: 382/466, loss: 0.32657772302627563 2023-01-24 01:00:30.991388: step: 384/466, loss: 0.35438624024391174 2023-01-24 01:00:31.643923: step: 386/466, loss: 0.28829479217529297 2023-01-24 01:00:32.256119: step: 388/466, loss: 2.6161675453186035 2023-01-24 01:00:32.899389: step: 390/466, loss: 1.0138685703277588 2023-01-24 01:00:33.474078: step: 392/466, loss: 0.47145283222198486 2023-01-24 01:00:34.128335: step: 394/466, loss: 0.6214955449104309 2023-01-24 01:00:34.780674: step: 396/466, loss: 0.31575721502304077 2023-01-24 01:00:35.332567: step: 398/466, loss: 0.6875791549682617 2023-01-24 01:00:35.944193: step: 400/466, loss: 1.193183422088623 2023-01-24 01:00:36.633901: step: 402/466, loss: 0.32001224160194397 2023-01-24 01:00:37.234392: step: 404/466, loss: 0.4855941832065582 2023-01-24 01:00:37.869210: step: 406/466, loss: 0.5931132435798645 2023-01-24 01:00:38.479441: step: 408/466, loss: 0.324947327375412 2023-01-24 01:00:39.165228: step: 410/466, loss: 0.7642245292663574 2023-01-24 01:00:39.749958: step: 412/466, loss: 0.48168429732322693 2023-01-24 01:00:40.401030: step: 414/466, loss: 0.5540041923522949 2023-01-24 01:00:41.121385: step: 416/466, loss: 1.211597204208374 2023-01-24 01:00:41.712507: step: 418/466, loss: 3.63765549659729 2023-01-24 01:00:42.337477: step: 420/466, loss: 0.7998482584953308 2023-01-24 01:00:42.972929: step: 422/466, loss: 2.532569408416748 2023-01-24 01:00:43.688065: step: 424/466, loss: 1.9449009895324707 2023-01-24 01:00:44.345831: step: 426/466, loss: 0.1245807558298111 2023-01-24 01:00:44.977281: step: 428/466, loss: 1.633721947669983 2023-01-24 01:00:45.584895: step: 430/466, loss: 0.49564772844314575 2023-01-24 01:00:46.297992: step: 432/466, loss: 0.3808407783508301 2023-01-24 01:00:46.919588: step: 434/466, loss: 0.6128196716308594 2023-01-24 01:00:47.547149: step: 436/466, loss: 0.6539456844329834 2023-01-24 01:00:48.118358: step: 438/466, loss: 1.2176947593688965 2023-01-24 01:00:48.707284: step: 440/466, loss: 0.6139934062957764 2023-01-24 01:00:49.363300: step: 442/466, loss: 0.3103480041027069 2023-01-24 01:00:50.065634: step: 444/466, loss: 1.0488853454589844 2023-01-24 01:00:50.723900: step: 446/466, loss: 0.5255616903305054 2023-01-24 01:00:51.335122: step: 448/466, loss: 0.39966022968292236 2023-01-24 01:00:51.966892: step: 450/466, loss: 0.35611096024513245 2023-01-24 01:00:52.594542: step: 452/466, loss: 0.34888365864753723 2023-01-24 01:00:53.212907: step: 454/466, loss: 0.19215558469295502 2023-01-24 01:00:53.813311: step: 456/466, loss: 4.303584575653076 2023-01-24 01:00:54.436363: step: 458/466, loss: 1.0213844776153564 2023-01-24 01:00:55.047941: step: 460/466, loss: 0.5794013142585754 2023-01-24 01:00:55.660069: step: 462/466, loss: 0.6341109275817871 2023-01-24 01:00:56.303937: step: 464/466, loss: 0.953895628452301 2023-01-24 01:00:56.938692: step: 466/466, loss: 0.20153357088565826 2023-01-24 01:00:57.616174: step: 468/466, loss: 0.1921996772289276 2023-01-24 01:00:58.241511: step: 470/466, loss: 0.45410609245300293 2023-01-24 01:00:58.830300: step: 472/466, loss: 0.4120723009109497 2023-01-24 01:00:59.438841: step: 474/466, loss: 0.7399694323539734 2023-01-24 01:01:00.064260: step: 476/466, loss: 0.752868115901947 2023-01-24 01:01:00.720215: step: 478/466, loss: 0.5677819848060608 2023-01-24 01:01:01.371395: step: 480/466, loss: 0.5402620434761047 2023-01-24 01:01:02.007380: step: 482/466, loss: 0.481319785118103 2023-01-24 01:01:02.625699: step: 484/466, loss: 1.7091079950332642 2023-01-24 01:01:03.247449: step: 486/466, loss: 1.1242730617523193 2023-01-24 01:01:03.905259: step: 488/466, loss: 0.3815958499908447 2023-01-24 01:01:04.549410: step: 490/466, loss: 0.56468266248703 2023-01-24 01:01:05.157038: step: 492/466, loss: 0.29745882749557495 2023-01-24 01:01:05.741628: step: 494/466, loss: 0.17169640958309174 2023-01-24 01:01:06.331205: step: 496/466, loss: 0.3405081629753113 2023-01-24 01:01:06.932213: step: 498/466, loss: 0.503221869468689 2023-01-24 01:01:07.530714: step: 500/466, loss: 0.9509025812149048 2023-01-24 01:01:08.090645: step: 502/466, loss: 0.1658211201429367 2023-01-24 01:01:08.795051: step: 504/466, loss: 0.47876477241516113 2023-01-24 01:01:09.392934: step: 506/466, loss: 1.4690961837768555 2023-01-24 01:01:09.975511: step: 508/466, loss: 0.7093818187713623 2023-01-24 01:01:10.556954: step: 510/466, loss: 0.7716629505157471 2023-01-24 01:01:11.115937: step: 512/466, loss: 0.9928057193756104 2023-01-24 01:01:11.766449: step: 514/466, loss: 0.41204598546028137 2023-01-24 01:01:12.430572: step: 516/466, loss: 0.43097513914108276 2023-01-24 01:01:13.052518: step: 518/466, loss: 0.4565886855125427 2023-01-24 01:01:13.654318: step: 520/466, loss: 0.6934749484062195 2023-01-24 01:01:14.388095: step: 522/466, loss: 1.2867885828018188 2023-01-24 01:01:14.995312: step: 524/466, loss: 0.7034398317337036 2023-01-24 01:01:15.602628: step: 526/466, loss: 0.894005537033081 2023-01-24 01:01:16.193720: step: 528/466, loss: 0.2675049602985382 2023-01-24 01:01:16.834733: step: 530/466, loss: 0.5433415770530701 2023-01-24 01:01:17.474009: step: 532/466, loss: 0.2747874855995178 2023-01-24 01:01:18.093669: step: 534/466, loss: 0.5573550462722778 2023-01-24 01:01:18.665460: step: 536/466, loss: 3.402272939682007 2023-01-24 01:01:19.298129: step: 538/466, loss: 1.3509525060653687 2023-01-24 01:01:19.880010: step: 540/466, loss: 1.3163940906524658 2023-01-24 01:01:20.494966: step: 542/466, loss: 0.9249829053878784 2023-01-24 01:01:21.138723: step: 544/466, loss: 0.9020353555679321 2023-01-24 01:01:21.739298: step: 546/466, loss: 0.6468010544776917 2023-01-24 01:01:22.337866: step: 548/466, loss: 0.1497478038072586 2023-01-24 01:01:22.985733: step: 550/466, loss: 2.2226760387420654 2023-01-24 01:01:23.645954: step: 552/466, loss: 1.034489393234253 2023-01-24 01:01:24.295734: step: 554/466, loss: 1.2668101787567139 2023-01-24 01:01:24.864568: step: 556/466, loss: 0.41327354311943054 2023-01-24 01:01:25.540949: step: 558/466, loss: 0.5404168367385864 2023-01-24 01:01:26.151565: step: 560/466, loss: 0.5784534811973572 2023-01-24 01:01:26.754831: step: 562/466, loss: 1.1106534004211426 2023-01-24 01:01:27.437505: step: 564/466, loss: 0.853075385093689 2023-01-24 01:01:28.040239: step: 566/466, loss: 1.339813470840454 2023-01-24 01:01:28.641332: step: 568/466, loss: 0.4004724621772766 2023-01-24 01:01:29.216520: step: 570/466, loss: 0.26823610067367554 2023-01-24 01:01:29.859322: step: 572/466, loss: 2.8942439556121826 2023-01-24 01:01:30.516047: step: 574/466, loss: 0.3049118220806122 2023-01-24 01:01:31.207264: step: 576/466, loss: 1.6497358083724976 2023-01-24 01:01:31.886551: step: 578/466, loss: 0.47421208024024963 2023-01-24 01:01:32.524431: step: 580/466, loss: 1.4139649868011475 2023-01-24 01:01:33.170955: step: 582/466, loss: 0.91805100440979 2023-01-24 01:01:33.786183: step: 584/466, loss: 0.918079137802124 2023-01-24 01:01:34.437866: step: 586/466, loss: 1.3088068962097168 2023-01-24 01:01:35.013805: step: 588/466, loss: 1.1106313467025757 2023-01-24 01:01:35.622274: step: 590/466, loss: 0.6188311576843262 2023-01-24 01:01:36.234654: step: 592/466, loss: 0.36715516448020935 2023-01-24 01:01:36.819576: step: 594/466, loss: 0.8313745260238647 2023-01-24 01:01:37.445873: step: 596/466, loss: 0.2605927884578705 2023-01-24 01:01:38.038713: step: 598/466, loss: 1.2984670400619507 2023-01-24 01:01:38.645278: step: 600/466, loss: 0.2875438630580902 2023-01-24 01:01:39.273768: step: 602/466, loss: 0.7626654505729675 2023-01-24 01:01:39.929570: step: 604/466, loss: 0.46695560216903687 2023-01-24 01:01:40.573574: step: 606/466, loss: 0.5650315880775452 2023-01-24 01:01:41.159082: step: 608/466, loss: 0.2230704426765442 2023-01-24 01:01:41.748797: step: 610/466, loss: 0.7090718150138855 2023-01-24 01:01:42.385140: step: 612/466, loss: 1.0561984777450562 2023-01-24 01:01:43.042975: step: 614/466, loss: 0.34719061851501465 2023-01-24 01:01:43.639887: step: 616/466, loss: 2.2053418159484863 2023-01-24 01:01:44.245541: step: 618/466, loss: 2.624155044555664 2023-01-24 01:01:44.909547: step: 620/466, loss: 1.5863568782806396 2023-01-24 01:01:45.584970: step: 622/466, loss: 0.5461592674255371 2023-01-24 01:01:46.272606: step: 624/466, loss: 0.5462601184844971 2023-01-24 01:01:46.891606: step: 626/466, loss: 1.3564051389694214 2023-01-24 01:01:47.509547: step: 628/466, loss: 0.18615445494651794 2023-01-24 01:01:48.148321: step: 630/466, loss: 0.8325631022453308 2023-01-24 01:01:48.807468: step: 632/466, loss: 0.6906343102455139 2023-01-24 01:01:49.429511: step: 634/466, loss: 0.4399493336677551 2023-01-24 01:01:50.132356: step: 636/466, loss: 0.5468199253082275 2023-01-24 01:01:50.776166: step: 638/466, loss: 0.7452964186668396 2023-01-24 01:01:51.444854: step: 640/466, loss: 1.1611220836639404 2023-01-24 01:01:52.000520: step: 642/466, loss: 0.549014687538147 2023-01-24 01:01:52.629114: step: 644/466, loss: 0.3178991973400116 2023-01-24 01:01:53.269347: step: 646/466, loss: 1.5364816188812256 2023-01-24 01:01:53.877150: step: 648/466, loss: 1.1733858585357666 2023-01-24 01:01:54.516165: step: 650/466, loss: 0.8622576594352722 2023-01-24 01:01:55.167923: step: 652/466, loss: 0.8693770170211792 2023-01-24 01:01:55.788911: step: 654/466, loss: 0.4081299304962158 2023-01-24 01:01:56.440544: step: 656/466, loss: 0.7796466946601868 2023-01-24 01:01:57.093333: step: 658/466, loss: 0.8079643845558167 2023-01-24 01:01:57.678202: step: 660/466, loss: 1.3321342468261719 2023-01-24 01:01:58.255752: step: 662/466, loss: 0.5625272989273071 2023-01-24 01:01:58.902274: step: 664/466, loss: 0.6250572204589844 2023-01-24 01:01:59.486647: step: 666/466, loss: 0.3744121193885803 2023-01-24 01:02:00.063649: step: 668/466, loss: 0.9476857781410217 2023-01-24 01:02:00.679110: step: 670/466, loss: 0.280961275100708 2023-01-24 01:02:01.269282: step: 672/466, loss: 0.34791773557662964 2023-01-24 01:02:01.918119: step: 674/466, loss: 1.0845293998718262 2023-01-24 01:02:02.527142: step: 676/466, loss: 0.43291908502578735 2023-01-24 01:02:03.164707: step: 678/466, loss: 1.20071280002594 2023-01-24 01:02:03.764235: step: 680/466, loss: 0.8086950778961182 2023-01-24 01:02:04.349723: step: 682/466, loss: 0.5120579600334167 2023-01-24 01:02:04.986017: step: 684/466, loss: 0.47946423292160034 2023-01-24 01:02:05.587139: step: 686/466, loss: 0.5221784114837646 2023-01-24 01:02:06.185617: step: 688/466, loss: 7.150136947631836 2023-01-24 01:02:06.786955: step: 690/466, loss: 0.1715814769268036 2023-01-24 01:02:07.429587: step: 692/466, loss: 2.3639845848083496 2023-01-24 01:02:08.034508: step: 694/466, loss: 0.6342852115631104 2023-01-24 01:02:08.601549: step: 696/466, loss: 0.5407548546791077 2023-01-24 01:02:09.315289: step: 698/466, loss: 0.4505281448364258 2023-01-24 01:02:10.036285: step: 700/466, loss: 0.5976901054382324 2023-01-24 01:02:10.637318: step: 702/466, loss: 3.942974328994751 2023-01-24 01:02:11.313658: step: 704/466, loss: 0.6684309244155884 2023-01-24 01:02:11.932295: step: 706/466, loss: 0.8447902798652649 2023-01-24 01:02:12.549544: step: 708/466, loss: 1.9354099035263062 2023-01-24 01:02:13.181186: step: 710/466, loss: 0.29163914918899536 2023-01-24 01:02:13.845338: step: 712/466, loss: 0.6639055013656616 2023-01-24 01:02:14.560805: step: 714/466, loss: 0.558269739151001 2023-01-24 01:02:15.267447: step: 716/466, loss: 6.34437370300293 2023-01-24 01:02:15.895974: step: 718/466, loss: 0.8044198751449585 2023-01-24 01:02:16.587721: step: 720/466, loss: 0.1624743640422821 2023-01-24 01:02:17.171784: step: 722/466, loss: 1.1905999183654785 2023-01-24 01:02:17.842593: step: 724/466, loss: 0.8427300453186035 2023-01-24 01:02:18.464035: step: 726/466, loss: 0.21977035701274872 2023-01-24 01:02:19.112471: step: 728/466, loss: 0.23939011991024017 2023-01-24 01:02:19.784079: step: 730/466, loss: 1.2132254838943481 2023-01-24 01:02:20.486531: step: 732/466, loss: 1.0316905975341797 2023-01-24 01:02:21.317148: step: 734/466, loss: 1.0953068733215332 2023-01-24 01:02:21.941410: step: 736/466, loss: 0.48757675290107727 2023-01-24 01:02:22.542979: step: 738/466, loss: 0.36855441331863403 2023-01-24 01:02:23.181876: step: 740/466, loss: 0.9813733100891113 2023-01-24 01:02:23.848360: step: 742/466, loss: 0.45953619480133057 2023-01-24 01:02:24.457442: step: 744/466, loss: 1.2267102003097534 2023-01-24 01:02:25.067889: step: 746/466, loss: 0.2407502830028534 2023-01-24 01:02:25.724051: step: 748/466, loss: 0.585015058517456 2023-01-24 01:02:26.357151: step: 750/466, loss: 0.6010576486587524 2023-01-24 01:02:26.980407: step: 752/466, loss: 0.994664192199707 2023-01-24 01:02:27.560407: step: 754/466, loss: 0.9954037666320801 2023-01-24 01:02:28.179325: step: 756/466, loss: 0.8531153202056885 2023-01-24 01:02:28.852558: step: 758/466, loss: 0.7809121608734131 2023-01-24 01:02:29.549859: step: 760/466, loss: 1.1692357063293457 2023-01-24 01:02:30.141177: step: 762/466, loss: 0.8527080416679382 2023-01-24 01:02:30.751016: step: 764/466, loss: 1.1707797050476074 2023-01-24 01:02:31.374042: step: 766/466, loss: 5.228311061859131 2023-01-24 01:02:32.015562: step: 768/466, loss: 0.7698712944984436 2023-01-24 01:02:32.649974: step: 770/466, loss: 2.095247268676758 2023-01-24 01:02:33.280212: step: 772/466, loss: 0.5572216510772705 2023-01-24 01:02:33.915943: step: 774/466, loss: 1.3784372806549072 2023-01-24 01:02:34.520326: step: 776/466, loss: 0.3510480523109436 2023-01-24 01:02:35.227108: step: 778/466, loss: 1.1574081182479858 2023-01-24 01:02:36.023503: step: 780/466, loss: 0.28958892822265625 2023-01-24 01:02:36.662198: step: 782/466, loss: 0.5354122519493103 2023-01-24 01:02:37.263602: step: 784/466, loss: 0.5628578066825867 2023-01-24 01:02:37.914373: step: 786/466, loss: 0.34347057342529297 2023-01-24 01:02:38.550038: step: 788/466, loss: 0.30458784103393555 2023-01-24 01:02:39.239571: step: 790/466, loss: 0.7543378472328186 2023-01-24 01:02:39.955857: step: 792/466, loss: 0.5978944301605225 2023-01-24 01:02:40.576866: step: 794/466, loss: 0.8760815262794495 2023-01-24 01:02:41.193622: step: 796/466, loss: 2.8389697074890137 2023-01-24 01:02:41.772910: step: 798/466, loss: 1.1012701988220215 2023-01-24 01:02:42.449124: step: 800/466, loss: 0.6880276203155518 2023-01-24 01:02:43.067205: step: 802/466, loss: 0.4630478024482727 2023-01-24 01:02:43.753886: step: 804/466, loss: 0.2929562032222748 2023-01-24 01:02:44.379438: step: 806/466, loss: 0.42517170310020447 2023-01-24 01:02:44.998484: step: 808/466, loss: 0.29620039463043213 2023-01-24 01:02:45.650910: step: 810/466, loss: 1.1095701456069946 2023-01-24 01:02:46.211548: step: 812/466, loss: 0.15149930119514465 2023-01-24 01:02:46.783302: step: 814/466, loss: 0.45719343423843384 2023-01-24 01:02:47.387467: step: 816/466, loss: 0.2268422544002533 2023-01-24 01:02:48.074120: step: 818/466, loss: 0.35965633392333984 2023-01-24 01:02:48.789313: step: 820/466, loss: 0.3629857897758484 2023-01-24 01:02:49.470479: step: 822/466, loss: 0.45110806822776794 2023-01-24 01:02:50.093299: step: 824/466, loss: 4.482956409454346 2023-01-24 01:02:50.719768: step: 826/466, loss: 0.6074123382568359 2023-01-24 01:02:51.371351: step: 828/466, loss: 0.16443508863449097 2023-01-24 01:02:52.021731: step: 830/466, loss: 0.6019780039787292 2023-01-24 01:02:52.714882: step: 832/466, loss: 0.2932056486606598 2023-01-24 01:02:53.275569: step: 834/466, loss: 0.7100816965103149 2023-01-24 01:02:53.838323: step: 836/466, loss: 2.600250482559204 2023-01-24 01:02:54.465804: step: 838/466, loss: 2.4224514961242676 2023-01-24 01:02:55.082503: step: 840/466, loss: 0.4868653118610382 2023-01-24 01:02:55.704949: step: 842/466, loss: 2.266256332397461 2023-01-24 01:02:56.342115: step: 844/466, loss: 0.674579918384552 2023-01-24 01:02:56.924812: step: 846/466, loss: 0.46162062883377075 2023-01-24 01:02:57.563117: step: 848/466, loss: 0.2178461253643036 2023-01-24 01:02:58.256311: step: 850/466, loss: 0.7983039021492004 2023-01-24 01:02:58.913116: step: 852/466, loss: 1.6612443923950195 2023-01-24 01:02:59.480290: step: 854/466, loss: 4.634618759155273 2023-01-24 01:03:00.105228: step: 856/466, loss: 1.1053825616836548 2023-01-24 01:03:00.776684: step: 858/466, loss: 0.2669443190097809 2023-01-24 01:03:01.366740: step: 860/466, loss: 0.5047089457511902 2023-01-24 01:03:02.013789: step: 862/466, loss: 0.6695466637611389 2023-01-24 01:03:02.622812: step: 864/466, loss: 3.249666213989258 2023-01-24 01:03:03.267184: step: 866/466, loss: 0.6248589754104614 2023-01-24 01:03:03.896429: step: 868/466, loss: 2.089353084564209 2023-01-24 01:03:04.499567: step: 870/466, loss: 1.4833861589431763 2023-01-24 01:03:05.188198: step: 872/466, loss: 0.49304646253585815 2023-01-24 01:03:05.808885: step: 874/466, loss: 0.20364585518836975 2023-01-24 01:03:06.413743: step: 876/466, loss: 0.41799670457839966 2023-01-24 01:03:07.056302: step: 878/466, loss: 1.991162896156311 2023-01-24 01:03:07.685616: step: 880/466, loss: 0.585088849067688 2023-01-24 01:03:08.227781: step: 882/466, loss: 1.423997402191162 2023-01-24 01:03:08.940146: step: 884/466, loss: 0.5289191007614136 2023-01-24 01:03:09.522353: step: 886/466, loss: 0.7948544025421143 2023-01-24 01:03:10.151161: step: 888/466, loss: 0.3936300277709961 2023-01-24 01:03:10.735373: step: 890/466, loss: 4.33754301071167 2023-01-24 01:03:11.384944: step: 892/466, loss: 1.7852764129638672 2023-01-24 01:03:11.996634: step: 894/466, loss: 0.3783182203769684 2023-01-24 01:03:12.585777: step: 896/466, loss: 0.40759915113449097 2023-01-24 01:03:13.173271: step: 898/466, loss: 0.35211068391799927 2023-01-24 01:03:13.743190: step: 900/466, loss: 1.551925539970398 2023-01-24 01:03:14.441963: step: 902/466, loss: 0.40734782814979553 2023-01-24 01:03:15.112814: step: 904/466, loss: 1.77029287815094 2023-01-24 01:03:15.691694: step: 906/466, loss: 3.029491901397705 2023-01-24 01:03:16.333174: step: 908/466, loss: 0.45158347487449646 2023-01-24 01:03:16.959788: step: 910/466, loss: 2.3562979698181152 2023-01-24 01:03:17.596887: step: 912/466, loss: 1.232257604598999 2023-01-24 01:03:18.241731: step: 914/466, loss: 1.3863972425460815 2023-01-24 01:03:18.858083: step: 916/466, loss: 0.934714138507843 2023-01-24 01:03:19.491025: step: 918/466, loss: 0.5982561111450195 2023-01-24 01:03:20.122326: step: 920/466, loss: 0.21740776300430298 2023-01-24 01:03:20.794845: step: 922/466, loss: 0.20542573928833008 2023-01-24 01:03:21.415239: step: 924/466, loss: 0.2219095677137375 2023-01-24 01:03:22.017940: step: 926/466, loss: 1.0753498077392578 2023-01-24 01:03:22.670592: step: 928/466, loss: 0.4892817437648773 2023-01-24 01:03:23.318103: step: 930/466, loss: 1.2177451848983765 2023-01-24 01:03:23.966362: step: 932/466, loss: 0.2971642017364502 ================================================== Loss: 0.944 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3593968005404078, 'r': 0.2659672717471709, 'f1': 0.3057028401543273}, 'combined': 0.22525472432424115, 'epoch': 5} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3421585747424842, 'r': 0.21883312639356714, 'f1': 0.26694024343545875}, 'combined': 0.16714949822594147, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3629078025018788, 'r': 0.2782063609312316, 'f1': 0.3149618737073234}, 'combined': 0.23207717010013304, 'epoch': 5} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3524047155382967, 'r': 0.22899945414040943, 'f1': 0.2776054651942819}, 'combined': 0.17203718969786483, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.331226056567194, 'r': 0.26900332487810064, 'f1': 0.2968895334256734}, 'combined': 0.21876070883996987, 'epoch': 5} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3891944615679512, 'r': 0.22817272408093975, 'f1': 0.2876847443708432}, 'combined': 0.19084037497867817, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.23214285714285715, 'f1': 0.26639344262295084}, 'combined': 0.17759562841530055, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.39285714285714285, 'r': 0.2391304347826087, 'f1': 0.2972972972972973}, 'combined': 0.14864864864864866, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.10344827586206896, 'f1': 0.15384615384615385}, 'combined': 0.10256410256410256, 'epoch': 5} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35266479985310323, 'r': 0.2603161425860667, 'f1': 0.2995340767311292}, 'combined': 0.2207093196966215, 'epoch': 4} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35061171058748686, 'r': 0.22688501488377985, 'f1': 0.27549435231915403}, 'combined': 0.17250580939610582, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40865384615384615, 'r': 0.30357142857142855, 'f1': 0.3483606557377049}, 'combined': 0.23224043715846993, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3629078025018788, 'r': 0.2782063609312316, 'f1': 0.3149618737073234}, 'combined': 0.23207717010013304, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3524047155382967, 'r': 0.22899945414040943, 'f1': 0.2776054651942819}, 'combined': 0.17203718969786483, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.39285714285714285, 'r': 0.2391304347826087, 'f1': 0.2972972972972973}, 'combined': 0.14864864864864866, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31668397009966776, 'r': 0.2583948902141502, 'f1': 0.28458538587848936}, 'combined': 0.20969449485783426, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.39165853086307634, 'r': 0.23329840286067724, 'f1': 0.2924147402371384}, 'combined': 0.19397809500879482, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.1896551724137931, 'f1': 0.2619047619047619}, 'combined': 0.1746031746031746, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:06:05.327999: step: 2/466, loss: 0.786810040473938 2023-01-24 01:06:05.956620: step: 4/466, loss: 1.206434726715088 2023-01-24 01:06:06.546125: step: 6/466, loss: 1.0188630819320679 2023-01-24 01:06:07.214350: step: 8/466, loss: 0.14964409172534943 2023-01-24 01:06:07.851165: step: 10/466, loss: 0.21679998934268951 2023-01-24 01:06:08.481247: step: 12/466, loss: 1.7508819103240967 2023-01-24 01:06:09.111840: step: 14/466, loss: 2.308197498321533 2023-01-24 01:06:09.689597: step: 16/466, loss: 0.463043212890625 2023-01-24 01:06:10.315126: step: 18/466, loss: 0.18147246539592743 2023-01-24 01:06:10.940444: step: 20/466, loss: 0.17629674077033997 2023-01-24 01:06:11.571683: step: 22/466, loss: 0.5790026783943176 2023-01-24 01:06:12.246606: step: 24/466, loss: 1.3718117475509644 2023-01-24 01:06:12.837217: step: 26/466, loss: 0.25628361105918884 2023-01-24 01:06:13.485123: step: 28/466, loss: 0.8798097372055054 2023-01-24 01:06:14.108552: step: 30/466, loss: 0.3664752244949341 2023-01-24 01:06:14.689063: step: 32/466, loss: 1.7821688652038574 2023-01-24 01:06:15.303621: step: 34/466, loss: 0.6296994090080261 2023-01-24 01:06:15.999482: step: 36/466, loss: 5.59828519821167 2023-01-24 01:06:16.594299: step: 38/466, loss: 0.8631159663200378 2023-01-24 01:06:17.167866: step: 40/466, loss: 0.21120579540729523 2023-01-24 01:06:17.813316: step: 42/466, loss: 2.49690580368042 2023-01-24 01:06:18.460253: step: 44/466, loss: 0.40758389234542847 2023-01-24 01:06:19.110066: step: 46/466, loss: 0.4912774860858917 2023-01-24 01:06:19.727662: step: 48/466, loss: 0.09327677637338638 2023-01-24 01:06:20.344138: step: 50/466, loss: 0.6095417737960815 2023-01-24 01:06:21.004811: step: 52/466, loss: 0.43752914667129517 2023-01-24 01:06:21.626805: step: 54/466, loss: 0.5212745070457458 2023-01-24 01:06:22.248652: step: 56/466, loss: 0.7521660327911377 2023-01-24 01:06:22.910278: step: 58/466, loss: 0.48838797211647034 2023-01-24 01:06:23.547183: step: 60/466, loss: 0.6393229961395264 2023-01-24 01:06:24.147644: step: 62/466, loss: 0.7894414663314819 2023-01-24 01:06:24.906155: step: 64/466, loss: 1.2035799026489258 2023-01-24 01:06:25.586270: step: 66/466, loss: 1.823659896850586 2023-01-24 01:06:26.199825: step: 68/466, loss: 1.1329340934753418 2023-01-24 01:06:26.928958: step: 70/466, loss: 0.5138535499572754 2023-01-24 01:06:27.505309: step: 72/466, loss: 0.24458928406238556 2023-01-24 01:06:28.142143: step: 74/466, loss: 0.41312578320503235 2023-01-24 01:06:28.797392: step: 76/466, loss: 0.9526811242103577 2023-01-24 01:06:29.408760: step: 78/466, loss: 0.23132620751857758 2023-01-24 01:06:30.065647: step: 80/466, loss: 2.6661341190338135 2023-01-24 01:06:30.758432: step: 82/466, loss: 0.8018032312393188 2023-01-24 01:06:31.432040: step: 84/466, loss: 0.6294028162956238 2023-01-24 01:06:32.081375: step: 86/466, loss: 0.5024424195289612 2023-01-24 01:06:32.699116: step: 88/466, loss: 1.4013025760650635 2023-01-24 01:06:33.245924: step: 90/466, loss: 5.564722061157227 2023-01-24 01:06:33.848670: step: 92/466, loss: 1.7727617025375366 2023-01-24 01:06:34.508238: step: 94/466, loss: 0.2613389790058136 2023-01-24 01:06:35.121125: step: 96/466, loss: 0.3380162715911865 2023-01-24 01:06:35.811601: step: 98/466, loss: 0.1322072297334671 2023-01-24 01:06:36.486931: step: 100/466, loss: 1.1087273359298706 2023-01-24 01:06:37.095364: step: 102/466, loss: 0.4166465103626251 2023-01-24 01:06:37.654877: step: 104/466, loss: 0.5383948683738708 2023-01-24 01:06:38.384625: step: 106/466, loss: 0.6129652857780457 2023-01-24 01:06:39.007300: step: 108/466, loss: 0.20186620950698853 2023-01-24 01:06:39.600906: step: 110/466, loss: 0.6196213960647583 2023-01-24 01:06:40.203589: step: 112/466, loss: 0.6186575889587402 2023-01-24 01:06:40.792000: step: 114/466, loss: 0.531226634979248 2023-01-24 01:06:41.389979: step: 116/466, loss: 0.40110477805137634 2023-01-24 01:06:42.060585: step: 118/466, loss: 0.6995194554328918 2023-01-24 01:06:42.626318: step: 120/466, loss: 0.1502196490764618 2023-01-24 01:06:43.373855: step: 122/466, loss: 0.43456703424453735 2023-01-24 01:06:44.020890: step: 124/466, loss: 0.8035438656806946 2023-01-24 01:06:44.634133: step: 126/466, loss: 2.1317715644836426 2023-01-24 01:06:45.292864: step: 128/466, loss: 0.7160360813140869 2023-01-24 01:06:45.866931: step: 130/466, loss: 0.1807565689086914 2023-01-24 01:06:46.498950: step: 132/466, loss: 0.5092267394065857 2023-01-24 01:06:47.155449: step: 134/466, loss: 0.30304017663002014 2023-01-24 01:06:47.801856: step: 136/466, loss: 0.3935609459877014 2023-01-24 01:06:48.424144: step: 138/466, loss: 0.5923236012458801 2023-01-24 01:06:48.975026: step: 140/466, loss: 0.5156057476997375 2023-01-24 01:06:49.632639: step: 142/466, loss: 0.33324167132377625 2023-01-24 01:06:50.287544: step: 144/466, loss: 0.276790589094162 2023-01-24 01:06:50.946244: step: 146/466, loss: 0.7008892893791199 2023-01-24 01:06:51.567040: step: 148/466, loss: 4.521030902862549 2023-01-24 01:06:52.195699: step: 150/466, loss: 0.36508435010910034 2023-01-24 01:06:52.820787: step: 152/466, loss: 0.21985134482383728 2023-01-24 01:06:53.443256: step: 154/466, loss: 3.0521111488342285 2023-01-24 01:06:54.094448: step: 156/466, loss: 0.5557551980018616 2023-01-24 01:06:54.689837: step: 158/466, loss: 0.9050538539886475 2023-01-24 01:06:55.285033: step: 160/466, loss: 0.48003241419792175 2023-01-24 01:06:55.952854: step: 162/466, loss: 9.193645477294922 2023-01-24 01:06:56.619751: step: 164/466, loss: 1.3696081638336182 2023-01-24 01:06:57.240339: step: 166/466, loss: 0.286681592464447 2023-01-24 01:06:57.921719: step: 168/466, loss: 0.6474204659461975 2023-01-24 01:06:58.591560: step: 170/466, loss: 1.1203362941741943 2023-01-24 01:06:59.203591: step: 172/466, loss: 0.9201343059539795 2023-01-24 01:06:59.805076: step: 174/466, loss: 0.40289899706840515 2023-01-24 01:07:00.418237: step: 176/466, loss: 0.31696873903274536 2023-01-24 01:07:01.031217: step: 178/466, loss: 0.8729555010795593 2023-01-24 01:07:01.666306: step: 180/466, loss: 0.2728038430213928 2023-01-24 01:07:02.307946: step: 182/466, loss: 0.4005033075809479 2023-01-24 01:07:02.893254: step: 184/466, loss: 0.15636737644672394 2023-01-24 01:07:03.454417: step: 186/466, loss: 0.2103724181652069 2023-01-24 01:07:04.095953: step: 188/466, loss: 0.8750123381614685 2023-01-24 01:07:04.744860: step: 190/466, loss: 0.23224452137947083 2023-01-24 01:07:05.388882: step: 192/466, loss: 0.7866896986961365 2023-01-24 01:07:06.032043: step: 194/466, loss: 0.43896278738975525 2023-01-24 01:07:06.641703: step: 196/466, loss: 0.2491196095943451 2023-01-24 01:07:07.262862: step: 198/466, loss: 0.5458095073699951 2023-01-24 01:07:07.982095: step: 200/466, loss: 0.5856198072433472 2023-01-24 01:07:08.618596: step: 202/466, loss: 1.415247917175293 2023-01-24 01:07:09.248755: step: 204/466, loss: 0.8848930597305298 2023-01-24 01:07:09.886286: step: 206/466, loss: 0.5740233659744263 2023-01-24 01:07:10.544023: step: 208/466, loss: 0.3295213282108307 2023-01-24 01:07:11.184053: step: 210/466, loss: 0.3278895318508148 2023-01-24 01:07:11.915054: step: 212/466, loss: 0.34687161445617676 2023-01-24 01:07:12.561362: step: 214/466, loss: 1.2130358219146729 2023-01-24 01:07:13.106866: step: 216/466, loss: 0.4145267605781555 2023-01-24 01:07:13.753287: step: 218/466, loss: 0.9429118037223816 2023-01-24 01:07:14.396558: step: 220/466, loss: 0.8307474255561829 2023-01-24 01:07:15.004375: step: 222/466, loss: 0.8328704237937927 2023-01-24 01:07:15.603521: step: 224/466, loss: 0.21130898594856262 2023-01-24 01:07:16.296261: step: 226/466, loss: 0.9046894907951355 2023-01-24 01:07:16.960625: step: 228/466, loss: 1.3835208415985107 2023-01-24 01:07:17.603744: step: 230/466, loss: 0.21962091326713562 2023-01-24 01:07:18.234499: step: 232/466, loss: 0.5466563105583191 2023-01-24 01:07:18.864154: step: 234/466, loss: 0.4586387574672699 2023-01-24 01:07:19.500444: step: 236/466, loss: 0.6743621826171875 2023-01-24 01:07:20.082515: step: 238/466, loss: 0.5060661435127258 2023-01-24 01:07:20.710810: step: 240/466, loss: 0.19885101914405823 2023-01-24 01:07:21.343956: step: 242/466, loss: 0.12861591577529907 2023-01-24 01:07:21.923958: step: 244/466, loss: 0.12938374280929565 2023-01-24 01:07:22.530775: step: 246/466, loss: 0.8911042213439941 2023-01-24 01:07:23.157318: step: 248/466, loss: 1.198577642440796 2023-01-24 01:07:23.778921: step: 250/466, loss: 1.077630877494812 2023-01-24 01:07:24.399720: step: 252/466, loss: 0.5633366107940674 2023-01-24 01:07:25.022710: step: 254/466, loss: 0.3750508427619934 2023-01-24 01:07:25.640730: step: 256/466, loss: 0.6132140755653381 2023-01-24 01:07:26.235009: step: 258/466, loss: 0.8923450708389282 2023-01-24 01:07:26.842629: step: 260/466, loss: 0.3184509575366974 2023-01-24 01:07:27.530154: step: 262/466, loss: 0.4965699315071106 2023-01-24 01:07:28.125127: step: 264/466, loss: 0.3206332325935364 2023-01-24 01:07:28.747452: step: 266/466, loss: 1.2157940864562988 2023-01-24 01:07:29.404667: step: 268/466, loss: 0.6410622596740723 2023-01-24 01:07:29.946777: step: 270/466, loss: 1.0094743967056274 2023-01-24 01:07:30.525675: step: 272/466, loss: 0.7588210105895996 2023-01-24 01:07:31.135144: step: 274/466, loss: 0.666674792766571 2023-01-24 01:07:31.773084: step: 276/466, loss: 0.6273010969161987 2023-01-24 01:07:32.409196: step: 278/466, loss: 0.34044626355171204 2023-01-24 01:07:33.012463: step: 280/466, loss: 0.17507882416248322 2023-01-24 01:07:33.669857: step: 282/466, loss: 0.2489982396364212 2023-01-24 01:07:34.289559: step: 284/466, loss: 0.7076356410980225 2023-01-24 01:07:34.903605: step: 286/466, loss: 0.5299989581108093 2023-01-24 01:07:35.496865: step: 288/466, loss: 0.1635860949754715 2023-01-24 01:07:36.122000: step: 290/466, loss: 0.4878239333629608 2023-01-24 01:07:36.772994: step: 292/466, loss: 0.6388272047042847 2023-01-24 01:07:37.378926: step: 294/466, loss: 0.09273450821638107 2023-01-24 01:07:38.087603: step: 296/466, loss: 0.43097102642059326 2023-01-24 01:07:38.869857: step: 298/466, loss: 1.4195430278778076 2023-01-24 01:07:39.478604: step: 300/466, loss: 0.3426002860069275 2023-01-24 01:07:40.183891: step: 302/466, loss: 0.6514154672622681 2023-01-24 01:07:40.884457: step: 304/466, loss: 0.2610836327075958 2023-01-24 01:07:41.673770: step: 306/466, loss: 0.43020790815353394 2023-01-24 01:07:42.275236: step: 308/466, loss: 0.16784794628620148 2023-01-24 01:07:42.921018: step: 310/466, loss: 0.5591654777526855 2023-01-24 01:07:43.552494: step: 312/466, loss: 0.4326792359352112 2023-01-24 01:07:44.173168: step: 314/466, loss: 0.2679749131202698 2023-01-24 01:07:44.799895: step: 316/466, loss: 0.8848189115524292 2023-01-24 01:07:45.386002: step: 318/466, loss: 1.574328899383545 2023-01-24 01:07:46.011170: step: 320/466, loss: 0.28649765253067017 2023-01-24 01:07:46.627537: step: 322/466, loss: 0.7625613808631897 2023-01-24 01:07:47.199453: step: 324/466, loss: 0.26245927810668945 2023-01-24 01:07:47.838214: step: 326/466, loss: 0.6206868886947632 2023-01-24 01:07:48.477042: step: 328/466, loss: 0.3276185691356659 2023-01-24 01:07:49.023001: step: 330/466, loss: 0.5720335245132446 2023-01-24 01:07:49.629854: step: 332/466, loss: 0.5923988819122314 2023-01-24 01:07:50.290253: step: 334/466, loss: 0.7126914858818054 2023-01-24 01:07:50.908108: step: 336/466, loss: 0.10740337520837784 2023-01-24 01:07:51.490022: step: 338/466, loss: 1.55208158493042 2023-01-24 01:07:52.214089: step: 340/466, loss: 0.47114670276641846 2023-01-24 01:07:52.830020: step: 342/466, loss: 0.34138184785842896 2023-01-24 01:07:53.410966: step: 344/466, loss: 0.8074330687522888 2023-01-24 01:07:54.034003: step: 346/466, loss: 0.24854327738285065 2023-01-24 01:07:54.663968: step: 348/466, loss: 0.2922949492931366 2023-01-24 01:07:55.308812: step: 350/466, loss: 0.2991713881492615 2023-01-24 01:07:55.944850: step: 352/466, loss: 0.21093149483203888 2023-01-24 01:07:56.665897: step: 354/466, loss: 0.19404518604278564 2023-01-24 01:07:57.292742: step: 356/466, loss: 0.6495528817176819 2023-01-24 01:07:58.024283: step: 358/466, loss: 0.9190727472305298 2023-01-24 01:07:58.629443: step: 360/466, loss: 0.4907245337963104 2023-01-24 01:07:59.305352: step: 362/466, loss: 1.6439244747161865 2023-01-24 01:07:59.952964: step: 364/466, loss: 0.8146404027938843 2023-01-24 01:08:00.568401: step: 366/466, loss: 0.2428710162639618 2023-01-24 01:08:01.147956: step: 368/466, loss: 0.2377680093050003 2023-01-24 01:08:01.796798: step: 370/466, loss: 0.24059906601905823 2023-01-24 01:08:02.488765: step: 372/466, loss: 0.218387633562088 2023-01-24 01:08:03.166665: step: 374/466, loss: 0.36093229055404663 2023-01-24 01:08:03.784621: step: 376/466, loss: 0.7442221641540527 2023-01-24 01:08:04.404413: step: 378/466, loss: 0.22716902196407318 2023-01-24 01:08:05.061142: step: 380/466, loss: 0.7637770175933838 2023-01-24 01:08:05.679145: step: 382/466, loss: 0.2453354448080063 2023-01-24 01:08:06.329065: step: 384/466, loss: 0.43727368116378784 2023-01-24 01:08:06.975563: step: 386/466, loss: 1.6836223602294922 2023-01-24 01:08:07.606648: step: 388/466, loss: 0.28373491764068604 2023-01-24 01:08:08.226004: step: 390/466, loss: 2.0264334678649902 2023-01-24 01:08:08.904567: step: 392/466, loss: 0.448641836643219 2023-01-24 01:08:09.540017: step: 394/466, loss: 1.5419176816940308 2023-01-24 01:08:10.171419: step: 396/466, loss: 0.6299686431884766 2023-01-24 01:08:10.767179: step: 398/466, loss: 1.0106375217437744 2023-01-24 01:08:11.377992: step: 400/466, loss: 0.420911967754364 2023-01-24 01:08:12.060991: step: 402/466, loss: 0.6072839498519897 2023-01-24 01:08:12.631828: step: 404/466, loss: 0.8522168397903442 2023-01-24 01:08:13.292923: step: 406/466, loss: 0.09603744745254517 2023-01-24 01:08:13.901830: step: 408/466, loss: 0.19519944489002228 2023-01-24 01:08:14.485336: step: 410/466, loss: 0.3113097846508026 2023-01-24 01:08:15.070347: step: 412/466, loss: 0.4202454090118408 2023-01-24 01:08:15.678536: step: 414/466, loss: 0.6679733395576477 2023-01-24 01:08:16.269695: step: 416/466, loss: 0.25027406215667725 2023-01-24 01:08:16.865972: step: 418/466, loss: 0.5896903276443481 2023-01-24 01:08:17.461884: step: 420/466, loss: 0.16288059949874878 2023-01-24 01:08:18.059296: step: 422/466, loss: 0.2254132181406021 2023-01-24 01:08:18.762850: step: 424/466, loss: 0.8087993264198303 2023-01-24 01:08:19.368160: step: 426/466, loss: 0.38509637117385864 2023-01-24 01:08:20.068892: step: 428/466, loss: 0.8836284875869751 2023-01-24 01:08:20.671445: step: 430/466, loss: 0.32001492381095886 2023-01-24 01:08:21.278792: step: 432/466, loss: 0.6224542260169983 2023-01-24 01:08:21.857612: step: 434/466, loss: 0.3766172528266907 2023-01-24 01:08:22.532093: step: 436/466, loss: 0.5045154094696045 2023-01-24 01:08:23.153836: step: 438/466, loss: 5.073431968688965 2023-01-24 01:08:23.795916: step: 440/466, loss: 0.3151794970035553 2023-01-24 01:08:24.396995: step: 442/466, loss: 0.426649272441864 2023-01-24 01:08:24.971602: step: 444/466, loss: 0.24568483233451843 2023-01-24 01:08:25.614813: step: 446/466, loss: 0.09146913141012192 2023-01-24 01:08:26.239112: step: 448/466, loss: 1.8122892379760742 2023-01-24 01:08:26.868873: step: 450/466, loss: 0.5065184831619263 2023-01-24 01:08:27.537464: step: 452/466, loss: 0.8282049894332886 2023-01-24 01:08:28.207969: step: 454/466, loss: 0.7290765643119812 2023-01-24 01:08:28.848891: step: 456/466, loss: 0.3378812074661255 2023-01-24 01:08:29.490205: step: 458/466, loss: 0.40172505378723145 2023-01-24 01:08:30.110269: step: 460/466, loss: 0.2633054554462433 2023-01-24 01:08:30.746546: step: 462/466, loss: 0.16881847381591797 2023-01-24 01:08:31.370540: step: 464/466, loss: 0.3000826835632324 2023-01-24 01:08:31.947256: step: 466/466, loss: 1.2336972951889038 2023-01-24 01:08:32.536184: step: 468/466, loss: 0.27715569734573364 2023-01-24 01:08:33.152638: step: 470/466, loss: 0.16088084876537323 2023-01-24 01:08:33.813262: step: 472/466, loss: 0.6458614468574524 2023-01-24 01:08:34.398281: step: 474/466, loss: 0.5212080478668213 2023-01-24 01:08:35.008465: step: 476/466, loss: 0.894993007183075 2023-01-24 01:08:35.652099: step: 478/466, loss: 1.0347660779953003 2023-01-24 01:08:36.487786: step: 480/466, loss: 1.368758201599121 2023-01-24 01:08:37.074583: step: 482/466, loss: 0.9775394201278687 2023-01-24 01:08:37.732608: step: 484/466, loss: 0.9914467930793762 2023-01-24 01:08:38.396370: step: 486/466, loss: 0.5039624571800232 2023-01-24 01:08:39.054046: step: 488/466, loss: 0.43688344955444336 2023-01-24 01:08:39.709917: step: 490/466, loss: 0.3364666998386383 2023-01-24 01:08:40.391943: step: 492/466, loss: 1.1753532886505127 2023-01-24 01:08:41.004447: step: 494/466, loss: 0.9748474955558777 2023-01-24 01:08:41.584810: step: 496/466, loss: 1.6335086822509766 2023-01-24 01:08:42.209062: step: 498/466, loss: 0.22400739789009094 2023-01-24 01:08:42.854090: step: 500/466, loss: 0.31947603821754456 2023-01-24 01:08:43.477737: step: 502/466, loss: 0.6949931979179382 2023-01-24 01:08:44.040017: step: 504/466, loss: 0.3621494472026825 2023-01-24 01:08:44.683739: step: 506/466, loss: 0.24993956089019775 2023-01-24 01:08:45.277487: step: 508/466, loss: 0.35971206426620483 2023-01-24 01:08:45.967898: step: 510/466, loss: 1.8766340017318726 2023-01-24 01:08:46.568070: step: 512/466, loss: 0.4962426424026489 2023-01-24 01:08:47.212501: step: 514/466, loss: 0.6077012419700623 2023-01-24 01:08:47.803435: step: 516/466, loss: 0.6568211317062378 2023-01-24 01:08:48.368485: step: 518/466, loss: 0.7788378000259399 2023-01-24 01:08:49.023943: step: 520/466, loss: 1.1048991680145264 2023-01-24 01:08:49.628501: step: 522/466, loss: 0.23610150814056396 2023-01-24 01:08:50.245218: step: 524/466, loss: 3.0489180088043213 2023-01-24 01:08:50.897271: step: 526/466, loss: 0.9111219048500061 2023-01-24 01:08:51.662480: step: 528/466, loss: 0.3445698618888855 2023-01-24 01:08:52.282935: step: 530/466, loss: 0.6594105362892151 2023-01-24 01:08:52.881477: step: 532/466, loss: 0.28699102997779846 2023-01-24 01:08:53.495549: step: 534/466, loss: 1.6701980829238892 2023-01-24 01:08:54.066862: step: 536/466, loss: 0.227378711104393 2023-01-24 01:08:54.688143: step: 538/466, loss: 0.6038797497749329 2023-01-24 01:08:55.290834: step: 540/466, loss: 1.1404021978378296 2023-01-24 01:08:55.900627: step: 542/466, loss: 1.1455515623092651 2023-01-24 01:08:56.514835: step: 544/466, loss: 1.6472821235656738 2023-01-24 01:08:57.169552: step: 546/466, loss: 0.8400754332542419 2023-01-24 01:08:57.782505: step: 548/466, loss: 0.5330002307891846 2023-01-24 01:08:58.345162: step: 550/466, loss: 1.4000282287597656 2023-01-24 01:08:58.944378: step: 552/466, loss: 0.5982736945152283 2023-01-24 01:08:59.630879: step: 554/466, loss: 0.345943808555603 2023-01-24 01:09:00.283950: step: 556/466, loss: 0.5705140233039856 2023-01-24 01:09:00.887125: step: 558/466, loss: 0.2651577591896057 2023-01-24 01:09:01.537689: step: 560/466, loss: 0.22759249806404114 2023-01-24 01:09:02.182434: step: 562/466, loss: 0.59922194480896 2023-01-24 01:09:02.800423: step: 564/466, loss: 1.2552974224090576 2023-01-24 01:09:03.462865: step: 566/466, loss: 0.19052596390247345 2023-01-24 01:09:04.102067: step: 568/466, loss: 1.6358355283737183 2023-01-24 01:09:04.720369: step: 570/466, loss: 0.7840304970741272 2023-01-24 01:09:05.383847: step: 572/466, loss: 0.6998308897018433 2023-01-24 01:09:05.942465: step: 574/466, loss: 0.5584572553634644 2023-01-24 01:09:06.581550: step: 576/466, loss: 0.4735950231552124 2023-01-24 01:09:07.158307: step: 578/466, loss: 4.024959564208984 2023-01-24 01:09:07.821026: step: 580/466, loss: 3.8568780422210693 2023-01-24 01:09:08.445026: step: 582/466, loss: 0.5000805854797363 2023-01-24 01:09:09.022754: step: 584/466, loss: 0.7963584065437317 2023-01-24 01:09:09.638275: step: 586/466, loss: 0.4287252426147461 2023-01-24 01:09:10.292885: step: 588/466, loss: 0.19504182040691376 2023-01-24 01:09:10.984046: step: 590/466, loss: 0.6054005026817322 2023-01-24 01:09:11.595219: step: 592/466, loss: 1.4158061742782593 2023-01-24 01:09:12.211856: step: 594/466, loss: 0.7032748460769653 2023-01-24 01:09:12.891337: step: 596/466, loss: 0.3883163332939148 2023-01-24 01:09:13.537918: step: 598/466, loss: 0.25828832387924194 2023-01-24 01:09:14.147713: step: 600/466, loss: 1.2799416780471802 2023-01-24 01:09:14.741466: step: 602/466, loss: 1.442213535308838 2023-01-24 01:09:15.402946: step: 604/466, loss: 0.40329328179359436 2023-01-24 01:09:16.015263: step: 606/466, loss: 0.14056351780891418 2023-01-24 01:09:16.663115: step: 608/466, loss: 0.1742161363363266 2023-01-24 01:09:17.203890: step: 610/466, loss: 0.31428566575050354 2023-01-24 01:09:17.805000: step: 612/466, loss: 5.002796173095703 2023-01-24 01:09:18.400321: step: 614/466, loss: 0.3916827440261841 2023-01-24 01:09:19.000441: step: 616/466, loss: 0.27184417843818665 2023-01-24 01:09:19.626748: step: 618/466, loss: 0.2802789807319641 2023-01-24 01:09:20.260461: step: 620/466, loss: 0.2814117670059204 2023-01-24 01:09:20.822474: step: 622/466, loss: 0.8114979267120361 2023-01-24 01:09:21.428560: step: 624/466, loss: 3.3512256145477295 2023-01-24 01:09:22.080489: step: 626/466, loss: 0.41930854320526123 2023-01-24 01:09:22.684565: step: 628/466, loss: 0.41478148102760315 2023-01-24 01:09:23.287886: step: 630/466, loss: 0.8603408336639404 2023-01-24 01:09:23.998793: step: 632/466, loss: 0.34210678935050964 2023-01-24 01:09:24.669568: step: 634/466, loss: 0.8705849051475525 2023-01-24 01:09:25.229435: step: 636/466, loss: 0.7041741013526917 2023-01-24 01:09:25.847444: step: 638/466, loss: 1.4362998008728027 2023-01-24 01:09:26.448413: step: 640/466, loss: 0.1915927529335022 2023-01-24 01:09:27.018425: step: 642/466, loss: 0.20950928330421448 2023-01-24 01:09:27.634355: step: 644/466, loss: 0.3953920602798462 2023-01-24 01:09:28.205812: step: 646/466, loss: 0.4086073637008667 2023-01-24 01:09:28.807009: step: 648/466, loss: 0.9524459838867188 2023-01-24 01:09:29.452416: step: 650/466, loss: 0.17150622606277466 2023-01-24 01:09:30.108261: step: 652/466, loss: 0.5794564485549927 2023-01-24 01:09:30.711034: step: 654/466, loss: 0.6404372453689575 2023-01-24 01:09:31.351817: step: 656/466, loss: 0.9542464017868042 2023-01-24 01:09:31.974610: step: 658/466, loss: 0.5697581171989441 2023-01-24 01:09:32.545912: step: 660/466, loss: 0.5123308300971985 2023-01-24 01:09:33.163105: step: 662/466, loss: 0.3932381868362427 2023-01-24 01:09:33.789752: step: 664/466, loss: 0.37858468294143677 2023-01-24 01:09:34.428786: step: 666/466, loss: 0.9859458208084106 2023-01-24 01:09:35.141486: step: 668/466, loss: 0.22349879145622253 2023-01-24 01:09:35.826664: step: 670/466, loss: 0.5277111530303955 2023-01-24 01:09:36.512932: step: 672/466, loss: 1.1844608783721924 2023-01-24 01:09:37.147500: step: 674/466, loss: 0.284910649061203 2023-01-24 01:09:37.857270: step: 676/466, loss: 0.6260483860969543 2023-01-24 01:09:38.481144: step: 678/466, loss: 0.1024300828576088 2023-01-24 01:09:39.192389: step: 680/466, loss: 0.19412025809288025 2023-01-24 01:09:39.794558: step: 682/466, loss: 0.19316069781780243 2023-01-24 01:09:40.393170: step: 684/466, loss: 0.3085916340351105 2023-01-24 01:09:41.053559: step: 686/466, loss: 0.31294581294059753 2023-01-24 01:09:41.761635: step: 688/466, loss: 5.219769477844238 2023-01-24 01:09:42.415634: step: 690/466, loss: 0.398385226726532 2023-01-24 01:09:43.048218: step: 692/466, loss: 0.7441683411598206 2023-01-24 01:09:43.665100: step: 694/466, loss: 0.45424970984458923 2023-01-24 01:09:44.341567: step: 696/466, loss: 0.5073054432868958 2023-01-24 01:09:44.943087: step: 698/466, loss: 1.23970365524292 2023-01-24 01:09:45.567211: step: 700/466, loss: 0.26997852325439453 2023-01-24 01:09:46.183290: step: 702/466, loss: 0.30107223987579346 2023-01-24 01:09:46.802055: step: 704/466, loss: 1.047650933265686 2023-01-24 01:09:47.402023: step: 706/466, loss: 1.2337769269943237 2023-01-24 01:09:48.010041: step: 708/466, loss: 0.1382889598608017 2023-01-24 01:09:48.693702: step: 710/466, loss: 0.5219071507453918 2023-01-24 01:09:49.317632: step: 712/466, loss: 0.8577562570571899 2023-01-24 01:09:49.871477: step: 714/466, loss: 2.7157881259918213 2023-01-24 01:09:50.512122: step: 716/466, loss: 1.589961290359497 2023-01-24 01:09:51.073741: step: 718/466, loss: 0.7895178198814392 2023-01-24 01:09:51.748294: step: 720/466, loss: 0.4477238655090332 2023-01-24 01:09:52.339297: step: 722/466, loss: 0.2571891248226166 2023-01-24 01:09:52.981747: step: 724/466, loss: 1.1215697526931763 2023-01-24 01:09:53.607944: step: 726/466, loss: 0.6022098660469055 2023-01-24 01:09:54.272512: step: 728/466, loss: 0.8486770391464233 2023-01-24 01:09:54.910283: step: 730/466, loss: 1.3985038995742798 2023-01-24 01:09:55.541351: step: 732/466, loss: 0.5383131504058838 2023-01-24 01:09:56.254493: step: 734/466, loss: 1.4918358325958252 2023-01-24 01:09:56.958103: step: 736/466, loss: 2.111380100250244 2023-01-24 01:09:57.546955: step: 738/466, loss: 0.23163145780563354 2023-01-24 01:09:58.218519: step: 740/466, loss: 1.5169225931167603 2023-01-24 01:09:58.923568: step: 742/466, loss: 0.9196684956550598 2023-01-24 01:09:59.543088: step: 744/466, loss: 1.2737724781036377 2023-01-24 01:10:00.178746: step: 746/466, loss: 0.5097503662109375 2023-01-24 01:10:00.735280: step: 748/466, loss: 0.37727874517440796 2023-01-24 01:10:01.358757: step: 750/466, loss: 0.6758705973625183 2023-01-24 01:10:01.963984: step: 752/466, loss: 0.6611981987953186 2023-01-24 01:10:02.634044: step: 754/466, loss: 0.43644487857818604 2023-01-24 01:10:03.210622: step: 756/466, loss: 1.3894761800765991 2023-01-24 01:10:03.822259: step: 758/466, loss: 0.591181218624115 2023-01-24 01:10:04.561306: step: 760/466, loss: 0.5741158127784729 2023-01-24 01:10:05.184968: step: 762/466, loss: 0.40604686737060547 2023-01-24 01:10:05.817489: step: 764/466, loss: 0.26860132813453674 2023-01-24 01:10:06.434506: step: 766/466, loss: 0.797225832939148 2023-01-24 01:10:07.058379: step: 768/466, loss: 1.1374201774597168 2023-01-24 01:10:07.739353: step: 770/466, loss: 0.7190713882446289 2023-01-24 01:10:08.398851: step: 772/466, loss: 0.26857054233551025 2023-01-24 01:10:09.018041: step: 774/466, loss: 0.5708498358726501 2023-01-24 01:10:09.609811: step: 776/466, loss: 0.14177720248699188 2023-01-24 01:10:10.192776: step: 778/466, loss: 0.42916902899742126 2023-01-24 01:10:10.805981: step: 780/466, loss: 0.49903401732444763 2023-01-24 01:10:11.411865: step: 782/466, loss: 0.45851343870162964 2023-01-24 01:10:12.069957: step: 784/466, loss: 0.22907648980617523 2023-01-24 01:10:12.720844: step: 786/466, loss: 1.1436830759048462 2023-01-24 01:10:13.341770: step: 788/466, loss: 0.5820797681808472 2023-01-24 01:10:14.071039: step: 790/466, loss: 2.55900502204895 2023-01-24 01:10:14.719632: step: 792/466, loss: 0.19003260135650635 2023-01-24 01:10:15.290964: step: 794/466, loss: 1.064763069152832 2023-01-24 01:10:15.960534: step: 796/466, loss: 1.7375653982162476 2023-01-24 01:10:16.514846: step: 798/466, loss: 0.31202900409698486 2023-01-24 01:10:17.158947: step: 800/466, loss: 0.576053261756897 2023-01-24 01:10:17.839738: step: 802/466, loss: 0.4688360095024109 2023-01-24 01:10:18.458502: step: 804/466, loss: 0.24730272591114044 2023-01-24 01:10:19.058023: step: 806/466, loss: 0.44804060459136963 2023-01-24 01:10:19.662661: step: 808/466, loss: 1.33967125415802 2023-01-24 01:10:20.364992: step: 810/466, loss: 8.95032024383545 2023-01-24 01:10:20.974597: step: 812/466, loss: 0.2827244699001312 2023-01-24 01:10:21.609802: step: 814/466, loss: 0.24904581904411316 2023-01-24 01:10:22.247605: step: 816/466, loss: 0.947492778301239 2023-01-24 01:10:22.849238: step: 818/466, loss: 0.24781152606010437 2023-01-24 01:10:23.430317: step: 820/466, loss: 0.4108606278896332 2023-01-24 01:10:24.127528: step: 822/466, loss: 0.0980253517627716 2023-01-24 01:10:24.733118: step: 824/466, loss: 0.6137410402297974 2023-01-24 01:10:25.332538: step: 826/466, loss: 2.0315511226654053 2023-01-24 01:10:25.949627: step: 828/466, loss: 0.8632413148880005 2023-01-24 01:10:26.647096: step: 830/466, loss: 0.3644121289253235 2023-01-24 01:10:27.332160: step: 832/466, loss: 0.39598798751831055 2023-01-24 01:10:27.934683: step: 834/466, loss: 0.6666589975357056 2023-01-24 01:10:28.535687: step: 836/466, loss: 4.266134262084961 2023-01-24 01:10:29.170141: step: 838/466, loss: 1.6588128805160522 2023-01-24 01:10:29.796040: step: 840/466, loss: 0.3272869884967804 2023-01-24 01:10:30.390474: step: 842/466, loss: 0.38514748215675354 2023-01-24 01:10:30.962207: step: 844/466, loss: 0.38480067253112793 2023-01-24 01:10:31.582330: step: 846/466, loss: 0.490105003118515 2023-01-24 01:10:32.196950: step: 848/466, loss: 2.017117500305176 2023-01-24 01:10:32.868909: step: 850/466, loss: 0.6285754442214966 2023-01-24 01:10:33.492172: step: 852/466, loss: 0.516296923160553 2023-01-24 01:10:34.086061: step: 854/466, loss: 0.4578474760055542 2023-01-24 01:10:34.660641: step: 856/466, loss: 0.76552814245224 2023-01-24 01:10:35.297855: step: 858/466, loss: 0.884811520576477 2023-01-24 01:10:35.887439: step: 860/466, loss: 0.34015825390815735 2023-01-24 01:10:36.555623: step: 862/466, loss: 0.2531778812408447 2023-01-24 01:10:37.192795: step: 864/466, loss: 0.1724013090133667 2023-01-24 01:10:37.816069: step: 866/466, loss: 0.5326557755470276 2023-01-24 01:10:38.456750: step: 868/466, loss: 1.0472228527069092 2023-01-24 01:10:39.077390: step: 870/466, loss: 1.7959599494934082 2023-01-24 01:10:39.721284: step: 872/466, loss: 0.6444715857505798 2023-01-24 01:10:40.417357: step: 874/466, loss: 0.26546746492385864 2023-01-24 01:10:41.003554: step: 876/466, loss: 0.48505330085754395 2023-01-24 01:10:41.633829: step: 878/466, loss: 1.3470640182495117 2023-01-24 01:10:42.290315: step: 880/466, loss: 1.7580037117004395 2023-01-24 01:10:42.892981: step: 882/466, loss: 0.28422167897224426 2023-01-24 01:10:43.481813: step: 884/466, loss: 0.5739858746528625 2023-01-24 01:10:44.152622: step: 886/466, loss: 1.6449081897735596 2023-01-24 01:10:44.819647: step: 888/466, loss: 0.3343558609485626 2023-01-24 01:10:45.441326: step: 890/466, loss: 0.4418397545814514 2023-01-24 01:10:46.091263: step: 892/466, loss: 0.8011448979377747 2023-01-24 01:10:46.676587: step: 894/466, loss: 0.329490065574646 2023-01-24 01:10:47.361993: step: 896/466, loss: 0.8715149164199829 2023-01-24 01:10:47.959018: step: 898/466, loss: 3.0846753120422363 2023-01-24 01:10:48.563447: step: 900/466, loss: 0.9813827276229858 2023-01-24 01:10:49.149903: step: 902/466, loss: 0.4854777753353119 2023-01-24 01:10:49.810652: step: 904/466, loss: 0.42504075169563293 2023-01-24 01:10:50.401122: step: 906/466, loss: 1.4318958520889282 2023-01-24 01:10:51.045136: step: 908/466, loss: 0.6176507472991943 2023-01-24 01:10:51.695571: step: 910/466, loss: 0.8056727647781372 2023-01-24 01:10:52.292494: step: 912/466, loss: 0.29409268498420715 2023-01-24 01:10:52.865336: step: 914/466, loss: 0.285684198141098 2023-01-24 01:10:53.546379: step: 916/466, loss: 0.5347999930381775 2023-01-24 01:10:54.148822: step: 918/466, loss: 0.6455689072608948 2023-01-24 01:10:54.856322: step: 920/466, loss: 0.2150963693857193 2023-01-24 01:10:55.517237: step: 922/466, loss: 0.9827973246574402 2023-01-24 01:10:56.090321: step: 924/466, loss: 0.4846271574497223 2023-01-24 01:10:56.708430: step: 926/466, loss: 1.0096116065979004 2023-01-24 01:10:57.303070: step: 928/466, loss: 0.7601162195205688 2023-01-24 01:10:57.940091: step: 930/466, loss: 0.5822792649269104 2023-01-24 01:10:58.666654: step: 932/466, loss: 0.4887627959251404 ================================================== Loss: 0.795 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3304354270090295, 'r': 0.26710719526726107, 'f1': 0.2954155129188806}, 'combined': 0.21767458846654356, 'epoch': 6} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3251667898698716, 'r': 0.24490039309118258, 'f1': 0.27938277115232546}, 'combined': 0.17494061371220382, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3300283307967587, 'r': 0.2836865917474985, 'f1': 0.3051078241855749}, 'combined': 0.22481629150516044, 'epoch': 6} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3325963507145691, 'r': 0.25312111812722843, 'f1': 0.28746678955770233}, 'combined': 0.17814843296533664, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3145673020104264, 'r': 0.2823345993376313, 'f1': 0.29758066770186337}, 'combined': 0.21926996567505722, 'epoch': 6} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3590313158877341, 'r': 0.2546918901514071, 'f1': 0.29799220093744355}, 'combined': 0.19767799468127445, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.325, 'r': 0.2785714285714286, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32894736842105265, 'r': 0.2717391304347826, 'f1': 0.2976190476190476}, 'combined': 0.1488095238095238, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35266479985310323, 'r': 0.2603161425860667, 'f1': 0.2995340767311292}, 'combined': 0.2207093196966215, 'epoch': 4} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35061171058748686, 'r': 0.22688501488377985, 'f1': 0.27549435231915403}, 'combined': 0.17250580939610582, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40865384615384615, 'r': 0.30357142857142855, 'f1': 0.3483606557377049}, 'combined': 0.23224043715846993, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3629078025018788, 'r': 0.2782063609312316, 'f1': 0.3149618737073234}, 'combined': 0.23207717010013304, 'epoch': 5} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3524047155382967, 'r': 0.22899945414040943, 'f1': 0.2776054651942819}, 'combined': 0.17203718969786483, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.39285714285714285, 'r': 0.2391304347826087, 'f1': 0.2972972972972973}, 'combined': 0.14864864864864866, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31668397009966776, 'r': 0.2583948902141502, 'f1': 0.28458538587848936}, 'combined': 0.20969449485783426, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.39165853086307634, 'r': 0.23329840286067724, 'f1': 0.2924147402371384}, 'combined': 0.19397809500879482, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.1896551724137931, 'f1': 0.2619047619047619}, 'combined': 0.1746031746031746, 'epoch': 4} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:13:35.810385: step: 2/466, loss: 0.9606571793556213 2023-01-24 01:13:36.365102: step: 4/466, loss: 0.3612016439437866 2023-01-24 01:13:37.046187: step: 6/466, loss: 0.9253258109092712 2023-01-24 01:13:37.706153: step: 8/466, loss: 0.4830124080181122 2023-01-24 01:13:38.356356: step: 10/466, loss: 0.38346290588378906 2023-01-24 01:13:38.963278: step: 12/466, loss: 0.8057071566581726 2023-01-24 01:13:39.623786: step: 14/466, loss: 0.3059096038341522 2023-01-24 01:13:40.353364: step: 16/466, loss: 0.22083964943885803 2023-01-24 01:13:40.932262: step: 18/466, loss: 0.30529630184173584 2023-01-24 01:13:41.605031: step: 20/466, loss: 0.5498822927474976 2023-01-24 01:13:42.218083: step: 22/466, loss: 1.5517122745513916 2023-01-24 01:13:42.871578: step: 24/466, loss: 0.7829954624176025 2023-01-24 01:13:43.425123: step: 26/466, loss: 0.20105233788490295 2023-01-24 01:13:44.058791: step: 28/466, loss: 0.6444220542907715 2023-01-24 01:13:44.676439: step: 30/466, loss: 1.7132608890533447 2023-01-24 01:13:45.273524: step: 32/466, loss: 0.9501598477363586 2023-01-24 01:13:45.867888: step: 34/466, loss: 0.34672775864601135 2023-01-24 01:13:46.465739: step: 36/466, loss: 0.5987630486488342 2023-01-24 01:13:47.075157: step: 38/466, loss: 0.2633925676345825 2023-01-24 01:13:47.649396: step: 40/466, loss: 1.5883678197860718 2023-01-24 01:13:48.288693: step: 42/466, loss: 0.2523646950721741 2023-01-24 01:13:48.931203: step: 44/466, loss: 0.32839834690093994 2023-01-24 01:13:49.500436: step: 46/466, loss: 0.4045436978340149 2023-01-24 01:13:50.095316: step: 48/466, loss: 0.4517868757247925 2023-01-24 01:13:50.765215: step: 50/466, loss: 0.22546136379241943 2023-01-24 01:13:51.447002: step: 52/466, loss: 0.23036596179008484 2023-01-24 01:13:52.037086: step: 54/466, loss: 0.28984159231185913 2023-01-24 01:13:52.635835: step: 56/466, loss: 0.4410346746444702 2023-01-24 01:13:53.326356: step: 58/466, loss: 0.21367600560188293 2023-01-24 01:13:53.955649: step: 60/466, loss: 0.35458019375801086 2023-01-24 01:13:54.582597: step: 62/466, loss: 0.17024670541286469 2023-01-24 01:13:55.217760: step: 64/466, loss: 0.14700041711330414 2023-01-24 01:13:55.880015: step: 66/466, loss: 0.5288400650024414 2023-01-24 01:13:56.530240: step: 68/466, loss: 0.13167166709899902 2023-01-24 01:13:57.166889: step: 70/466, loss: 0.6722766757011414 2023-01-24 01:13:57.765336: step: 72/466, loss: 0.1936565488576889 2023-01-24 01:13:58.402882: step: 74/466, loss: 0.46509358286857605 2023-01-24 01:13:58.973713: step: 76/466, loss: 0.4784170687198639 2023-01-24 01:13:59.616540: step: 78/466, loss: 0.36129406094551086 2023-01-24 01:14:00.259667: step: 80/466, loss: 3.51059889793396 2023-01-24 01:14:00.871472: step: 82/466, loss: 0.17317715287208557 2023-01-24 01:14:01.515979: step: 84/466, loss: 0.9708307385444641 2023-01-24 01:14:02.156072: step: 86/466, loss: 0.8890564441680908 2023-01-24 01:14:02.757818: step: 88/466, loss: 0.5933763384819031 2023-01-24 01:14:03.355224: step: 90/466, loss: 0.30360448360443115 2023-01-24 01:14:03.923157: step: 92/466, loss: 0.5512064099311829 2023-01-24 01:14:04.554823: step: 94/466, loss: 0.4373668432235718 2023-01-24 01:14:05.125917: step: 96/466, loss: 0.35091495513916016 2023-01-24 01:14:05.794238: step: 98/466, loss: 0.7135890126228333 2023-01-24 01:14:06.486846: step: 100/466, loss: 0.702728807926178 2023-01-24 01:14:07.148508: step: 102/466, loss: 0.22279851138591766 2023-01-24 01:14:07.779736: step: 104/466, loss: 0.3196096420288086 2023-01-24 01:14:08.429978: step: 106/466, loss: 0.3496376574039459 2023-01-24 01:14:09.106056: step: 108/466, loss: 0.23381787538528442 2023-01-24 01:14:09.758077: step: 110/466, loss: 2.360985279083252 2023-01-24 01:14:10.379717: step: 112/466, loss: 0.6244679689407349 2023-01-24 01:14:11.037853: step: 114/466, loss: 0.6387752294540405 2023-01-24 01:14:11.646785: step: 116/466, loss: 0.29985910654067993 2023-01-24 01:14:12.276247: step: 118/466, loss: 0.37929147481918335 2023-01-24 01:14:12.971394: step: 120/466, loss: 0.7739306092262268 2023-01-24 01:14:13.622354: step: 122/466, loss: 0.7082308530807495 2023-01-24 01:14:14.185232: step: 124/466, loss: 0.3670591115951538 2023-01-24 01:14:14.852671: step: 126/466, loss: 0.5902093648910522 2023-01-24 01:14:15.499294: step: 128/466, loss: 0.7791361808776855 2023-01-24 01:14:16.214060: step: 130/466, loss: 0.19285088777542114 2023-01-24 01:14:16.830037: step: 132/466, loss: 0.35721462965011597 2023-01-24 01:14:17.432939: step: 134/466, loss: 0.3081234395503998 2023-01-24 01:14:18.120267: step: 136/466, loss: 0.3913836181163788 2023-01-24 01:14:18.727619: step: 138/466, loss: 0.31764063239097595 2023-01-24 01:14:19.359301: step: 140/466, loss: 0.17015546560287476 2023-01-24 01:14:20.045408: step: 142/466, loss: 0.49441781640052795 2023-01-24 01:14:20.676564: step: 144/466, loss: 0.2210272252559662 2023-01-24 01:14:21.386251: step: 146/466, loss: 0.4163098633289337 2023-01-24 01:14:21.961871: step: 148/466, loss: 0.9439469575881958 2023-01-24 01:14:22.599385: step: 150/466, loss: 0.7482032179832458 2023-01-24 01:14:23.172632: step: 152/466, loss: 0.2315014749765396 2023-01-24 01:14:23.811246: step: 154/466, loss: 0.477324903011322 2023-01-24 01:14:24.418829: step: 156/466, loss: 0.31996142864227295 2023-01-24 01:14:25.065178: step: 158/466, loss: 0.3227391839027405 2023-01-24 01:14:25.740257: step: 160/466, loss: 1.9098715782165527 2023-01-24 01:14:26.401867: step: 162/466, loss: 0.15192030370235443 2023-01-24 01:14:26.985660: step: 164/466, loss: 2.4227092266082764 2023-01-24 01:14:27.604458: step: 166/466, loss: 0.9149259924888611 2023-01-24 01:14:28.274001: step: 168/466, loss: 0.28424474596977234 2023-01-24 01:14:28.850841: step: 170/466, loss: 0.2596474289894104 2023-01-24 01:14:29.522548: step: 172/466, loss: 0.6030668020248413 2023-01-24 01:14:30.142017: step: 174/466, loss: 0.4024707078933716 2023-01-24 01:14:30.716676: step: 176/466, loss: 0.11528413742780685 2023-01-24 01:14:31.375795: step: 178/466, loss: 0.1654813587665558 2023-01-24 01:14:32.021275: step: 180/466, loss: 0.16900886595249176 2023-01-24 01:14:32.676887: step: 182/466, loss: 0.5156384110450745 2023-01-24 01:14:33.295161: step: 184/466, loss: 0.42618176341056824 2023-01-24 01:14:33.937853: step: 186/466, loss: 0.6345421671867371 2023-01-24 01:14:34.591975: step: 188/466, loss: 0.24912512302398682 2023-01-24 01:14:35.183558: step: 190/466, loss: 0.7710665464401245 2023-01-24 01:14:35.751784: step: 192/466, loss: 0.3259478509426117 2023-01-24 01:14:36.393366: step: 194/466, loss: 0.216398224234581 2023-01-24 01:14:37.007024: step: 196/466, loss: 0.8208011984825134 2023-01-24 01:14:37.630965: step: 198/466, loss: 0.5244461894035339 2023-01-24 01:14:38.294021: step: 200/466, loss: 0.6144452691078186 2023-01-24 01:14:38.967792: step: 202/466, loss: 0.22315502166748047 2023-01-24 01:14:39.595798: step: 204/466, loss: 0.42204487323760986 2023-01-24 01:14:40.219163: step: 206/466, loss: 0.7389995455741882 2023-01-24 01:14:40.830227: step: 208/466, loss: 0.18414448201656342 2023-01-24 01:14:41.436159: step: 210/466, loss: 0.8658441305160522 2023-01-24 01:14:42.091980: step: 212/466, loss: 0.41797181963920593 2023-01-24 01:14:42.790132: step: 214/466, loss: 0.9090080261230469 2023-01-24 01:14:43.404231: step: 216/466, loss: 0.5187643766403198 2023-01-24 01:14:44.002520: step: 218/466, loss: 0.7451024651527405 2023-01-24 01:14:44.607836: step: 220/466, loss: 0.49195000529289246 2023-01-24 01:14:45.259003: step: 222/466, loss: 0.18547570705413818 2023-01-24 01:14:45.926260: step: 224/466, loss: 0.1988060474395752 2023-01-24 01:14:46.537501: step: 226/466, loss: 0.508165180683136 2023-01-24 01:14:47.123046: step: 228/466, loss: 0.11084049940109253 2023-01-24 01:14:47.770051: step: 230/466, loss: 0.9763489961624146 2023-01-24 01:14:48.400606: step: 232/466, loss: 0.5351693630218506 2023-01-24 01:14:48.912068: step: 234/466, loss: 0.5538202524185181 2023-01-24 01:14:49.548425: step: 236/466, loss: 0.3254033327102661 2023-01-24 01:14:50.153323: step: 238/466, loss: 0.32077309489250183 2023-01-24 01:14:50.803428: step: 240/466, loss: 0.9185691475868225 2023-01-24 01:14:51.431276: step: 242/466, loss: 0.11787169426679611 2023-01-24 01:14:52.012480: step: 244/466, loss: 0.5049479007720947 2023-01-24 01:14:52.617590: step: 246/466, loss: 0.09478340297937393 2023-01-24 01:14:53.246890: step: 248/466, loss: 0.15972957015037537 2023-01-24 01:14:53.819044: step: 250/466, loss: 3.33953595161438 2023-01-24 01:14:54.458241: step: 252/466, loss: 0.14158812165260315 2023-01-24 01:14:55.101800: step: 254/466, loss: 0.4418643116950989 2023-01-24 01:14:55.758955: step: 256/466, loss: 0.5265594124794006 2023-01-24 01:14:56.441043: step: 258/466, loss: 0.2596603035926819 2023-01-24 01:14:57.055393: step: 260/466, loss: 0.5344682931900024 2023-01-24 01:14:57.654501: step: 262/466, loss: 0.20869165658950806 2023-01-24 01:14:58.327734: step: 264/466, loss: 0.4390193819999695 2023-01-24 01:14:58.966250: step: 266/466, loss: 0.8909998536109924 2023-01-24 01:14:59.577260: step: 268/466, loss: 0.7148413062095642 2023-01-24 01:15:00.196838: step: 270/466, loss: 0.8363057374954224 2023-01-24 01:15:00.805998: step: 272/466, loss: 0.5471477508544922 2023-01-24 01:15:01.413558: step: 274/466, loss: 0.2999334931373596 2023-01-24 01:15:02.030146: step: 276/466, loss: 0.45258891582489014 2023-01-24 01:15:02.642109: step: 278/466, loss: 0.33410534262657166 2023-01-24 01:15:03.294919: step: 280/466, loss: 0.46963629126548767 2023-01-24 01:15:03.893493: step: 282/466, loss: 1.2168225049972534 2023-01-24 01:15:04.451992: step: 284/466, loss: 0.06262262910604477 2023-01-24 01:15:05.166838: step: 286/466, loss: 0.4550589919090271 2023-01-24 01:15:05.800844: step: 288/466, loss: 0.15784329175949097 2023-01-24 01:15:06.407277: step: 290/466, loss: 0.9966462254524231 2023-01-24 01:15:07.063851: step: 292/466, loss: 0.4710371494293213 2023-01-24 01:15:07.708351: step: 294/466, loss: 1.3634792566299438 2023-01-24 01:15:08.345246: step: 296/466, loss: 0.45720961689949036 2023-01-24 01:15:08.957457: step: 298/466, loss: 0.48143327236175537 2023-01-24 01:15:09.641253: step: 300/466, loss: 1.4690943956375122 2023-01-24 01:15:10.253002: step: 302/466, loss: 0.18070606887340546 2023-01-24 01:15:10.942087: step: 304/466, loss: 1.0625159740447998 2023-01-24 01:15:11.586215: step: 306/466, loss: 0.3338050842285156 2023-01-24 01:15:12.242277: step: 308/466, loss: 0.21788737177848816 2023-01-24 01:15:12.907470: step: 310/466, loss: 0.28946834802627563 2023-01-24 01:15:13.560483: step: 312/466, loss: 1.5697287321090698 2023-01-24 01:15:14.133861: step: 314/466, loss: 0.3977658748626709 2023-01-24 01:15:14.752808: step: 316/466, loss: 0.4918626844882965 2023-01-24 01:15:15.431739: step: 318/466, loss: 0.24856841564178467 2023-01-24 01:15:16.052793: step: 320/466, loss: 2.086498737335205 2023-01-24 01:15:16.640822: step: 322/466, loss: 1.2578928470611572 2023-01-24 01:15:17.307418: step: 324/466, loss: 0.12625814974308014 2023-01-24 01:15:17.933462: step: 326/466, loss: 0.34538987278938293 2023-01-24 01:15:18.546315: step: 328/466, loss: 1.0326175689697266 2023-01-24 01:15:19.145531: step: 330/466, loss: 0.5270153880119324 2023-01-24 01:15:19.770402: step: 332/466, loss: 0.5706387758255005 2023-01-24 01:15:20.407922: step: 334/466, loss: 0.5501885414123535 2023-01-24 01:15:21.120556: step: 336/466, loss: 0.5912823677062988 2023-01-24 01:15:21.722270: step: 338/466, loss: 0.9064117670059204 2023-01-24 01:15:22.346466: step: 340/466, loss: 0.10313384234905243 2023-01-24 01:15:22.907456: step: 342/466, loss: 0.20501954853534698 2023-01-24 01:15:23.589762: step: 344/466, loss: 0.305141806602478 2023-01-24 01:15:24.191379: step: 346/466, loss: 0.3541070818901062 2023-01-24 01:15:24.801481: step: 348/466, loss: 0.17641820013523102 2023-01-24 01:15:25.456896: step: 350/466, loss: 0.8985726237297058 2023-01-24 01:15:26.091237: step: 352/466, loss: 0.6228352189064026 2023-01-24 01:15:26.750973: step: 354/466, loss: 1.645764946937561 2023-01-24 01:15:27.343532: step: 356/466, loss: 0.4667787253856659 2023-01-24 01:15:28.048095: step: 358/466, loss: 1.009447455406189 2023-01-24 01:15:28.762183: step: 360/466, loss: 0.27950695157051086 2023-01-24 01:15:29.403721: step: 362/466, loss: 3.32397198677063 2023-01-24 01:15:30.011054: step: 364/466, loss: 0.46399447321891785 2023-01-24 01:15:30.641662: step: 366/466, loss: 0.3752869665622711 2023-01-24 01:15:31.315462: step: 368/466, loss: 0.6119229793548584 2023-01-24 01:15:31.957692: step: 370/466, loss: 0.27729475498199463 2023-01-24 01:15:32.583356: step: 372/466, loss: 1.4170265197753906 2023-01-24 01:15:33.258962: step: 374/466, loss: 0.18623074889183044 2023-01-24 01:15:33.873021: step: 376/466, loss: 0.678354024887085 2023-01-24 01:15:34.514438: step: 378/466, loss: 0.7824224829673767 2023-01-24 01:15:35.135974: step: 380/466, loss: 0.24425385892391205 2023-01-24 01:15:35.916666: step: 382/466, loss: 0.6537637114524841 2023-01-24 01:15:36.555666: step: 384/466, loss: 0.6355273127555847 2023-01-24 01:15:37.273188: step: 386/466, loss: 2.3607473373413086 2023-01-24 01:15:37.905210: step: 388/466, loss: 0.7117860317230225 2023-01-24 01:15:38.448357: step: 390/466, loss: 1.5877604484558105 2023-01-24 01:15:39.099759: step: 392/466, loss: 0.25026875734329224 2023-01-24 01:15:39.687105: step: 394/466, loss: 0.9659414291381836 2023-01-24 01:15:40.283668: step: 396/466, loss: 0.8510382175445557 2023-01-24 01:15:40.935525: step: 398/466, loss: 0.23068997263908386 2023-01-24 01:15:41.584878: step: 400/466, loss: 0.14895950257778168 2023-01-24 01:15:42.189037: step: 402/466, loss: 0.511879563331604 2023-01-24 01:15:42.769430: step: 404/466, loss: 0.562067985534668 2023-01-24 01:15:43.402382: step: 406/466, loss: 0.6167904734611511 2023-01-24 01:15:44.046713: step: 408/466, loss: 0.29697105288505554 2023-01-24 01:15:44.613242: step: 410/466, loss: 0.7155808210372925 2023-01-24 01:15:45.238073: step: 412/466, loss: 0.27304062247276306 2023-01-24 01:15:45.847388: step: 414/466, loss: 0.17526191473007202 2023-01-24 01:15:46.494514: step: 416/466, loss: 0.31682494282722473 2023-01-24 01:15:47.084216: step: 418/466, loss: 0.45163649320602417 2023-01-24 01:15:47.753126: step: 420/466, loss: 1.0491752624511719 2023-01-24 01:15:48.427868: step: 422/466, loss: 1.2448574304580688 2023-01-24 01:15:49.045479: step: 424/466, loss: 0.12224073708057404 2023-01-24 01:15:49.694268: step: 426/466, loss: 0.35593071579933167 2023-01-24 01:15:50.332275: step: 428/466, loss: 0.5212833285331726 2023-01-24 01:15:50.883447: step: 430/466, loss: 0.19980010390281677 2023-01-24 01:15:51.457571: step: 432/466, loss: 0.3044664263725281 2023-01-24 01:15:52.111724: step: 434/466, loss: 0.39862319827079773 2023-01-24 01:15:52.697336: step: 436/466, loss: 0.41108438372612 2023-01-24 01:15:53.307214: step: 438/466, loss: 0.9988893866539001 2023-01-24 01:15:53.924977: step: 440/466, loss: 0.6291717886924744 2023-01-24 01:15:54.600163: step: 442/466, loss: 2.1317265033721924 2023-01-24 01:15:55.192109: step: 444/466, loss: 0.6207007169723511 2023-01-24 01:15:55.817685: step: 446/466, loss: 0.38326412439346313 2023-01-24 01:15:56.443132: step: 448/466, loss: 1.4615135192871094 2023-01-24 01:15:57.024509: step: 450/466, loss: 0.1804785281419754 2023-01-24 01:15:57.656432: step: 452/466, loss: 0.6153715252876282 2023-01-24 01:15:58.261331: step: 454/466, loss: 0.3203708529472351 2023-01-24 01:15:58.865693: step: 456/466, loss: 0.32922083139419556 2023-01-24 01:15:59.511622: step: 458/466, loss: 0.3243230879306793 2023-01-24 01:16:00.178652: step: 460/466, loss: 0.3458571434020996 2023-01-24 01:16:00.785657: step: 462/466, loss: 0.3195928931236267 2023-01-24 01:16:01.457086: step: 464/466, loss: 0.8329210877418518 2023-01-24 01:16:02.013488: step: 466/466, loss: 0.23395095765590668 2023-01-24 01:16:02.634622: step: 468/466, loss: 0.47280454635620117 2023-01-24 01:16:03.250801: step: 470/466, loss: 0.4958782494068146 2023-01-24 01:16:03.898524: step: 472/466, loss: 0.2251265048980713 2023-01-24 01:16:04.557711: step: 474/466, loss: 0.2383415699005127 2023-01-24 01:16:05.137912: step: 476/466, loss: 0.2788371741771698 2023-01-24 01:16:05.719524: step: 478/466, loss: 0.35121840238571167 2023-01-24 01:16:06.310227: step: 480/466, loss: 0.35221993923187256 2023-01-24 01:16:06.887567: step: 482/466, loss: 0.4240202009677887 2023-01-24 01:16:07.502569: step: 484/466, loss: 0.23571814596652985 2023-01-24 01:16:08.143154: step: 486/466, loss: 0.5392071008682251 2023-01-24 01:16:08.844294: step: 488/466, loss: 0.16577844321727753 2023-01-24 01:16:09.472175: step: 490/466, loss: 0.4230649769306183 2023-01-24 01:16:10.065984: step: 492/466, loss: 0.36499860882759094 2023-01-24 01:16:10.665753: step: 494/466, loss: 1.428270936012268 2023-01-24 01:16:11.298747: step: 496/466, loss: 0.21719515323638916 2023-01-24 01:16:11.954861: step: 498/466, loss: 0.9568606615066528 2023-01-24 01:16:12.518972: step: 500/466, loss: 3.720993995666504 2023-01-24 01:16:13.151291: step: 502/466, loss: 0.5529990792274475 2023-01-24 01:16:13.745739: step: 504/466, loss: 0.5724399089813232 2023-01-24 01:16:14.353059: step: 506/466, loss: 1.9884328842163086 2023-01-24 01:16:14.990314: step: 508/466, loss: 0.5034530162811279 2023-01-24 01:16:15.637598: step: 510/466, loss: 0.8145545721054077 2023-01-24 01:16:16.248175: step: 512/466, loss: 0.5394100546836853 2023-01-24 01:16:16.833099: step: 514/466, loss: 0.15192024409770966 2023-01-24 01:16:17.517928: step: 516/466, loss: 0.5822650194168091 2023-01-24 01:16:18.175824: step: 518/466, loss: 0.8212950825691223 2023-01-24 01:16:18.878053: step: 520/466, loss: 0.11858353763818741 2023-01-24 01:16:19.501172: step: 522/466, loss: 0.2861880362033844 2023-01-24 01:16:20.098762: step: 524/466, loss: 0.18946318328380585 2023-01-24 01:16:20.734260: step: 526/466, loss: 0.36098793148994446 2023-01-24 01:16:21.370335: step: 528/466, loss: 0.6966142654418945 2023-01-24 01:16:22.094517: step: 530/466, loss: 1.6891040802001953 2023-01-24 01:16:22.692126: step: 532/466, loss: 0.21906842291355133 2023-01-24 01:16:23.312123: step: 534/466, loss: 0.3703289330005646 2023-01-24 01:16:23.860965: step: 536/466, loss: 0.22129729390144348 2023-01-24 01:16:24.463966: step: 538/466, loss: 0.41192248463630676 2023-01-24 01:16:25.067415: step: 540/466, loss: 0.44342806935310364 2023-01-24 01:16:25.664625: step: 542/466, loss: 1.316843032836914 2023-01-24 01:16:26.238286: step: 544/466, loss: 0.2924029231071472 2023-01-24 01:16:26.869158: step: 546/466, loss: 0.31457072496414185 2023-01-24 01:16:27.452935: step: 548/466, loss: 0.209502175450325 2023-01-24 01:16:28.047003: step: 550/466, loss: 0.3837663531303406 2023-01-24 01:16:28.674428: step: 552/466, loss: 0.2353142350912094 2023-01-24 01:16:29.285247: step: 554/466, loss: 0.4405522644519806 2023-01-24 01:16:29.983319: step: 556/466, loss: 0.3007507622241974 2023-01-24 01:16:30.660478: step: 558/466, loss: 0.7260340452194214 2023-01-24 01:16:31.241048: step: 560/466, loss: 0.26162004470825195 2023-01-24 01:16:31.885548: step: 562/466, loss: 0.3456798493862152 2023-01-24 01:16:32.511738: step: 564/466, loss: 0.5380363464355469 2023-01-24 01:16:33.082125: step: 566/466, loss: 2.494497299194336 2023-01-24 01:16:33.699378: step: 568/466, loss: 0.23229075968265533 2023-01-24 01:16:34.346718: step: 570/466, loss: 0.23848365247249603 2023-01-24 01:16:34.965344: step: 572/466, loss: 0.5783706307411194 2023-01-24 01:16:35.591786: step: 574/466, loss: 1.4762802124023438 2023-01-24 01:16:36.158932: step: 576/466, loss: 0.5708321332931519 2023-01-24 01:16:36.800252: step: 578/466, loss: 0.26208239793777466 2023-01-24 01:16:37.392659: step: 580/466, loss: 1.4005645513534546 2023-01-24 01:16:37.975430: step: 582/466, loss: 0.2656085193157196 2023-01-24 01:16:38.568335: step: 584/466, loss: 0.3431568741798401 2023-01-24 01:16:39.278523: step: 586/466, loss: 0.2746313810348511 2023-01-24 01:16:39.948786: step: 588/466, loss: 0.8503986597061157 2023-01-24 01:16:40.568836: step: 590/466, loss: 0.412700355052948 2023-01-24 01:16:41.184212: step: 592/466, loss: 0.24228881299495697 2023-01-24 01:16:41.784263: step: 594/466, loss: 0.06156359612941742 2023-01-24 01:16:42.433611: step: 596/466, loss: 1.6006569862365723 2023-01-24 01:16:43.016673: step: 598/466, loss: 0.21712467074394226 2023-01-24 01:16:43.608702: step: 600/466, loss: 0.21318912506103516 2023-01-24 01:16:44.215939: step: 602/466, loss: 0.50748211145401 2023-01-24 01:16:44.894762: step: 604/466, loss: 0.3261891007423401 2023-01-24 01:16:45.441324: step: 606/466, loss: 0.12974101305007935 2023-01-24 01:16:46.057741: step: 608/466, loss: 0.4472119212150574 2023-01-24 01:16:46.633126: step: 610/466, loss: 1.4981715679168701 2023-01-24 01:16:47.240064: step: 612/466, loss: 0.7696586847305298 2023-01-24 01:16:47.847625: step: 614/466, loss: 0.4564979374408722 2023-01-24 01:16:48.454577: step: 616/466, loss: 0.29963427782058716 2023-01-24 01:16:49.040086: step: 618/466, loss: 1.1432433128356934 2023-01-24 01:16:49.685625: step: 620/466, loss: 0.8962889313697815 2023-01-24 01:16:50.280468: step: 622/466, loss: 0.5036012530326843 2023-01-24 01:16:51.024159: step: 624/466, loss: 0.28956055641174316 2023-01-24 01:16:51.716603: step: 626/466, loss: 0.8795130848884583 2023-01-24 01:16:52.349936: step: 628/466, loss: 0.6439096927642822 2023-01-24 01:16:52.908586: step: 630/466, loss: 2.8661391735076904 2023-01-24 01:16:53.507529: step: 632/466, loss: 0.1871575117111206 2023-01-24 01:16:54.135430: step: 634/466, loss: 0.41019201278686523 2023-01-24 01:16:54.756573: step: 636/466, loss: 0.5545302033424377 2023-01-24 01:16:55.363678: step: 638/466, loss: 0.25796765089035034 2023-01-24 01:16:55.961891: step: 640/466, loss: 0.6741047501564026 2023-01-24 01:16:56.567208: step: 642/466, loss: 0.7461636066436768 2023-01-24 01:16:57.134834: step: 644/466, loss: 0.10852614790201187 2023-01-24 01:16:57.823461: step: 646/466, loss: 0.44105851650238037 2023-01-24 01:16:58.428053: step: 648/466, loss: 0.3716807961463928 2023-01-24 01:16:59.036325: step: 650/466, loss: 0.6248000264167786 2023-01-24 01:16:59.637047: step: 652/466, loss: 0.5479502081871033 2023-01-24 01:17:00.260875: step: 654/466, loss: 0.22235040366649628 2023-01-24 01:17:00.884081: step: 656/466, loss: 0.24069158732891083 2023-01-24 01:17:01.573920: step: 658/466, loss: 0.6740921139717102 2023-01-24 01:17:02.166510: step: 660/466, loss: 0.7209137082099915 2023-01-24 01:17:02.749219: step: 662/466, loss: 0.18780159950256348 2023-01-24 01:17:03.376164: step: 664/466, loss: 1.1777057647705078 2023-01-24 01:17:04.004534: step: 666/466, loss: 0.40443477034568787 2023-01-24 01:17:04.677023: step: 668/466, loss: 0.31447118520736694 2023-01-24 01:17:05.286989: step: 670/466, loss: 0.18810229003429413 2023-01-24 01:17:05.915018: step: 672/466, loss: 0.37618061900138855 2023-01-24 01:17:06.539048: step: 674/466, loss: 0.3444058895111084 2023-01-24 01:17:07.186385: step: 676/466, loss: 0.34029021859169006 2023-01-24 01:17:07.869855: step: 678/466, loss: 0.35844942927360535 2023-01-24 01:17:08.555049: step: 680/466, loss: 0.5731791257858276 2023-01-24 01:17:09.173797: step: 682/466, loss: 0.3525617718696594 2023-01-24 01:17:09.779199: step: 684/466, loss: 0.49626442790031433 2023-01-24 01:17:10.419633: step: 686/466, loss: 1.9497008323669434 2023-01-24 01:17:11.004914: step: 688/466, loss: 0.54268479347229 2023-01-24 01:17:11.573819: step: 690/466, loss: 2.6503868103027344 2023-01-24 01:17:12.208165: step: 692/466, loss: 0.7313562631607056 2023-01-24 01:17:12.945857: step: 694/466, loss: 2.6455135345458984 2023-01-24 01:17:13.588885: step: 696/466, loss: 0.7552454471588135 2023-01-24 01:17:14.223227: step: 698/466, loss: 0.5448524355888367 2023-01-24 01:17:14.844632: step: 700/466, loss: 0.2509639263153076 2023-01-24 01:17:15.446337: step: 702/466, loss: 0.7717275619506836 2023-01-24 01:17:16.083550: step: 704/466, loss: 0.6373904943466187 2023-01-24 01:17:16.766691: step: 706/466, loss: 0.2916221618652344 2023-01-24 01:17:17.392678: step: 708/466, loss: 0.09957034140825272 2023-01-24 01:17:18.056136: step: 710/466, loss: 0.16980797052383423 2023-01-24 01:17:18.701366: step: 712/466, loss: 0.1834866851568222 2023-01-24 01:17:19.290451: step: 714/466, loss: 0.1340818852186203 2023-01-24 01:17:19.886024: step: 716/466, loss: 0.20902466773986816 2023-01-24 01:17:20.539905: step: 718/466, loss: 1.2532103061676025 2023-01-24 01:17:21.322090: step: 720/466, loss: 0.7625625133514404 2023-01-24 01:17:21.981454: step: 722/466, loss: 0.23378467559814453 2023-01-24 01:17:22.643304: step: 724/466, loss: 0.763206422328949 2023-01-24 01:17:23.305308: step: 726/466, loss: 0.4498950242996216 2023-01-24 01:17:23.969065: step: 728/466, loss: 0.19985279440879822 2023-01-24 01:17:24.573533: step: 730/466, loss: 1.2688860893249512 2023-01-24 01:17:25.187995: step: 732/466, loss: 0.16228799521923065 2023-01-24 01:17:25.812152: step: 734/466, loss: 0.5916755795478821 2023-01-24 01:17:26.376560: step: 736/466, loss: 0.8936856389045715 2023-01-24 01:17:27.086615: step: 738/466, loss: 0.6819954514503479 2023-01-24 01:17:27.731400: step: 740/466, loss: 0.5362803339958191 2023-01-24 01:17:28.345321: step: 742/466, loss: 0.342614084482193 2023-01-24 01:17:28.961837: step: 744/466, loss: 0.5168187618255615 2023-01-24 01:17:29.579651: step: 746/466, loss: 0.5107553601264954 2023-01-24 01:17:30.208582: step: 748/466, loss: 0.3864627182483673 2023-01-24 01:17:30.791259: step: 750/466, loss: 1.1903786659240723 2023-01-24 01:17:31.394364: step: 752/466, loss: 1.3825924396514893 2023-01-24 01:17:32.037336: step: 754/466, loss: 0.36727002263069153 2023-01-24 01:17:32.598970: step: 756/466, loss: 0.13973617553710938 2023-01-24 01:17:33.234258: step: 758/466, loss: 0.3887569308280945 2023-01-24 01:17:33.868814: step: 760/466, loss: 0.6890345215797424 2023-01-24 01:17:34.519238: step: 762/466, loss: 0.4852111339569092 2023-01-24 01:17:35.181924: step: 764/466, loss: 1.3857756853103638 2023-01-24 01:17:35.766859: step: 766/466, loss: 0.3762704133987427 2023-01-24 01:17:36.370236: step: 768/466, loss: 0.28308290243148804 2023-01-24 01:17:37.026366: step: 770/466, loss: 0.5062119960784912 2023-01-24 01:17:37.634011: step: 772/466, loss: 1.144497036933899 2023-01-24 01:17:38.202098: step: 774/466, loss: 0.2258695363998413 2023-01-24 01:17:38.834645: step: 776/466, loss: 0.3459767699241638 2023-01-24 01:17:39.467941: step: 778/466, loss: 0.8844873309135437 2023-01-24 01:17:40.086307: step: 780/466, loss: 0.14429156482219696 2023-01-24 01:17:40.737376: step: 782/466, loss: 0.18639802932739258 2023-01-24 01:17:41.343930: step: 784/466, loss: 0.5045554041862488 2023-01-24 01:17:41.991393: step: 786/466, loss: 0.26245996356010437 2023-01-24 01:17:42.557741: step: 788/466, loss: 0.9515361189842224 2023-01-24 01:17:43.257016: step: 790/466, loss: 0.7297141551971436 2023-01-24 01:17:43.846647: step: 792/466, loss: 0.07746868580579758 2023-01-24 01:17:44.485710: step: 794/466, loss: 0.25915664434432983 2023-01-24 01:17:45.108697: step: 796/466, loss: 0.2650740444660187 2023-01-24 01:17:45.704120: step: 798/466, loss: 0.45773547887802124 2023-01-24 01:17:46.313889: step: 800/466, loss: 1.342380166053772 2023-01-24 01:17:46.928998: step: 802/466, loss: 1.4285269975662231 2023-01-24 01:17:47.585527: step: 804/466, loss: 0.2560678720474243 2023-01-24 01:17:48.241220: step: 806/466, loss: 0.4064926505088806 2023-01-24 01:17:48.839710: step: 808/466, loss: 0.2849564254283905 2023-01-24 01:17:49.408117: step: 810/466, loss: 1.2182557582855225 2023-01-24 01:17:49.999925: step: 812/466, loss: 0.44663289189338684 2023-01-24 01:17:50.619707: step: 814/466, loss: 0.3266800045967102 2023-01-24 01:17:51.209810: step: 816/466, loss: 0.5361456871032715 2023-01-24 01:17:51.810312: step: 818/466, loss: 0.3417607545852661 2023-01-24 01:17:52.396287: step: 820/466, loss: 0.2348371148109436 2023-01-24 01:17:53.043771: step: 822/466, loss: 0.6924700140953064 2023-01-24 01:17:53.756019: step: 824/466, loss: 0.39939966797828674 2023-01-24 01:17:54.421317: step: 826/466, loss: 0.2644041180610657 2023-01-24 01:17:55.018255: step: 828/466, loss: 0.8015566468238831 2023-01-24 01:17:55.602765: step: 830/466, loss: 0.14491812884807587 2023-01-24 01:17:56.191403: step: 832/466, loss: 0.19254310429096222 2023-01-24 01:17:56.883945: step: 834/466, loss: 0.7438780069351196 2023-01-24 01:17:57.512270: step: 836/466, loss: 0.6236226558685303 2023-01-24 01:17:58.234423: step: 838/466, loss: 0.3768724203109741 2023-01-24 01:17:58.813204: step: 840/466, loss: 0.4085683822631836 2023-01-24 01:17:59.441274: step: 842/466, loss: 0.6542133688926697 2023-01-24 01:18:00.070477: step: 844/466, loss: 0.1571868658065796 2023-01-24 01:18:00.686059: step: 846/466, loss: 0.2461111694574356 2023-01-24 01:18:01.280754: step: 848/466, loss: 0.9098088145256042 2023-01-24 01:18:01.937009: step: 850/466, loss: 0.5644944310188293 2023-01-24 01:18:02.505713: step: 852/466, loss: 1.5331993103027344 2023-01-24 01:18:03.123565: step: 854/466, loss: 0.19291947782039642 2023-01-24 01:18:03.698911: step: 856/466, loss: 0.2534935772418976 2023-01-24 01:18:04.240881: step: 858/466, loss: 0.2990705072879791 2023-01-24 01:18:04.908928: step: 860/466, loss: 0.18518830835819244 2023-01-24 01:18:05.515441: step: 862/466, loss: 0.6862562894821167 2023-01-24 01:18:06.105281: step: 864/466, loss: 0.5459586381912231 2023-01-24 01:18:06.671708: step: 866/466, loss: 0.7927795648574829 2023-01-24 01:18:07.281884: step: 868/466, loss: 0.7867419123649597 2023-01-24 01:18:07.856333: step: 870/466, loss: 0.20071671903133392 2023-01-24 01:18:08.532608: step: 872/466, loss: 0.23669061064720154 2023-01-24 01:18:09.208795: step: 874/466, loss: 1.3111863136291504 2023-01-24 01:18:09.814873: step: 876/466, loss: 3.68401837348938 2023-01-24 01:18:10.497005: step: 878/466, loss: 0.4205979108810425 2023-01-24 01:18:11.110754: step: 880/466, loss: 0.5019314885139465 2023-01-24 01:18:11.678998: step: 882/466, loss: 0.253624826669693 2023-01-24 01:18:12.333119: step: 884/466, loss: 3.2044146060943604 2023-01-24 01:18:12.952428: step: 886/466, loss: 0.7262096405029297 2023-01-24 01:18:13.575182: step: 888/466, loss: 0.42531633377075195 2023-01-24 01:18:14.170025: step: 890/466, loss: 0.5560821294784546 2023-01-24 01:18:14.736360: step: 892/466, loss: 0.6997929811477661 2023-01-24 01:18:15.341801: step: 894/466, loss: 0.34848010540008545 2023-01-24 01:18:16.010001: step: 896/466, loss: 1.874853253364563 2023-01-24 01:18:16.652916: step: 898/466, loss: 0.22323575615882874 2023-01-24 01:18:17.223935: step: 900/466, loss: 0.30327314138412476 2023-01-24 01:18:17.829171: step: 902/466, loss: 0.1081712618470192 2023-01-24 01:18:18.497286: step: 904/466, loss: 0.5367346405982971 2023-01-24 01:18:19.138036: step: 906/466, loss: 0.18195709586143494 2023-01-24 01:18:19.763121: step: 908/466, loss: 2.1218409538269043 2023-01-24 01:18:20.474356: step: 910/466, loss: 0.5148961544036865 2023-01-24 01:18:21.050881: step: 912/466, loss: 0.1135493814945221 2023-01-24 01:18:21.804931: step: 914/466, loss: 0.31819668412208557 2023-01-24 01:18:22.453175: step: 916/466, loss: 0.255973756313324 2023-01-24 01:18:23.003074: step: 918/466, loss: 0.6888198852539062 2023-01-24 01:18:23.729299: step: 920/466, loss: 0.6373869180679321 2023-01-24 01:18:24.346955: step: 922/466, loss: 0.15597975254058838 2023-01-24 01:18:24.974412: step: 924/466, loss: 0.43550437688827515 2023-01-24 01:18:25.601782: step: 926/466, loss: 0.2740226686000824 2023-01-24 01:18:26.199255: step: 928/466, loss: 0.3159542679786682 2023-01-24 01:18:26.854936: step: 930/466, loss: 0.24795958399772644 2023-01-24 01:18:27.477336: step: 932/466, loss: 0.5158659219741821 ================================================== Loss: 0.600 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3334560666389954, 'r': 0.331557834760595, 'f1': 0.33250424152014}, 'combined': 0.24500312533062948, 'epoch': 7} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3375465233285987, 'r': 0.25194131298984757, 'f1': 0.288528139320673}, 'combined': 0.1806671526587392, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30627168248020137, 'r': 0.3277746279294755, 'f1': 0.3166585314735721}, 'combined': 0.2333273389805268, 'epoch': 7} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3488091284089221, 'r': 0.2653092015540311, 'f1': 0.3013825409136095}, 'combined': 0.18677227887603967, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2861140711449159, 'r': 0.3257465705634716, 'f1': 0.3046467483353142}, 'combined': 0.22447655140496833, 'epoch': 7} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.375188599553625, 'r': 0.2626659120632876, 'f1': 0.30900230037625864}, 'combined': 0.20498172401197357, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23469387755102042, 'r': 0.32857142857142857, 'f1': 0.27380952380952384}, 'combined': 0.18253968253968256, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3557692307692308, 'r': 0.40217391304347827, 'f1': 0.37755102040816324}, 'combined': 0.18877551020408162, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20833333333333334, 'r': 0.1724137931034483, 'f1': 0.18867924528301888}, 'combined': 0.12578616352201258, 'epoch': 7} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35266479985310323, 'r': 0.2603161425860667, 'f1': 0.2995340767311292}, 'combined': 0.2207093196966215, 'epoch': 4} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35061171058748686, 'r': 0.22688501488377985, 'f1': 0.27549435231915403}, 'combined': 0.17250580939610582, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40865384615384615, 'r': 0.30357142857142855, 'f1': 0.3483606557377049}, 'combined': 0.23224043715846993, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30627168248020137, 'r': 0.3277746279294755, 'f1': 0.3166585314735721}, 'combined': 0.2333273389805268, 'epoch': 7} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3488091284089221, 'r': 0.2653092015540311, 'f1': 0.3013825409136095}, 'combined': 0.18677227887603967, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3557692307692308, 'r': 0.40217391304347827, 'f1': 0.37755102040816324}, 'combined': 0.18877551020408162, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31668397009966776, 'r': 0.2583948902141502, 'f1': 0.28458538587848936}, 'combined': 0.20969449485783426, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.39165853086307634, 'r': 0.23329840286067724, 'f1': 0.2924147402371384}, 'combined': 0.19397809500879482, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.1896551724137931, 'f1': 0.2619047619047619}, 'combined': 0.1746031746031746, 'epoch': 4} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:21:06.971244: step: 2/466, loss: 0.4891449809074402 2023-01-24 01:21:07.609732: step: 4/466, loss: 0.16528034210205078 2023-01-24 01:21:08.222473: step: 6/466, loss: 0.5206984877586365 2023-01-24 01:21:08.927275: step: 8/466, loss: 0.8224088549613953 2023-01-24 01:21:09.551227: step: 10/466, loss: 0.1467466652393341 2023-01-24 01:21:10.138046: step: 12/466, loss: 0.389367938041687 2023-01-24 01:21:10.809137: step: 14/466, loss: 0.41784942150115967 2023-01-24 01:21:11.383591: step: 16/466, loss: 0.5343703031539917 2023-01-24 01:21:11.979531: step: 18/466, loss: 0.2499110996723175 2023-01-24 01:21:12.625656: step: 20/466, loss: 0.11215971410274506 2023-01-24 01:21:13.262094: step: 22/466, loss: 0.4292660057544708 2023-01-24 01:21:13.883826: step: 24/466, loss: 0.306545615196228 2023-01-24 01:21:14.500447: step: 26/466, loss: 0.336021363735199 2023-01-24 01:21:15.141192: step: 28/466, loss: 0.4028060734272003 2023-01-24 01:21:15.743581: step: 30/466, loss: 0.11552461236715317 2023-01-24 01:21:16.318655: step: 32/466, loss: 0.43966126441955566 2023-01-24 01:21:16.931847: step: 34/466, loss: 0.15228354930877686 2023-01-24 01:21:17.549060: step: 36/466, loss: 0.3842215836048126 2023-01-24 01:21:18.139198: step: 38/466, loss: 0.11852533370256424 2023-01-24 01:21:18.926791: step: 40/466, loss: 0.32897087931632996 2023-01-24 01:21:19.511241: step: 42/466, loss: 0.25963473320007324 2023-01-24 01:21:20.091518: step: 44/466, loss: 0.4247356355190277 2023-01-24 01:21:20.677107: step: 46/466, loss: 0.2964792549610138 2023-01-24 01:21:21.322635: step: 48/466, loss: 0.2749635577201843 2023-01-24 01:21:21.985500: step: 50/466, loss: 0.33326396346092224 2023-01-24 01:21:22.687592: step: 52/466, loss: 0.3246749937534332 2023-01-24 01:21:23.250774: step: 54/466, loss: 0.36350858211517334 2023-01-24 01:21:23.880053: step: 56/466, loss: 0.423279345035553 2023-01-24 01:21:24.498158: step: 58/466, loss: 0.45801088213920593 2023-01-24 01:21:25.055297: step: 60/466, loss: 0.46881794929504395 2023-01-24 01:21:25.682394: step: 62/466, loss: 0.30722299218177795 2023-01-24 01:21:26.378690: step: 64/466, loss: 0.9380748271942139 2023-01-24 01:21:26.983000: step: 66/466, loss: 0.3640623390674591 2023-01-24 01:21:27.566524: step: 68/466, loss: 0.10745082050561905 2023-01-24 01:21:28.140461: step: 70/466, loss: 0.0724589079618454 2023-01-24 01:21:28.784008: step: 72/466, loss: 1.0328303575515747 2023-01-24 01:21:29.402489: step: 74/466, loss: 0.1602552980184555 2023-01-24 01:21:30.032533: step: 76/466, loss: 0.2987484037876129 2023-01-24 01:21:30.627251: step: 78/466, loss: 0.3314462900161743 2023-01-24 01:21:31.237312: step: 80/466, loss: 0.5131017565727234 2023-01-24 01:21:31.879758: step: 82/466, loss: 1.0332460403442383 2023-01-24 01:21:32.418256: step: 84/466, loss: 0.21270959079265594 2023-01-24 01:21:33.070957: step: 86/466, loss: 0.21382087469100952 2023-01-24 01:21:33.670180: step: 88/466, loss: 0.17594799399375916 2023-01-24 01:21:34.283781: step: 90/466, loss: 0.7188621163368225 2023-01-24 01:21:34.905312: step: 92/466, loss: 0.21991394460201263 2023-01-24 01:21:35.543117: step: 94/466, loss: 0.08778136968612671 2023-01-24 01:21:36.223939: step: 96/466, loss: 0.2640227675437927 2023-01-24 01:21:36.825262: step: 98/466, loss: 0.36777612566947937 2023-01-24 01:21:37.405347: step: 100/466, loss: 0.8869547247886658 2023-01-24 01:21:38.026705: step: 102/466, loss: 1.0804152488708496 2023-01-24 01:21:38.588915: step: 104/466, loss: 0.14686386287212372 2023-01-24 01:21:39.253941: step: 106/466, loss: 0.17105038464069366 2023-01-24 01:21:39.878832: step: 108/466, loss: 0.11347710341215134 2023-01-24 01:21:40.501292: step: 110/466, loss: 1.2311139106750488 2023-01-24 01:21:41.174066: step: 112/466, loss: 0.12046322226524353 2023-01-24 01:21:41.778528: step: 114/466, loss: 0.08653685450553894 2023-01-24 01:21:42.464764: step: 116/466, loss: 0.7125675678253174 2023-01-24 01:21:43.062189: step: 118/466, loss: 1.1520308256149292 2023-01-24 01:21:43.667596: step: 120/466, loss: 3.4308528900146484 2023-01-24 01:21:44.233550: step: 122/466, loss: 0.5015811920166016 2023-01-24 01:21:44.829745: step: 124/466, loss: 0.19502291083335876 2023-01-24 01:21:45.506487: step: 126/466, loss: 0.44669079780578613 2023-01-24 01:21:46.177190: step: 128/466, loss: 0.21128225326538086 2023-01-24 01:21:46.755271: step: 130/466, loss: 0.143118754029274 2023-01-24 01:21:47.353315: step: 132/466, loss: 0.44315147399902344 2023-01-24 01:21:47.967334: step: 134/466, loss: 0.24999311566352844 2023-01-24 01:21:48.521858: step: 136/466, loss: 0.30038580298423767 2023-01-24 01:21:49.115684: step: 138/466, loss: 0.33321985602378845 2023-01-24 01:21:49.717682: step: 140/466, loss: 0.6168915629386902 2023-01-24 01:21:50.400993: step: 142/466, loss: 0.30142584443092346 2023-01-24 01:21:51.050680: step: 144/466, loss: 0.1558297872543335 2023-01-24 01:21:51.622064: step: 146/466, loss: 0.9224661588668823 2023-01-24 01:21:52.120645: step: 148/466, loss: 0.5734288692474365 2023-01-24 01:21:52.724306: step: 150/466, loss: 0.3471072316169739 2023-01-24 01:21:53.344424: step: 152/466, loss: 0.4813990890979767 2023-01-24 01:21:53.978042: step: 154/466, loss: 0.6883899569511414 2023-01-24 01:21:54.608907: step: 156/466, loss: 0.761605441570282 2023-01-24 01:21:55.359289: step: 158/466, loss: 0.3350845277309418 2023-01-24 01:21:56.005072: step: 160/466, loss: 0.49126744270324707 2023-01-24 01:21:56.580474: step: 162/466, loss: 0.2792118191719055 2023-01-24 01:21:57.229534: step: 164/466, loss: 0.30940383672714233 2023-01-24 01:21:57.799024: step: 166/466, loss: 0.39825183153152466 2023-01-24 01:21:58.398834: step: 168/466, loss: 0.2737186849117279 2023-01-24 01:21:59.048889: step: 170/466, loss: 0.4189709424972534 2023-01-24 01:21:59.660926: step: 172/466, loss: 0.22759076952934265 2023-01-24 01:22:00.293688: step: 174/466, loss: 0.3200244903564453 2023-01-24 01:22:00.946606: step: 176/466, loss: 0.408761203289032 2023-01-24 01:22:01.636013: step: 178/466, loss: 0.7359369397163391 2023-01-24 01:22:02.199345: step: 180/466, loss: 1.7133041620254517 2023-01-24 01:22:02.825469: step: 182/466, loss: 1.0704987049102783 2023-01-24 01:22:03.430911: step: 184/466, loss: 1.2679871320724487 2023-01-24 01:22:04.129065: step: 186/466, loss: 0.19554297626018524 2023-01-24 01:22:04.728489: step: 188/466, loss: 1.5121451616287231 2023-01-24 01:22:05.384439: step: 190/466, loss: 0.14149893820285797 2023-01-24 01:22:06.015083: step: 192/466, loss: 0.5469195246696472 2023-01-24 01:22:06.674618: step: 194/466, loss: 0.19274388253688812 2023-01-24 01:22:07.311846: step: 196/466, loss: 0.10487526655197144 2023-01-24 01:22:07.892036: step: 198/466, loss: 0.20737946033477783 2023-01-24 01:22:08.479852: step: 200/466, loss: 0.22559292614459991 2023-01-24 01:22:09.072628: step: 202/466, loss: 0.28806859254837036 2023-01-24 01:22:09.675789: step: 204/466, loss: 0.23927956819534302 2023-01-24 01:22:10.327132: step: 206/466, loss: 0.6100448966026306 2023-01-24 01:22:10.997380: step: 208/466, loss: 0.7341493368148804 2023-01-24 01:22:11.612315: step: 210/466, loss: 0.15287290513515472 2023-01-24 01:22:12.260747: step: 212/466, loss: 0.45582741498947144 2023-01-24 01:22:12.909595: step: 214/466, loss: 0.19518420100212097 2023-01-24 01:22:13.640350: step: 216/466, loss: 0.3610004186630249 2023-01-24 01:22:14.273100: step: 218/466, loss: 0.5222340822219849 2023-01-24 01:22:14.881386: step: 220/466, loss: 0.4963740110397339 2023-01-24 01:22:15.572689: step: 222/466, loss: 0.4004770517349243 2023-01-24 01:22:16.111424: step: 224/466, loss: 0.27218079566955566 2023-01-24 01:22:16.762710: step: 226/466, loss: 1.1614692211151123 2023-01-24 01:22:17.452509: step: 228/466, loss: 0.4531356394290924 2023-01-24 01:22:18.013473: step: 230/466, loss: 0.4567834734916687 2023-01-24 01:22:18.665385: step: 232/466, loss: 0.3453659415245056 2023-01-24 01:22:19.321032: step: 234/466, loss: 0.18262067437171936 2023-01-24 01:22:19.986160: step: 236/466, loss: 0.3032654821872711 2023-01-24 01:22:20.517847: step: 238/466, loss: 0.21114803850650787 2023-01-24 01:22:21.148061: step: 240/466, loss: 0.28366371989250183 2023-01-24 01:22:21.707877: step: 242/466, loss: 0.20999203622341156 2023-01-24 01:22:22.316670: step: 244/466, loss: 0.10796754062175751 2023-01-24 01:22:22.905955: step: 246/466, loss: 1.5038633346557617 2023-01-24 01:22:23.475223: step: 248/466, loss: 1.139866590499878 2023-01-24 01:22:24.029995: step: 250/466, loss: 0.08542253077030182 2023-01-24 01:22:24.592568: step: 252/466, loss: 0.8522475361824036 2023-01-24 01:22:25.195479: step: 254/466, loss: 0.3592441976070404 2023-01-24 01:22:25.819551: step: 256/466, loss: 0.32976898550987244 2023-01-24 01:22:26.409253: step: 258/466, loss: 0.8676184415817261 2023-01-24 01:22:27.004207: step: 260/466, loss: 0.30186426639556885 2023-01-24 01:22:27.598434: step: 262/466, loss: 0.7166699767112732 2023-01-24 01:22:28.146160: step: 264/466, loss: 0.38969892263412476 2023-01-24 01:22:28.706540: step: 266/466, loss: 1.7875447273254395 2023-01-24 01:22:29.400845: step: 268/466, loss: 1.1755938529968262 2023-01-24 01:22:29.956451: step: 270/466, loss: 0.20601579546928406 2023-01-24 01:22:30.609877: step: 272/466, loss: 0.18585112690925598 2023-01-24 01:22:31.227842: step: 274/466, loss: 0.25609394907951355 2023-01-24 01:22:31.841153: step: 276/466, loss: 0.27535563707351685 2023-01-24 01:22:32.495508: step: 278/466, loss: 0.21241937577724457 2023-01-24 01:22:33.198920: step: 280/466, loss: 0.6060916781425476 2023-01-24 01:22:33.773328: step: 282/466, loss: 0.9486252665519714 2023-01-24 01:22:34.522644: step: 284/466, loss: 0.41912248730659485 2023-01-24 01:22:35.152846: step: 286/466, loss: 1.7066893577575684 2023-01-24 01:22:35.811702: step: 288/466, loss: 0.5481307506561279 2023-01-24 01:22:36.450783: step: 290/466, loss: 0.4919094741344452 2023-01-24 01:22:37.148409: step: 292/466, loss: 0.6574281454086304 2023-01-24 01:22:37.774010: step: 294/466, loss: 0.22694242000579834 2023-01-24 01:22:38.401746: step: 296/466, loss: 2.1146581172943115 2023-01-24 01:22:38.998769: step: 298/466, loss: 0.5112307667732239 2023-01-24 01:22:39.652916: step: 300/466, loss: 0.895716667175293 2023-01-24 01:22:40.258666: step: 302/466, loss: 0.2852054834365845 2023-01-24 01:22:40.879501: step: 304/466, loss: 0.29013773798942566 2023-01-24 01:22:41.518470: step: 306/466, loss: 0.31209948658943176 2023-01-24 01:22:42.202287: step: 308/466, loss: 0.0986483246088028 2023-01-24 01:22:42.825932: step: 310/466, loss: 0.22533275187015533 2023-01-24 01:22:43.429427: step: 312/466, loss: 0.20643262565135956 2023-01-24 01:22:44.045872: step: 314/466, loss: 0.7615416646003723 2023-01-24 01:22:44.613043: step: 316/466, loss: 0.15414905548095703 2023-01-24 01:22:45.330775: step: 318/466, loss: 1.1039615869522095 2023-01-24 01:22:45.988180: step: 320/466, loss: 0.265773206949234 2023-01-24 01:22:46.653243: step: 322/466, loss: 0.40013816952705383 2023-01-24 01:22:47.317944: step: 324/466, loss: 0.667767345905304 2023-01-24 01:22:47.927138: step: 326/466, loss: 0.4804272949695587 2023-01-24 01:22:48.619215: step: 328/466, loss: 0.6763488054275513 2023-01-24 01:22:49.314297: step: 330/466, loss: 2.750519037246704 2023-01-24 01:22:49.910354: step: 332/466, loss: 1.282062292098999 2023-01-24 01:22:50.492490: step: 334/466, loss: 1.68393075466156 2023-01-24 01:22:51.123853: step: 336/466, loss: 0.23293834924697876 2023-01-24 01:22:51.756892: step: 338/466, loss: 0.7747480869293213 2023-01-24 01:22:52.393423: step: 340/466, loss: 0.11038610339164734 2023-01-24 01:22:53.001222: step: 342/466, loss: 0.08814762532711029 2023-01-24 01:22:53.671091: step: 344/466, loss: 0.4685029983520508 2023-01-24 01:22:54.246697: step: 346/466, loss: 0.25662773847579956 2023-01-24 01:22:54.816922: step: 348/466, loss: 0.3834894895553589 2023-01-24 01:22:55.443606: step: 350/466, loss: 0.3033915162086487 2023-01-24 01:22:56.096328: step: 352/466, loss: 0.6060888171195984 2023-01-24 01:22:56.738092: step: 354/466, loss: 0.3031148612499237 2023-01-24 01:22:57.357327: step: 356/466, loss: 0.26334863901138306 2023-01-24 01:22:57.949737: step: 358/466, loss: 0.4853774905204773 2023-01-24 01:22:58.619185: step: 360/466, loss: 0.6246654391288757 2023-01-24 01:22:59.300692: step: 362/466, loss: 0.10203198343515396 2023-01-24 01:22:59.910215: step: 364/466, loss: 0.6044864058494568 2023-01-24 01:23:00.578575: step: 366/466, loss: 0.3602357506752014 2023-01-24 01:23:01.185775: step: 368/466, loss: 0.25677040219306946 2023-01-24 01:23:01.808317: step: 370/466, loss: 0.10781390219926834 2023-01-24 01:23:02.376346: step: 372/466, loss: 0.35638684034347534 2023-01-24 01:23:02.990371: step: 374/466, loss: 1.1047513484954834 2023-01-24 01:23:03.525363: step: 376/466, loss: 0.28093698620796204 2023-01-24 01:23:04.120273: step: 378/466, loss: 0.7687437534332275 2023-01-24 01:23:04.751889: step: 380/466, loss: 1.9897117614746094 2023-01-24 01:23:05.357219: step: 382/466, loss: 0.5927366018295288 2023-01-24 01:23:05.960230: step: 384/466, loss: 0.3256917893886566 2023-01-24 01:23:06.555639: step: 386/466, loss: 0.09695328027009964 2023-01-24 01:23:07.217919: step: 388/466, loss: 0.2785665988922119 2023-01-24 01:23:07.815495: step: 390/466, loss: 0.2793259620666504 2023-01-24 01:23:08.439226: step: 392/466, loss: 0.3837505578994751 2023-01-24 01:23:09.125581: step: 394/466, loss: 1.2248982191085815 2023-01-24 01:23:09.781313: step: 396/466, loss: 0.18187536299228668 2023-01-24 01:23:10.367457: step: 398/466, loss: 1.0493046045303345 2023-01-24 01:23:11.014962: step: 400/466, loss: 0.19331248104572296 2023-01-24 01:23:11.706500: step: 402/466, loss: 3.6122488975524902 2023-01-24 01:23:12.402710: step: 404/466, loss: 2.8651459217071533 2023-01-24 01:23:12.980611: step: 406/466, loss: 0.35814833641052246 2023-01-24 01:23:13.574888: step: 408/466, loss: 0.13510677218437195 2023-01-24 01:23:14.279295: step: 410/466, loss: 0.4739450216293335 2023-01-24 01:23:14.873028: step: 412/466, loss: 0.8541678190231323 2023-01-24 01:23:15.466678: step: 414/466, loss: 0.8350094556808472 2023-01-24 01:23:16.089980: step: 416/466, loss: 0.2985285818576813 2023-01-24 01:23:16.656770: step: 418/466, loss: 1.1642861366271973 2023-01-24 01:23:17.289937: step: 420/466, loss: 0.674666702747345 2023-01-24 01:23:17.931043: step: 422/466, loss: 0.48510220646858215 2023-01-24 01:23:18.523377: step: 424/466, loss: 0.5639714002609253 2023-01-24 01:23:19.088749: step: 426/466, loss: 0.37887322902679443 2023-01-24 01:23:19.639344: step: 428/466, loss: 0.43298012018203735 2023-01-24 01:23:20.255816: step: 430/466, loss: 0.07726405560970306 2023-01-24 01:23:20.818984: step: 432/466, loss: 1.3216429948806763 2023-01-24 01:23:21.419040: step: 434/466, loss: 0.33925119042396545 2023-01-24 01:23:22.051168: step: 436/466, loss: 0.29555174708366394 2023-01-24 01:23:22.678643: step: 438/466, loss: 0.15159834921360016 2023-01-24 01:23:23.274059: step: 440/466, loss: 0.3884194791316986 2023-01-24 01:23:23.957620: step: 442/466, loss: 0.19900605082511902 2023-01-24 01:23:24.662597: step: 444/466, loss: 0.2201690375804901 2023-01-24 01:23:25.277923: step: 446/466, loss: 0.46234437823295593 2023-01-24 01:23:25.914633: step: 448/466, loss: 0.26391351222991943 2023-01-24 01:23:26.524232: step: 450/466, loss: 0.16837245225906372 2023-01-24 01:23:27.177572: step: 452/466, loss: 0.11911796033382416 2023-01-24 01:23:27.863821: step: 454/466, loss: 0.7436661720275879 2023-01-24 01:23:28.451401: step: 456/466, loss: 0.8642342686653137 2023-01-24 01:23:29.033173: step: 458/466, loss: 0.12915383279323578 2023-01-24 01:23:29.675754: step: 460/466, loss: 0.8493901491165161 2023-01-24 01:23:30.306006: step: 462/466, loss: 0.13043589890003204 2023-01-24 01:23:30.903923: step: 464/466, loss: 0.17544306814670563 2023-01-24 01:23:31.564345: step: 466/466, loss: 0.20262432098388672 2023-01-24 01:23:32.236311: step: 468/466, loss: 0.3896419107913971 2023-01-24 01:23:32.862523: step: 470/466, loss: 0.28438907861709595 2023-01-24 01:23:33.528367: step: 472/466, loss: 0.5023604035377502 2023-01-24 01:23:34.216448: step: 474/466, loss: 0.57744961977005 2023-01-24 01:23:34.830526: step: 476/466, loss: 0.9624512195587158 2023-01-24 01:23:35.404800: step: 478/466, loss: 0.7388322949409485 2023-01-24 01:23:36.045195: step: 480/466, loss: 0.4975886046886444 2023-01-24 01:23:36.655573: step: 482/466, loss: 0.05164724215865135 2023-01-24 01:23:37.291148: step: 484/466, loss: 0.4877254366874695 2023-01-24 01:23:37.951769: step: 486/466, loss: 0.28059661388397217 2023-01-24 01:23:38.599079: step: 488/466, loss: 0.8249738216400146 2023-01-24 01:23:39.230633: step: 490/466, loss: 0.5365325212478638 2023-01-24 01:23:39.837417: step: 492/466, loss: 1.1868059635162354 2023-01-24 01:23:40.484896: step: 494/466, loss: 0.7013980150222778 2023-01-24 01:23:41.106199: step: 496/466, loss: 0.29911187291145325 2023-01-24 01:23:41.748863: step: 498/466, loss: 0.34993094205856323 2023-01-24 01:23:42.323331: step: 500/466, loss: 0.37614795565605164 2023-01-24 01:23:42.909474: step: 502/466, loss: 0.3541680872440338 2023-01-24 01:23:43.534413: step: 504/466, loss: 0.12377132475376129 2023-01-24 01:23:44.131306: step: 506/466, loss: 0.2723594009876251 2023-01-24 01:23:44.777237: step: 508/466, loss: 1.4284451007843018 2023-01-24 01:23:45.411669: step: 510/466, loss: 0.11622864753007889 2023-01-24 01:23:46.093346: step: 512/466, loss: 0.22645901143550873 2023-01-24 01:23:46.709373: step: 514/466, loss: 0.37532246112823486 2023-01-24 01:23:47.392835: step: 516/466, loss: 2.2145767211914062 2023-01-24 01:23:47.963592: step: 518/466, loss: 0.613787055015564 2023-01-24 01:23:48.547206: step: 520/466, loss: 0.2860957980155945 2023-01-24 01:23:49.123830: step: 522/466, loss: 1.8711495399475098 2023-01-24 01:23:49.726936: step: 524/466, loss: 0.2798992395401001 2023-01-24 01:23:50.333787: step: 526/466, loss: 0.7050901651382446 2023-01-24 01:23:50.961003: step: 528/466, loss: 0.17970451712608337 2023-01-24 01:23:51.634465: step: 530/466, loss: 0.7807099223136902 2023-01-24 01:23:52.242935: step: 532/466, loss: 0.24164973199367523 2023-01-24 01:23:52.946116: step: 534/466, loss: 0.15025509893894196 2023-01-24 01:23:53.568359: step: 536/466, loss: 1.8316205739974976 2023-01-24 01:23:54.165854: step: 538/466, loss: 0.12155395746231079 2023-01-24 01:23:54.804497: step: 540/466, loss: 1.446241855621338 2023-01-24 01:23:55.466569: step: 542/466, loss: 0.3058852553367615 2023-01-24 01:23:56.183394: step: 544/466, loss: 0.6193525791168213 2023-01-24 01:23:56.850692: step: 546/466, loss: 0.21508213877677917 2023-01-24 01:23:57.433549: step: 548/466, loss: 0.21789303421974182 2023-01-24 01:23:58.035078: step: 550/466, loss: 0.4308699071407318 2023-01-24 01:23:58.726170: step: 552/466, loss: 0.3660205900669098 2023-01-24 01:23:59.399219: step: 554/466, loss: 1.035229206085205 2023-01-24 01:23:59.962461: step: 556/466, loss: 0.6700537204742432 2023-01-24 01:24:00.586767: step: 558/466, loss: 0.7066946625709534 2023-01-24 01:24:01.136601: step: 560/466, loss: 0.3775577247142792 2023-01-24 01:24:01.753004: step: 562/466, loss: 0.6352196931838989 2023-01-24 01:24:02.387535: step: 564/466, loss: 0.2720373868942261 2023-01-24 01:24:03.033565: step: 566/466, loss: 0.21507731080055237 2023-01-24 01:24:03.713137: step: 568/466, loss: 0.6397743821144104 2023-01-24 01:24:04.466386: step: 570/466, loss: 0.33709046244621277 2023-01-24 01:24:05.104782: step: 572/466, loss: 0.662662923336029 2023-01-24 01:24:05.704290: step: 574/466, loss: 0.5373324751853943 2023-01-24 01:24:06.287245: step: 576/466, loss: 0.4190295934677124 2023-01-24 01:24:06.961116: step: 578/466, loss: 0.27939939498901367 2023-01-24 01:24:07.528877: step: 580/466, loss: 0.3925917148590088 2023-01-24 01:24:08.126221: step: 582/466, loss: 0.7767550945281982 2023-01-24 01:24:08.770605: step: 584/466, loss: 0.29361432790756226 2023-01-24 01:24:09.355560: step: 586/466, loss: 0.5022650957107544 2023-01-24 01:24:09.967846: step: 588/466, loss: 0.2584410011768341 2023-01-24 01:24:10.563369: step: 590/466, loss: 0.16977599263191223 2023-01-24 01:24:11.192963: step: 592/466, loss: 0.7773684859275818 2023-01-24 01:24:11.757089: step: 594/466, loss: 1.2744741439819336 2023-01-24 01:24:12.390159: step: 596/466, loss: 0.3293832838535309 2023-01-24 01:24:13.005840: step: 598/466, loss: 0.4391189515590668 2023-01-24 01:24:13.600301: step: 600/466, loss: 0.33168160915374756 2023-01-24 01:24:14.182287: step: 602/466, loss: 0.5100934505462646 2023-01-24 01:24:14.773932: step: 604/466, loss: 1.8785651922225952 2023-01-24 01:24:15.321365: step: 606/466, loss: 0.6358599662780762 2023-01-24 01:24:15.912601: step: 608/466, loss: 0.1605401188135147 2023-01-24 01:24:16.584479: step: 610/466, loss: 0.21751931309700012 2023-01-24 01:24:17.176559: step: 612/466, loss: 0.5162280797958374 2023-01-24 01:24:17.760617: step: 614/466, loss: 0.2392381727695465 2023-01-24 01:24:18.373638: step: 616/466, loss: 0.5406078100204468 2023-01-24 01:24:18.986371: step: 618/466, loss: 0.4269423484802246 2023-01-24 01:24:19.578084: step: 620/466, loss: 0.4323086142539978 2023-01-24 01:24:20.215598: step: 622/466, loss: 0.7112070918083191 2023-01-24 01:24:20.816770: step: 624/466, loss: 0.7384576797485352 2023-01-24 01:24:21.464551: step: 626/466, loss: 0.19302891194820404 2023-01-24 01:24:22.085863: step: 628/466, loss: 0.8037995100021362 2023-01-24 01:24:22.665733: step: 630/466, loss: 0.28480249643325806 2023-01-24 01:24:23.321441: step: 632/466, loss: 0.14106625318527222 2023-01-24 01:24:24.013086: step: 634/466, loss: 0.43928807973861694 2023-01-24 01:24:24.614807: step: 636/466, loss: 0.2419721484184265 2023-01-24 01:24:25.223057: step: 638/466, loss: 0.3035977780818939 2023-01-24 01:24:25.867008: step: 640/466, loss: 0.22203347086906433 2023-01-24 01:24:26.481444: step: 642/466, loss: 0.22260044515132904 2023-01-24 01:24:27.153399: step: 644/466, loss: 0.46736451983451843 2023-01-24 01:24:27.746549: step: 646/466, loss: 0.40349987149238586 2023-01-24 01:24:28.301614: step: 648/466, loss: 0.5523925423622131 2023-01-24 01:24:28.981634: step: 650/466, loss: 2.0567588806152344 2023-01-24 01:24:29.615158: step: 652/466, loss: 0.1411169320344925 2023-01-24 01:24:30.226870: step: 654/466, loss: 0.30137670040130615 2023-01-24 01:24:30.942199: step: 656/466, loss: 0.1508619785308838 2023-01-24 01:24:31.614683: step: 658/466, loss: 0.3482684791088104 2023-01-24 01:24:32.276327: step: 660/466, loss: 0.12911884486675262 2023-01-24 01:24:32.907014: step: 662/466, loss: 0.17649178206920624 2023-01-24 01:24:33.548683: step: 664/466, loss: 0.1580641120672226 2023-01-24 01:24:34.176768: step: 666/466, loss: 0.6881174445152283 2023-01-24 01:24:34.740778: step: 668/466, loss: 0.5445083379745483 2023-01-24 01:24:35.330033: step: 670/466, loss: 0.18814463913440704 2023-01-24 01:24:35.960858: step: 672/466, loss: 0.4814763069152832 2023-01-24 01:24:36.563278: step: 674/466, loss: 0.6384454965591431 2023-01-24 01:24:37.165664: step: 676/466, loss: 0.5807408094406128 2023-01-24 01:24:37.798326: step: 678/466, loss: 0.7347681522369385 2023-01-24 01:24:38.364196: step: 680/466, loss: 0.3069644570350647 2023-01-24 01:24:38.994257: step: 682/466, loss: 9.15442180633545 2023-01-24 01:24:39.701084: step: 684/466, loss: 1.127607822418213 2023-01-24 01:24:40.302145: step: 686/466, loss: 0.92728191614151 2023-01-24 01:24:40.915268: step: 688/466, loss: 0.3164721429347992 2023-01-24 01:24:41.479819: step: 690/466, loss: 0.5347660183906555 2023-01-24 01:24:42.128102: step: 692/466, loss: 0.9528379440307617 2023-01-24 01:24:42.760050: step: 694/466, loss: 0.12908470630645752 2023-01-24 01:24:43.423570: step: 696/466, loss: 0.49494537711143494 2023-01-24 01:24:44.114647: step: 698/466, loss: 0.49588873982429504 2023-01-24 01:24:44.712407: step: 700/466, loss: 0.44561535120010376 2023-01-24 01:24:45.303612: step: 702/466, loss: 0.27392247319221497 2023-01-24 01:24:45.904172: step: 704/466, loss: 0.1783570647239685 2023-01-24 01:24:46.515662: step: 706/466, loss: 0.1547602117061615 2023-01-24 01:24:47.084023: step: 708/466, loss: 0.5803582668304443 2023-01-24 01:24:47.702896: step: 710/466, loss: 0.11984024196863174 2023-01-24 01:24:48.344924: step: 712/466, loss: 0.6738526225090027 2023-01-24 01:24:48.993705: step: 714/466, loss: 0.1570020318031311 2023-01-24 01:24:49.611344: step: 716/466, loss: 0.5142013430595398 2023-01-24 01:24:50.154761: step: 718/466, loss: 0.1734398752450943 2023-01-24 01:24:50.802743: step: 720/466, loss: 0.5289613008499146 2023-01-24 01:24:51.433168: step: 722/466, loss: 0.14029181003570557 2023-01-24 01:24:52.159801: step: 724/466, loss: 0.38573265075683594 2023-01-24 01:24:52.818991: step: 726/466, loss: 0.5696289539337158 2023-01-24 01:24:53.471456: step: 728/466, loss: 0.4644445776939392 2023-01-24 01:24:54.051027: step: 730/466, loss: 0.34148305654525757 2023-01-24 01:24:54.624795: step: 732/466, loss: 1.0923960208892822 2023-01-24 01:24:55.174623: step: 734/466, loss: 0.1537511795759201 2023-01-24 01:24:55.790904: step: 736/466, loss: 0.16307362914085388 2023-01-24 01:24:56.425648: step: 738/466, loss: 0.20459650456905365 2023-01-24 01:24:57.077195: step: 740/466, loss: 0.2831628620624542 2023-01-24 01:24:57.724198: step: 742/466, loss: 0.6424025297164917 2023-01-24 01:24:58.287685: step: 744/466, loss: 0.25834977626800537 2023-01-24 01:24:58.924199: step: 746/466, loss: 0.5080797672271729 2023-01-24 01:24:59.526025: step: 748/466, loss: 0.9643440842628479 2023-01-24 01:25:00.186841: step: 750/466, loss: 0.1867329627275467 2023-01-24 01:25:00.819394: step: 752/466, loss: 1.104346513748169 2023-01-24 01:25:01.469636: step: 754/466, loss: 0.20777837932109833 2023-01-24 01:25:02.120695: step: 756/466, loss: 0.40053248405456543 2023-01-24 01:25:02.786971: step: 758/466, loss: 0.7012250423431396 2023-01-24 01:25:03.325028: step: 760/466, loss: 0.24934914708137512 2023-01-24 01:25:03.872863: step: 762/466, loss: 0.08132177591323853 2023-01-24 01:25:04.436520: step: 764/466, loss: 0.7300129532814026 2023-01-24 01:25:05.078890: step: 766/466, loss: 0.2983568012714386 2023-01-24 01:25:05.665243: step: 768/466, loss: 0.4181157052516937 2023-01-24 01:25:06.334189: step: 770/466, loss: 0.1665697544813156 2023-01-24 01:25:06.966684: step: 772/466, loss: 0.3983364403247833 2023-01-24 01:25:07.607407: step: 774/466, loss: 0.9016590714454651 2023-01-24 01:25:08.250769: step: 776/466, loss: 0.2623269557952881 2023-01-24 01:25:08.882869: step: 778/466, loss: 0.30989834666252136 2023-01-24 01:25:09.528418: step: 780/466, loss: 0.30029258131980896 2023-01-24 01:25:10.217039: step: 782/466, loss: 0.8610550165176392 2023-01-24 01:25:10.810656: step: 784/466, loss: 0.18214188516139984 2023-01-24 01:25:11.451865: step: 786/466, loss: 1.3146240711212158 2023-01-24 01:25:12.078494: step: 788/466, loss: 0.17744801938533783 2023-01-24 01:25:12.709035: step: 790/466, loss: 0.3240196108818054 2023-01-24 01:25:13.403820: step: 792/466, loss: 0.15075667202472687 2023-01-24 01:25:14.066241: step: 794/466, loss: 0.1881697028875351 2023-01-24 01:25:14.740806: step: 796/466, loss: 1.692413568496704 2023-01-24 01:25:15.335297: step: 798/466, loss: 0.1842980831861496 2023-01-24 01:25:15.909989: step: 800/466, loss: 0.9132423400878906 2023-01-24 01:25:16.614133: step: 802/466, loss: 0.25958237051963806 2023-01-24 01:25:17.207163: step: 804/466, loss: 0.46160873770713806 2023-01-24 01:25:17.785955: step: 806/466, loss: 0.5344682931900024 2023-01-24 01:25:18.375664: step: 808/466, loss: 0.3357923924922943 2023-01-24 01:25:18.966568: step: 810/466, loss: 0.16021166741847992 2023-01-24 01:25:19.558853: step: 812/466, loss: 0.15776556730270386 2023-01-24 01:25:20.148525: step: 814/466, loss: 3.033827304840088 2023-01-24 01:25:20.765873: step: 816/466, loss: 0.7120888233184814 2023-01-24 01:25:21.410143: step: 818/466, loss: 0.3273853361606598 2023-01-24 01:25:21.991328: step: 820/466, loss: 0.7432978749275208 2023-01-24 01:25:22.627045: step: 822/466, loss: 0.1920372098684311 2023-01-24 01:25:23.269974: step: 824/466, loss: 1.0010572671890259 2023-01-24 01:25:23.895298: step: 826/466, loss: 1.0172005891799927 2023-01-24 01:25:24.573910: step: 828/466, loss: 0.19087344408035278 2023-01-24 01:25:25.202349: step: 830/466, loss: 0.5420885682106018 2023-01-24 01:25:25.812470: step: 832/466, loss: 0.22804272174835205 2023-01-24 01:25:26.524557: step: 834/466, loss: 0.2788725793361664 2023-01-24 01:25:27.201324: step: 836/466, loss: 0.16794948279857635 2023-01-24 01:25:27.811059: step: 838/466, loss: 0.5333663821220398 2023-01-24 01:25:28.376591: step: 840/466, loss: 0.1299857646226883 2023-01-24 01:25:28.976227: step: 842/466, loss: 0.3256866931915283 2023-01-24 01:25:29.582637: step: 844/466, loss: 0.5658522248268127 2023-01-24 01:25:30.235488: step: 846/466, loss: 0.2746819853782654 2023-01-24 01:25:30.834654: step: 848/466, loss: 0.2980114221572876 2023-01-24 01:25:31.459725: step: 850/466, loss: 0.2512083947658539 2023-01-24 01:25:32.063593: step: 852/466, loss: 0.42742490768432617 2023-01-24 01:25:32.689545: step: 854/466, loss: 0.7600464820861816 2023-01-24 01:25:33.297795: step: 856/466, loss: 0.14330822229385376 2023-01-24 01:25:33.867898: step: 858/466, loss: 1.0403125286102295 2023-01-24 01:25:34.527773: step: 860/466, loss: 0.2647596001625061 2023-01-24 01:25:35.107070: step: 862/466, loss: 0.11951369047164917 2023-01-24 01:25:35.784031: step: 864/466, loss: 0.4448555111885071 2023-01-24 01:25:36.441662: step: 866/466, loss: 0.2565997540950775 2023-01-24 01:25:37.097887: step: 868/466, loss: 0.21580347418785095 2023-01-24 01:25:37.770237: step: 870/466, loss: 1.1926987171173096 2023-01-24 01:25:38.332748: step: 872/466, loss: 0.2284402698278427 2023-01-24 01:25:38.916132: step: 874/466, loss: 0.2942436933517456 2023-01-24 01:25:39.591817: step: 876/466, loss: 0.2567604184150696 2023-01-24 01:25:40.242109: step: 878/466, loss: 0.26719653606414795 2023-01-24 01:25:40.862106: step: 880/466, loss: 0.16044160723686218 2023-01-24 01:25:41.461437: step: 882/466, loss: 0.19051779806613922 2023-01-24 01:25:42.162287: step: 884/466, loss: 5.118203163146973 2023-01-24 01:25:42.825156: step: 886/466, loss: 0.16387850046157837 2023-01-24 01:25:43.498073: step: 888/466, loss: 0.46310433745384216 2023-01-24 01:25:44.102163: step: 890/466, loss: 0.07903813570737839 2023-01-24 01:25:44.674809: step: 892/466, loss: 0.2074670046567917 2023-01-24 01:25:45.266259: step: 894/466, loss: 0.49393320083618164 2023-01-24 01:25:45.895833: step: 896/466, loss: 1.1565308570861816 2023-01-24 01:25:46.522673: step: 898/466, loss: 0.5069413185119629 2023-01-24 01:25:47.152365: step: 900/466, loss: 0.47783806920051575 2023-01-24 01:25:47.771705: step: 902/466, loss: 1.0002384185791016 2023-01-24 01:25:48.333242: step: 904/466, loss: 0.6225457787513733 2023-01-24 01:25:48.899215: step: 906/466, loss: 0.29943346977233887 2023-01-24 01:25:49.505194: step: 908/466, loss: 0.1338597536087036 2023-01-24 01:25:50.089405: step: 910/466, loss: 0.36804789304733276 2023-01-24 01:25:50.667768: step: 912/466, loss: 0.7149313688278198 2023-01-24 01:25:51.413423: step: 914/466, loss: 0.3055904805660248 2023-01-24 01:25:52.058199: step: 916/466, loss: 0.17568480968475342 2023-01-24 01:25:52.665116: step: 918/466, loss: 0.7913815975189209 2023-01-24 01:25:53.297924: step: 920/466, loss: 0.25138628482818604 2023-01-24 01:25:53.903271: step: 922/466, loss: 0.40639394521713257 2023-01-24 01:25:54.512850: step: 924/466, loss: 0.3113460838794708 2023-01-24 01:25:55.075848: step: 926/466, loss: 1.047174334526062 2023-01-24 01:25:55.783175: step: 928/466, loss: 1.0717891454696655 2023-01-24 01:25:56.435434: step: 930/466, loss: 0.12569622695446014 2023-01-24 01:25:57.072095: step: 932/466, loss: 0.19045127928256989 ================================================== Loss: 0.536 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3560864378078818, 'r': 0.31292444534632036, 'f1': 0.3331131192396314}, 'combined': 0.24545177207130733, 'epoch': 8} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3231893889149893, 'r': 0.27039401040089805, 'f1': 0.294443797075376}, 'combined': 0.18437134957056256, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34311008886313143, 'r': 0.32036604888167386, 'f1': 0.33134823469054614}, 'combined': 0.24415133082461293, 'epoch': 8} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.33236024526776287, 'r': 0.282566201301654, 'f1': 0.30544717174851965}, 'combined': 0.1892912050272516, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31801932532095056, 'r': 0.3150077786796537, 'f1': 0.3165063884735626}, 'combined': 0.23321523361209875, 'epoch': 8} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3518542716628563, 'r': 0.2739822783860724, 'f1': 0.3080735217606726}, 'combined': 0.20436560354420857, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2903225806451613, 'r': 0.2571428571428571, 'f1': 0.27272727272727276}, 'combined': 0.18181818181818182, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2608695652173913, 'r': 0.2608695652173913, 'f1': 0.2608695652173913}, 'combined': 0.13043478260869565, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36538461538461536, 'r': 0.16379310344827586, 'f1': 0.2261904761904762}, 'combined': 0.15079365079365079, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35266479985310323, 'r': 0.2603161425860667, 'f1': 0.2995340767311292}, 'combined': 0.2207093196966215, 'epoch': 4} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35061171058748686, 'r': 0.22688501488377985, 'f1': 0.27549435231915403}, 'combined': 0.17250580939610582, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40865384615384615, 'r': 0.30357142857142855, 'f1': 0.3483606557377049}, 'combined': 0.23224043715846993, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30627168248020137, 'r': 0.3277746279294755, 'f1': 0.3166585314735721}, 'combined': 0.2333273389805268, 'epoch': 7} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3488091284089221, 'r': 0.2653092015540311, 'f1': 0.3013825409136095}, 'combined': 0.18677227887603967, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3557692307692308, 'r': 0.40217391304347827, 'f1': 0.37755102040816324}, 'combined': 0.18877551020408162, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31668397009966776, 'r': 0.2583948902141502, 'f1': 0.28458538587848936}, 'combined': 0.20969449485783426, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.39165853086307634, 'r': 0.23329840286067724, 'f1': 0.2924147402371384}, 'combined': 0.19397809500879482, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.1896551724137931, 'f1': 0.2619047619047619}, 'combined': 0.1746031746031746, 'epoch': 4} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:28:31.895803: step: 2/466, loss: 0.14333821833133698 2023-01-24 01:28:32.529135: step: 4/466, loss: 0.3721154034137726 2023-01-24 01:28:33.140286: step: 6/466, loss: 0.1154833510518074 2023-01-24 01:28:33.766173: step: 8/466, loss: 0.31080323457717896 2023-01-24 01:28:34.364965: step: 10/466, loss: 0.22003662586212158 2023-01-24 01:28:34.976722: step: 12/466, loss: 0.13437055051326752 2023-01-24 01:28:35.589638: step: 14/466, loss: 0.17498359084129333 2023-01-24 01:28:36.153798: step: 16/466, loss: 0.28144538402557373 2023-01-24 01:28:36.788652: step: 18/466, loss: 0.0995631068944931 2023-01-24 01:28:37.405022: step: 20/466, loss: 0.49647435545921326 2023-01-24 01:28:37.928299: step: 22/466, loss: 0.2559816539287567 2023-01-24 01:28:38.511763: step: 24/466, loss: 0.2428935170173645 2023-01-24 01:28:39.217070: step: 26/466, loss: 0.15105891227722168 2023-01-24 01:28:39.834781: step: 28/466, loss: 0.18047349154949188 2023-01-24 01:28:40.502477: step: 30/466, loss: 1.209218144416809 2023-01-24 01:28:41.189796: step: 32/466, loss: 0.3093844950199127 2023-01-24 01:28:41.829805: step: 34/466, loss: 0.25497299432754517 2023-01-24 01:28:42.418121: step: 36/466, loss: 0.2641991376876831 2023-01-24 01:28:43.031454: step: 38/466, loss: 0.11457935720682144 2023-01-24 01:28:43.667842: step: 40/466, loss: 0.4195786714553833 2023-01-24 01:28:44.227384: step: 42/466, loss: 0.1561284363269806 2023-01-24 01:28:44.850646: step: 44/466, loss: 0.3661919832229614 2023-01-24 01:28:45.502896: step: 46/466, loss: 0.24955451488494873 2023-01-24 01:28:46.081859: step: 48/466, loss: 0.11433481425046921 2023-01-24 01:28:46.793885: step: 50/466, loss: 0.07350247353315353 2023-01-24 01:28:47.387903: step: 52/466, loss: 0.12642313539981842 2023-01-24 01:28:48.001906: step: 54/466, loss: 0.6436976790428162 2023-01-24 01:28:48.580865: step: 56/466, loss: 1.2792249917984009 2023-01-24 01:28:49.236576: step: 58/466, loss: 0.11254329234361649 2023-01-24 01:28:49.825657: step: 60/466, loss: 0.17455169558525085 2023-01-24 01:28:50.360387: step: 62/466, loss: 0.2526421546936035 2023-01-24 01:28:50.977828: step: 64/466, loss: 0.7558532953262329 2023-01-24 01:28:51.586313: step: 66/466, loss: 0.4842546582221985 2023-01-24 01:28:52.251852: step: 68/466, loss: 0.15906848013401031 2023-01-24 01:28:52.865298: step: 70/466, loss: 0.2760641276836395 2023-01-24 01:28:53.448464: step: 72/466, loss: 0.6627677083015442 2023-01-24 01:28:54.097066: step: 74/466, loss: 0.7741183638572693 2023-01-24 01:28:54.757747: step: 76/466, loss: 0.1407208889722824 2023-01-24 01:28:55.347497: step: 78/466, loss: 0.49924731254577637 2023-01-24 01:28:55.978358: step: 80/466, loss: 0.9123730659484863 2023-01-24 01:28:56.672295: step: 82/466, loss: 0.11611166596412659 2023-01-24 01:28:57.266437: step: 84/466, loss: 0.6890730261802673 2023-01-24 01:28:57.891505: step: 86/466, loss: 0.3308718502521515 2023-01-24 01:28:58.494671: step: 88/466, loss: 0.10638472437858582 2023-01-24 01:28:59.175768: step: 90/466, loss: 0.14739586412906647 2023-01-24 01:29:00.030359: step: 92/466, loss: 0.3096799850463867 2023-01-24 01:29:00.709810: step: 94/466, loss: 0.25767630338668823 2023-01-24 01:29:01.328794: step: 96/466, loss: 0.7320065498352051 2023-01-24 01:29:02.047279: step: 98/466, loss: 0.524272084236145 2023-01-24 01:29:02.672616: step: 100/466, loss: 0.12355181574821472 2023-01-24 01:29:03.344942: step: 102/466, loss: 0.35262879729270935 2023-01-24 01:29:03.973987: step: 104/466, loss: 0.43407341837882996 2023-01-24 01:29:04.595058: step: 106/466, loss: 0.24438145756721497 2023-01-24 01:29:05.187194: step: 108/466, loss: 0.19344185292720795 2023-01-24 01:29:05.860192: step: 110/466, loss: 0.22223243117332458 2023-01-24 01:29:06.461247: step: 112/466, loss: 0.22712765634059906 2023-01-24 01:29:07.098024: step: 114/466, loss: 0.5355889201164246 2023-01-24 01:29:07.721117: step: 116/466, loss: 0.20177462697029114 2023-01-24 01:29:08.319099: step: 118/466, loss: 3.8853647708892822 2023-01-24 01:29:09.013563: step: 120/466, loss: 0.6942616701126099 2023-01-24 01:29:09.589584: step: 122/466, loss: 0.1367759108543396 2023-01-24 01:29:10.178768: step: 124/466, loss: 0.28530353307724 2023-01-24 01:29:10.777555: step: 126/466, loss: 0.40220534801483154 2023-01-24 01:29:11.425090: step: 128/466, loss: 0.25116458535194397 2023-01-24 01:29:12.058842: step: 130/466, loss: 0.33628004789352417 2023-01-24 01:29:12.705933: step: 132/466, loss: 0.16188682615756989 2023-01-24 01:29:13.291759: step: 134/466, loss: 0.11521150171756744 2023-01-24 01:29:13.881598: step: 136/466, loss: 0.21888132393360138 2023-01-24 01:29:14.476240: step: 138/466, loss: 0.16899710893630981 2023-01-24 01:29:15.137887: step: 140/466, loss: 0.2219388335943222 2023-01-24 01:29:15.799080: step: 142/466, loss: 0.1316850483417511 2023-01-24 01:29:16.476033: step: 144/466, loss: 0.44268444180488586 2023-01-24 01:29:17.097507: step: 146/466, loss: 0.17376552522182465 2023-01-24 01:29:17.710894: step: 148/466, loss: 0.2718490660190582 2023-01-24 01:29:18.263668: step: 150/466, loss: 0.18473376333713531 2023-01-24 01:29:18.868770: step: 152/466, loss: 0.3350732624530792 2023-01-24 01:29:19.476565: step: 154/466, loss: 0.18617266416549683 2023-01-24 01:29:20.061376: step: 156/466, loss: 0.5854041576385498 2023-01-24 01:29:20.771678: step: 158/466, loss: 0.20790636539459229 2023-01-24 01:29:21.381652: step: 160/466, loss: 0.1810241937637329 2023-01-24 01:29:22.057827: step: 162/466, loss: 0.5088878273963928 2023-01-24 01:29:22.763069: step: 164/466, loss: 1.458961009979248 2023-01-24 01:29:23.435593: step: 166/466, loss: 3.8906965255737305 2023-01-24 01:29:24.021088: step: 168/466, loss: 0.3065788745880127 2023-01-24 01:29:24.573657: step: 170/466, loss: 0.30616405606269836 2023-01-24 01:29:25.142983: step: 172/466, loss: 0.3278638422489166 2023-01-24 01:29:25.774607: step: 174/466, loss: 0.458742618560791 2023-01-24 01:29:26.411087: step: 176/466, loss: 0.507235586643219 2023-01-24 01:29:26.998382: step: 178/466, loss: 0.16787923872470856 2023-01-24 01:29:27.696714: step: 180/466, loss: 0.89646315574646 2023-01-24 01:29:28.389909: step: 182/466, loss: 0.11676928400993347 2023-01-24 01:29:28.957652: step: 184/466, loss: 0.517906665802002 2023-01-24 01:29:29.562991: step: 186/466, loss: 1.5923292636871338 2023-01-24 01:29:30.191361: step: 188/466, loss: 0.17046838998794556 2023-01-24 01:29:30.773046: step: 190/466, loss: 0.20602427423000336 2023-01-24 01:29:31.411738: step: 192/466, loss: 0.5573226809501648 2023-01-24 01:29:32.016208: step: 194/466, loss: 0.4264920949935913 2023-01-24 01:29:32.675086: step: 196/466, loss: 0.18450847268104553 2023-01-24 01:29:33.253968: step: 198/466, loss: 0.07594388723373413 2023-01-24 01:29:33.926083: step: 200/466, loss: 0.7025171518325806 2023-01-24 01:29:34.435972: step: 202/466, loss: 0.48026180267333984 2023-01-24 01:29:35.053109: step: 204/466, loss: 0.13154345750808716 2023-01-24 01:29:35.712717: step: 206/466, loss: 0.6287126541137695 2023-01-24 01:29:36.315686: step: 208/466, loss: 0.960460364818573 2023-01-24 01:29:36.960118: step: 210/466, loss: 1.5425586700439453 2023-01-24 01:29:37.625171: step: 212/466, loss: 0.3823030889034271 2023-01-24 01:29:38.192213: step: 214/466, loss: 0.25595736503601074 2023-01-24 01:29:38.845507: step: 216/466, loss: 0.20159533619880676 2023-01-24 01:29:39.478835: step: 218/466, loss: 0.23626333475112915 2023-01-24 01:29:40.151581: step: 220/466, loss: 0.4423496425151825 2023-01-24 01:29:40.707264: step: 222/466, loss: 0.0924079641699791 2023-01-24 01:29:41.378629: step: 224/466, loss: 0.3597632944583893 2023-01-24 01:29:41.997474: step: 226/466, loss: 0.15774992108345032 2023-01-24 01:29:42.601921: step: 228/466, loss: 0.21371318399906158 2023-01-24 01:29:43.272177: step: 230/466, loss: 0.5776114463806152 2023-01-24 01:29:43.955947: step: 232/466, loss: 0.13860873878002167 2023-01-24 01:29:44.546347: step: 234/466, loss: 0.6534222364425659 2023-01-24 01:29:45.167833: step: 236/466, loss: 0.31477588415145874 2023-01-24 01:29:45.731259: step: 238/466, loss: 1.035041332244873 2023-01-24 01:29:46.301809: step: 240/466, loss: 0.7310987114906311 2023-01-24 01:29:46.903274: step: 242/466, loss: 0.48828408122062683 2023-01-24 01:29:47.550745: step: 244/466, loss: 0.4517148435115814 2023-01-24 01:29:48.141885: step: 246/466, loss: 0.07291319221258163 2023-01-24 01:29:48.690940: step: 248/466, loss: 0.2562633454799652 2023-01-24 01:29:49.296335: step: 250/466, loss: 0.23484401404857635 2023-01-24 01:29:49.965337: step: 252/466, loss: 0.19312064349651337 2023-01-24 01:29:50.630765: step: 254/466, loss: 0.17013314366340637 2023-01-24 01:29:51.287184: step: 256/466, loss: 0.17698858678340912 2023-01-24 01:29:51.916343: step: 258/466, loss: 0.2043430656194687 2023-01-24 01:29:52.567931: step: 260/466, loss: 0.24182303249835968 2023-01-24 01:29:53.136866: step: 262/466, loss: 0.4848344624042511 2023-01-24 01:29:53.793546: step: 264/466, loss: 0.7192230224609375 2023-01-24 01:29:54.481304: step: 266/466, loss: 0.4204176366329193 2023-01-24 01:29:55.109518: step: 268/466, loss: 0.2875136435031891 2023-01-24 01:29:55.724706: step: 270/466, loss: 0.0769433081150055 2023-01-24 01:29:56.307124: step: 272/466, loss: 0.15030288696289062 2023-01-24 01:29:56.926974: step: 274/466, loss: 0.8517444133758545 2023-01-24 01:29:57.624533: step: 276/466, loss: 0.4224884510040283 2023-01-24 01:29:58.219470: step: 278/466, loss: 0.8873869180679321 2023-01-24 01:29:58.858569: step: 280/466, loss: 2.515134572982788 2023-01-24 01:29:59.439003: step: 282/466, loss: 0.2280302494764328 2023-01-24 01:29:59.992916: step: 284/466, loss: 0.08815629780292511 2023-01-24 01:30:00.710426: step: 286/466, loss: 0.4746396541595459 2023-01-24 01:30:01.369588: step: 288/466, loss: 0.2827693819999695 2023-01-24 01:30:02.014526: step: 290/466, loss: 0.16482758522033691 2023-01-24 01:30:02.621581: step: 292/466, loss: 0.7850125432014465 2023-01-24 01:30:03.248332: step: 294/466, loss: 0.20151492953300476 2023-01-24 01:30:03.917606: step: 296/466, loss: 0.6452239155769348 2023-01-24 01:30:04.506028: step: 298/466, loss: 1.8399653434753418 2023-01-24 01:30:05.274322: step: 300/466, loss: 1.0284277200698853 2023-01-24 01:30:05.895438: step: 302/466, loss: 0.14042891561985016 2023-01-24 01:30:06.462056: step: 304/466, loss: 0.17981228232383728 2023-01-24 01:30:07.134946: step: 306/466, loss: 0.8614758849143982 2023-01-24 01:30:07.709673: step: 308/466, loss: 0.17790968716144562 2023-01-24 01:30:08.354942: step: 310/466, loss: 0.22819513082504272 2023-01-24 01:30:08.936105: step: 312/466, loss: 0.18512138724327087 2023-01-24 01:30:09.573615: step: 314/466, loss: 0.46361806988716125 2023-01-24 01:30:10.208332: step: 316/466, loss: 0.5142015218734741 2023-01-24 01:30:10.807758: step: 318/466, loss: 0.20874568819999695 2023-01-24 01:30:11.404633: step: 320/466, loss: 0.4852907061576843 2023-01-24 01:30:11.994219: step: 322/466, loss: 0.39413660764694214 2023-01-24 01:30:12.579871: step: 324/466, loss: 0.29739612340927124 2023-01-24 01:30:13.177101: step: 326/466, loss: 0.7066906690597534 2023-01-24 01:30:13.742121: step: 328/466, loss: 0.2640800476074219 2023-01-24 01:30:14.368579: step: 330/466, loss: 0.39870014786720276 2023-01-24 01:30:14.977952: step: 332/466, loss: 0.991432249546051 2023-01-24 01:30:15.636884: step: 334/466, loss: 0.14665810763835907 2023-01-24 01:30:16.232502: step: 336/466, loss: 0.09133289754390717 2023-01-24 01:30:16.849470: step: 338/466, loss: 0.11483022570610046 2023-01-24 01:30:17.546023: step: 340/466, loss: 1.3138666152954102 2023-01-24 01:30:18.185003: step: 342/466, loss: 0.320402055978775 2023-01-24 01:30:18.854001: step: 344/466, loss: 0.7380730509757996 2023-01-24 01:30:19.474668: step: 346/466, loss: 1.4596487283706665 2023-01-24 01:30:20.085003: step: 348/466, loss: 0.773550271987915 2023-01-24 01:30:20.752613: step: 350/466, loss: 0.1945510059595108 2023-01-24 01:30:21.437651: step: 352/466, loss: 0.33311906456947327 2023-01-24 01:30:22.037975: step: 354/466, loss: 0.08922120928764343 2023-01-24 01:30:22.653461: step: 356/466, loss: 0.35805872082710266 2023-01-24 01:30:23.302122: step: 358/466, loss: 0.42160797119140625 2023-01-24 01:30:23.944963: step: 360/466, loss: 0.2711777985095978 2023-01-24 01:30:24.557382: step: 362/466, loss: 0.5504414439201355 2023-01-24 01:30:25.183363: step: 364/466, loss: 0.11443190276622772 2023-01-24 01:30:25.829353: step: 366/466, loss: 0.10705424100160599 2023-01-24 01:30:26.441707: step: 368/466, loss: 0.422481894493103 2023-01-24 01:30:27.053558: step: 370/466, loss: 0.257695734500885 2023-01-24 01:30:27.679765: step: 372/466, loss: 1.4501545429229736 2023-01-24 01:30:28.252501: step: 374/466, loss: 0.10965612530708313 2023-01-24 01:30:28.859939: step: 376/466, loss: 0.2261856198310852 2023-01-24 01:30:29.522374: step: 378/466, loss: 0.14318789541721344 2023-01-24 01:30:30.124383: step: 380/466, loss: 0.2959175407886505 2023-01-24 01:30:30.706333: step: 382/466, loss: 0.16589011251926422 2023-01-24 01:30:31.301641: step: 384/466, loss: 0.2049659788608551 2023-01-24 01:30:31.929196: step: 386/466, loss: 0.4398001730442047 2023-01-24 01:30:32.596550: step: 388/466, loss: 0.8791870474815369 2023-01-24 01:30:33.201588: step: 390/466, loss: 0.38660451769828796 2023-01-24 01:30:33.891276: step: 392/466, loss: 0.8298895359039307 2023-01-24 01:30:34.459364: step: 394/466, loss: 0.27300766110420227 2023-01-24 01:30:35.089040: step: 396/466, loss: 0.30740171670913696 2023-01-24 01:30:35.770786: step: 398/466, loss: 0.5869360566139221 2023-01-24 01:30:36.370107: step: 400/466, loss: 0.1795710325241089 2023-01-24 01:30:37.020294: step: 402/466, loss: 0.34334051609039307 2023-01-24 01:30:37.607759: step: 404/466, loss: 0.7431521415710449 2023-01-24 01:30:38.227430: step: 406/466, loss: 0.3503372073173523 2023-01-24 01:30:38.809317: step: 408/466, loss: 0.47325900197029114 2023-01-24 01:30:39.403317: step: 410/466, loss: 0.2354981154203415 2023-01-24 01:30:40.015202: step: 412/466, loss: 0.8883786797523499 2023-01-24 01:30:40.653484: step: 414/466, loss: 0.3884502053260803 2023-01-24 01:30:41.336028: step: 416/466, loss: 0.42652392387390137 2023-01-24 01:30:41.992691: step: 418/466, loss: 0.4728102385997772 2023-01-24 01:30:42.625780: step: 420/466, loss: 0.16548635065555573 2023-01-24 01:30:43.300491: step: 422/466, loss: 0.23618537187576294 2023-01-24 01:30:43.983551: step: 424/466, loss: 0.7445458173751831 2023-01-24 01:30:44.625653: step: 426/466, loss: 0.24472343921661377 2023-01-24 01:30:45.268665: step: 428/466, loss: 0.12914451956748962 2023-01-24 01:30:45.906600: step: 430/466, loss: 0.28074443340301514 2023-01-24 01:30:46.448943: step: 432/466, loss: 0.4233020544052124 2023-01-24 01:30:47.091430: step: 434/466, loss: 0.1644560694694519 2023-01-24 01:30:47.732067: step: 436/466, loss: 0.23518234491348267 2023-01-24 01:30:48.326268: step: 438/466, loss: 0.40295037627220154 2023-01-24 01:30:48.971869: step: 440/466, loss: 0.5894485712051392 2023-01-24 01:30:49.583808: step: 442/466, loss: 0.17456242442131042 2023-01-24 01:30:50.232392: step: 444/466, loss: 0.1628921926021576 2023-01-24 01:30:50.861885: step: 446/466, loss: 0.28210774064064026 2023-01-24 01:30:51.463223: step: 448/466, loss: 0.4841403365135193 2023-01-24 01:30:52.083342: step: 450/466, loss: 0.23069562017917633 2023-01-24 01:30:52.708808: step: 452/466, loss: 0.14583279192447662 2023-01-24 01:30:53.326769: step: 454/466, loss: 0.1791859269142151 2023-01-24 01:30:53.930807: step: 456/466, loss: 1.2605204582214355 2023-01-24 01:30:54.618439: step: 458/466, loss: 0.3231509327888489 2023-01-24 01:30:55.216938: step: 460/466, loss: 0.14519108831882477 2023-01-24 01:30:55.812836: step: 462/466, loss: 0.4772692918777466 2023-01-24 01:30:56.452819: step: 464/466, loss: 0.20196957886219025 2023-01-24 01:30:57.041034: step: 466/466, loss: 0.42681217193603516 2023-01-24 01:30:57.651100: step: 468/466, loss: 0.2679383158683777 2023-01-24 01:30:58.272996: step: 470/466, loss: 0.17321833968162537 2023-01-24 01:30:58.902877: step: 472/466, loss: 0.7088302969932556 2023-01-24 01:30:59.621383: step: 474/466, loss: 0.5940250754356384 2023-01-24 01:31:00.228936: step: 476/466, loss: 0.24723488092422485 2023-01-24 01:31:00.929265: step: 478/466, loss: 0.5523827075958252 2023-01-24 01:31:01.533453: step: 480/466, loss: 0.1399628221988678 2023-01-24 01:31:02.153219: step: 482/466, loss: 0.20225651562213898 2023-01-24 01:31:02.721159: step: 484/466, loss: 0.41005074977874756 2023-01-24 01:31:03.379945: step: 486/466, loss: 0.4968833923339844 2023-01-24 01:31:03.949730: step: 488/466, loss: 0.172898069024086 2023-01-24 01:31:04.596766: step: 490/466, loss: 0.44241058826446533 2023-01-24 01:31:05.175132: step: 492/466, loss: 0.24638186395168304 2023-01-24 01:31:05.779010: step: 494/466, loss: 0.1952744871377945 2023-01-24 01:31:06.461046: step: 496/466, loss: 0.3134210407733917 2023-01-24 01:31:07.100812: step: 498/466, loss: 0.2878206968307495 2023-01-24 01:31:07.697532: step: 500/466, loss: 0.16420000791549683 2023-01-24 01:31:08.309999: step: 502/466, loss: 0.05525343492627144 2023-01-24 01:31:08.880974: step: 504/466, loss: 0.4072072505950928 2023-01-24 01:31:09.439722: step: 506/466, loss: 0.26593515276908875 2023-01-24 01:31:10.109504: step: 508/466, loss: 0.1372692883014679 2023-01-24 01:31:10.681667: step: 510/466, loss: 0.10550988465547562 2023-01-24 01:31:11.266023: step: 512/466, loss: 0.43171632289886475 2023-01-24 01:31:11.916097: step: 514/466, loss: 0.3661409914493561 2023-01-24 01:31:12.555968: step: 516/466, loss: 0.26294615864753723 2023-01-24 01:31:13.227749: step: 518/466, loss: 0.25712138414382935 2023-01-24 01:31:13.832581: step: 520/466, loss: 0.27735304832458496 2023-01-24 01:31:14.399131: step: 522/466, loss: 1.425169825553894 2023-01-24 01:31:14.949870: step: 524/466, loss: 0.34775903820991516 2023-01-24 01:31:15.665522: step: 526/466, loss: 0.06502726674079895 2023-01-24 01:31:16.352881: step: 528/466, loss: 0.29903191328048706 2023-01-24 01:31:16.994716: step: 530/466, loss: 0.2911469638347626 2023-01-24 01:31:17.614945: step: 532/466, loss: 0.19249558448791504 2023-01-24 01:31:18.196101: step: 534/466, loss: 0.5120958685874939 2023-01-24 01:31:18.726464: step: 536/466, loss: 0.25753968954086304 2023-01-24 01:31:19.434765: step: 538/466, loss: 0.3769363462924957 2023-01-24 01:31:20.027680: step: 540/466, loss: 0.2791467308998108 2023-01-24 01:31:20.587825: step: 542/466, loss: 0.5322665572166443 2023-01-24 01:31:21.207923: step: 544/466, loss: 0.5498647689819336 2023-01-24 01:31:21.813171: step: 546/466, loss: 0.1426161825656891 2023-01-24 01:31:22.406873: step: 548/466, loss: 0.4128901958465576 2023-01-24 01:31:23.017253: step: 550/466, loss: 0.17598459124565125 2023-01-24 01:31:23.651743: step: 552/466, loss: 0.21260590851306915 2023-01-24 01:31:24.247261: step: 554/466, loss: 0.5410464406013489 2023-01-24 01:31:24.868482: step: 556/466, loss: 0.2087046205997467 2023-01-24 01:31:25.472027: step: 558/466, loss: 0.29485809803009033 2023-01-24 01:31:26.108012: step: 560/466, loss: 0.3445322513580322 2023-01-24 01:31:26.673494: step: 562/466, loss: 0.1379055231809616 2023-01-24 01:31:27.333201: step: 564/466, loss: 0.19649401307106018 2023-01-24 01:31:27.914342: step: 566/466, loss: 0.24501477181911469 2023-01-24 01:31:28.541371: step: 568/466, loss: 0.3747404217720032 2023-01-24 01:31:29.250050: step: 570/466, loss: 0.1150267943739891 2023-01-24 01:31:29.898554: step: 572/466, loss: 0.2907281816005707 2023-01-24 01:31:30.528550: step: 574/466, loss: 0.2371668964624405 2023-01-24 01:31:31.101269: step: 576/466, loss: 0.37155136466026306 2023-01-24 01:31:31.696833: step: 578/466, loss: 0.51060551404953 2023-01-24 01:31:32.314592: step: 580/466, loss: 0.35388556122779846 2023-01-24 01:31:32.894803: step: 582/466, loss: 0.386762797832489 2023-01-24 01:31:33.562108: step: 584/466, loss: 0.43013936281204224 2023-01-24 01:31:34.238678: step: 586/466, loss: 0.10582101345062256 2023-01-24 01:31:34.816057: step: 588/466, loss: 0.09551119059324265 2023-01-24 01:31:35.450841: step: 590/466, loss: 1.1651842594146729 2023-01-24 01:31:36.105761: step: 592/466, loss: 0.4927811026573181 2023-01-24 01:31:36.678182: step: 594/466, loss: 0.3293505609035492 2023-01-24 01:31:37.293844: step: 596/466, loss: 0.11302569508552551 2023-01-24 01:31:37.951579: step: 598/466, loss: 0.1843276470899582 2023-01-24 01:31:38.588737: step: 600/466, loss: 0.2377513200044632 2023-01-24 01:31:39.246628: step: 602/466, loss: 0.1325540989637375 2023-01-24 01:31:39.823279: step: 604/466, loss: 2.2765705585479736 2023-01-24 01:31:40.451602: step: 606/466, loss: 0.35985463857650757 2023-01-24 01:31:41.109560: step: 608/466, loss: 0.4295981228351593 2023-01-24 01:31:41.683378: step: 610/466, loss: 0.19231435656547546 2023-01-24 01:31:42.427788: step: 612/466, loss: 0.13297294080257416 2023-01-24 01:31:42.956191: step: 614/466, loss: 0.21880677342414856 2023-01-24 01:31:43.572730: step: 616/466, loss: 0.09613621234893799 2023-01-24 01:31:44.285509: step: 618/466, loss: 0.16539478302001953 2023-01-24 01:31:44.895434: step: 620/466, loss: 0.3807227313518524 2023-01-24 01:31:45.472428: step: 622/466, loss: 0.2249719202518463 2023-01-24 01:31:45.995241: step: 624/466, loss: 0.09430894255638123 2023-01-24 01:31:46.577461: step: 626/466, loss: 0.3039550483226776 2023-01-24 01:31:47.220406: step: 628/466, loss: 0.17318031191825867 2023-01-24 01:31:47.794323: step: 630/466, loss: 0.3045390844345093 2023-01-24 01:31:48.409546: step: 632/466, loss: 0.08908650279045105 2023-01-24 01:31:49.023535: step: 634/466, loss: 0.045281678438186646 2023-01-24 01:31:49.645415: step: 636/466, loss: 0.28397464752197266 2023-01-24 01:31:50.246543: step: 638/466, loss: 0.2249760925769806 2023-01-24 01:31:50.885554: step: 640/466, loss: 0.16043658554553986 2023-01-24 01:31:51.493035: step: 642/466, loss: 0.12697969377040863 2023-01-24 01:31:52.104649: step: 644/466, loss: 0.5979228615760803 2023-01-24 01:31:52.725538: step: 646/466, loss: 1.1969491243362427 2023-01-24 01:31:53.391167: step: 648/466, loss: 0.30741339921951294 2023-01-24 01:31:54.000929: step: 650/466, loss: 0.17364108562469482 2023-01-24 01:31:54.626846: step: 652/466, loss: 0.9397881627082825 2023-01-24 01:31:55.181191: step: 654/466, loss: 0.26951509714126587 2023-01-24 01:31:55.833670: step: 656/466, loss: 0.13325363397598267 2023-01-24 01:31:56.427142: step: 658/466, loss: 0.868958592414856 2023-01-24 01:31:57.121350: step: 660/466, loss: 0.4372906982898712 2023-01-24 01:31:57.739782: step: 662/466, loss: 0.24490392208099365 2023-01-24 01:31:58.324964: step: 664/466, loss: 0.07556468993425369 2023-01-24 01:31:58.921285: step: 666/466, loss: 0.3491494655609131 2023-01-24 01:31:59.558196: step: 668/466, loss: 0.34895455837249756 2023-01-24 01:32:00.201242: step: 670/466, loss: 0.12369483709335327 2023-01-24 01:32:00.820154: step: 672/466, loss: 0.36864712834358215 2023-01-24 01:32:01.507158: step: 674/466, loss: 0.2754892110824585 2023-01-24 01:32:02.151141: step: 676/466, loss: 0.5482978224754333 2023-01-24 01:32:02.777356: step: 678/466, loss: 0.31320154666900635 2023-01-24 01:32:03.408107: step: 680/466, loss: 1.316051721572876 2023-01-24 01:32:04.014104: step: 682/466, loss: 0.3966720402240753 2023-01-24 01:32:04.715225: step: 684/466, loss: 0.277188777923584 2023-01-24 01:32:05.352835: step: 686/466, loss: 0.23997512459754944 2023-01-24 01:32:06.084784: step: 688/466, loss: 0.5385978817939758 2023-01-24 01:32:06.687763: step: 690/466, loss: 0.19652266800403595 2023-01-24 01:32:07.310034: step: 692/466, loss: 0.25740042328834534 2023-01-24 01:32:07.947188: step: 694/466, loss: 0.2064402997493744 2023-01-24 01:32:08.563338: step: 696/466, loss: 0.18065351247787476 2023-01-24 01:32:09.174314: step: 698/466, loss: 0.12909433245658875 2023-01-24 01:32:09.777252: step: 700/466, loss: 1.1202539205551147 2023-01-24 01:32:10.471129: step: 702/466, loss: 0.17530035972595215 2023-01-24 01:32:11.068956: step: 704/466, loss: 0.21027502417564392 2023-01-24 01:32:11.666882: step: 706/466, loss: 0.17757992446422577 2023-01-24 01:32:12.322779: step: 708/466, loss: 0.17123140394687653 2023-01-24 01:32:12.937678: step: 710/466, loss: 0.2377919703722 2023-01-24 01:32:13.521329: step: 712/466, loss: 0.37611502408981323 2023-01-24 01:32:14.058925: step: 714/466, loss: 0.3173653483390808 2023-01-24 01:32:14.704305: step: 716/466, loss: 0.3790663182735443 2023-01-24 01:32:15.328603: step: 718/466, loss: 2.566300392150879 2023-01-24 01:32:15.952587: step: 720/466, loss: 0.17286144196987152 2023-01-24 01:32:16.524899: step: 722/466, loss: 0.24581412971019745 2023-01-24 01:32:17.186454: step: 724/466, loss: 0.4705178439617157 2023-01-24 01:32:17.835957: step: 726/466, loss: 0.15359728038311005 2023-01-24 01:32:18.494531: step: 728/466, loss: 0.463126003742218 2023-01-24 01:32:19.135581: step: 730/466, loss: 0.29831749200820923 2023-01-24 01:32:19.724245: step: 732/466, loss: 0.3101665675640106 2023-01-24 01:32:20.353242: step: 734/466, loss: 0.31220731139183044 2023-01-24 01:32:20.961159: step: 736/466, loss: 0.20585808157920837 2023-01-24 01:32:21.583679: step: 738/466, loss: 0.1852637231349945 2023-01-24 01:32:22.240207: step: 740/466, loss: 0.3673744797706604 2023-01-24 01:32:22.904126: step: 742/466, loss: 0.5200921893119812 2023-01-24 01:32:23.524720: step: 744/466, loss: 0.3681810200214386 2023-01-24 01:32:24.088315: step: 746/466, loss: 0.446247935295105 2023-01-24 01:32:24.763375: step: 748/466, loss: 0.3060239553451538 2023-01-24 01:32:25.375532: step: 750/466, loss: 0.2827172875404358 2023-01-24 01:32:26.052610: step: 752/466, loss: 0.5558502674102783 2023-01-24 01:32:26.684056: step: 754/466, loss: 0.13139766454696655 2023-01-24 01:32:27.370835: step: 756/466, loss: 0.3317015767097473 2023-01-24 01:32:28.017448: step: 758/466, loss: 0.2862865924835205 2023-01-24 01:32:28.647367: step: 760/466, loss: 0.15090171992778778 2023-01-24 01:32:29.311265: step: 762/466, loss: 0.2872665822505951 2023-01-24 01:32:29.913032: step: 764/466, loss: 1.0078598260879517 2023-01-24 01:32:30.520632: step: 766/466, loss: 0.3062320053577423 2023-01-24 01:32:31.124665: step: 768/466, loss: 0.22913332283496857 2023-01-24 01:32:31.724696: step: 770/466, loss: 0.9309630990028381 2023-01-24 01:32:32.258651: step: 772/466, loss: 0.2182549238204956 2023-01-24 01:32:32.866390: step: 774/466, loss: 0.5010969042778015 2023-01-24 01:32:33.509054: step: 776/466, loss: 0.23311811685562134 2023-01-24 01:32:34.181032: step: 778/466, loss: 0.24458865821361542 2023-01-24 01:32:34.741841: step: 780/466, loss: 0.5709508061408997 2023-01-24 01:32:35.453791: step: 782/466, loss: 0.4269219636917114 2023-01-24 01:32:36.065900: step: 784/466, loss: 0.1943705976009369 2023-01-24 01:32:36.672608: step: 786/466, loss: 0.20909816026687622 2023-01-24 01:32:37.326516: step: 788/466, loss: 0.15215927362442017 2023-01-24 01:32:37.918947: step: 790/466, loss: 0.3103340268135071 2023-01-24 01:32:38.509284: step: 792/466, loss: 0.28564512729644775 2023-01-24 01:32:39.173100: step: 794/466, loss: 0.9070299863815308 2023-01-24 01:32:39.846540: step: 796/466, loss: 0.29510730504989624 2023-01-24 01:32:40.408650: step: 798/466, loss: 0.22085902094841003 2023-01-24 01:32:40.988012: step: 800/466, loss: 2.387493371963501 2023-01-24 01:32:41.622122: step: 802/466, loss: 0.11658139526844025 2023-01-24 01:32:42.192960: step: 804/466, loss: 0.1715186983346939 2023-01-24 01:32:42.793762: step: 806/466, loss: 0.35206925868988037 2023-01-24 01:32:43.327674: step: 808/466, loss: 0.0895785465836525 2023-01-24 01:32:43.911219: step: 810/466, loss: 0.4298768937587738 2023-01-24 01:32:44.592766: step: 812/466, loss: 0.5055814385414124 2023-01-24 01:32:45.199241: step: 814/466, loss: 0.4311542510986328 2023-01-24 01:32:45.828706: step: 816/466, loss: 0.20049983263015747 2023-01-24 01:32:46.433783: step: 818/466, loss: 0.15987136960029602 2023-01-24 01:32:47.163893: step: 820/466, loss: 0.15413245558738708 2023-01-24 01:32:47.781720: step: 822/466, loss: 0.18975743651390076 2023-01-24 01:32:48.434439: step: 824/466, loss: 0.2768426239490509 2023-01-24 01:32:49.050722: step: 826/466, loss: 0.14185315370559692 2023-01-24 01:32:49.721891: step: 828/466, loss: 0.9774134159088135 2023-01-24 01:32:50.320672: step: 830/466, loss: 0.09398987889289856 2023-01-24 01:32:50.972867: step: 832/466, loss: 0.20575417578220367 2023-01-24 01:32:51.596789: step: 834/466, loss: 1.6657462120056152 2023-01-24 01:32:52.224266: step: 836/466, loss: 0.1069575697183609 2023-01-24 01:32:52.962565: step: 838/466, loss: 0.7195698618888855 2023-01-24 01:32:53.538437: step: 840/466, loss: 0.6453332304954529 2023-01-24 01:32:54.172436: step: 842/466, loss: 0.12283100187778473 2023-01-24 01:32:54.838190: step: 844/466, loss: 0.2211642861366272 2023-01-24 01:32:55.425695: step: 846/466, loss: 0.5271596908569336 2023-01-24 01:32:56.094324: step: 848/466, loss: 0.6781986951828003 2023-01-24 01:32:56.773624: step: 850/466, loss: 0.09970416128635406 2023-01-24 01:32:57.373516: step: 852/466, loss: 0.434560626745224 2023-01-24 01:32:57.966085: step: 854/466, loss: 0.2963170111179352 2023-01-24 01:32:58.578459: step: 856/466, loss: 0.5884841680526733 2023-01-24 01:32:59.208711: step: 858/466, loss: 0.8253400325775146 2023-01-24 01:32:59.795857: step: 860/466, loss: 0.5129435062408447 2023-01-24 01:33:00.367263: step: 862/466, loss: 0.2506413161754608 2023-01-24 01:33:01.008987: step: 864/466, loss: 0.31779375672340393 2023-01-24 01:33:01.647683: step: 866/466, loss: 0.44427239894866943 2023-01-24 01:33:02.215513: step: 868/466, loss: 0.14660854637622833 2023-01-24 01:33:02.782356: step: 870/466, loss: 0.13683336973190308 2023-01-24 01:33:03.462375: step: 872/466, loss: 0.607457160949707 2023-01-24 01:33:04.076255: step: 874/466, loss: 0.12032435834407806 2023-01-24 01:33:04.689530: step: 876/466, loss: 0.1489037722349167 2023-01-24 01:33:05.310039: step: 878/466, loss: 0.539090096950531 2023-01-24 01:33:05.890894: step: 880/466, loss: 0.3116208612918854 2023-01-24 01:33:06.533581: step: 882/466, loss: 0.2158992737531662 2023-01-24 01:33:07.163275: step: 884/466, loss: 0.2425546646118164 2023-01-24 01:33:07.834144: step: 886/466, loss: 0.5707557797431946 2023-01-24 01:33:08.433171: step: 888/466, loss: 0.7293208837509155 2023-01-24 01:33:09.050532: step: 890/466, loss: 0.7477041482925415 2023-01-24 01:33:09.717145: step: 892/466, loss: 0.6065076589584351 2023-01-24 01:33:10.468924: step: 894/466, loss: 0.17689277231693268 2023-01-24 01:33:11.202331: step: 896/466, loss: 1.6025630235671997 2023-01-24 01:33:11.910081: step: 898/466, loss: 0.08351809531450272 2023-01-24 01:33:12.496521: step: 900/466, loss: 0.541731059551239 2023-01-24 01:33:13.106257: step: 902/466, loss: 0.6780999302864075 2023-01-24 01:33:13.683667: step: 904/466, loss: 0.20099695026874542 2023-01-24 01:33:14.367446: step: 906/466, loss: 2.1794590950012207 2023-01-24 01:33:14.898619: step: 908/466, loss: 0.11681900918483734 2023-01-24 01:33:15.522942: step: 910/466, loss: 2.078395366668701 2023-01-24 01:33:16.134642: step: 912/466, loss: 0.3107766807079315 2023-01-24 01:33:16.750439: step: 914/466, loss: 0.09672574698925018 2023-01-24 01:33:17.371783: step: 916/466, loss: 0.1826476752758026 2023-01-24 01:33:17.989073: step: 918/466, loss: 0.2181442677974701 2023-01-24 01:33:18.575315: step: 920/466, loss: 0.14747542142868042 2023-01-24 01:33:19.187048: step: 922/466, loss: 0.3388816714286804 2023-01-24 01:33:19.825424: step: 924/466, loss: 0.15716803073883057 2023-01-24 01:33:20.436145: step: 926/466, loss: 0.2093413770198822 2023-01-24 01:33:21.031131: step: 928/466, loss: 0.19133101403713226 2023-01-24 01:33:21.698982: step: 930/466, loss: 0.1611374169588089 2023-01-24 01:33:22.266090: step: 932/466, loss: 0.272554874420166 ================================================== Loss: 0.409 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3538175366300366, 'r': 0.3136109983766234, 'f1': 0.33250322719449227}, 'combined': 0.24500237793278376, 'epoch': 9} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36292739519235573, 'r': 0.2797292378107146, 'f1': 0.3159429107994616}, 'combined': 0.19579560669262405, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3183877071955543, 'r': 0.3002631697840427, 'f1': 0.3090599423363095}, 'combined': 0.22772837856359646, 'epoch': 9} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36496705152888226, 'r': 0.26762050523501224, 'f1': 0.30880362941045747}, 'combined': 0.20484993238119456, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32894736842105265, 'r': 0.2717391304347826, 'f1': 0.2976190476190476}, 'combined': 0.1488095238095238, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 9} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30627168248020137, 'r': 0.3277746279294755, 'f1': 0.3166585314735721}, 'combined': 0.2333273389805268, 'epoch': 7} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3488091284089221, 'r': 0.2653092015540311, 'f1': 0.3013825409136095}, 'combined': 0.18677227887603967, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3557692307692308, 'r': 0.40217391304347827, 'f1': 0.37755102040816324}, 'combined': 0.18877551020408162, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31668397009966776, 'r': 0.2583948902141502, 'f1': 0.28458538587848936}, 'combined': 0.20969449485783426, 'epoch': 4} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.39165853086307634, 'r': 0.23329840286067724, 'f1': 0.2924147402371384}, 'combined': 0.19397809500879482, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.1896551724137931, 'f1': 0.2619047619047619}, 'combined': 0.1746031746031746, 'epoch': 4} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:36:04.770477: step: 2/466, loss: 0.06481149047613144 2023-01-24 01:36:05.354078: step: 4/466, loss: 0.1914258748292923 2023-01-24 01:36:05.955668: step: 6/466, loss: 0.3393650949001312 2023-01-24 01:36:06.596423: step: 8/466, loss: 0.13467082381248474 2023-01-24 01:36:07.239867: step: 10/466, loss: 0.3454969823360443 2023-01-24 01:36:07.855057: step: 12/466, loss: 0.8786474466323853 2023-01-24 01:36:08.438786: step: 14/466, loss: 0.5851201415061951 2023-01-24 01:36:09.088418: step: 16/466, loss: 0.07797753065824509 2023-01-24 01:36:09.680313: step: 18/466, loss: 0.08148655295372009 2023-01-24 01:36:10.269074: step: 20/466, loss: 0.23666100203990936 2023-01-24 01:36:10.861277: step: 22/466, loss: 0.0680367574095726 2023-01-24 01:36:11.598792: step: 24/466, loss: 0.521649956703186 2023-01-24 01:36:12.249909: step: 26/466, loss: 0.5934398174285889 2023-01-24 01:36:12.859788: step: 28/466, loss: 0.5916581153869629 2023-01-24 01:36:13.493643: step: 30/466, loss: 0.2847846746444702 2023-01-24 01:36:14.050457: step: 32/466, loss: 0.1682792603969574 2023-01-24 01:36:14.763686: step: 34/466, loss: 0.5811177492141724 2023-01-24 01:36:15.391562: step: 36/466, loss: 0.37301358580589294 2023-01-24 01:36:16.029537: step: 38/466, loss: 0.6371402740478516 2023-01-24 01:36:16.642204: step: 40/466, loss: 0.29740265011787415 2023-01-24 01:36:17.273893: step: 42/466, loss: 0.23849110305309296 2023-01-24 01:36:17.871832: step: 44/466, loss: 0.20933814346790314 2023-01-24 01:36:18.518851: step: 46/466, loss: 1.008119821548462 2023-01-24 01:36:19.108731: step: 48/466, loss: 0.12082512676715851 2023-01-24 01:36:19.746892: step: 50/466, loss: 0.17111796140670776 2023-01-24 01:36:20.429761: step: 52/466, loss: 0.5508844256401062 2023-01-24 01:36:21.064415: step: 54/466, loss: 0.07896469533443451 2023-01-24 01:36:21.634964: step: 56/466, loss: 0.16774111986160278 2023-01-24 01:36:22.269786: step: 58/466, loss: 0.13596868515014648 2023-01-24 01:36:22.860454: step: 60/466, loss: 0.29006361961364746 2023-01-24 01:36:23.416747: step: 62/466, loss: 0.13309992849826813 2023-01-24 01:36:23.982368: step: 64/466, loss: 0.14383406937122345 2023-01-24 01:36:24.750596: step: 66/466, loss: 0.29724830389022827 2023-01-24 01:36:25.396023: step: 68/466, loss: 0.1715736985206604 2023-01-24 01:36:26.080119: step: 70/466, loss: 0.3546826243400574 2023-01-24 01:36:26.674265: step: 72/466, loss: 0.10893898457288742 2023-01-24 01:36:27.329075: step: 74/466, loss: 0.8169422149658203 2023-01-24 01:36:27.929519: step: 76/466, loss: 0.36334866285324097 2023-01-24 01:36:28.477168: step: 78/466, loss: 0.10342225432395935 2023-01-24 01:36:29.062313: step: 80/466, loss: 0.37415358424186707 2023-01-24 01:36:29.664106: step: 82/466, loss: 0.1697869598865509 2023-01-24 01:36:30.265527: step: 84/466, loss: 0.17617714405059814 2023-01-24 01:36:30.954776: step: 86/466, loss: 0.385753870010376 2023-01-24 01:36:31.570615: step: 88/466, loss: 0.11032872647047043 2023-01-24 01:36:32.235800: step: 90/466, loss: 0.3091268241405487 2023-01-24 01:36:32.913820: step: 92/466, loss: 0.21702732145786285 2023-01-24 01:36:33.539796: step: 94/466, loss: 0.16088707745075226 2023-01-24 01:36:34.216588: step: 96/466, loss: 0.09631867706775665 2023-01-24 01:36:34.888123: step: 98/466, loss: 0.23908157646656036 2023-01-24 01:36:35.475582: step: 100/466, loss: 0.35277605056762695 2023-01-24 01:36:36.127322: step: 102/466, loss: 0.5009201765060425 2023-01-24 01:36:36.817698: step: 104/466, loss: 0.16452857851982117 2023-01-24 01:36:37.392421: step: 106/466, loss: 0.5607878565788269 2023-01-24 01:36:38.144550: step: 108/466, loss: 0.25911426544189453 2023-01-24 01:36:38.742970: step: 110/466, loss: 0.11729790270328522 2023-01-24 01:36:39.380905: step: 112/466, loss: 0.2607351839542389 2023-01-24 01:36:39.996618: step: 114/466, loss: 0.21392768621444702 2023-01-24 01:36:40.621185: step: 116/466, loss: 0.04871556907892227 2023-01-24 01:36:41.246444: step: 118/466, loss: 0.1251087337732315 2023-01-24 01:36:41.915745: step: 120/466, loss: 2.139417886734009 2023-01-24 01:36:42.569397: step: 122/466, loss: 0.9409970641136169 2023-01-24 01:36:43.164179: step: 124/466, loss: 0.5758810043334961 2023-01-24 01:36:43.783461: step: 126/466, loss: 0.20359812676906586 2023-01-24 01:36:44.433179: step: 128/466, loss: 0.1587243527173996 2023-01-24 01:36:45.073647: step: 130/466, loss: 0.17990712821483612 2023-01-24 01:36:45.689153: step: 132/466, loss: 0.17337213456630707 2023-01-24 01:36:46.274109: step: 134/466, loss: 0.0818430483341217 2023-01-24 01:36:46.887943: step: 136/466, loss: 0.1378822922706604 2023-01-24 01:36:47.523668: step: 138/466, loss: 0.5450152158737183 2023-01-24 01:36:48.096520: step: 140/466, loss: 0.07567490637302399 2023-01-24 01:36:48.683400: step: 142/466, loss: 1.2197580337524414 2023-01-24 01:36:49.336120: step: 144/466, loss: 0.38101068139076233 2023-01-24 01:36:49.935279: step: 146/466, loss: 0.1391480714082718 2023-01-24 01:36:50.645273: step: 148/466, loss: 0.19454778730869293 2023-01-24 01:36:51.321283: step: 150/466, loss: 0.3941478729248047 2023-01-24 01:36:51.843598: step: 152/466, loss: 0.15801694989204407 2023-01-24 01:36:52.541419: step: 154/466, loss: 0.41450387239456177 2023-01-24 01:36:53.153827: step: 156/466, loss: 0.2930014431476593 2023-01-24 01:36:53.784119: step: 158/466, loss: 0.19108444452285767 2023-01-24 01:36:54.413178: step: 160/466, loss: 0.22323651611804962 2023-01-24 01:36:55.061606: step: 162/466, loss: 0.2740851044654846 2023-01-24 01:36:55.656745: step: 164/466, loss: 0.07235020399093628 2023-01-24 01:36:56.289961: step: 166/466, loss: 0.18892675638198853 2023-01-24 01:36:56.939537: step: 168/466, loss: 0.37091153860092163 2023-01-24 01:36:57.575156: step: 170/466, loss: 0.944072961807251 2023-01-24 01:36:58.185158: step: 172/466, loss: 0.16395629942417145 2023-01-24 01:36:58.781674: step: 174/466, loss: 0.05845191329717636 2023-01-24 01:36:59.455486: step: 176/466, loss: 0.05888865143060684 2023-01-24 01:37:00.109669: step: 178/466, loss: 0.31981462240219116 2023-01-24 01:37:00.730024: step: 180/466, loss: 0.23472468554973602 2023-01-24 01:37:01.330040: step: 182/466, loss: 0.14848054945468903 2023-01-24 01:37:01.915245: step: 184/466, loss: 0.6054875254631042 2023-01-24 01:37:02.635433: step: 186/466, loss: 0.9153898358345032 2023-01-24 01:37:03.233872: step: 188/466, loss: 0.2103879302740097 2023-01-24 01:37:03.763530: step: 190/466, loss: 0.26340317726135254 2023-01-24 01:37:04.405504: step: 192/466, loss: 0.18384523689746857 2023-01-24 01:37:05.017223: step: 194/466, loss: 0.38214704394340515 2023-01-24 01:37:05.726379: step: 196/466, loss: 0.21841195225715637 2023-01-24 01:37:06.399377: step: 198/466, loss: 0.42027539014816284 2023-01-24 01:37:07.002612: step: 200/466, loss: 0.5119179487228394 2023-01-24 01:37:07.641757: step: 202/466, loss: 0.11029555648565292 2023-01-24 01:37:08.287211: step: 204/466, loss: 0.13725778460502625 2023-01-24 01:37:08.878111: step: 206/466, loss: 0.23730728030204773 2023-01-24 01:37:09.486717: step: 208/466, loss: 0.10681381076574326 2023-01-24 01:37:10.087882: step: 210/466, loss: 0.4054355025291443 2023-01-24 01:37:10.746872: step: 212/466, loss: 0.17219959199428558 2023-01-24 01:37:11.387888: step: 214/466, loss: 0.15734705328941345 2023-01-24 01:37:11.988453: step: 216/466, loss: 0.11057482659816742 2023-01-24 01:37:12.591626: step: 218/466, loss: 0.10751251131296158 2023-01-24 01:37:13.191884: step: 220/466, loss: 0.29336410760879517 2023-01-24 01:37:13.789555: step: 222/466, loss: 0.3931858539581299 2023-01-24 01:37:14.391732: step: 224/466, loss: 0.32212305068969727 2023-01-24 01:37:15.005064: step: 226/466, loss: 0.2841503918170929 2023-01-24 01:37:15.688991: step: 228/466, loss: 0.4519486427307129 2023-01-24 01:37:16.306714: step: 230/466, loss: 0.7676974534988403 2023-01-24 01:37:16.968562: step: 232/466, loss: 0.3085453510284424 2023-01-24 01:37:17.614197: step: 234/466, loss: 0.20741544663906097 2023-01-24 01:37:18.212736: step: 236/466, loss: 0.4583464562892914 2023-01-24 01:37:18.805881: step: 238/466, loss: 0.1378641277551651 2023-01-24 01:37:19.400098: step: 240/466, loss: 0.7796522378921509 2023-01-24 01:37:20.022505: step: 242/466, loss: 0.3027142286300659 2023-01-24 01:37:20.676674: step: 244/466, loss: 0.08265858143568039 2023-01-24 01:37:21.338687: step: 246/466, loss: 0.3559706509113312 2023-01-24 01:37:21.970020: step: 248/466, loss: 0.6588718295097351 2023-01-24 01:37:22.523199: step: 250/466, loss: 0.3847041726112366 2023-01-24 01:37:23.143538: step: 252/466, loss: 0.35952022671699524 2023-01-24 01:37:23.770110: step: 254/466, loss: 0.10576501488685608 2023-01-24 01:37:24.409262: step: 256/466, loss: 0.4464896321296692 2023-01-24 01:37:25.040560: step: 258/466, loss: 0.2264501303434372 2023-01-24 01:37:25.713239: step: 260/466, loss: 0.24269479513168335 2023-01-24 01:37:26.364049: step: 262/466, loss: 0.17529931664466858 2023-01-24 01:37:26.983407: step: 264/466, loss: 0.14880551397800446 2023-01-24 01:37:27.550111: step: 266/466, loss: 0.12219415605068207 2023-01-24 01:37:28.181569: step: 268/466, loss: 0.8103137612342834 2023-01-24 01:37:28.803460: step: 270/466, loss: 0.18102988600730896 2023-01-24 01:37:29.456564: step: 272/466, loss: 0.4099413752555847 2023-01-24 01:37:30.034422: step: 274/466, loss: 0.4709075391292572 2023-01-24 01:37:30.645086: step: 276/466, loss: 0.1665007621049881 2023-01-24 01:37:31.285136: step: 278/466, loss: 0.21260009706020355 2023-01-24 01:37:31.918036: step: 280/466, loss: 0.2750088572502136 2023-01-24 01:37:32.578575: step: 282/466, loss: 0.18898963928222656 2023-01-24 01:37:33.203243: step: 284/466, loss: 0.11391142010688782 2023-01-24 01:37:33.817176: step: 286/466, loss: 0.25304052233695984 2023-01-24 01:37:34.439551: step: 288/466, loss: 0.6753566265106201 2023-01-24 01:37:35.075377: step: 290/466, loss: 0.255403995513916 2023-01-24 01:37:35.749650: step: 292/466, loss: 0.06397488713264465 2023-01-24 01:37:36.360863: step: 294/466, loss: 0.17752189934253693 2023-01-24 01:37:36.992184: step: 296/466, loss: 0.46928924322128296 2023-01-24 01:37:37.584057: step: 298/466, loss: 2.6196093559265137 2023-01-24 01:37:38.190508: step: 300/466, loss: 0.46346068382263184 2023-01-24 01:37:38.785786: step: 302/466, loss: 0.3404328227043152 2023-01-24 01:37:39.451537: step: 304/466, loss: 0.21604882180690765 2023-01-24 01:37:40.119041: step: 306/466, loss: 0.218265101313591 2023-01-24 01:37:40.760888: step: 308/466, loss: 0.46633002161979675 2023-01-24 01:37:41.449285: step: 310/466, loss: 0.17038500308990479 2023-01-24 01:37:42.123737: step: 312/466, loss: 0.26837801933288574 2023-01-24 01:37:42.752225: step: 314/466, loss: 0.29201966524124146 2023-01-24 01:37:43.432946: step: 316/466, loss: 0.6387988924980164 2023-01-24 01:37:44.122921: step: 318/466, loss: 0.18127477169036865 2023-01-24 01:37:44.745101: step: 320/466, loss: 0.5306467413902283 2023-01-24 01:37:45.371739: step: 322/466, loss: 0.12120350450277328 2023-01-24 01:37:45.973103: step: 324/466, loss: 0.18139812350273132 2023-01-24 01:37:46.581310: step: 326/466, loss: 0.11129505932331085 2023-01-24 01:37:47.184844: step: 328/466, loss: 0.18971511721611023 2023-01-24 01:37:47.922893: step: 330/466, loss: 0.17079608142375946 2023-01-24 01:37:48.573253: step: 332/466, loss: 0.3552361726760864 2023-01-24 01:37:49.267429: step: 334/466, loss: 0.18197661638259888 2023-01-24 01:37:49.881689: step: 336/466, loss: 0.36013326048851013 2023-01-24 01:37:50.492760: step: 338/466, loss: 0.478237509727478 2023-01-24 01:37:51.093620: step: 340/466, loss: 0.18918289244174957 2023-01-24 01:37:51.685253: step: 342/466, loss: 0.13420641422271729 2023-01-24 01:37:52.326825: step: 344/466, loss: 0.14395514130592346 2023-01-24 01:37:52.988519: step: 346/466, loss: 0.361555278301239 2023-01-24 01:37:53.592249: step: 348/466, loss: 0.14946097135543823 2023-01-24 01:37:54.177149: step: 350/466, loss: 0.2907811403274536 2023-01-24 01:37:54.798941: step: 352/466, loss: 0.14296364784240723 2023-01-24 01:37:55.343703: step: 354/466, loss: 0.3844510614871979 2023-01-24 01:37:55.957894: step: 356/466, loss: 0.1620994210243225 2023-01-24 01:37:56.632864: step: 358/466, loss: 0.31386253237724304 2023-01-24 01:37:57.259704: step: 360/466, loss: 0.33979666233062744 2023-01-24 01:37:57.927623: step: 362/466, loss: 0.22030147910118103 2023-01-24 01:37:58.553937: step: 364/466, loss: 0.3009497821331024 2023-01-24 01:37:59.193587: step: 366/466, loss: 0.2967192530632019 2023-01-24 01:37:59.859420: step: 368/466, loss: 0.14870119094848633 2023-01-24 01:38:00.471841: step: 370/466, loss: 0.21280643343925476 2023-01-24 01:38:01.029056: step: 372/466, loss: 0.1196550503373146 2023-01-24 01:38:01.627343: step: 374/466, loss: 0.19388926029205322 2023-01-24 01:38:02.264640: step: 376/466, loss: 0.17999613285064697 2023-01-24 01:38:02.878507: step: 378/466, loss: 0.13130156695842743 2023-01-24 01:38:03.442818: step: 380/466, loss: 0.22691947221755981 2023-01-24 01:38:04.019490: step: 382/466, loss: 0.14949214458465576 2023-01-24 01:38:04.651266: step: 384/466, loss: 0.2136528193950653 2023-01-24 01:38:05.248826: step: 386/466, loss: 0.5096291899681091 2023-01-24 01:38:05.891109: step: 388/466, loss: 0.14101539552211761 2023-01-24 01:38:06.514855: step: 390/466, loss: 0.18433600664138794 2023-01-24 01:38:07.074113: step: 392/466, loss: 0.17363117635250092 2023-01-24 01:38:07.740594: step: 394/466, loss: 0.32679206132888794 2023-01-24 01:38:08.408317: step: 396/466, loss: 0.9661167860031128 2023-01-24 01:38:09.043263: step: 398/466, loss: 0.46869587898254395 2023-01-24 01:38:09.667571: step: 400/466, loss: 0.14851966500282288 2023-01-24 01:38:10.283353: step: 402/466, loss: 0.19928830862045288 2023-01-24 01:38:10.890421: step: 404/466, loss: 0.3361433148384094 2023-01-24 01:38:11.486025: step: 406/466, loss: 0.10857192426919937 2023-01-24 01:38:12.224925: step: 408/466, loss: 0.18808101117610931 2023-01-24 01:38:12.858229: step: 410/466, loss: 0.2283339947462082 2023-01-24 01:38:13.560316: step: 412/466, loss: 0.44825655221939087 2023-01-24 01:38:14.217083: step: 414/466, loss: 0.7821473479270935 2023-01-24 01:38:14.940600: step: 416/466, loss: 0.4337299168109894 2023-01-24 01:38:15.592125: step: 418/466, loss: 0.3108523190021515 2023-01-24 01:38:16.284616: step: 420/466, loss: 0.14360187947750092 2023-01-24 01:38:16.898079: step: 422/466, loss: 0.2493402510881424 2023-01-24 01:38:17.496392: step: 424/466, loss: 0.1904027760028839 2023-01-24 01:38:18.106053: step: 426/466, loss: 0.4023641347885132 2023-01-24 01:38:18.735748: step: 428/466, loss: 0.09789680689573288 2023-01-24 01:38:19.323839: step: 430/466, loss: 0.3129696846008301 2023-01-24 01:38:19.993699: step: 432/466, loss: 0.2356068640947342 2023-01-24 01:38:20.625404: step: 434/466, loss: 0.1248709037899971 2023-01-24 01:38:21.232806: step: 436/466, loss: 0.5273644328117371 2023-01-24 01:38:21.823039: step: 438/466, loss: 0.2423192709684372 2023-01-24 01:38:22.458552: step: 440/466, loss: 0.1080382913351059 2023-01-24 01:38:23.067788: step: 442/466, loss: 0.18122127652168274 2023-01-24 01:38:23.700376: step: 444/466, loss: 0.5400142073631287 2023-01-24 01:38:24.331509: step: 446/466, loss: 0.263302743434906 2023-01-24 01:38:24.920496: step: 448/466, loss: 0.2619061768054962 2023-01-24 01:38:25.584717: step: 450/466, loss: 0.42697620391845703 2023-01-24 01:38:26.181669: step: 452/466, loss: 0.5079036951065063 2023-01-24 01:38:26.801731: step: 454/466, loss: 0.8966825008392334 2023-01-24 01:38:27.431192: step: 456/466, loss: 1.4609519243240356 2023-01-24 01:38:28.093978: step: 458/466, loss: 1.9890491962432861 2023-01-24 01:38:28.698070: step: 460/466, loss: 0.3308573067188263 2023-01-24 01:38:29.369814: step: 462/466, loss: 0.20168599486351013 2023-01-24 01:38:29.957956: step: 464/466, loss: 0.15842114388942719 2023-01-24 01:38:30.540606: step: 466/466, loss: 0.12318496406078339 2023-01-24 01:38:31.153892: step: 468/466, loss: 0.10131128877401352 2023-01-24 01:38:31.790078: step: 470/466, loss: 0.316278338432312 2023-01-24 01:38:32.377868: step: 472/466, loss: 0.15732251107692719 2023-01-24 01:38:33.004209: step: 474/466, loss: 0.22823511064052582 2023-01-24 01:38:33.671061: step: 476/466, loss: 0.14556622505187988 2023-01-24 01:38:34.321786: step: 478/466, loss: 0.49621596932411194 2023-01-24 01:38:34.970095: step: 480/466, loss: 0.5935064554214478 2023-01-24 01:38:35.532089: step: 482/466, loss: 0.1994830071926117 2023-01-24 01:38:36.343166: step: 484/466, loss: 0.23406997323036194 2023-01-24 01:38:36.870184: step: 486/466, loss: 0.14036954939365387 2023-01-24 01:38:37.445338: step: 488/466, loss: 0.16422095894813538 2023-01-24 01:38:38.190134: step: 490/466, loss: 0.10193836688995361 2023-01-24 01:38:38.804408: step: 492/466, loss: 0.3593794107437134 2023-01-24 01:38:39.437471: step: 494/466, loss: 0.12931369245052338 2023-01-24 01:38:40.044358: step: 496/466, loss: 0.24577723443508148 2023-01-24 01:38:40.682605: step: 498/466, loss: 0.42191627621650696 2023-01-24 01:38:41.293742: step: 500/466, loss: 0.08254116773605347 2023-01-24 01:38:41.936806: step: 502/466, loss: 1.0767302513122559 2023-01-24 01:38:42.600497: step: 504/466, loss: 0.18729932606220245 2023-01-24 01:38:43.189694: step: 506/466, loss: 0.34757569432258606 2023-01-24 01:38:43.826212: step: 508/466, loss: 0.19753587245941162 2023-01-24 01:38:44.418705: step: 510/466, loss: 0.6562075018882751 2023-01-24 01:38:45.080700: step: 512/466, loss: 0.45441025495529175 2023-01-24 01:38:45.656851: step: 514/466, loss: 0.11676815897226334 2023-01-24 01:38:46.266205: step: 516/466, loss: 0.5185372829437256 2023-01-24 01:38:46.857533: step: 518/466, loss: 0.3092823028564453 2023-01-24 01:38:47.507349: step: 520/466, loss: 0.11145910620689392 2023-01-24 01:38:48.097090: step: 522/466, loss: 0.3906674087047577 2023-01-24 01:38:48.715943: step: 524/466, loss: 0.28080275654792786 2023-01-24 01:38:49.335804: step: 526/466, loss: 0.1907173991203308 2023-01-24 01:38:49.927811: step: 528/466, loss: 0.28857186436653137 2023-01-24 01:38:50.498653: step: 530/466, loss: 0.33306193351745605 2023-01-24 01:38:51.132742: step: 532/466, loss: 0.24578644335269928 2023-01-24 01:38:51.734001: step: 534/466, loss: 0.23515483736991882 2023-01-24 01:38:52.359212: step: 536/466, loss: 1.1538259983062744 2023-01-24 01:38:52.982464: step: 538/466, loss: 0.6076947450637817 2023-01-24 01:38:53.607780: step: 540/466, loss: 1.0239328145980835 2023-01-24 01:38:54.250274: step: 542/466, loss: 0.25282955169677734 2023-01-24 01:38:54.905155: step: 544/466, loss: 0.40280333161354065 2023-01-24 01:38:55.512563: step: 546/466, loss: 0.4870428144931793 2023-01-24 01:38:56.149452: step: 548/466, loss: 0.0939960926771164 2023-01-24 01:38:56.749934: step: 550/466, loss: 0.12107057124376297 2023-01-24 01:38:57.438994: step: 552/466, loss: 0.41152551770210266 2023-01-24 01:38:58.061747: step: 554/466, loss: 0.29801052808761597 2023-01-24 01:38:58.685529: step: 556/466, loss: 0.2560926079750061 2023-01-24 01:38:59.300242: step: 558/466, loss: 0.1539243757724762 2023-01-24 01:38:59.905724: step: 560/466, loss: 1.2889466285705566 2023-01-24 01:39:00.490288: step: 562/466, loss: 0.348409503698349 2023-01-24 01:39:01.067931: step: 564/466, loss: 0.2598280906677246 2023-01-24 01:39:01.669314: step: 566/466, loss: 0.20993900299072266 2023-01-24 01:39:02.295699: step: 568/466, loss: 0.22750478982925415 2023-01-24 01:39:02.846954: step: 570/466, loss: 0.4462200701236725 2023-01-24 01:39:03.539104: step: 572/466, loss: 0.9216004610061646 2023-01-24 01:39:04.141574: step: 574/466, loss: 0.5238385796546936 2023-01-24 01:39:04.779633: step: 576/466, loss: 0.13033205270767212 2023-01-24 01:39:05.462896: step: 578/466, loss: 0.24837486445903778 2023-01-24 01:39:06.055414: step: 580/466, loss: 0.045542627573013306 2023-01-24 01:39:06.695726: step: 582/466, loss: 0.18392203748226166 2023-01-24 01:39:07.336914: step: 584/466, loss: 0.1877361536026001 2023-01-24 01:39:07.999170: step: 586/466, loss: 0.2505837082862854 2023-01-24 01:39:08.618557: step: 588/466, loss: 0.2759116291999817 2023-01-24 01:39:09.203628: step: 590/466, loss: 0.3421742022037506 2023-01-24 01:39:09.885328: step: 592/466, loss: 0.7619215846061707 2023-01-24 01:39:10.466344: step: 594/466, loss: 0.11908534914255142 2023-01-24 01:39:11.082943: step: 596/466, loss: 0.34376809000968933 2023-01-24 01:39:11.693878: step: 598/466, loss: 0.164188414812088 2023-01-24 01:39:12.324291: step: 600/466, loss: 0.4696170687675476 2023-01-24 01:39:12.961362: step: 602/466, loss: 0.33465299010276794 2023-01-24 01:39:13.536208: step: 604/466, loss: 0.16777755320072174 2023-01-24 01:39:14.150304: step: 606/466, loss: 0.2437925487756729 2023-01-24 01:39:14.798157: step: 608/466, loss: 0.2520957291126251 2023-01-24 01:39:15.344283: step: 610/466, loss: 0.0658729299902916 2023-01-24 01:39:15.945098: step: 612/466, loss: 0.13700823485851288 2023-01-24 01:39:16.509874: step: 614/466, loss: 0.7343111038208008 2023-01-24 01:39:17.113333: step: 616/466, loss: 0.21706841886043549 2023-01-24 01:39:17.836624: step: 618/466, loss: 0.11549585312604904 2023-01-24 01:39:18.433313: step: 620/466, loss: 0.4640030860900879 2023-01-24 01:39:18.994194: step: 622/466, loss: 0.12104494124650955 2023-01-24 01:39:19.646598: step: 624/466, loss: 0.6957822442054749 2023-01-24 01:39:20.251456: step: 626/466, loss: 0.12641991674900055 2023-01-24 01:39:20.866662: step: 628/466, loss: 0.18275563418865204 2023-01-24 01:39:21.621633: step: 630/466, loss: 0.1254248172044754 2023-01-24 01:39:22.297404: step: 632/466, loss: 0.2987591028213501 2023-01-24 01:39:22.909803: step: 634/466, loss: 0.17989489436149597 2023-01-24 01:39:23.524483: step: 636/466, loss: 0.26884424686431885 2023-01-24 01:39:24.273919: step: 638/466, loss: 0.447631299495697 2023-01-24 01:39:24.891647: step: 640/466, loss: 0.22676263749599457 2023-01-24 01:39:25.478389: step: 642/466, loss: 0.16808676719665527 2023-01-24 01:39:26.094143: step: 644/466, loss: 0.11691843718290329 2023-01-24 01:39:26.802814: step: 646/466, loss: 0.3022649586200714 2023-01-24 01:39:27.453120: step: 648/466, loss: 0.13623934984207153 2023-01-24 01:39:28.111750: step: 650/466, loss: 1.3627370595932007 2023-01-24 01:39:28.708286: step: 652/466, loss: 0.09851698577404022 2023-01-24 01:39:29.324243: step: 654/466, loss: 0.46624961495399475 2023-01-24 01:39:29.925657: step: 656/466, loss: 0.08882220834493637 2023-01-24 01:39:30.505382: step: 658/466, loss: 1.265326738357544 2023-01-24 01:39:31.065148: step: 660/466, loss: 0.22405727207660675 2023-01-24 01:39:31.660049: step: 662/466, loss: 0.6508872509002686 2023-01-24 01:39:32.268375: step: 664/466, loss: 0.15667793154716492 2023-01-24 01:39:32.837379: step: 666/466, loss: 0.22925753891468048 2023-01-24 01:39:33.490001: step: 668/466, loss: 0.23180414736270905 2023-01-24 01:39:34.066892: step: 670/466, loss: 0.051641300320625305 2023-01-24 01:39:34.662971: step: 672/466, loss: 0.9895915985107422 2023-01-24 01:39:35.309108: step: 674/466, loss: 0.0929332822561264 2023-01-24 01:39:35.904116: step: 676/466, loss: 0.13406313955783844 2023-01-24 01:39:36.443999: step: 678/466, loss: 0.06268256902694702 2023-01-24 01:39:37.061082: step: 680/466, loss: 0.17089411616325378 2023-01-24 01:39:37.690366: step: 682/466, loss: 0.14011450111865997 2023-01-24 01:39:38.398426: step: 684/466, loss: 0.22866925597190857 2023-01-24 01:39:38.985453: step: 686/466, loss: 0.5124398469924927 2023-01-24 01:39:39.589198: step: 688/466, loss: 0.1384267508983612 2023-01-24 01:39:40.231051: step: 690/466, loss: 9.537487983703613 2023-01-24 01:39:40.803184: step: 692/466, loss: 0.15773287415504456 2023-01-24 01:39:41.424322: step: 694/466, loss: 0.21435973048210144 2023-01-24 01:39:42.090629: step: 696/466, loss: 0.2601606249809265 2023-01-24 01:39:42.706026: step: 698/466, loss: 0.2897873520851135 2023-01-24 01:39:43.347663: step: 700/466, loss: 0.12837408483028412 2023-01-24 01:39:43.990720: step: 702/466, loss: 0.094563327729702 2023-01-24 01:39:44.653348: step: 704/466, loss: 0.17177322506904602 2023-01-24 01:39:45.263238: step: 706/466, loss: 0.1358441412448883 2023-01-24 01:39:45.854319: step: 708/466, loss: 0.10824206471443176 2023-01-24 01:39:46.441837: step: 710/466, loss: 2.6558961868286133 2023-01-24 01:39:46.995335: step: 712/466, loss: 0.20807532966136932 2023-01-24 01:39:47.657097: step: 714/466, loss: 0.2447029948234558 2023-01-24 01:39:48.313562: step: 716/466, loss: 0.1848250776529312 2023-01-24 01:39:48.908172: step: 718/466, loss: 0.08883415907621384 2023-01-24 01:39:49.522614: step: 720/466, loss: 1.2126619815826416 2023-01-24 01:39:50.145880: step: 722/466, loss: 0.9208321571350098 2023-01-24 01:39:50.800241: step: 724/466, loss: 0.43263712525367737 2023-01-24 01:39:51.390716: step: 726/466, loss: 0.4163546562194824 2023-01-24 01:39:52.085443: step: 728/466, loss: 0.24820135533809662 2023-01-24 01:39:52.712965: step: 730/466, loss: 0.5538040399551392 2023-01-24 01:39:53.317915: step: 732/466, loss: 0.08325211703777313 2023-01-24 01:39:53.996254: step: 734/466, loss: 0.5551610589027405 2023-01-24 01:39:54.643613: step: 736/466, loss: 0.14608952403068542 2023-01-24 01:39:55.246055: step: 738/466, loss: 0.1840585619211197 2023-01-24 01:39:55.825208: step: 740/466, loss: 0.29782766103744507 2023-01-24 01:39:56.351230: step: 742/466, loss: 0.14386539161205292 2023-01-24 01:39:56.969476: step: 744/466, loss: 1.7157152891159058 2023-01-24 01:39:57.657541: step: 746/466, loss: 0.10851907730102539 2023-01-24 01:39:58.283062: step: 748/466, loss: 0.19914494454860687 2023-01-24 01:39:58.916470: step: 750/466, loss: 0.22274081408977509 2023-01-24 01:39:59.559654: step: 752/466, loss: 0.08526704460382462 2023-01-24 01:40:00.156543: step: 754/466, loss: 0.23813314735889435 2023-01-24 01:40:00.778694: step: 756/466, loss: 0.1329123079776764 2023-01-24 01:40:01.386608: step: 758/466, loss: 0.2598244845867157 2023-01-24 01:40:02.042591: step: 760/466, loss: 0.3998894691467285 2023-01-24 01:40:02.713702: step: 762/466, loss: 0.2382477968931198 2023-01-24 01:40:03.302936: step: 764/466, loss: 0.1373913437128067 2023-01-24 01:40:03.963527: step: 766/466, loss: 0.47792330384254456 2023-01-24 01:40:04.545276: step: 768/466, loss: 0.3679344654083252 2023-01-24 01:40:05.238329: step: 770/466, loss: 0.15216253697872162 2023-01-24 01:40:05.876349: step: 772/466, loss: 0.15031273663043976 2023-01-24 01:40:06.582716: step: 774/466, loss: 2.023613929748535 2023-01-24 01:40:07.247222: step: 776/466, loss: 0.7451434135437012 2023-01-24 01:40:07.866707: step: 778/466, loss: 0.09389051049947739 2023-01-24 01:40:08.465185: step: 780/466, loss: 0.14813999831676483 2023-01-24 01:40:09.005701: step: 782/466, loss: 0.7446677684783936 2023-01-24 01:40:09.630310: step: 784/466, loss: 0.28307709097862244 2023-01-24 01:40:10.272592: step: 786/466, loss: 0.2559066414833069 2023-01-24 01:40:10.888742: step: 788/466, loss: 0.9395307302474976 2023-01-24 01:40:11.536334: step: 790/466, loss: 0.12403196096420288 2023-01-24 01:40:12.193762: step: 792/466, loss: 0.11562259495258331 2023-01-24 01:40:12.886839: step: 794/466, loss: 0.1862974762916565 2023-01-24 01:40:13.551483: step: 796/466, loss: 0.34427696466445923 2023-01-24 01:40:14.217469: step: 798/466, loss: 0.09961500763893127 2023-01-24 01:40:14.849644: step: 800/466, loss: 0.8094140291213989 2023-01-24 01:40:15.433260: step: 802/466, loss: 0.2891375422477722 2023-01-24 01:40:16.042074: step: 804/466, loss: 0.6342332363128662 2023-01-24 01:40:16.677386: step: 806/466, loss: 0.2321213334798813 2023-01-24 01:40:17.331444: step: 808/466, loss: 0.19736243784427643 2023-01-24 01:40:17.959914: step: 810/466, loss: 0.19904033839702606 2023-01-24 01:40:18.578323: step: 812/466, loss: 0.11806154251098633 2023-01-24 01:40:19.186636: step: 814/466, loss: 0.40048617124557495 2023-01-24 01:40:19.795792: step: 816/466, loss: 0.5157013535499573 2023-01-24 01:40:20.401226: step: 818/466, loss: 0.16514591872692108 2023-01-24 01:40:21.035439: step: 820/466, loss: 0.45448026061058044 2023-01-24 01:40:21.634827: step: 822/466, loss: 0.15857598185539246 2023-01-24 01:40:22.259172: step: 824/466, loss: 0.3477775454521179 2023-01-24 01:40:22.893723: step: 826/466, loss: 0.20452511310577393 2023-01-24 01:40:23.496095: step: 828/466, loss: 0.46907299757003784 2023-01-24 01:40:24.164227: step: 830/466, loss: 0.13698351383209229 2023-01-24 01:40:24.806021: step: 832/466, loss: 0.17828044295310974 2023-01-24 01:40:25.419091: step: 834/466, loss: 0.11355534940958023 2023-01-24 01:40:26.001753: step: 836/466, loss: 0.48741504549980164 2023-01-24 01:40:26.576303: step: 838/466, loss: 0.19051556289196014 2023-01-24 01:40:27.176363: step: 840/466, loss: 0.13757193088531494 2023-01-24 01:40:27.743993: step: 842/466, loss: 0.17317397892475128 2023-01-24 01:40:28.354086: step: 844/466, loss: 0.9766011834144592 2023-01-24 01:40:28.936545: step: 846/466, loss: 0.27026239037513733 2023-01-24 01:40:29.588063: step: 848/466, loss: 0.4328172504901886 2023-01-24 01:40:30.273199: step: 850/466, loss: 0.06835843622684479 2023-01-24 01:40:30.886407: step: 852/466, loss: 0.4180905222892761 2023-01-24 01:40:31.485716: step: 854/466, loss: 0.27884113788604736 2023-01-24 01:40:32.122845: step: 856/466, loss: 0.37110257148742676 2023-01-24 01:40:32.701039: step: 858/466, loss: 0.11132179200649261 2023-01-24 01:40:33.320996: step: 860/466, loss: 0.17947500944137573 2023-01-24 01:40:33.994070: step: 862/466, loss: 0.14948973059654236 2023-01-24 01:40:34.653420: step: 864/466, loss: 0.7457356452941895 2023-01-24 01:40:35.276724: step: 866/466, loss: 0.20709970593452454 2023-01-24 01:40:35.850773: step: 868/466, loss: 0.1610090434551239 2023-01-24 01:40:36.490451: step: 870/466, loss: 0.1790013164281845 2023-01-24 01:40:37.100069: step: 872/466, loss: 0.8546355366706848 2023-01-24 01:40:37.647924: step: 874/466, loss: 0.09819687902927399 2023-01-24 01:40:38.192096: step: 876/466, loss: 0.2067101001739502 2023-01-24 01:40:38.836862: step: 878/466, loss: 0.27337244153022766 2023-01-24 01:40:39.439211: step: 880/466, loss: 0.6295063495635986 2023-01-24 01:40:40.069591: step: 882/466, loss: 0.3326685428619385 2023-01-24 01:40:40.688228: step: 884/466, loss: 0.2988430857658386 2023-01-24 01:40:41.269189: step: 886/466, loss: 0.5852946639060974 2023-01-24 01:40:41.918348: step: 888/466, loss: 0.18134286999702454 2023-01-24 01:40:42.532960: step: 890/466, loss: 0.22485585510730743 2023-01-24 01:40:43.188033: step: 892/466, loss: 0.3484098017215729 2023-01-24 01:40:43.837680: step: 894/466, loss: 0.48258063197135925 2023-01-24 01:40:44.512712: step: 896/466, loss: 0.37149667739868164 2023-01-24 01:40:45.206553: step: 898/466, loss: 0.1821063756942749 2023-01-24 01:40:45.855152: step: 900/466, loss: 0.16822296380996704 2023-01-24 01:40:46.460394: step: 902/466, loss: 0.43086278438568115 2023-01-24 01:40:47.084790: step: 904/466, loss: 0.13566407561302185 2023-01-24 01:40:47.702562: step: 906/466, loss: 0.32909202575683594 2023-01-24 01:40:48.317151: step: 908/466, loss: 0.21370433270931244 2023-01-24 01:40:48.880505: step: 910/466, loss: 0.41602638363838196 2023-01-24 01:40:49.508028: step: 912/466, loss: 0.36425134539604187 2023-01-24 01:40:50.136814: step: 914/466, loss: 0.06356432288885117 2023-01-24 01:40:50.810138: step: 916/466, loss: 0.17971070110797882 2023-01-24 01:40:51.415328: step: 918/466, loss: 0.2898575961589813 2023-01-24 01:40:52.016684: step: 920/466, loss: 0.8324838876724243 2023-01-24 01:40:52.683601: step: 922/466, loss: 0.26693013310432434 2023-01-24 01:40:53.283192: step: 924/466, loss: 2.2129180431365967 2023-01-24 01:40:53.884387: step: 926/466, loss: 0.1820612996816635 2023-01-24 01:40:54.505030: step: 928/466, loss: 0.14786766469478607 2023-01-24 01:40:55.172252: step: 930/466, loss: 0.26998090744018555 2023-01-24 01:40:55.869365: step: 932/466, loss: 0.18968722224235535 ================================================== Loss: 0.357 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3344017750568686, 'r': 0.3369399289472433, 'f1': 0.3356660539795789}, 'combined': 0.2473328818796897, 'epoch': 10} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32421500798559244, 'r': 0.28617534098367275, 'f1': 0.30400985408428516}, 'combined': 0.19036131050137484, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3228507485494478, 'r': 0.3510312692956994, 'f1': 0.33635177985242465}, 'combined': 0.2478381535754708, 'epoch': 10} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3262515949477071, 'r': 0.2929786434774085, 'f1': 0.30872119540938525}, 'combined': 0.1913201774368021, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2914422563345237, 'r': 0.32849468740551624, 'f1': 0.30886119582998584}, 'combined': 0.22758193376946323, 'epoch': 10} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3417894272625764, 'r': 0.2958267696252805, 'f1': 0.3171514859649838}, 'combined': 0.21038761940251402, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2785714285714286, 'r': 0.2785714285714286, 'f1': 0.2785714285714286}, 'combined': 0.18571428571428572, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.20689655172413793, 'f1': 0.24489795918367346}, 'combined': 0.16326530612244897, 'epoch': 10} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3228507485494478, 'r': 0.3510312692956994, 'f1': 0.33635177985242465}, 'combined': 0.2478381535754708, 'epoch': 10} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3262515949477071, 'r': 0.2929786434774085, 'f1': 0.30872119540938525}, 'combined': 0.1913201774368021, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2914422563345237, 'r': 0.32849468740551624, 'f1': 0.30886119582998584}, 'combined': 0.22758193376946323, 'epoch': 10} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3417894272625764, 'r': 0.2958267696252805, 'f1': 0.3171514859649838}, 'combined': 0.21038761940251402, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.20689655172413793, 'f1': 0.24489795918367346}, 'combined': 0.16326530612244897, 'epoch': 10} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:43:49.126382: step: 2/466, loss: 0.056166116148233414 2023-01-24 01:43:49.717114: step: 4/466, loss: 0.2557902932167053 2023-01-24 01:43:50.439880: step: 6/466, loss: 0.1701134741306305 2023-01-24 01:43:51.057770: step: 8/466, loss: 0.14746788144111633 2023-01-24 01:43:51.742453: step: 10/466, loss: 0.10391730070114136 2023-01-24 01:43:52.347180: step: 12/466, loss: 0.13643328845500946 2023-01-24 01:43:52.993506: step: 14/466, loss: 0.14409023523330688 2023-01-24 01:43:53.661138: step: 16/466, loss: 0.4911487400531769 2023-01-24 01:43:54.244045: step: 18/466, loss: 0.3150400221347809 2023-01-24 01:43:54.869113: step: 20/466, loss: 0.15729796886444092 2023-01-24 01:43:55.520712: step: 22/466, loss: 0.6850324273109436 2023-01-24 01:43:56.150378: step: 24/466, loss: 1.054762840270996 2023-01-24 01:43:56.803213: step: 26/466, loss: 0.21192385256290436 2023-01-24 01:43:57.373668: step: 28/466, loss: 0.40402689576148987 2023-01-24 01:43:58.023675: step: 30/466, loss: 0.14002251625061035 2023-01-24 01:43:58.641346: step: 32/466, loss: 0.3916529715061188 2023-01-24 01:43:59.255835: step: 34/466, loss: 0.8134997487068176 2023-01-24 01:43:59.834006: step: 36/466, loss: 0.11228177696466446 2023-01-24 01:44:00.432116: step: 38/466, loss: 0.10730807483196259 2023-01-24 01:44:01.070516: step: 40/466, loss: 0.23734834790229797 2023-01-24 01:44:01.636803: step: 42/466, loss: 0.18028199672698975 2023-01-24 01:44:02.222093: step: 44/466, loss: 0.09118640422821045 2023-01-24 01:44:02.820379: step: 46/466, loss: 0.1470775306224823 2023-01-24 01:44:03.402077: step: 48/466, loss: 0.09362707287073135 2023-01-24 01:44:04.103775: step: 50/466, loss: 0.03722560405731201 2023-01-24 01:44:04.744940: step: 52/466, loss: 0.28206494450569153 2023-01-24 01:44:05.363704: step: 54/466, loss: 0.322757750749588 2023-01-24 01:44:06.007155: step: 56/466, loss: 0.20481041073799133 2023-01-24 01:44:06.618392: step: 58/466, loss: 0.16629400849342346 2023-01-24 01:44:07.243886: step: 60/466, loss: 0.16229598224163055 2023-01-24 01:44:07.901055: step: 62/466, loss: 0.1146460473537445 2023-01-24 01:44:08.624187: step: 64/466, loss: 0.11553932726383209 2023-01-24 01:44:09.290151: step: 66/466, loss: 0.3799624443054199 2023-01-24 01:44:09.922590: step: 68/466, loss: 0.04066538065671921 2023-01-24 01:44:10.525064: step: 70/466, loss: 0.14782850444316864 2023-01-24 01:44:11.129113: step: 72/466, loss: 0.12409090250730515 2023-01-24 01:44:11.756566: step: 74/466, loss: 0.5242952108383179 2023-01-24 01:44:12.404963: step: 76/466, loss: 0.014922366477549076 2023-01-24 01:44:13.074593: step: 78/466, loss: 0.4112562835216522 2023-01-24 01:44:13.730738: step: 80/466, loss: 0.26756608486175537 2023-01-24 01:44:14.395700: step: 82/466, loss: 0.3237709105014801 2023-01-24 01:44:15.075788: step: 84/466, loss: 0.27106547355651855 2023-01-24 01:44:15.668143: step: 86/466, loss: 0.11905548721551895 2023-01-24 01:44:16.293263: step: 88/466, loss: 1.0723085403442383 2023-01-24 01:44:16.919703: step: 90/466, loss: 0.1311020404100418 2023-01-24 01:44:17.555989: step: 92/466, loss: 0.07831726968288422 2023-01-24 01:44:18.147676: step: 94/466, loss: 0.20921853184700012 2023-01-24 01:44:18.759074: step: 96/466, loss: 0.098052978515625 2023-01-24 01:44:19.392026: step: 98/466, loss: 0.23252728581428528 2023-01-24 01:44:20.088695: step: 100/466, loss: 0.09003081172704697 2023-01-24 01:44:20.738932: step: 102/466, loss: 0.5116091966629028 2023-01-24 01:44:21.305618: step: 104/466, loss: 0.6654913425445557 2023-01-24 01:44:21.915905: step: 106/466, loss: 0.267116904258728 2023-01-24 01:44:22.536790: step: 108/466, loss: 0.3134758472442627 2023-01-24 01:44:23.251617: step: 110/466, loss: 0.46777868270874023 2023-01-24 01:44:23.814689: step: 112/466, loss: 4.004735946655273 2023-01-24 01:44:24.416189: step: 114/466, loss: 0.17657536268234253 2023-01-24 01:44:25.023678: step: 116/466, loss: 0.03620237484574318 2023-01-24 01:44:25.620637: step: 118/466, loss: 0.0979924350976944 2023-01-24 01:44:26.257034: step: 120/466, loss: 0.15746234357357025 2023-01-24 01:44:26.879603: step: 122/466, loss: 0.3161891996860504 2023-01-24 01:44:27.504771: step: 124/466, loss: 0.17940855026245117 2023-01-24 01:44:28.172424: step: 126/466, loss: 0.6414418816566467 2023-01-24 01:44:28.826972: step: 128/466, loss: 0.09979026764631271 2023-01-24 01:44:29.392239: step: 130/466, loss: 0.15462499856948853 2023-01-24 01:44:29.990586: step: 132/466, loss: 0.04857669025659561 2023-01-24 01:44:30.622663: step: 134/466, loss: 0.10343555361032486 2023-01-24 01:44:31.303831: step: 136/466, loss: 0.19048278033733368 2023-01-24 01:44:31.850632: step: 138/466, loss: 0.20670101046562195 2023-01-24 01:44:32.424386: step: 140/466, loss: 0.6163320541381836 2023-01-24 01:44:33.070568: step: 142/466, loss: 0.44770777225494385 2023-01-24 01:44:33.626421: step: 144/466, loss: 0.20391231775283813 2023-01-24 01:44:34.251454: step: 146/466, loss: 0.08158297091722488 2023-01-24 01:44:34.860210: step: 148/466, loss: 0.22164416313171387 2023-01-24 01:44:35.433636: step: 150/466, loss: 0.16902580857276917 2023-01-24 01:44:36.062638: step: 152/466, loss: 0.19119475781917572 2023-01-24 01:44:36.696257: step: 154/466, loss: 0.1406470388174057 2023-01-24 01:44:37.342317: step: 156/466, loss: 0.2205415517091751 2023-01-24 01:44:37.919143: step: 158/466, loss: 0.12865635752677917 2023-01-24 01:44:38.555618: step: 160/466, loss: 0.35933148860931396 2023-01-24 01:44:39.211550: step: 162/466, loss: 0.45000115036964417 2023-01-24 01:44:39.790259: step: 164/466, loss: 0.17844568192958832 2023-01-24 01:44:40.475464: step: 166/466, loss: 0.6599502563476562 2023-01-24 01:44:41.086689: step: 168/466, loss: 0.18270272016525269 2023-01-24 01:44:41.708350: step: 170/466, loss: 0.12775404751300812 2023-01-24 01:44:42.448416: step: 172/466, loss: 0.6985673308372498 2023-01-24 01:44:43.070124: step: 174/466, loss: 0.3083813190460205 2023-01-24 01:44:43.665017: step: 176/466, loss: 0.05631847307085991 2023-01-24 01:44:44.301442: step: 178/466, loss: 0.20867310464382172 2023-01-24 01:44:44.894240: step: 180/466, loss: 0.1922890990972519 2023-01-24 01:44:45.512185: step: 182/466, loss: 0.1106002926826477 2023-01-24 01:44:46.127888: step: 184/466, loss: 0.0857967808842659 2023-01-24 01:44:46.724009: step: 186/466, loss: 0.2109062820672989 2023-01-24 01:44:47.243501: step: 188/466, loss: 0.17689363658428192 2023-01-24 01:44:47.867930: step: 190/466, loss: 0.4798988401889801 2023-01-24 01:44:48.545156: step: 192/466, loss: 0.2285255342721939 2023-01-24 01:44:49.188746: step: 194/466, loss: 0.202779620885849 2023-01-24 01:44:49.883271: step: 196/466, loss: 0.0576382577419281 2023-01-24 01:44:50.546247: step: 198/466, loss: 0.1753125935792923 2023-01-24 01:44:51.142543: step: 200/466, loss: 0.10562325268983841 2023-01-24 01:44:51.775811: step: 202/466, loss: 0.22770178318023682 2023-01-24 01:44:52.442550: step: 204/466, loss: 0.09853328764438629 2023-01-24 01:44:53.116399: step: 206/466, loss: 0.12197496742010117 2023-01-24 01:44:53.771196: step: 208/466, loss: 0.3578759729862213 2023-01-24 01:44:54.363237: step: 210/466, loss: 0.09613992273807526 2023-01-24 01:44:54.945503: step: 212/466, loss: 0.1323639452457428 2023-01-24 01:44:55.576314: step: 214/466, loss: 0.15989448130130768 2023-01-24 01:44:56.229061: step: 216/466, loss: 0.622464656829834 2023-01-24 01:44:56.830811: step: 218/466, loss: 0.09623079746961594 2023-01-24 01:44:57.389781: step: 220/466, loss: 0.22858555614948273 2023-01-24 01:44:58.001902: step: 222/466, loss: 0.10492166876792908 2023-01-24 01:44:58.641499: step: 224/466, loss: 1.284364938735962 2023-01-24 01:44:59.261582: step: 226/466, loss: 0.17451579868793488 2023-01-24 01:44:59.882432: step: 228/466, loss: 0.11049184203147888 2023-01-24 01:45:00.540078: step: 230/466, loss: 0.07170509546995163 2023-01-24 01:45:01.158088: step: 232/466, loss: 0.21166926622390747 2023-01-24 01:45:01.814927: step: 234/466, loss: 0.14977402985095978 2023-01-24 01:45:02.485865: step: 236/466, loss: 0.19428275525569916 2023-01-24 01:45:03.121880: step: 238/466, loss: 0.2535887360572815 2023-01-24 01:45:03.776912: step: 240/466, loss: 0.2047453373670578 2023-01-24 01:45:04.389372: step: 242/466, loss: 0.3012225031852722 2023-01-24 01:45:04.933747: step: 244/466, loss: 0.19248755276203156 2023-01-24 01:45:05.513059: step: 246/466, loss: 0.05941566452383995 2023-01-24 01:45:06.125216: step: 248/466, loss: 0.07200485467910767 2023-01-24 01:45:06.695447: step: 250/466, loss: 0.22358103096485138 2023-01-24 01:45:07.312560: step: 252/466, loss: 0.2905464470386505 2023-01-24 01:45:07.936083: step: 254/466, loss: 0.20560117065906525 2023-01-24 01:45:08.552455: step: 256/466, loss: 0.10541833192110062 2023-01-24 01:45:09.160895: step: 258/466, loss: 0.11137007176876068 2023-01-24 01:45:09.781111: step: 260/466, loss: 0.04343457892537117 2023-01-24 01:45:10.395304: step: 262/466, loss: 0.13195733726024628 2023-01-24 01:45:11.017506: step: 264/466, loss: 0.16842803359031677 2023-01-24 01:45:11.690368: step: 266/466, loss: 0.1032499447464943 2023-01-24 01:45:12.295397: step: 268/466, loss: 0.18273863196372986 2023-01-24 01:45:12.925655: step: 270/466, loss: 0.1662614494562149 2023-01-24 01:45:13.543139: step: 272/466, loss: 0.1534671187400818 2023-01-24 01:45:14.187610: step: 274/466, loss: 0.20650523900985718 2023-01-24 01:45:14.774774: step: 276/466, loss: 0.372755229473114 2023-01-24 01:45:15.357491: step: 278/466, loss: 0.06759672611951828 2023-01-24 01:45:15.945664: step: 280/466, loss: 0.11779394000768661 2023-01-24 01:45:16.543332: step: 282/466, loss: 0.7636472582817078 2023-01-24 01:45:17.156023: step: 284/466, loss: 0.11199310421943665 2023-01-24 01:45:17.789470: step: 286/466, loss: 0.12100084125995636 2023-01-24 01:45:18.355250: step: 288/466, loss: 0.12693998217582703 2023-01-24 01:45:18.998076: step: 290/466, loss: 0.1179727166891098 2023-01-24 01:45:19.645806: step: 292/466, loss: 0.14116007089614868 2023-01-24 01:45:20.321158: step: 294/466, loss: 0.22696992754936218 2023-01-24 01:45:20.946264: step: 296/466, loss: 0.30198702216148376 2023-01-24 01:45:21.642698: step: 298/466, loss: 0.6689245700836182 2023-01-24 01:45:22.299939: step: 300/466, loss: 0.9285719394683838 2023-01-24 01:45:22.871133: step: 302/466, loss: 0.13377000391483307 2023-01-24 01:45:23.482337: step: 304/466, loss: 0.2543970048427582 2023-01-24 01:45:24.160548: step: 306/466, loss: 0.10936364531517029 2023-01-24 01:45:24.801100: step: 308/466, loss: 1.0188202857971191 2023-01-24 01:45:25.480655: step: 310/466, loss: 0.12935645878314972 2023-01-24 01:45:26.095620: step: 312/466, loss: 0.19918908178806305 2023-01-24 01:45:26.702456: step: 314/466, loss: 0.16760675609111786 2023-01-24 01:45:27.291861: step: 316/466, loss: 0.1381971687078476 2023-01-24 01:45:27.912252: step: 318/466, loss: 0.09110873937606812 2023-01-24 01:45:28.468035: step: 320/466, loss: 0.5863840579986572 2023-01-24 01:45:29.121187: step: 322/466, loss: 0.0976371094584465 2023-01-24 01:45:29.795767: step: 324/466, loss: 0.13793091475963593 2023-01-24 01:45:30.388506: step: 326/466, loss: 1.1786155700683594 2023-01-24 01:45:30.996670: step: 328/466, loss: 0.40811020135879517 2023-01-24 01:45:31.562643: step: 330/466, loss: 0.10378309339284897 2023-01-24 01:45:32.147675: step: 332/466, loss: 0.8036404848098755 2023-01-24 01:45:32.814078: step: 334/466, loss: 0.08031360059976578 2023-01-24 01:45:33.476121: step: 336/466, loss: 0.2537367343902588 2023-01-24 01:45:34.064862: step: 338/466, loss: 0.5860034823417664 2023-01-24 01:45:34.737280: step: 340/466, loss: 0.22907838225364685 2023-01-24 01:45:35.414092: step: 342/466, loss: 0.0988997146487236 2023-01-24 01:45:36.093441: step: 344/466, loss: 0.2374100387096405 2023-01-24 01:45:36.670712: step: 346/466, loss: 0.35751140117645264 2023-01-24 01:45:37.259946: step: 348/466, loss: 0.1537267416715622 2023-01-24 01:45:37.830343: step: 350/466, loss: 0.3873507082462311 2023-01-24 01:45:38.406112: step: 352/466, loss: 0.13627862930297852 2023-01-24 01:45:39.031137: step: 354/466, loss: 0.2671084403991699 2023-01-24 01:45:39.646508: step: 356/466, loss: 0.21971730887889862 2023-01-24 01:45:40.349191: step: 358/466, loss: 0.13752120733261108 2023-01-24 01:45:41.032748: step: 360/466, loss: 0.47819826006889343 2023-01-24 01:45:41.679488: step: 362/466, loss: 0.11981973797082901 2023-01-24 01:45:42.275868: step: 364/466, loss: 0.8910827040672302 2023-01-24 01:45:42.880197: step: 366/466, loss: 0.124082051217556 2023-01-24 01:45:43.437464: step: 368/466, loss: 0.09532630443572998 2023-01-24 01:45:44.057765: step: 370/466, loss: 0.7827644944190979 2023-01-24 01:45:44.681094: step: 372/466, loss: 0.15190982818603516 2023-01-24 01:45:45.305778: step: 374/466, loss: 0.4592849612236023 2023-01-24 01:45:45.870070: step: 376/466, loss: 0.1304216831922531 2023-01-24 01:45:46.477530: step: 378/466, loss: 1.2518675327301025 2023-01-24 01:45:47.092004: step: 380/466, loss: 0.5958972573280334 2023-01-24 01:45:47.742613: step: 382/466, loss: 0.8851840496063232 2023-01-24 01:45:48.399486: step: 384/466, loss: 0.16735360026359558 2023-01-24 01:45:49.011433: step: 386/466, loss: 0.10196258127689362 2023-01-24 01:45:49.560466: step: 388/466, loss: 0.042919568717479706 2023-01-24 01:45:50.193979: step: 390/466, loss: 0.07912568002939224 2023-01-24 01:45:50.809617: step: 392/466, loss: 0.25435715913772583 2023-01-24 01:45:51.383301: step: 394/466, loss: 0.20457041263580322 2023-01-24 01:45:52.085270: step: 396/466, loss: 1.0136419534683228 2023-01-24 01:45:52.694165: step: 398/466, loss: 0.32960301637649536 2023-01-24 01:45:53.294809: step: 400/466, loss: 0.1917905956506729 2023-01-24 01:45:53.925374: step: 402/466, loss: 0.3560708165168762 2023-01-24 01:45:54.549680: step: 404/466, loss: 0.4032041132450104 2023-01-24 01:45:55.116581: step: 406/466, loss: 0.17448638379573822 2023-01-24 01:45:55.788739: step: 408/466, loss: 1.0081785917282104 2023-01-24 01:45:56.361077: step: 410/466, loss: 0.1689838171005249 2023-01-24 01:45:56.876562: step: 412/466, loss: 0.8321733474731445 2023-01-24 01:45:57.513036: step: 414/466, loss: 0.4371199309825897 2023-01-24 01:45:58.105458: step: 416/466, loss: 0.20262926816940308 2023-01-24 01:45:58.648253: step: 418/466, loss: 0.053672414273023605 2023-01-24 01:45:59.250044: step: 420/466, loss: 0.14170688390731812 2023-01-24 01:45:59.850320: step: 422/466, loss: 0.2766505479812622 2023-01-24 01:46:00.489311: step: 424/466, loss: 0.2543244957923889 2023-01-24 01:46:01.091537: step: 426/466, loss: 0.08526773750782013 2023-01-24 01:46:01.699040: step: 428/466, loss: 0.1939818561077118 2023-01-24 01:46:02.295708: step: 430/466, loss: 0.38968127965927124 2023-01-24 01:46:02.912819: step: 432/466, loss: 0.08138088881969452 2023-01-24 01:46:03.516291: step: 434/466, loss: 0.17517045140266418 2023-01-24 01:46:04.127080: step: 436/466, loss: 0.148594930768013 2023-01-24 01:46:04.722776: step: 438/466, loss: 0.22264939546585083 2023-01-24 01:46:05.357058: step: 440/466, loss: 0.1607075184583664 2023-01-24 01:46:05.942254: step: 442/466, loss: 1.6625304222106934 2023-01-24 01:46:06.493961: step: 444/466, loss: 0.46067410707473755 2023-01-24 01:46:07.081274: step: 446/466, loss: 0.27573996782302856 2023-01-24 01:46:07.651172: step: 448/466, loss: 0.08637235313653946 2023-01-24 01:46:08.335818: step: 450/466, loss: 0.40373581647872925 2023-01-24 01:46:08.961604: step: 452/466, loss: 0.12875215709209442 2023-01-24 01:46:09.510986: step: 454/466, loss: 0.16318896412849426 2023-01-24 01:46:10.095246: step: 456/466, loss: 0.3789152204990387 2023-01-24 01:46:10.779757: step: 458/466, loss: 0.41057494282722473 2023-01-24 01:46:11.440336: step: 460/466, loss: 0.09855090826749802 2023-01-24 01:46:12.086276: step: 462/466, loss: 0.11188815534114838 2023-01-24 01:46:12.748102: step: 464/466, loss: 0.09850119054317474 2023-01-24 01:46:13.380312: step: 466/466, loss: 0.5544841289520264 2023-01-24 01:46:13.975871: step: 468/466, loss: 0.10233950614929199 2023-01-24 01:46:14.665255: step: 470/466, loss: 0.12407094240188599 2023-01-24 01:46:15.308889: step: 472/466, loss: 0.06933730840682983 2023-01-24 01:46:15.899397: step: 474/466, loss: 1.7711869478225708 2023-01-24 01:46:16.588126: step: 476/466, loss: 0.26224398612976074 2023-01-24 01:46:17.214225: step: 478/466, loss: 0.17908237874507904 2023-01-24 01:46:17.821389: step: 480/466, loss: 0.236375093460083 2023-01-24 01:46:18.500417: step: 482/466, loss: 0.19716686010360718 2023-01-24 01:46:19.148058: step: 484/466, loss: 0.12723639607429504 2023-01-24 01:46:19.736370: step: 486/466, loss: 0.17864809930324554 2023-01-24 01:46:20.308198: step: 488/466, loss: 0.5733076333999634 2023-01-24 01:46:20.896089: step: 490/466, loss: 0.5182340741157532 2023-01-24 01:46:21.521711: step: 492/466, loss: 0.06551437079906464 2023-01-24 01:46:22.126008: step: 494/466, loss: 0.046929892152547836 2023-01-24 01:46:22.742011: step: 496/466, loss: 0.1886240541934967 2023-01-24 01:46:23.300776: step: 498/466, loss: 0.2766363322734833 2023-01-24 01:46:23.959427: step: 500/466, loss: 0.34598588943481445 2023-01-24 01:46:24.579607: step: 502/466, loss: 0.5055862665176392 2023-01-24 01:46:25.168550: step: 504/466, loss: 0.1396089792251587 2023-01-24 01:46:25.784699: step: 506/466, loss: 0.4359109401702881 2023-01-24 01:46:26.364601: step: 508/466, loss: 0.2639307677745819 2023-01-24 01:46:27.041449: step: 510/466, loss: 0.28115010261535645 2023-01-24 01:46:27.646749: step: 512/466, loss: 0.2753887474536896 2023-01-24 01:46:28.419860: step: 514/466, loss: 0.16874361038208008 2023-01-24 01:46:29.043304: step: 516/466, loss: 0.21374468505382538 2023-01-24 01:46:29.718430: step: 518/466, loss: 1.2864924669265747 2023-01-24 01:46:30.346189: step: 520/466, loss: 0.13360130786895752 2023-01-24 01:46:30.969190: step: 522/466, loss: 0.6753610372543335 2023-01-24 01:46:31.514475: step: 524/466, loss: 0.2466708868741989 2023-01-24 01:46:32.114738: step: 526/466, loss: 0.15917545557022095 2023-01-24 01:46:32.718589: step: 528/466, loss: 0.21142955124378204 2023-01-24 01:46:33.343951: step: 530/466, loss: 0.30307796597480774 2023-01-24 01:46:34.039507: step: 532/466, loss: 0.2757112383842468 2023-01-24 01:46:34.689719: step: 534/466, loss: 0.21693839132785797 2023-01-24 01:46:35.372633: step: 536/466, loss: 0.07317715138196945 2023-01-24 01:46:36.004132: step: 538/466, loss: 0.06557666510343552 2023-01-24 01:46:36.628092: step: 540/466, loss: 0.11224383115768433 2023-01-24 01:46:37.241134: step: 542/466, loss: 0.10584692656993866 2023-01-24 01:46:37.898657: step: 544/466, loss: 0.13893507421016693 2023-01-24 01:46:38.509229: step: 546/466, loss: 0.14835600554943085 2023-01-24 01:46:39.121016: step: 548/466, loss: 0.1030556708574295 2023-01-24 01:46:39.733616: step: 550/466, loss: 0.23301927745342255 2023-01-24 01:46:40.343670: step: 552/466, loss: 0.1005689799785614 2023-01-24 01:46:40.963994: step: 554/466, loss: 0.7783551216125488 2023-01-24 01:46:41.566276: step: 556/466, loss: 0.1749415099620819 2023-01-24 01:46:42.242837: step: 558/466, loss: 0.12470461428165436 2023-01-24 01:46:42.857136: step: 560/466, loss: 0.10696142911911011 2023-01-24 01:46:43.531687: step: 562/466, loss: 0.1840374767780304 2023-01-24 01:46:44.148357: step: 564/466, loss: 0.1637786328792572 2023-01-24 01:46:44.739169: step: 566/466, loss: 0.2765638530254364 2023-01-24 01:46:45.331592: step: 568/466, loss: 0.1757660061120987 2023-01-24 01:46:45.947429: step: 570/466, loss: 0.3000456988811493 2023-01-24 01:46:46.608597: step: 572/466, loss: 0.253776490688324 2023-01-24 01:46:47.221546: step: 574/466, loss: 0.17246007919311523 2023-01-24 01:46:47.875375: step: 576/466, loss: 0.2563311755657196 2023-01-24 01:46:48.544038: step: 578/466, loss: 0.3139677047729492 2023-01-24 01:46:49.205003: step: 580/466, loss: 0.5516467690467834 2023-01-24 01:46:49.828210: step: 582/466, loss: 0.12537474930286407 2023-01-24 01:46:50.573402: step: 584/466, loss: 0.25660762190818787 2023-01-24 01:46:51.257110: step: 586/466, loss: 0.10562071949243546 2023-01-24 01:46:51.866237: step: 588/466, loss: 0.29111579060554504 2023-01-24 01:46:52.506033: step: 590/466, loss: 0.186752051115036 2023-01-24 01:46:53.139746: step: 592/466, loss: 0.22669342160224915 2023-01-24 01:46:53.790867: step: 594/466, loss: 0.17457406222820282 2023-01-24 01:46:54.516517: step: 596/466, loss: 0.13138017058372498 2023-01-24 01:46:55.074680: step: 598/466, loss: 0.18545128405094147 2023-01-24 01:46:55.699168: step: 600/466, loss: 1.5592176914215088 2023-01-24 01:46:56.265570: step: 602/466, loss: 0.1451473832130432 2023-01-24 01:46:56.882926: step: 604/466, loss: 0.19037167727947235 2023-01-24 01:46:57.511123: step: 606/466, loss: 0.6539323329925537 2023-01-24 01:46:58.112175: step: 608/466, loss: 0.37702956795692444 2023-01-24 01:46:58.713834: step: 610/466, loss: 0.261125385761261 2023-01-24 01:46:59.274472: step: 612/466, loss: 0.5847034454345703 2023-01-24 01:46:59.907157: step: 614/466, loss: 1.1425877809524536 2023-01-24 01:47:00.478489: step: 616/466, loss: 0.5076453685760498 2023-01-24 01:47:01.112939: step: 618/466, loss: 0.2025333195924759 2023-01-24 01:47:01.733300: step: 620/466, loss: 0.2334977239370346 2023-01-24 01:47:02.416946: step: 622/466, loss: 0.21920332312583923 2023-01-24 01:47:03.070036: step: 624/466, loss: 0.1656721979379654 2023-01-24 01:47:03.662304: step: 626/466, loss: 0.28424978256225586 2023-01-24 01:47:04.293457: step: 628/466, loss: 0.1002599224448204 2023-01-24 01:47:04.879120: step: 630/466, loss: 0.27894553542137146 2023-01-24 01:47:05.443595: step: 632/466, loss: 0.6983669400215149 2023-01-24 01:47:06.070457: step: 634/466, loss: 0.08150078356266022 2023-01-24 01:47:06.667899: step: 636/466, loss: 1.2374701499938965 2023-01-24 01:47:07.315717: step: 638/466, loss: 0.12696145474910736 2023-01-24 01:47:07.914338: step: 640/466, loss: 0.18900883197784424 2023-01-24 01:47:08.535993: step: 642/466, loss: 0.12979234755039215 2023-01-24 01:47:09.113013: step: 644/466, loss: 0.09342486411333084 2023-01-24 01:47:09.699335: step: 646/466, loss: 0.2928432822227478 2023-01-24 01:47:10.379815: step: 648/466, loss: 0.3717673122882843 2023-01-24 01:47:10.995569: step: 650/466, loss: 0.2137729972600937 2023-01-24 01:47:11.607608: step: 652/466, loss: 0.11146210134029388 2023-01-24 01:47:12.236789: step: 654/466, loss: 0.1910243183374405 2023-01-24 01:47:12.940493: step: 656/466, loss: 0.6716959476470947 2023-01-24 01:47:13.594628: step: 658/466, loss: 0.3120448887348175 2023-01-24 01:47:14.250493: step: 660/466, loss: 0.1816355288028717 2023-01-24 01:47:14.880466: step: 662/466, loss: 0.14433424174785614 2023-01-24 01:47:15.473829: step: 664/466, loss: 0.2028488665819168 2023-01-24 01:47:16.204550: step: 666/466, loss: 0.6138753890991211 2023-01-24 01:47:16.818744: step: 668/466, loss: 0.8248133659362793 2023-01-24 01:47:17.401927: step: 670/466, loss: 0.30739057064056396 2023-01-24 01:47:17.988472: step: 672/466, loss: 0.1564578264951706 2023-01-24 01:47:18.612262: step: 674/466, loss: 0.2032361924648285 2023-01-24 01:47:19.237256: step: 676/466, loss: 0.1419278085231781 2023-01-24 01:47:19.864891: step: 678/466, loss: 0.16310647130012512 2023-01-24 01:47:20.542272: step: 680/466, loss: 0.2876623272895813 2023-01-24 01:47:21.303018: step: 682/466, loss: 0.08155625313520432 2023-01-24 01:47:21.920890: step: 684/466, loss: 0.5003127455711365 2023-01-24 01:47:22.511087: step: 686/466, loss: 0.10688678920269012 2023-01-24 01:47:23.108688: step: 688/466, loss: 0.11806152760982513 2023-01-24 01:47:23.712570: step: 690/466, loss: 0.19526918232440948 2023-01-24 01:47:24.350164: step: 692/466, loss: 0.15610742568969727 2023-01-24 01:47:25.030235: step: 694/466, loss: 0.2325829118490219 2023-01-24 01:47:25.666638: step: 696/466, loss: 0.067191481590271 2023-01-24 01:47:26.257368: step: 698/466, loss: 0.2806580662727356 2023-01-24 01:47:26.842200: step: 700/466, loss: 0.33890679478645325 2023-01-24 01:47:27.488270: step: 702/466, loss: 0.2745884656906128 2023-01-24 01:47:28.182402: step: 704/466, loss: 0.08699221163988113 2023-01-24 01:47:28.764120: step: 706/466, loss: 0.2990705072879791 2023-01-24 01:47:29.369210: step: 708/466, loss: 0.22613131999969482 2023-01-24 01:47:29.988566: step: 710/466, loss: 0.17546585202217102 2023-01-24 01:47:30.569601: step: 712/466, loss: 0.2502884566783905 2023-01-24 01:47:31.186144: step: 714/466, loss: 0.18384194374084473 2023-01-24 01:47:31.781623: step: 716/466, loss: 0.3344501852989197 2023-01-24 01:47:32.398617: step: 718/466, loss: 0.2311343103647232 2023-01-24 01:47:33.032007: step: 720/466, loss: 0.4437202215194702 2023-01-24 01:47:33.679050: step: 722/466, loss: 0.23476120829582214 2023-01-24 01:47:34.356032: step: 724/466, loss: 0.10877900570631027 2023-01-24 01:47:35.029916: step: 726/466, loss: 0.1343332678079605 2023-01-24 01:47:35.612581: step: 728/466, loss: 0.13292235136032104 2023-01-24 01:47:36.212637: step: 730/466, loss: 0.08492002636194229 2023-01-24 01:47:36.868323: step: 732/466, loss: 0.32326027750968933 2023-01-24 01:47:37.409280: step: 734/466, loss: 0.2701374590396881 2023-01-24 01:47:38.058563: step: 736/466, loss: 0.32011130452156067 2023-01-24 01:47:38.672257: step: 738/466, loss: 0.15664590895175934 2023-01-24 01:47:39.362815: step: 740/466, loss: 0.8938038349151611 2023-01-24 01:47:39.995436: step: 742/466, loss: 0.11967580020427704 2023-01-24 01:47:40.645390: step: 744/466, loss: 0.3725675046443939 2023-01-24 01:47:41.327195: step: 746/466, loss: 0.3549603521823883 2023-01-24 01:47:42.084555: step: 748/466, loss: 0.22611792385578156 2023-01-24 01:47:42.741848: step: 750/466, loss: 0.13594792783260345 2023-01-24 01:47:43.402845: step: 752/466, loss: 0.13058385252952576 2023-01-24 01:47:43.981200: step: 754/466, loss: 0.12555429339408875 2023-01-24 01:47:44.517739: step: 756/466, loss: 0.08366088569164276 2023-01-24 01:47:45.160367: step: 758/466, loss: 0.15027771890163422 2023-01-24 01:47:45.746191: step: 760/466, loss: 0.5595515966415405 2023-01-24 01:47:46.302914: step: 762/466, loss: 0.09424523264169693 2023-01-24 01:47:46.957586: step: 764/466, loss: 0.15257276594638824 2023-01-24 01:47:47.536335: step: 766/466, loss: 0.08885790407657623 2023-01-24 01:47:48.138950: step: 768/466, loss: 0.378495454788208 2023-01-24 01:47:48.740084: step: 770/466, loss: 0.07986627519130707 2023-01-24 01:47:49.355656: step: 772/466, loss: 0.1039273589849472 2023-01-24 01:47:50.017128: step: 774/466, loss: 0.09044419974088669 2023-01-24 01:47:50.631972: step: 776/466, loss: 0.2003486156463623 2023-01-24 01:47:51.232215: step: 778/466, loss: 0.17398804426193237 2023-01-24 01:47:51.858547: step: 780/466, loss: 0.30610060691833496 2023-01-24 01:47:52.467041: step: 782/466, loss: 0.14237099885940552 2023-01-24 01:47:53.104604: step: 784/466, loss: 0.2621976435184479 2023-01-24 01:47:53.665508: step: 786/466, loss: 0.35762977600097656 2023-01-24 01:47:54.266967: step: 788/466, loss: 0.38972553610801697 2023-01-24 01:47:54.902447: step: 790/466, loss: 0.13141439855098724 2023-01-24 01:47:55.510352: step: 792/466, loss: 0.11431451886892319 2023-01-24 01:47:56.046851: step: 794/466, loss: 0.25144729018211365 2023-01-24 01:47:56.649532: step: 796/466, loss: 0.35012686252593994 2023-01-24 01:47:57.287993: step: 798/466, loss: 0.11493077129125595 2023-01-24 01:47:57.991104: step: 800/466, loss: 0.5581721067428589 2023-01-24 01:47:58.622610: step: 802/466, loss: 0.1472097933292389 2023-01-24 01:47:59.182984: step: 804/466, loss: 0.3674423098564148 2023-01-24 01:47:59.861418: step: 806/466, loss: 0.2731553614139557 2023-01-24 01:48:00.531982: step: 808/466, loss: 0.20992299914360046 2023-01-24 01:48:01.272868: step: 810/466, loss: 0.5103529095649719 2023-01-24 01:48:01.986339: step: 812/466, loss: 0.06470693647861481 2023-01-24 01:48:02.637522: step: 814/466, loss: 0.08526197820901871 2023-01-24 01:48:03.304516: step: 816/466, loss: 0.5159960985183716 2023-01-24 01:48:03.928993: step: 818/466, loss: 0.7602057456970215 2023-01-24 01:48:04.542027: step: 820/466, loss: 0.44809937477111816 2023-01-24 01:48:05.103578: step: 822/466, loss: 0.0920209214091301 2023-01-24 01:48:05.673310: step: 824/466, loss: 0.10172657668590546 2023-01-24 01:48:06.297302: step: 826/466, loss: 0.1861218512058258 2023-01-24 01:48:06.903670: step: 828/466, loss: 0.11565698683261871 2023-01-24 01:48:07.469942: step: 830/466, loss: 0.11650048196315765 2023-01-24 01:48:08.091939: step: 832/466, loss: 0.22282303869724274 2023-01-24 01:48:08.733411: step: 834/466, loss: 0.45914947986602783 2023-01-24 01:48:09.362227: step: 836/466, loss: 0.20269319415092468 2023-01-24 01:48:09.994533: step: 838/466, loss: 0.4981473684310913 2023-01-24 01:48:10.614237: step: 840/466, loss: 0.12210121005773544 2023-01-24 01:48:11.208867: step: 842/466, loss: 0.27366921305656433 2023-01-24 01:48:11.831212: step: 844/466, loss: 0.24226875603199005 2023-01-24 01:48:12.526536: step: 846/466, loss: 0.32749420404434204 2023-01-24 01:48:13.182097: step: 848/466, loss: 0.2046627700328827 2023-01-24 01:48:13.806841: step: 850/466, loss: 11.532801628112793 2023-01-24 01:48:14.424472: step: 852/466, loss: 1.3917676210403442 2023-01-24 01:48:15.025285: step: 854/466, loss: 0.41023728251457214 2023-01-24 01:48:15.779744: step: 856/466, loss: 0.4746791124343872 2023-01-24 01:48:16.382816: step: 858/466, loss: 0.2983412742614746 2023-01-24 01:48:16.979637: step: 860/466, loss: 0.5580613613128662 2023-01-24 01:48:17.560232: step: 862/466, loss: 0.6077788472175598 2023-01-24 01:48:18.208828: step: 864/466, loss: 0.1894834190607071 2023-01-24 01:48:18.778617: step: 866/466, loss: 0.14195139706134796 2023-01-24 01:48:19.358945: step: 868/466, loss: 1.7875826358795166 2023-01-24 01:48:19.934050: step: 870/466, loss: 0.4175891578197479 2023-01-24 01:48:20.477064: step: 872/466, loss: 0.1943206787109375 2023-01-24 01:48:21.046480: step: 874/466, loss: 0.20352262258529663 2023-01-24 01:48:21.732206: step: 876/466, loss: 1.3924146890640259 2023-01-24 01:48:22.315815: step: 878/466, loss: 0.7312995791435242 2023-01-24 01:48:22.928997: step: 880/466, loss: 0.1208275556564331 2023-01-24 01:48:23.569805: step: 882/466, loss: 0.44542407989501953 2023-01-24 01:48:24.215661: step: 884/466, loss: 0.20071272552013397 2023-01-24 01:48:24.827099: step: 886/466, loss: 0.2863750755786896 2023-01-24 01:48:25.465850: step: 888/466, loss: 0.3443882465362549 2023-01-24 01:48:26.055022: step: 890/466, loss: 0.2728160321712494 2023-01-24 01:48:26.694865: step: 892/466, loss: 0.2089729905128479 2023-01-24 01:48:27.331574: step: 894/466, loss: 0.4772091209888458 2023-01-24 01:48:27.970074: step: 896/466, loss: 0.5877247452735901 2023-01-24 01:48:28.626010: step: 898/466, loss: 0.12831562757492065 2023-01-24 01:48:29.235279: step: 900/466, loss: 0.08909977227449417 2023-01-24 01:48:29.869806: step: 902/466, loss: 0.12847910821437836 2023-01-24 01:48:30.590318: step: 904/466, loss: 0.16102434694766998 2023-01-24 01:48:31.170166: step: 906/466, loss: 0.11746357381343842 2023-01-24 01:48:31.771132: step: 908/466, loss: 0.13781356811523438 2023-01-24 01:48:32.371172: step: 910/466, loss: 0.2222241312265396 2023-01-24 01:48:33.045171: step: 912/466, loss: 0.5814196467399597 2023-01-24 01:48:33.696922: step: 914/466, loss: 0.6439260840415955 2023-01-24 01:48:34.265026: step: 916/466, loss: 0.13478106260299683 2023-01-24 01:48:34.906585: step: 918/466, loss: 0.18502192199230194 2023-01-24 01:48:35.591685: step: 920/466, loss: 0.27424484491348267 2023-01-24 01:48:36.368419: step: 922/466, loss: 0.20019055902957916 2023-01-24 01:48:37.011427: step: 924/466, loss: 0.11570438742637634 2023-01-24 01:48:37.653804: step: 926/466, loss: 0.2209179401397705 2023-01-24 01:48:38.278746: step: 928/466, loss: 0.4133010804653168 2023-01-24 01:48:38.869629: step: 930/466, loss: 0.3311747908592224 2023-01-24 01:48:39.480203: step: 932/466, loss: 0.052754368633031845 ================================================== Loss: 0.319 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36244202226345085, 'r': 0.30329588580300154, 'f1': 0.3302415946656649}, 'combined': 0.24333591185891096, 'epoch': 11} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3692313638370953, 'r': 0.28092241851504635, 'f1': 0.31907948714984247}, 'combined': 0.19979743587887333, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3091608471850273, 'r': 0.30564098934231354, 'f1': 0.3073908423347313}, 'combined': 0.22649851540453883, 'epoch': 11} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.37346264183973005, 'r': 0.28296717347350253, 'f1': 0.32197705129803883}, 'combined': 0.21358873699968914, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3482142857142857, 'r': 0.2785714285714286, 'f1': 0.30952380952380953}, 'combined': 0.20634920634920634, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.1724137931034483, 'f1': 0.22727272727272724}, 'combined': 0.1515151515151515, 'epoch': 11} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2914422563345237, 'r': 0.32849468740551624, 'f1': 0.30886119582998584}, 'combined': 0.22758193376946323, 'epoch': 10} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3417894272625764, 'r': 0.2958267696252805, 'f1': 0.3171514859649838}, 'combined': 0.21038761940251402, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.20689655172413793, 'f1': 0.24489795918367346}, 'combined': 0.16326530612244897, 'epoch': 10} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:51:21.219244: step: 2/466, loss: 0.06511319428682327 2023-01-24 01:51:21.816074: step: 4/466, loss: 0.13714738190174103 2023-01-24 01:51:22.458851: step: 6/466, loss: 0.12040044367313385 2023-01-24 01:51:23.083376: step: 8/466, loss: 0.1934211552143097 2023-01-24 01:51:23.708509: step: 10/466, loss: 0.14183814823627472 2023-01-24 01:51:24.337825: step: 12/466, loss: 0.2772085964679718 2023-01-24 01:51:24.971659: step: 14/466, loss: 1.1655776500701904 2023-01-24 01:51:25.569667: step: 16/466, loss: 0.13058745861053467 2023-01-24 01:51:26.152535: step: 18/466, loss: 0.18193387985229492 2023-01-24 01:51:26.776463: step: 20/466, loss: 0.4208381772041321 2023-01-24 01:51:27.347604: step: 22/466, loss: 0.15971587598323822 2023-01-24 01:51:27.951471: step: 24/466, loss: 0.04996322840452194 2023-01-24 01:51:28.607173: step: 26/466, loss: 0.6347571611404419 2023-01-24 01:51:29.253256: step: 28/466, loss: 0.06279143691062927 2023-01-24 01:51:29.832966: step: 30/466, loss: 0.09358839690685272 2023-01-24 01:51:30.469811: step: 32/466, loss: 4.3400750160217285 2023-01-24 01:51:31.132884: step: 34/466, loss: 0.21792274713516235 2023-01-24 01:51:31.739911: step: 36/466, loss: 0.15213552117347717 2023-01-24 01:51:32.398533: step: 38/466, loss: 0.13988621532917023 2023-01-24 01:51:33.032419: step: 40/466, loss: 0.19849446415901184 2023-01-24 01:51:33.605559: step: 42/466, loss: 0.03976305201649666 2023-01-24 01:51:34.226524: step: 44/466, loss: 0.258556991815567 2023-01-24 01:51:34.790003: step: 46/466, loss: 0.5114126205444336 2023-01-24 01:51:35.350811: step: 48/466, loss: 0.074154332280159 2023-01-24 01:51:35.930936: step: 50/466, loss: 0.061618831008672714 2023-01-24 01:51:36.611710: step: 52/466, loss: 0.1468667984008789 2023-01-24 01:51:37.202305: step: 54/466, loss: 0.30897611379623413 2023-01-24 01:51:37.796699: step: 56/466, loss: 0.10922352969646454 2023-01-24 01:51:38.453591: step: 58/466, loss: 0.15912429988384247 2023-01-24 01:51:39.043100: step: 60/466, loss: 0.1769641935825348 2023-01-24 01:51:39.638254: step: 62/466, loss: 0.3742664158344269 2023-01-24 01:51:40.207475: step: 64/466, loss: 0.07299426943063736 2023-01-24 01:51:40.824031: step: 66/466, loss: 0.17261630296707153 2023-01-24 01:51:41.427756: step: 68/466, loss: 0.14107061922550201 2023-01-24 01:51:42.131433: step: 70/466, loss: 0.054710984230041504 2023-01-24 01:51:42.730098: step: 72/466, loss: 0.06515087932348251 2023-01-24 01:51:43.303571: step: 74/466, loss: 0.12145428359508514 2023-01-24 01:51:43.890228: step: 76/466, loss: 0.19312424957752228 2023-01-24 01:51:44.580705: step: 78/466, loss: 0.27080410718917847 2023-01-24 01:51:45.200775: step: 80/466, loss: 0.20949777960777283 2023-01-24 01:51:45.823663: step: 82/466, loss: 0.18421269953250885 2023-01-24 01:51:46.406128: step: 84/466, loss: 0.21335816383361816 2023-01-24 01:51:47.021592: step: 86/466, loss: 0.1356712132692337 2023-01-24 01:51:47.672047: step: 88/466, loss: 0.18476635217666626 2023-01-24 01:51:48.307183: step: 90/466, loss: 0.7203394174575806 2023-01-24 01:51:48.905913: step: 92/466, loss: 0.32669126987457275 2023-01-24 01:51:49.520542: step: 94/466, loss: 0.834490954875946 2023-01-24 01:51:50.154579: step: 96/466, loss: 0.12480907887220383 2023-01-24 01:51:50.772423: step: 98/466, loss: 0.21006691455841064 2023-01-24 01:51:51.370307: step: 100/466, loss: 0.10662776231765747 2023-01-24 01:51:51.992789: step: 102/466, loss: 0.3675839900970459 2023-01-24 01:51:52.574493: step: 104/466, loss: 0.12077916413545609 2023-01-24 01:51:53.216563: step: 106/466, loss: 0.2399625927209854 2023-01-24 01:51:53.808527: step: 108/466, loss: 0.10004761070013046 2023-01-24 01:51:54.508656: step: 110/466, loss: 0.12662649154663086 2023-01-24 01:51:55.156825: step: 112/466, loss: 0.3942157030105591 2023-01-24 01:51:55.801045: step: 114/466, loss: 0.4537506103515625 2023-01-24 01:51:56.485935: step: 116/466, loss: 0.18500998616218567 2023-01-24 01:51:57.061570: step: 118/466, loss: 0.1711919903755188 2023-01-24 01:51:57.663429: step: 120/466, loss: 0.33424004912376404 2023-01-24 01:51:58.261993: step: 122/466, loss: 0.22498555481433868 2023-01-24 01:51:58.891971: step: 124/466, loss: 0.19132056832313538 2023-01-24 01:51:59.536166: step: 126/466, loss: 0.6551756858825684 2023-01-24 01:52:00.167833: step: 128/466, loss: 0.37431344389915466 2023-01-24 01:52:00.757396: step: 130/466, loss: 0.2734319269657135 2023-01-24 01:52:01.360086: step: 132/466, loss: 0.10243318974971771 2023-01-24 01:52:01.956150: step: 134/466, loss: 0.17675721645355225 2023-01-24 01:52:02.575899: step: 136/466, loss: 0.09176446497440338 2023-01-24 01:52:03.229833: step: 138/466, loss: 0.1962217390537262 2023-01-24 01:52:03.836116: step: 140/466, loss: 0.3212512135505676 2023-01-24 01:52:04.436625: step: 142/466, loss: 0.11028824746608734 2023-01-24 01:52:05.066975: step: 144/466, loss: 0.45328283309936523 2023-01-24 01:52:05.755927: step: 146/466, loss: 0.07855706661939621 2023-01-24 01:52:06.402370: step: 148/466, loss: 0.07228368520736694 2023-01-24 01:52:06.972258: step: 150/466, loss: 0.1302642971277237 2023-01-24 01:52:07.558215: step: 152/466, loss: 0.06668470799922943 2023-01-24 01:52:08.165714: step: 154/466, loss: 0.23040235042572021 2023-01-24 01:52:08.832244: step: 156/466, loss: 0.28832942247390747 2023-01-24 01:52:09.449367: step: 158/466, loss: 0.09825500845909119 2023-01-24 01:52:10.021834: step: 160/466, loss: 0.8808926939964294 2023-01-24 01:52:10.601820: step: 162/466, loss: 0.10054733604192734 2023-01-24 01:52:11.255246: step: 164/466, loss: 0.13233955204486847 2023-01-24 01:52:11.933518: step: 166/466, loss: 0.13742248713970184 2023-01-24 01:52:12.516572: step: 168/466, loss: 0.1636868417263031 2023-01-24 01:52:13.141205: step: 170/466, loss: 0.09520162642002106 2023-01-24 01:52:13.803677: step: 172/466, loss: 0.1735386997461319 2023-01-24 01:52:14.418974: step: 174/466, loss: 0.07352687418460846 2023-01-24 01:52:15.069690: step: 176/466, loss: 0.11939296871423721 2023-01-24 01:52:15.703235: step: 178/466, loss: 0.06646954268217087 2023-01-24 01:52:16.315232: step: 180/466, loss: 0.2287725806236267 2023-01-24 01:52:16.951268: step: 182/466, loss: 0.13557642698287964 2023-01-24 01:52:17.598974: step: 184/466, loss: 0.12363395094871521 2023-01-24 01:52:18.181767: step: 186/466, loss: 0.04114263132214546 2023-01-24 01:52:18.796909: step: 188/466, loss: 0.6626765727996826 2023-01-24 01:52:19.364734: step: 190/466, loss: 0.021727893501520157 2023-01-24 01:52:19.941260: step: 192/466, loss: 0.528025209903717 2023-01-24 01:52:20.561401: step: 194/466, loss: 0.04923209920525551 2023-01-24 01:52:21.253810: step: 196/466, loss: 0.15736114978790283 2023-01-24 01:52:21.841952: step: 198/466, loss: 0.1750304102897644 2023-01-24 01:52:22.423232: step: 200/466, loss: 0.06680495291948318 2023-01-24 01:52:23.002314: step: 202/466, loss: 0.09692071378231049 2023-01-24 01:52:23.634105: step: 204/466, loss: 0.07838789373636246 2023-01-24 01:52:24.284034: step: 206/466, loss: 0.3004598021507263 2023-01-24 01:52:24.895362: step: 208/466, loss: 0.1860407292842865 2023-01-24 01:52:25.492874: step: 210/466, loss: 0.35397303104400635 2023-01-24 01:52:26.092598: step: 212/466, loss: 0.18014740943908691 2023-01-24 01:52:26.727161: step: 214/466, loss: 0.24358399212360382 2023-01-24 01:52:27.397803: step: 216/466, loss: 0.21685518324375153 2023-01-24 01:52:27.989107: step: 218/466, loss: 0.08633943647146225 2023-01-24 01:52:28.572607: step: 220/466, loss: 0.08359657973051071 2023-01-24 01:52:29.289077: step: 222/466, loss: 0.1390642523765564 2023-01-24 01:52:29.869484: step: 224/466, loss: 0.5406646728515625 2023-01-24 01:52:30.510451: step: 226/466, loss: 0.20375865697860718 2023-01-24 01:52:31.073110: step: 228/466, loss: 0.2889938950538635 2023-01-24 01:52:31.737322: step: 230/466, loss: 0.37442490458488464 2023-01-24 01:52:32.324862: step: 232/466, loss: 0.06598281115293503 2023-01-24 01:52:32.949934: step: 234/466, loss: 0.24299190938472748 2023-01-24 01:52:33.547625: step: 236/466, loss: 0.12016425281763077 2023-01-24 01:52:34.142783: step: 238/466, loss: 0.09447656571865082 2023-01-24 01:52:34.759397: step: 240/466, loss: 0.23242297768592834 2023-01-24 01:52:35.356134: step: 242/466, loss: 0.14975641667842865 2023-01-24 01:52:36.009872: step: 244/466, loss: 0.26126527786254883 2023-01-24 01:52:36.551821: step: 246/466, loss: 0.26530560851097107 2023-01-24 01:52:37.184448: step: 248/466, loss: 0.3171543478965759 2023-01-24 01:52:37.893923: step: 250/466, loss: 0.19703979790210724 2023-01-24 01:52:38.486643: step: 252/466, loss: 0.04827583208680153 2023-01-24 01:52:39.120053: step: 254/466, loss: 0.299142450094223 2023-01-24 01:52:39.761614: step: 256/466, loss: 0.20214809477329254 2023-01-24 01:52:40.382236: step: 258/466, loss: 0.7955482006072998 2023-01-24 01:52:41.023658: step: 260/466, loss: 0.40130114555358887 2023-01-24 01:52:41.675497: step: 262/466, loss: 0.16113027930259705 2023-01-24 01:52:42.408650: step: 264/466, loss: 0.07945267111063004 2023-01-24 01:52:42.986863: step: 266/466, loss: 0.11397974193096161 2023-01-24 01:52:43.562802: step: 268/466, loss: 0.1477915346622467 2023-01-24 01:52:44.182861: step: 270/466, loss: 0.42245355248451233 2023-01-24 01:52:44.799309: step: 272/466, loss: 0.06648483872413635 2023-01-24 01:52:45.503904: step: 274/466, loss: 0.30151882767677307 2023-01-24 01:52:46.133991: step: 276/466, loss: 0.18119044601917267 2023-01-24 01:52:46.805367: step: 278/466, loss: 0.7626487016677856 2023-01-24 01:52:47.441482: step: 280/466, loss: 0.4290889799594879 2023-01-24 01:52:48.030021: step: 282/466, loss: 0.07369454205036163 2023-01-24 01:52:48.708241: step: 284/466, loss: 0.32976651191711426 2023-01-24 01:52:49.372626: step: 286/466, loss: 0.0721362978219986 2023-01-24 01:52:50.044955: step: 288/466, loss: 0.2435348480939865 2023-01-24 01:52:50.700871: step: 290/466, loss: 0.26690465211868286 2023-01-24 01:52:51.336224: step: 292/466, loss: 0.04773420840501785 2023-01-24 01:52:52.023267: step: 294/466, loss: 0.15354648232460022 2023-01-24 01:52:52.647874: step: 296/466, loss: 0.07595565915107727 2023-01-24 01:52:53.253260: step: 298/466, loss: 0.12475527822971344 2023-01-24 01:52:53.814172: step: 300/466, loss: 0.18679562211036682 2023-01-24 01:52:54.429268: step: 302/466, loss: 0.09634778648614883 2023-01-24 01:52:55.037823: step: 304/466, loss: 0.17946763336658478 2023-01-24 01:52:55.612037: step: 306/466, loss: 0.08628623932600021 2023-01-24 01:52:56.231593: step: 308/466, loss: 0.09216740727424622 2023-01-24 01:52:56.802676: step: 310/466, loss: 0.12150314450263977 2023-01-24 01:52:57.436028: step: 312/466, loss: 0.23423610627651215 2023-01-24 01:52:58.021124: step: 314/466, loss: 0.1008576825261116 2023-01-24 01:52:58.640097: step: 316/466, loss: 0.15783874690532684 2023-01-24 01:52:59.256906: step: 318/466, loss: 0.06963887065649033 2023-01-24 01:52:59.854024: step: 320/466, loss: 0.06577898561954498 2023-01-24 01:53:00.505611: step: 322/466, loss: 1.1378934383392334 2023-01-24 01:53:01.115786: step: 324/466, loss: 0.364601194858551 2023-01-24 01:53:01.749531: step: 326/466, loss: 0.28841596841812134 2023-01-24 01:53:02.385052: step: 328/466, loss: 0.11138347536325455 2023-01-24 01:53:02.957126: step: 330/466, loss: 0.4491463601589203 2023-01-24 01:53:03.577589: step: 332/466, loss: 0.10850333422422409 2023-01-24 01:53:04.174056: step: 334/466, loss: 0.11454664170742035 2023-01-24 01:53:04.775852: step: 336/466, loss: 0.46762022376060486 2023-01-24 01:53:05.435041: step: 338/466, loss: 0.1430659145116806 2023-01-24 01:53:06.049674: step: 340/466, loss: 1.066911220550537 2023-01-24 01:53:06.690650: step: 342/466, loss: 0.25913503766059875 2023-01-24 01:53:07.330829: step: 344/466, loss: 0.16815349459648132 2023-01-24 01:53:08.018981: step: 346/466, loss: 0.13444244861602783 2023-01-24 01:53:08.681006: step: 348/466, loss: 0.13549885153770447 2023-01-24 01:53:09.341495: step: 350/466, loss: 0.23235894739627838 2023-01-24 01:53:09.949471: step: 352/466, loss: 0.39627596735954285 2023-01-24 01:53:10.491024: step: 354/466, loss: 0.12371134012937546 2023-01-24 01:53:11.121510: step: 356/466, loss: 0.11389538645744324 2023-01-24 01:53:11.680170: step: 358/466, loss: 0.09537847340106964 2023-01-24 01:53:12.356395: step: 360/466, loss: 0.4542959928512573 2023-01-24 01:53:12.929813: step: 362/466, loss: 0.057292964309453964 2023-01-24 01:53:13.697221: step: 364/466, loss: 0.21706074476242065 2023-01-24 01:53:14.336689: step: 366/466, loss: 0.802412211894989 2023-01-24 01:53:15.007692: step: 368/466, loss: 0.2602801024913788 2023-01-24 01:53:15.602095: step: 370/466, loss: 0.0633874163031578 2023-01-24 01:53:16.211119: step: 372/466, loss: 0.16423580050468445 2023-01-24 01:53:16.829170: step: 374/466, loss: 0.23285065591335297 2023-01-24 01:53:17.475999: step: 376/466, loss: 0.10875140875577927 2023-01-24 01:53:18.076840: step: 378/466, loss: 0.25117120146751404 2023-01-24 01:53:18.712813: step: 380/466, loss: 0.133839413523674 2023-01-24 01:53:19.373897: step: 382/466, loss: 0.13380390405654907 2023-01-24 01:53:19.903839: step: 384/466, loss: 0.14601582288742065 2023-01-24 01:53:20.622486: step: 386/466, loss: 0.1574646681547165 2023-01-24 01:53:21.198532: step: 388/466, loss: 0.07392328977584839 2023-01-24 01:53:21.806147: step: 390/466, loss: 0.6617650389671326 2023-01-24 01:53:22.389005: step: 392/466, loss: 0.09536303579807281 2023-01-24 01:53:22.982164: step: 394/466, loss: 0.31436464190483093 2023-01-24 01:53:23.587919: step: 396/466, loss: 0.4017498791217804 2023-01-24 01:53:24.159869: step: 398/466, loss: 0.08853510767221451 2023-01-24 01:53:24.832433: step: 400/466, loss: 0.11967477202415466 2023-01-24 01:53:25.403881: step: 402/466, loss: 4.735015869140625 2023-01-24 01:53:26.037300: step: 404/466, loss: 0.6043958067893982 2023-01-24 01:53:26.589490: step: 406/466, loss: 0.3705092966556549 2023-01-24 01:53:27.275202: step: 408/466, loss: 0.10290555655956268 2023-01-24 01:53:27.876023: step: 410/466, loss: 0.06083455681800842 2023-01-24 01:53:28.453507: step: 412/466, loss: 0.05793687701225281 2023-01-24 01:53:29.079005: step: 414/466, loss: 0.29581812024116516 2023-01-24 01:53:29.750825: step: 416/466, loss: 0.09313644468784332 2023-01-24 01:53:30.343980: step: 418/466, loss: 0.08619770407676697 2023-01-24 01:53:30.894153: step: 420/466, loss: 0.05409029871225357 2023-01-24 01:53:31.496879: step: 422/466, loss: 0.19690914452075958 2023-01-24 01:53:32.112617: step: 424/466, loss: 0.14666756987571716 2023-01-24 01:53:32.720993: step: 426/466, loss: 0.060391172766685486 2023-01-24 01:53:33.291343: step: 428/466, loss: 0.1270732432603836 2023-01-24 01:53:33.930359: step: 430/466, loss: 0.304675430059433 2023-01-24 01:53:34.519532: step: 432/466, loss: 0.37709683179855347 2023-01-24 01:53:35.165369: step: 434/466, loss: 0.06947958469390869 2023-01-24 01:53:35.833873: step: 436/466, loss: 0.12346003204584122 2023-01-24 01:53:36.434623: step: 438/466, loss: 0.1719336062669754 2023-01-24 01:53:36.994051: step: 440/466, loss: 0.27502453327178955 2023-01-24 01:53:37.659151: step: 442/466, loss: 0.49445199966430664 2023-01-24 01:53:38.268482: step: 444/466, loss: 0.07066982239484787 2023-01-24 01:53:38.910656: step: 446/466, loss: 0.7173477411270142 2023-01-24 01:53:39.494242: step: 448/466, loss: 0.28133848309516907 2023-01-24 01:53:40.114413: step: 450/466, loss: 0.17305824160575867 2023-01-24 01:53:40.786941: step: 452/466, loss: 0.1007755696773529 2023-01-24 01:53:41.455758: step: 454/466, loss: 0.07673931121826172 2023-01-24 01:53:42.118633: step: 456/466, loss: 0.26113641262054443 2023-01-24 01:53:42.725022: step: 458/466, loss: 0.09634999930858612 2023-01-24 01:53:43.415689: step: 460/466, loss: 0.3144139349460602 2023-01-24 01:53:44.041164: step: 462/466, loss: 0.1348186433315277 2023-01-24 01:53:44.741483: step: 464/466, loss: 0.6207475662231445 2023-01-24 01:53:45.402516: step: 466/466, loss: 0.6087349057197571 2023-01-24 01:53:46.030905: step: 468/466, loss: 1.3309130668640137 2023-01-24 01:53:46.665446: step: 470/466, loss: 0.11831938475370407 2023-01-24 01:53:47.270630: step: 472/466, loss: 0.35188978910446167 2023-01-24 01:53:47.985905: step: 474/466, loss: 0.07000864297151566 2023-01-24 01:53:48.648695: step: 476/466, loss: 0.15208332240581512 2023-01-24 01:53:49.217793: step: 478/466, loss: 0.2524581849575043 2023-01-24 01:53:49.847464: step: 480/466, loss: 0.1295643150806427 2023-01-24 01:53:50.453873: step: 482/466, loss: 0.13918237388134003 2023-01-24 01:53:51.054166: step: 484/466, loss: 0.06539881229400635 2023-01-24 01:53:51.717021: step: 486/466, loss: 0.7312734127044678 2023-01-24 01:53:52.356794: step: 488/466, loss: 0.3051009178161621 2023-01-24 01:53:52.974197: step: 490/466, loss: 0.29282113909721375 2023-01-24 01:53:53.601632: step: 492/466, loss: 0.06706909090280533 2023-01-24 01:53:54.277213: step: 494/466, loss: 0.19537198543548584 2023-01-24 01:53:54.861212: step: 496/466, loss: 1.2063255310058594 2023-01-24 01:53:55.474673: step: 498/466, loss: 0.24088512361049652 2023-01-24 01:53:56.113274: step: 500/466, loss: 0.08956007659435272 2023-01-24 01:53:56.780432: step: 502/466, loss: 1.016743779182434 2023-01-24 01:53:57.406768: step: 504/466, loss: 0.32006266713142395 2023-01-24 01:53:58.077225: step: 506/466, loss: 0.4554866552352905 2023-01-24 01:53:58.681944: step: 508/466, loss: 0.6734225749969482 2023-01-24 01:53:59.308756: step: 510/466, loss: 0.294268399477005 2023-01-24 01:53:59.957173: step: 512/466, loss: 0.22506310045719147 2023-01-24 01:54:00.630584: step: 514/466, loss: 0.3229205012321472 2023-01-24 01:54:01.240072: step: 516/466, loss: 0.6201850771903992 2023-01-24 01:54:01.822822: step: 518/466, loss: 0.07383572310209274 2023-01-24 01:54:02.462631: step: 520/466, loss: 0.6262032985687256 2023-01-24 01:54:03.111052: step: 522/466, loss: 0.24702614545822144 2023-01-24 01:54:03.745592: step: 524/466, loss: 0.9728817343711853 2023-01-24 01:54:04.441368: step: 526/466, loss: 2.29763126373291 2023-01-24 01:54:05.086261: step: 528/466, loss: 0.4962475001811981 2023-01-24 01:54:05.657540: step: 530/466, loss: 0.143583282828331 2023-01-24 01:54:06.287911: step: 532/466, loss: 0.3662044107913971 2023-01-24 01:54:06.867786: step: 534/466, loss: 3.1325182914733887 2023-01-24 01:54:07.470277: step: 536/466, loss: 0.18455298244953156 2023-01-24 01:54:08.061489: step: 538/466, loss: 0.17454926669597626 2023-01-24 01:54:08.723182: step: 540/466, loss: 0.12500549852848053 2023-01-24 01:54:09.385067: step: 542/466, loss: 0.22460201382637024 2023-01-24 01:54:10.120430: step: 544/466, loss: 0.33004269003868103 2023-01-24 01:54:10.730612: step: 546/466, loss: 0.20828965306282043 2023-01-24 01:54:11.360797: step: 548/466, loss: 0.17520847916603088 2023-01-24 01:54:11.975031: step: 550/466, loss: 0.10480187088251114 2023-01-24 01:54:12.642590: step: 552/466, loss: 0.13264250755310059 2023-01-24 01:54:13.279256: step: 554/466, loss: 0.21460601687431335 2023-01-24 01:54:13.904983: step: 556/466, loss: 0.08982677757740021 2023-01-24 01:54:14.591855: step: 558/466, loss: 0.10906903445720673 2023-01-24 01:54:15.255605: step: 560/466, loss: 0.21557100117206573 2023-01-24 01:54:15.893288: step: 562/466, loss: 0.13042625784873962 2023-01-24 01:54:16.506400: step: 564/466, loss: 0.15146774053573608 2023-01-24 01:54:17.061702: step: 566/466, loss: 0.03116701729595661 2023-01-24 01:54:17.661841: step: 568/466, loss: 0.02243943326175213 2023-01-24 01:54:18.350021: step: 570/466, loss: 0.08668933063745499 2023-01-24 01:54:19.012812: step: 572/466, loss: 0.04543516784906387 2023-01-24 01:54:19.623927: step: 574/466, loss: 0.38458898663520813 2023-01-24 01:54:20.278219: step: 576/466, loss: 0.2776734530925751 2023-01-24 01:54:20.890154: step: 578/466, loss: 0.13305240869522095 2023-01-24 01:54:21.540392: step: 580/466, loss: 0.3373351991176605 2023-01-24 01:54:22.200825: step: 582/466, loss: 0.40106603503227234 2023-01-24 01:54:22.875717: step: 584/466, loss: 0.37612906098365784 2023-01-24 01:54:23.493683: step: 586/466, loss: 0.5300690531730652 2023-01-24 01:54:24.131656: step: 588/466, loss: 0.4692898392677307 2023-01-24 01:54:24.713919: step: 590/466, loss: 0.18448734283447266 2023-01-24 01:54:25.350345: step: 592/466, loss: 0.15303319692611694 2023-01-24 01:54:25.951038: step: 594/466, loss: 0.07411587238311768 2023-01-24 01:54:26.552512: step: 596/466, loss: 0.08063060790300369 2023-01-24 01:54:27.158326: step: 598/466, loss: 0.19190742075443268 2023-01-24 01:54:27.761411: step: 600/466, loss: 0.17303743958473206 2023-01-24 01:54:28.368320: step: 602/466, loss: 0.7020127773284912 2023-01-24 01:54:28.993899: step: 604/466, loss: 0.21286289393901825 2023-01-24 01:54:29.632910: step: 606/466, loss: 0.12226472795009613 2023-01-24 01:54:30.240592: step: 608/466, loss: 0.17794010043144226 2023-01-24 01:54:30.837583: step: 610/466, loss: 0.05442595109343529 2023-01-24 01:54:31.460266: step: 612/466, loss: 0.7128135561943054 2023-01-24 01:54:32.078866: step: 614/466, loss: 0.13611501455307007 2023-01-24 01:54:32.720923: step: 616/466, loss: 0.41149094700813293 2023-01-24 01:54:33.271725: step: 618/466, loss: 0.3342394530773163 2023-01-24 01:54:33.883685: step: 620/466, loss: 0.2459389865398407 2023-01-24 01:54:34.474497: step: 622/466, loss: 0.10671906918287277 2023-01-24 01:54:35.077442: step: 624/466, loss: 0.08950351923704147 2023-01-24 01:54:35.678698: step: 626/466, loss: 0.11727484315633774 2023-01-24 01:54:36.332223: step: 628/466, loss: 0.06244143843650818 2023-01-24 01:54:36.923963: step: 630/466, loss: 0.12855567038059235 2023-01-24 01:54:37.515801: step: 632/466, loss: 0.0703582912683487 2023-01-24 01:54:38.114499: step: 634/466, loss: 0.05989354848861694 2023-01-24 01:54:38.698439: step: 636/466, loss: 0.19541144371032715 2023-01-24 01:54:39.319411: step: 638/466, loss: 0.077695332467556 2023-01-24 01:54:39.900873: step: 640/466, loss: 0.3223434388637543 2023-01-24 01:54:40.487864: step: 642/466, loss: 0.08753734081983566 2023-01-24 01:54:41.054543: step: 644/466, loss: 0.1359861195087433 2023-01-24 01:54:41.671688: step: 646/466, loss: 0.44570258259773254 2023-01-24 01:54:42.321240: step: 648/466, loss: 0.23596426844596863 2023-01-24 01:54:42.989424: step: 650/466, loss: 0.3924348056316376 2023-01-24 01:54:43.602932: step: 652/466, loss: 0.4193865656852722 2023-01-24 01:54:44.239276: step: 654/466, loss: 0.10640563070774078 2023-01-24 01:54:44.824549: step: 656/466, loss: 0.06833884119987488 2023-01-24 01:54:45.481261: step: 658/466, loss: 0.17900003492832184 2023-01-24 01:54:46.104563: step: 660/466, loss: 0.17604024708271027 2023-01-24 01:54:46.745220: step: 662/466, loss: 0.327255517244339 2023-01-24 01:54:47.350027: step: 664/466, loss: 0.18427890539169312 2023-01-24 01:54:47.925628: step: 666/466, loss: 0.2898465394973755 2023-01-24 01:54:48.590874: step: 668/466, loss: 0.1335490345954895 2023-01-24 01:54:49.255918: step: 670/466, loss: 0.12968330085277557 2023-01-24 01:54:49.872054: step: 672/466, loss: 0.39474010467529297 2023-01-24 01:54:50.558841: step: 674/466, loss: 0.1463014930486679 2023-01-24 01:54:51.146689: step: 676/466, loss: 1.5141112804412842 2023-01-24 01:54:51.801002: step: 678/466, loss: 0.5454038977622986 2023-01-24 01:54:52.404159: step: 680/466, loss: 0.2692568004131317 2023-01-24 01:54:52.991251: step: 682/466, loss: 0.5969122648239136 2023-01-24 01:54:53.580536: step: 684/466, loss: 0.05869481712579727 2023-01-24 01:54:54.145957: step: 686/466, loss: 0.9477701187133789 2023-01-24 01:54:54.731931: step: 688/466, loss: 0.4314132034778595 2023-01-24 01:54:55.399485: step: 690/466, loss: 0.13118954002857208 2023-01-24 01:54:56.044184: step: 692/466, loss: 0.12757283449172974 2023-01-24 01:54:56.654396: step: 694/466, loss: 0.16748552024364471 2023-01-24 01:54:57.285120: step: 696/466, loss: 0.4196185767650604 2023-01-24 01:54:57.858704: step: 698/466, loss: 0.1329767256975174 2023-01-24 01:54:58.510322: step: 700/466, loss: 0.32858502864837646 2023-01-24 01:54:59.186680: step: 702/466, loss: 0.08262277394533157 2023-01-24 01:54:59.812790: step: 704/466, loss: 0.1173609271645546 2023-01-24 01:55:00.440111: step: 706/466, loss: 0.2870000898838043 2023-01-24 01:55:01.091085: step: 708/466, loss: 0.5597653388977051 2023-01-24 01:55:01.768360: step: 710/466, loss: 0.14444905519485474 2023-01-24 01:55:02.632429: step: 712/466, loss: 0.35095858573913574 2023-01-24 01:55:03.245891: step: 714/466, loss: 0.1954328566789627 2023-01-24 01:55:03.828643: step: 716/466, loss: 0.19545452296733856 2023-01-24 01:55:04.420026: step: 718/466, loss: 0.7782852649688721 2023-01-24 01:55:05.078834: step: 720/466, loss: 0.5309181213378906 2023-01-24 01:55:05.765915: step: 722/466, loss: 0.36571553349494934 2023-01-24 01:55:06.342585: step: 724/466, loss: 0.7747761011123657 2023-01-24 01:55:06.941200: step: 726/466, loss: 0.20885266363620758 2023-01-24 01:55:07.614328: step: 728/466, loss: 0.17795908451080322 2023-01-24 01:55:08.247528: step: 730/466, loss: 0.09812970459461212 2023-01-24 01:55:08.943496: step: 732/466, loss: 0.6455363631248474 2023-01-24 01:55:09.548262: step: 734/466, loss: 0.4045051336288452 2023-01-24 01:55:10.144182: step: 736/466, loss: 0.10788479447364807 2023-01-24 01:55:10.853082: step: 738/466, loss: 0.14161811769008636 2023-01-24 01:55:11.429338: step: 740/466, loss: 0.11607873439788818 2023-01-24 01:55:12.111529: step: 742/466, loss: 0.055919233709573746 2023-01-24 01:55:12.689061: step: 744/466, loss: 0.1498071700334549 2023-01-24 01:55:13.277096: step: 746/466, loss: 0.03807078301906586 2023-01-24 01:55:13.890942: step: 748/466, loss: 1.082453966140747 2023-01-24 01:55:14.534511: step: 750/466, loss: 0.1055598184466362 2023-01-24 01:55:15.111030: step: 752/466, loss: 0.19270923733711243 2023-01-24 01:55:15.726101: step: 754/466, loss: 0.08269581198692322 2023-01-24 01:55:16.349166: step: 756/466, loss: 0.12768109142780304 2023-01-24 01:55:16.955065: step: 758/466, loss: 0.20174935460090637 2023-01-24 01:55:17.556193: step: 760/466, loss: 0.026714444160461426 2023-01-24 01:55:18.157736: step: 762/466, loss: 0.25208786129951477 2023-01-24 01:55:18.791016: step: 764/466, loss: 0.20811015367507935 2023-01-24 01:55:19.456229: step: 766/466, loss: 0.18629762530326843 2023-01-24 01:55:20.127219: step: 768/466, loss: 0.07611539959907532 2023-01-24 01:55:20.740607: step: 770/466, loss: 0.16778625547885895 2023-01-24 01:55:21.342382: step: 772/466, loss: 0.8209713101387024 2023-01-24 01:55:22.004762: step: 774/466, loss: 0.06022028997540474 2023-01-24 01:55:22.616830: step: 776/466, loss: 0.05615582689642906 2023-01-24 01:55:23.285633: step: 778/466, loss: 0.08323463052511215 2023-01-24 01:55:23.952441: step: 780/466, loss: 0.28745153546333313 2023-01-24 01:55:24.659395: step: 782/466, loss: 0.15258856117725372 2023-01-24 01:55:25.288485: step: 784/466, loss: 0.11546079814434052 2023-01-24 01:55:25.918251: step: 786/466, loss: 0.15049335360527039 2023-01-24 01:55:26.576977: step: 788/466, loss: 0.1887330263853073 2023-01-24 01:55:27.187303: step: 790/466, loss: 0.0862591415643692 2023-01-24 01:55:27.911837: step: 792/466, loss: 0.8746164441108704 2023-01-24 01:55:28.503522: step: 794/466, loss: 0.20723503828048706 2023-01-24 01:55:29.138288: step: 796/466, loss: 0.4907574951648712 2023-01-24 01:55:29.775211: step: 798/466, loss: 1.3156168460845947 2023-01-24 01:55:30.376082: step: 800/466, loss: 0.21074840426445007 2023-01-24 01:55:31.116260: step: 802/466, loss: 0.3152695596218109 2023-01-24 01:55:31.694901: step: 804/466, loss: 0.06980019062757492 2023-01-24 01:55:32.296287: step: 806/466, loss: 1.8368053436279297 2023-01-24 01:55:32.964954: step: 808/466, loss: 1.4271897077560425 2023-01-24 01:55:33.605669: step: 810/466, loss: 0.19687141478061676 2023-01-24 01:55:34.226437: step: 812/466, loss: 0.3990250825881958 2023-01-24 01:55:34.826742: step: 814/466, loss: 0.15776890516281128 2023-01-24 01:55:35.531725: step: 816/466, loss: 0.11926817893981934 2023-01-24 01:55:36.172366: step: 818/466, loss: 0.07674985378980637 2023-01-24 01:55:36.802540: step: 820/466, loss: 0.1839275360107422 2023-01-24 01:55:37.399929: step: 822/466, loss: 0.035189174115657806 2023-01-24 01:55:38.089300: step: 824/466, loss: 0.2586238384246826 2023-01-24 01:55:38.670066: step: 826/466, loss: 0.11197404563426971 2023-01-24 01:55:39.308277: step: 828/466, loss: 0.41328197717666626 2023-01-24 01:55:39.931482: step: 830/466, loss: 0.6752942204475403 2023-01-24 01:55:40.577427: step: 832/466, loss: 0.23095834255218506 2023-01-24 01:55:41.197992: step: 834/466, loss: 0.19791486859321594 2023-01-24 01:55:41.933594: step: 836/466, loss: 0.4155274033546448 2023-01-24 01:55:42.543473: step: 838/466, loss: 0.11686521023511887 2023-01-24 01:55:43.181078: step: 840/466, loss: 0.10920114070177078 2023-01-24 01:55:43.874109: step: 842/466, loss: 0.1957877278327942 2023-01-24 01:55:44.495912: step: 844/466, loss: 0.19180512428283691 2023-01-24 01:55:45.105775: step: 846/466, loss: 0.8602334260940552 2023-01-24 01:55:45.933985: step: 848/466, loss: 1.637770175933838 2023-01-24 01:55:46.572915: step: 850/466, loss: 0.08310823887586594 2023-01-24 01:55:47.294222: step: 852/466, loss: 0.3732314109802246 2023-01-24 01:55:47.914017: step: 854/466, loss: 0.2023201435804367 2023-01-24 01:55:48.563359: step: 856/466, loss: 0.3385159969329834 2023-01-24 01:55:49.190125: step: 858/466, loss: 0.10411600768566132 2023-01-24 01:55:49.783327: step: 860/466, loss: 0.18611595034599304 2023-01-24 01:55:50.375827: step: 862/466, loss: 0.24844716489315033 2023-01-24 01:55:50.938116: step: 864/466, loss: 0.12318140268325806 2023-01-24 01:55:51.542483: step: 866/466, loss: 0.20975176990032196 2023-01-24 01:55:52.149893: step: 868/466, loss: 0.1696348488330841 2023-01-24 01:55:52.788905: step: 870/466, loss: 0.23161551356315613 2023-01-24 01:55:53.330419: step: 872/466, loss: 0.19035020470619202 2023-01-24 01:55:53.943748: step: 874/466, loss: 0.3820752203464508 2023-01-24 01:55:54.603057: step: 876/466, loss: 0.1305333822965622 2023-01-24 01:55:55.254960: step: 878/466, loss: 0.1538640856742859 2023-01-24 01:55:55.853701: step: 880/466, loss: 0.3288590908050537 2023-01-24 01:55:56.404695: step: 882/466, loss: 0.18580347299575806 2023-01-24 01:55:57.009043: step: 884/466, loss: 0.1356642097234726 2023-01-24 01:55:57.643093: step: 886/466, loss: 0.49856212735176086 2023-01-24 01:55:58.340404: step: 888/466, loss: 0.23073936998844147 2023-01-24 01:55:58.930431: step: 890/466, loss: 0.2459000051021576 2023-01-24 01:55:59.590129: step: 892/466, loss: 0.18568305671215057 2023-01-24 01:56:00.216800: step: 894/466, loss: 0.15195661783218384 2023-01-24 01:56:00.865881: step: 896/466, loss: 0.11524511873722076 2023-01-24 01:56:01.464220: step: 898/466, loss: 0.11277007311582565 2023-01-24 01:56:02.084135: step: 900/466, loss: 0.04688365012407303 2023-01-24 01:56:02.676652: step: 902/466, loss: 0.08872295916080475 2023-01-24 01:56:03.274531: step: 904/466, loss: 0.14066576957702637 2023-01-24 01:56:03.847650: step: 906/466, loss: 0.2034319043159485 2023-01-24 01:56:04.484888: step: 908/466, loss: 0.09343183040618896 2023-01-24 01:56:05.086620: step: 910/466, loss: 0.36260998249053955 2023-01-24 01:56:05.657129: step: 912/466, loss: 0.15928703546524048 2023-01-24 01:56:06.236646: step: 914/466, loss: 1.7589353322982788 2023-01-24 01:56:06.806313: step: 916/466, loss: 0.3109453618526459 2023-01-24 01:56:07.441612: step: 918/466, loss: 0.08942896127700806 2023-01-24 01:56:08.065646: step: 920/466, loss: 0.12767262756824493 2023-01-24 01:56:08.729982: step: 922/466, loss: 0.1355731040239334 2023-01-24 01:56:09.411472: step: 924/466, loss: 0.20094452798366547 2023-01-24 01:56:10.024037: step: 926/466, loss: 0.2024645060300827 2023-01-24 01:56:10.632973: step: 928/466, loss: 0.08893533796072006 2023-01-24 01:56:11.258601: step: 930/466, loss: 0.24943366646766663 2023-01-24 01:56:11.806163: step: 932/466, loss: 0.11482098698616028 ================================================== Loss: 0.289 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34702878474830956, 'r': 0.3187133431085044, 'f1': 0.33226890567395023}, 'combined': 0.2448297199702791, 'epoch': 12} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3374244945662739, 'r': 0.26531484153452733, 'f1': 0.2970561930962048}, 'combined': 0.18600714894809087, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33613684110007636, 'r': 0.3252937171936223, 'f1': 0.33062640108204233}, 'combined': 0.24361945342887328, 'epoch': 12} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3443748959580015, 'r': 0.2753755936992684, 'f1': 0.30603426060059113}, 'combined': 0.18965503473839448, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3101533457249071, 'r': 0.3166271347248577, 'f1': 0.31335680751173717}, 'combined': 0.23089448974549054, 'epoch': 12} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.34558112654998463, 'r': 0.27065838908657336, 'f1': 0.30356518411229655}, 'combined': 0.20137492411409771, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3161764705882353, 'r': 0.30714285714285716, 'f1': 0.3115942028985507}, 'combined': 0.2077294685990338, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34375, 'r': 0.358695652173913, 'f1': 0.351063829787234}, 'combined': 0.175531914893617, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.19827586206896552, 'f1': 0.2555555555555556}, 'combined': 0.1703703703703704, 'epoch': 12} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3101533457249071, 'r': 0.3166271347248577, 'f1': 0.31335680751173717}, 'combined': 0.23089448974549054, 'epoch': 12} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.34558112654998463, 'r': 0.27065838908657336, 'f1': 0.30356518411229655}, 'combined': 0.20137492411409771, 'epoch': 12} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.19827586206896552, 'f1': 0.2555555555555556}, 'combined': 0.1703703703703704, 'epoch': 12} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:58:53.742655: step: 2/466, loss: 0.1640540212392807 2023-01-24 01:58:54.375489: step: 4/466, loss: 0.3309607207775116 2023-01-24 01:58:55.038057: step: 6/466, loss: 0.2043866664171219 2023-01-24 01:58:55.574813: step: 8/466, loss: 0.08511985093355179 2023-01-24 01:58:56.191072: step: 10/466, loss: 0.03777071461081505 2023-01-24 01:58:56.838573: step: 12/466, loss: 0.4136914014816284 2023-01-24 01:58:57.469894: step: 14/466, loss: 0.03706258535385132 2023-01-24 01:58:58.132110: step: 16/466, loss: 0.2376665621995926 2023-01-24 01:58:58.812374: step: 18/466, loss: 0.5675952434539795 2023-01-24 01:58:59.405072: step: 20/466, loss: 0.20553147792816162 2023-01-24 01:59:00.028647: step: 22/466, loss: 0.25619378685951233 2023-01-24 01:59:00.675723: step: 24/466, loss: 0.07210592180490494 2023-01-24 01:59:01.276588: step: 26/466, loss: 0.11914689838886261 2023-01-24 01:59:01.877136: step: 28/466, loss: 0.5526707768440247 2023-01-24 01:59:02.538051: step: 30/466, loss: 0.22043925523757935 2023-01-24 01:59:03.129527: step: 32/466, loss: 0.07043454796075821 2023-01-24 01:59:03.815216: step: 34/466, loss: 0.17447586357593536 2023-01-24 01:59:04.482542: step: 36/466, loss: 0.14629477262496948 2023-01-24 01:59:05.081139: step: 38/466, loss: 0.04400822892785072 2023-01-24 01:59:05.681221: step: 40/466, loss: 0.27579110860824585 2023-01-24 01:59:06.258475: step: 42/466, loss: 0.11529107391834259 2023-01-24 01:59:06.811652: step: 44/466, loss: 0.060026880353689194 2023-01-24 01:59:07.486253: step: 46/466, loss: 0.08529799431562424 2023-01-24 01:59:08.114474: step: 48/466, loss: 0.13222745060920715 2023-01-24 01:59:08.807285: step: 50/466, loss: 0.0990067720413208 2023-01-24 01:59:09.432045: step: 52/466, loss: 0.13855914771556854 2023-01-24 01:59:10.097524: step: 54/466, loss: 0.13198557496070862 2023-01-24 01:59:10.786954: step: 56/466, loss: 0.11340803653001785 2023-01-24 01:59:11.399817: step: 58/466, loss: 0.5908045768737793 2023-01-24 01:59:12.064050: step: 60/466, loss: 0.14172838628292084 2023-01-24 01:59:12.642737: step: 62/466, loss: 0.14429081976413727 2023-01-24 01:59:13.312732: step: 64/466, loss: 0.36701783537864685 2023-01-24 01:59:13.997442: step: 66/466, loss: 0.197360098361969 2023-01-24 01:59:14.549459: step: 68/466, loss: 0.08826547116041183 2023-01-24 01:59:15.135669: step: 70/466, loss: 0.3188633918762207 2023-01-24 01:59:15.716004: step: 72/466, loss: 0.061413321644067764 2023-01-24 01:59:16.302665: step: 74/466, loss: 0.10852718353271484 2023-01-24 01:59:16.932358: step: 76/466, loss: 0.09878215193748474 2023-01-24 01:59:17.563575: step: 78/466, loss: 0.069364532828331 2023-01-24 01:59:18.175771: step: 80/466, loss: 0.1032869890332222 2023-01-24 01:59:18.734923: step: 82/466, loss: 0.08465754985809326 2023-01-24 01:59:19.293235: step: 84/466, loss: 0.22014209628105164 2023-01-24 01:59:19.935350: step: 86/466, loss: 0.12644293904304504 2023-01-24 01:59:20.491810: step: 88/466, loss: 0.14519761502742767 2023-01-24 01:59:21.090777: step: 90/466, loss: 0.706619143486023 2023-01-24 01:59:21.702154: step: 92/466, loss: 0.20220623910427094 2023-01-24 01:59:22.325468: step: 94/466, loss: 0.08715146034955978 2023-01-24 01:59:23.014417: step: 96/466, loss: 0.25675857067108154 2023-01-24 01:59:23.621626: step: 98/466, loss: 0.17394381761550903 2023-01-24 01:59:24.214948: step: 100/466, loss: 0.18044376373291016 2023-01-24 01:59:24.826807: step: 102/466, loss: 0.10915549844503403 2023-01-24 01:59:25.355896: step: 104/466, loss: 0.06167028471827507 2023-01-24 01:59:25.985201: step: 106/466, loss: 0.06519010663032532 2023-01-24 01:59:26.584678: step: 108/466, loss: 0.12466680258512497 2023-01-24 01:59:27.172423: step: 110/466, loss: 0.13578496873378754 2023-01-24 01:59:27.866041: step: 112/466, loss: 0.5819948315620422 2023-01-24 01:59:28.490867: step: 114/466, loss: 0.1312570869922638 2023-01-24 01:59:29.062103: step: 116/466, loss: 0.1735519915819168 2023-01-24 01:59:29.738781: step: 118/466, loss: 1.240576982498169 2023-01-24 01:59:30.358639: step: 120/466, loss: 0.1172071099281311 2023-01-24 01:59:31.009914: step: 122/466, loss: 0.10495390743017197 2023-01-24 01:59:31.579606: step: 124/466, loss: 0.11870117485523224 2023-01-24 01:59:32.171017: step: 126/466, loss: 0.13622358441352844 2023-01-24 01:59:32.759490: step: 128/466, loss: 0.11489726603031158 2023-01-24 01:59:33.349414: step: 130/466, loss: 0.22453820705413818 2023-01-24 01:59:34.007859: step: 132/466, loss: 0.14519764482975006 2023-01-24 01:59:34.656136: step: 134/466, loss: 0.1792580485343933 2023-01-24 01:59:35.298225: step: 136/466, loss: 1.21463143825531 2023-01-24 01:59:35.883361: step: 138/466, loss: 0.06776592135429382 2023-01-24 01:59:36.481963: step: 140/466, loss: 0.36141058802604675 2023-01-24 01:59:37.118067: step: 142/466, loss: 0.41635945439338684 2023-01-24 01:59:37.698061: step: 144/466, loss: 0.42210379242897034 2023-01-24 01:59:38.265338: step: 146/466, loss: 0.1286270171403885 2023-01-24 01:59:38.872112: step: 148/466, loss: 0.152645543217659 2023-01-24 01:59:39.562580: step: 150/466, loss: 0.129474475979805 2023-01-24 01:59:40.158170: step: 152/466, loss: 0.31282925605773926 2023-01-24 01:59:40.792668: step: 154/466, loss: 0.27455687522888184 2023-01-24 01:59:41.406868: step: 156/466, loss: 0.08866291493177414 2023-01-24 01:59:42.016485: step: 158/466, loss: 0.16239285469055176 2023-01-24 01:59:42.573966: step: 160/466, loss: 0.048395611345767975 2023-01-24 01:59:43.169488: step: 162/466, loss: 0.19695651531219482 2023-01-24 01:59:43.788175: step: 164/466, loss: 0.24603235721588135 2023-01-24 01:59:44.474579: step: 166/466, loss: 0.09938321262598038 2023-01-24 01:59:45.138191: step: 168/466, loss: 0.1550007462501526 2023-01-24 01:59:45.870145: step: 170/466, loss: 0.29955270886421204 2023-01-24 01:59:46.513743: step: 172/466, loss: 0.09472016245126724 2023-01-24 01:59:47.155139: step: 174/466, loss: 0.4685268998146057 2023-01-24 01:59:47.780208: step: 176/466, loss: 0.19812646508216858 2023-01-24 01:59:48.380587: step: 178/466, loss: 0.15283237397670746 2023-01-24 01:59:49.077006: step: 180/466, loss: 0.04112152382731438 2023-01-24 01:59:49.711776: step: 182/466, loss: 0.3206070363521576 2023-01-24 01:59:50.477941: step: 184/466, loss: 0.08839256316423416 2023-01-24 01:59:51.089782: step: 186/466, loss: 0.02866632677614689 2023-01-24 01:59:51.734901: step: 188/466, loss: 0.4633135497570038 2023-01-24 01:59:52.295091: step: 190/466, loss: 0.0650775283575058 2023-01-24 01:59:52.878778: step: 192/466, loss: 0.04255685210227966 2023-01-24 01:59:53.522275: step: 194/466, loss: 0.15480415523052216 2023-01-24 01:59:54.175143: step: 196/466, loss: 0.09784644097089767 2023-01-24 01:59:54.772569: step: 198/466, loss: 0.4938371479511261 2023-01-24 01:59:55.376874: step: 200/466, loss: 0.21996623277664185 2023-01-24 01:59:55.961808: step: 202/466, loss: 0.5384853482246399 2023-01-24 01:59:56.601131: step: 204/466, loss: 0.7514946460723877 2023-01-24 01:59:57.285092: step: 206/466, loss: 0.4952646791934967 2023-01-24 01:59:57.916484: step: 208/466, loss: 0.05475543439388275 2023-01-24 01:59:58.587256: step: 210/466, loss: 0.0820336639881134 2023-01-24 01:59:59.231304: step: 212/466, loss: 0.17772632837295532 2023-01-24 01:59:59.902004: step: 214/466, loss: 0.07154051959514618 2023-01-24 02:00:00.596154: step: 216/466, loss: 0.39577537775039673 2023-01-24 02:00:01.214235: step: 218/466, loss: 0.10776152461767197 2023-01-24 02:00:01.800372: step: 220/466, loss: 0.23313681781291962 2023-01-24 02:00:02.427636: step: 222/466, loss: 0.2835061252117157 2023-01-24 02:00:03.006479: step: 224/466, loss: 0.15277494490146637 2023-01-24 02:00:03.634696: step: 226/466, loss: 0.08761324733495712 2023-01-24 02:00:04.290622: step: 228/466, loss: 0.1521938443183899 2023-01-24 02:00:04.858199: step: 230/466, loss: 1.1920808553695679 2023-01-24 02:00:05.481271: step: 232/466, loss: 0.2710120677947998 2023-01-24 02:00:06.094350: step: 234/466, loss: 0.1194315254688263 2023-01-24 02:00:06.702644: step: 236/466, loss: 0.5656651258468628 2023-01-24 02:00:07.364251: step: 238/466, loss: 0.4444570541381836 2023-01-24 02:00:07.993150: step: 240/466, loss: 0.15339183807373047 2023-01-24 02:00:08.616673: step: 242/466, loss: 0.22125717997550964 2023-01-24 02:00:09.363068: step: 244/466, loss: 0.252727210521698 2023-01-24 02:00:09.971527: step: 246/466, loss: 0.04293074458837509 2023-01-24 02:00:10.598890: step: 248/466, loss: 0.39329782128334045 2023-01-24 02:00:11.234638: step: 250/466, loss: 0.11064071953296661 2023-01-24 02:00:11.831274: step: 252/466, loss: 0.03417646139860153 2023-01-24 02:00:12.485117: step: 254/466, loss: 0.7463078498840332 2023-01-24 02:00:13.087885: step: 256/466, loss: 0.09719155728816986 2023-01-24 02:00:13.672870: step: 258/466, loss: 0.4411686956882477 2023-01-24 02:00:14.306338: step: 260/466, loss: 0.22540371119976044 2023-01-24 02:00:14.907148: step: 262/466, loss: 0.07298116385936737 2023-01-24 02:00:15.567949: step: 264/466, loss: 0.3341713547706604 2023-01-24 02:00:16.222174: step: 266/466, loss: 0.19830569624900818 2023-01-24 02:00:16.857862: step: 268/466, loss: 0.06072846055030823 2023-01-24 02:00:17.553611: step: 270/466, loss: 0.20249119400978088 2023-01-24 02:00:18.122594: step: 272/466, loss: 0.38819921016693115 2023-01-24 02:00:18.802490: step: 274/466, loss: 0.09318225085735321 2023-01-24 02:00:19.422110: step: 276/466, loss: 0.12391915917396545 2023-01-24 02:00:20.062646: step: 278/466, loss: 0.11748528480529785 2023-01-24 02:00:20.748598: step: 280/466, loss: 0.0606972798705101 2023-01-24 02:00:21.364279: step: 282/466, loss: 0.07778766751289368 2023-01-24 02:00:22.024066: step: 284/466, loss: 0.08217433840036392 2023-01-24 02:00:22.771978: step: 286/466, loss: 0.22422903776168823 2023-01-24 02:00:23.362639: step: 288/466, loss: 0.1362757831811905 2023-01-24 02:00:23.938013: step: 290/466, loss: 0.4838602542877197 2023-01-24 02:00:24.522254: step: 292/466, loss: 0.48333677649497986 2023-01-24 02:00:25.101907: step: 294/466, loss: 0.08443082123994827 2023-01-24 02:00:25.740755: step: 296/466, loss: 0.07937867194414139 2023-01-24 02:00:26.379227: step: 298/466, loss: 0.19950351119041443 2023-01-24 02:00:27.014218: step: 300/466, loss: 0.07143021374940872 2023-01-24 02:00:27.620603: step: 302/466, loss: 0.1029263511300087 2023-01-24 02:00:28.234010: step: 304/466, loss: 0.08823765814304352 2023-01-24 02:00:28.843929: step: 306/466, loss: 0.07364961504936218 2023-01-24 02:00:29.438945: step: 308/466, loss: 0.16864685714244843 2023-01-24 02:00:30.069704: step: 310/466, loss: 0.45850521326065063 2023-01-24 02:00:30.742458: step: 312/466, loss: 0.10307233780622482 2023-01-24 02:00:31.361294: step: 314/466, loss: 0.18089449405670166 2023-01-24 02:00:31.932763: step: 316/466, loss: 0.0594630092382431 2023-01-24 02:00:32.568281: step: 318/466, loss: 0.3256591558456421 2023-01-24 02:00:33.217931: step: 320/466, loss: 0.16105329990386963 2023-01-24 02:00:33.897716: step: 322/466, loss: 0.10364712029695511 2023-01-24 02:00:34.500122: step: 324/466, loss: 0.04758321866393089 2023-01-24 02:00:35.122944: step: 326/466, loss: 0.22980478405952454 2023-01-24 02:00:35.739978: step: 328/466, loss: 0.08890247344970703 2023-01-24 02:00:36.319987: step: 330/466, loss: 0.23473994433879852 2023-01-24 02:00:36.918119: step: 332/466, loss: 0.21703583002090454 2023-01-24 02:00:37.523959: step: 334/466, loss: 0.02021816000342369 2023-01-24 02:00:38.161353: step: 336/466, loss: 0.04711470007896423 2023-01-24 02:00:38.776517: step: 338/466, loss: 0.1616368442773819 2023-01-24 02:00:39.437291: step: 340/466, loss: 0.2109578549861908 2023-01-24 02:00:40.060292: step: 342/466, loss: 0.1256185621023178 2023-01-24 02:00:40.650428: step: 344/466, loss: 0.17266616225242615 2023-01-24 02:00:41.232984: step: 346/466, loss: 0.09382988512516022 2023-01-24 02:00:41.920736: step: 348/466, loss: 0.1868274211883545 2023-01-24 02:00:42.561719: step: 350/466, loss: 0.09630129486322403 2023-01-24 02:00:43.210606: step: 352/466, loss: 0.1375264823436737 2023-01-24 02:00:43.853507: step: 354/466, loss: 0.18844608962535858 2023-01-24 02:00:44.456636: step: 356/466, loss: 0.08993268013000488 2023-01-24 02:00:45.053825: step: 358/466, loss: 0.07612888514995575 2023-01-24 02:00:45.655476: step: 360/466, loss: 0.5700476169586182 2023-01-24 02:00:46.273898: step: 362/466, loss: 0.5032669305801392 2023-01-24 02:00:46.847636: step: 364/466, loss: 0.2480846643447876 2023-01-24 02:00:47.480400: step: 366/466, loss: 0.12014836817979813 2023-01-24 02:00:48.088843: step: 368/466, loss: 0.14348430931568146 2023-01-24 02:00:48.774427: step: 370/466, loss: 0.06538870930671692 2023-01-24 02:00:49.427419: step: 372/466, loss: 0.6643422842025757 2023-01-24 02:00:50.112637: step: 374/466, loss: 0.05286271125078201 2023-01-24 02:00:50.761169: step: 376/466, loss: 0.105209119617939 2023-01-24 02:00:51.376459: step: 378/466, loss: 0.26690319180488586 2023-01-24 02:00:51.923738: step: 380/466, loss: 0.15930518507957458 2023-01-24 02:00:52.537297: step: 382/466, loss: 0.15475893020629883 2023-01-24 02:00:53.108323: step: 384/466, loss: 0.2482644021511078 2023-01-24 02:00:53.750651: step: 386/466, loss: 0.07550220936536789 2023-01-24 02:00:54.366657: step: 388/466, loss: 0.18894460797309875 2023-01-24 02:00:54.974652: step: 390/466, loss: 0.091074638068676 2023-01-24 02:00:55.648082: step: 392/466, loss: 0.04081743583083153 2023-01-24 02:00:56.311966: step: 394/466, loss: 0.1577485054731369 2023-01-24 02:00:56.952282: step: 396/466, loss: 0.18509070575237274 2023-01-24 02:00:57.556334: step: 398/466, loss: 0.15012842416763306 2023-01-24 02:00:58.101029: step: 400/466, loss: 0.16608832776546478 2023-01-24 02:00:58.677512: step: 402/466, loss: 0.22238484025001526 2023-01-24 02:00:59.319563: step: 404/466, loss: 0.2626784145832062 2023-01-24 02:00:59.972304: step: 406/466, loss: 0.12193439155817032 2023-01-24 02:01:00.653204: step: 408/466, loss: 0.12389352172613144 2023-01-24 02:01:01.280610: step: 410/466, loss: 0.3696343004703522 2023-01-24 02:01:01.913702: step: 412/466, loss: 0.1802680492401123 2023-01-24 02:01:02.532426: step: 414/466, loss: 0.1357947587966919 2023-01-24 02:01:03.167371: step: 416/466, loss: 0.16391897201538086 2023-01-24 02:01:03.801374: step: 418/466, loss: 0.7947637438774109 2023-01-24 02:01:04.433250: step: 420/466, loss: 0.19358088076114655 2023-01-24 02:01:05.079395: step: 422/466, loss: 0.028136981651186943 2023-01-24 02:01:05.721168: step: 424/466, loss: 0.129495769739151 2023-01-24 02:01:06.281848: step: 426/466, loss: 0.06203593313694 2023-01-24 02:01:06.881816: step: 428/466, loss: 0.27690795063972473 2023-01-24 02:01:07.534381: step: 430/466, loss: 0.08294371515512466 2023-01-24 02:01:08.192019: step: 432/466, loss: 0.0887903943657875 2023-01-24 02:01:08.841561: step: 434/466, loss: 0.40193527936935425 2023-01-24 02:01:09.443543: step: 436/466, loss: 0.18932312726974487 2023-01-24 02:01:10.017610: step: 438/466, loss: 0.20605716109275818 2023-01-24 02:01:10.659313: step: 440/466, loss: 0.19271188974380493 2023-01-24 02:01:11.275161: step: 442/466, loss: 0.22124242782592773 2023-01-24 02:01:11.905432: step: 444/466, loss: 0.08189988136291504 2023-01-24 02:01:12.524430: step: 446/466, loss: 0.3875589072704315 2023-01-24 02:01:13.123607: step: 448/466, loss: 0.7076061964035034 2023-01-24 02:01:13.684559: step: 450/466, loss: 0.08960109204053879 2023-01-24 02:01:14.351685: step: 452/466, loss: 0.4185073971748352 2023-01-24 02:01:15.000275: step: 454/466, loss: 0.11291522532701492 2023-01-24 02:01:15.663034: step: 456/466, loss: 0.4014487564563751 2023-01-24 02:01:16.314104: step: 458/466, loss: 0.14478829503059387 2023-01-24 02:01:16.979855: step: 460/466, loss: 0.2312946915626526 2023-01-24 02:01:17.656246: step: 462/466, loss: 0.11276478320360184 2023-01-24 02:01:18.366245: step: 464/466, loss: 0.1325899064540863 2023-01-24 02:01:18.994613: step: 466/466, loss: 0.08391700685024261 2023-01-24 02:01:19.634583: step: 468/466, loss: 0.2524271607398987 2023-01-24 02:01:20.283592: step: 470/466, loss: 0.14266198873519897 2023-01-24 02:01:20.863669: step: 472/466, loss: 0.19147278368473053 2023-01-24 02:01:21.537803: step: 474/466, loss: 0.16023756563663483 2023-01-24 02:01:22.247166: step: 476/466, loss: 0.34925153851509094 2023-01-24 02:01:22.834975: step: 478/466, loss: 0.1936853975057602 2023-01-24 02:01:23.439070: step: 480/466, loss: 0.06167168170213699 2023-01-24 02:01:24.037556: step: 482/466, loss: 0.06931286305189133 2023-01-24 02:01:24.592741: step: 484/466, loss: 0.24022479355335236 2023-01-24 02:01:25.270716: step: 486/466, loss: 0.12189552187919617 2023-01-24 02:01:25.826547: step: 488/466, loss: 0.277190238237381 2023-01-24 02:01:26.473317: step: 490/466, loss: 0.10748836398124695 2023-01-24 02:01:27.064818: step: 492/466, loss: 0.03430560231208801 2023-01-24 02:01:27.678386: step: 494/466, loss: 0.2065049707889557 2023-01-24 02:01:28.312964: step: 496/466, loss: 0.06921470910310745 2023-01-24 02:01:29.012951: step: 498/466, loss: 0.12291540205478668 2023-01-24 02:01:29.716696: step: 500/466, loss: 0.040339380502700806 2023-01-24 02:01:30.323981: step: 502/466, loss: 0.26626771688461304 2023-01-24 02:01:31.019850: step: 504/466, loss: 0.2785085439682007 2023-01-24 02:01:31.762338: step: 506/466, loss: 0.12924779951572418 2023-01-24 02:01:32.425864: step: 508/466, loss: 0.13007335364818573 2023-01-24 02:01:33.013502: step: 510/466, loss: 0.08024508506059647 2023-01-24 02:01:33.712210: step: 512/466, loss: 0.6170725226402283 2023-01-24 02:01:34.318539: step: 514/466, loss: 0.5800790190696716 2023-01-24 02:01:34.871732: step: 516/466, loss: 0.11268080025911331 2023-01-24 02:01:35.521575: step: 518/466, loss: 0.22556880116462708 2023-01-24 02:01:36.130088: step: 520/466, loss: 0.14080668985843658 2023-01-24 02:01:36.717502: step: 522/466, loss: 0.2947949767112732 2023-01-24 02:01:37.292476: step: 524/466, loss: 0.15909208357334137 2023-01-24 02:01:37.942205: step: 526/466, loss: 0.10946908593177795 2023-01-24 02:01:38.576949: step: 528/466, loss: 0.15271134674549103 2023-01-24 02:01:39.152503: step: 530/466, loss: 0.42776089906692505 2023-01-24 02:01:39.817631: step: 532/466, loss: 0.02391742914915085 2023-01-24 02:01:40.422888: step: 534/466, loss: 0.36857134103775024 2023-01-24 02:01:41.037835: step: 536/466, loss: 0.25262266397476196 2023-01-24 02:01:41.706223: step: 538/466, loss: 0.10111632943153381 2023-01-24 02:01:42.399430: step: 540/466, loss: 0.20518727600574493 2023-01-24 02:01:42.966032: step: 542/466, loss: 0.08978749811649323 2023-01-24 02:01:43.633168: step: 544/466, loss: 0.196076899766922 2023-01-24 02:01:44.225317: step: 546/466, loss: 0.16637547314167023 2023-01-24 02:01:44.890312: step: 548/466, loss: 0.1056482344865799 2023-01-24 02:01:45.488719: step: 550/466, loss: 0.20052708685398102 2023-01-24 02:01:46.075435: step: 552/466, loss: 0.18453501164913177 2023-01-24 02:01:46.659028: step: 554/466, loss: 0.21498796343803406 2023-01-24 02:01:47.299487: step: 556/466, loss: 0.08253206312656403 2023-01-24 02:01:47.886882: step: 558/466, loss: 0.5168558955192566 2023-01-24 02:01:48.464320: step: 560/466, loss: 0.14647255837917328 2023-01-24 02:01:49.057770: step: 562/466, loss: 0.0941804051399231 2023-01-24 02:01:49.685194: step: 564/466, loss: 0.07615640014410019 2023-01-24 02:01:50.298702: step: 566/466, loss: 0.11760447174310684 2023-01-24 02:01:50.925662: step: 568/466, loss: 0.14441268146038055 2023-01-24 02:01:51.501979: step: 570/466, loss: 0.14566536247730255 2023-01-24 02:01:52.152990: step: 572/466, loss: 0.015807218849658966 2023-01-24 02:01:52.740904: step: 574/466, loss: 0.19025352597236633 2023-01-24 02:01:53.333409: step: 576/466, loss: 0.2444162666797638 2023-01-24 02:01:53.858774: step: 578/466, loss: 0.10195492208003998 2023-01-24 02:01:54.474847: step: 580/466, loss: 0.2547774016857147 2023-01-24 02:01:55.092179: step: 582/466, loss: 0.11530125141143799 2023-01-24 02:01:55.725731: step: 584/466, loss: 0.14400054514408112 2023-01-24 02:01:56.394059: step: 586/466, loss: 0.17188303172588348 2023-01-24 02:01:57.010012: step: 588/466, loss: 0.2476584017276764 2023-01-24 02:01:57.697247: step: 590/466, loss: 0.617317259311676 2023-01-24 02:01:58.376705: step: 592/466, loss: 0.15053248405456543 2023-01-24 02:01:59.019934: step: 594/466, loss: 0.1030726283788681 2023-01-24 02:01:59.684723: step: 596/466, loss: 0.16715604066848755 2023-01-24 02:02:00.280189: step: 598/466, loss: 0.039887625724077225 2023-01-24 02:02:00.905940: step: 600/466, loss: 0.08842974901199341 2023-01-24 02:02:01.521957: step: 602/466, loss: 2.2897515296936035 2023-01-24 02:02:02.111310: step: 604/466, loss: 0.10700254887342453 2023-01-24 02:02:02.720541: step: 606/466, loss: 0.09269782900810242 2023-01-24 02:02:03.362470: step: 608/466, loss: 0.15501828491687775 2023-01-24 02:02:03.967696: step: 610/466, loss: 0.2086525410413742 2023-01-24 02:02:04.542670: step: 612/466, loss: 0.20226308703422546 2023-01-24 02:02:05.131367: step: 614/466, loss: 0.20458634197711945 2023-01-24 02:02:05.710417: step: 616/466, loss: 0.570742130279541 2023-01-24 02:02:06.328656: step: 618/466, loss: 0.5643695592880249 2023-01-24 02:02:06.921815: step: 620/466, loss: 0.04040331020951271 2023-01-24 02:02:07.565701: step: 622/466, loss: 0.17890098690986633 2023-01-24 02:02:08.164757: step: 624/466, loss: 0.08190061151981354 2023-01-24 02:02:08.749990: step: 626/466, loss: 1.0058467388153076 2023-01-24 02:02:09.347495: step: 628/466, loss: 0.11896853893995285 2023-01-24 02:02:09.953160: step: 630/466, loss: 0.12671291828155518 2023-01-24 02:02:10.488329: step: 632/466, loss: 0.15029926598072052 2023-01-24 02:02:11.018199: step: 634/466, loss: 0.16588181257247925 2023-01-24 02:02:11.648738: step: 636/466, loss: 0.631856381893158 2023-01-24 02:02:12.308967: step: 638/466, loss: 0.18267585337162018 2023-01-24 02:02:12.929135: step: 640/466, loss: 0.21784941852092743 2023-01-24 02:02:13.526631: step: 642/466, loss: 0.06392834335565567 2023-01-24 02:02:14.119561: step: 644/466, loss: 0.13560988008975983 2023-01-24 02:02:14.834779: step: 646/466, loss: 0.41749024391174316 2023-01-24 02:02:15.431631: step: 648/466, loss: 0.12861019372940063 2023-01-24 02:02:16.014083: step: 650/466, loss: 0.0982961505651474 2023-01-24 02:02:16.665389: step: 652/466, loss: 0.18329547345638275 2023-01-24 02:02:17.324381: step: 654/466, loss: 0.11583784222602844 2023-01-24 02:02:18.038076: step: 656/466, loss: 0.20851461589336395 2023-01-24 02:02:18.675739: step: 658/466, loss: 0.10331014543771744 2023-01-24 02:02:19.373246: step: 660/466, loss: 0.0695447102189064 2023-01-24 02:02:20.019853: step: 662/466, loss: 0.22022593021392822 2023-01-24 02:02:20.654022: step: 664/466, loss: 0.2137749046087265 2023-01-24 02:02:21.318152: step: 666/466, loss: 0.09532894194126129 2023-01-24 02:02:21.948023: step: 668/466, loss: 0.11481019109487534 2023-01-24 02:02:22.544358: step: 670/466, loss: 0.034565288573503494 2023-01-24 02:02:23.107273: step: 672/466, loss: 0.14431558549404144 2023-01-24 02:02:23.754258: step: 674/466, loss: 0.2387639433145523 2023-01-24 02:02:24.353777: step: 676/466, loss: 0.13092225790023804 2023-01-24 02:02:24.989969: step: 678/466, loss: 0.3727954030036926 2023-01-24 02:02:25.616544: step: 680/466, loss: 0.22211085259914398 2023-01-24 02:02:26.270732: step: 682/466, loss: 0.12298870831727982 2023-01-24 02:02:26.849023: step: 684/466, loss: 0.15994293987751007 2023-01-24 02:02:27.424680: step: 686/466, loss: 0.32660427689552307 2023-01-24 02:02:28.070806: step: 688/466, loss: 0.12016677856445312 2023-01-24 02:02:28.733828: step: 690/466, loss: 0.12251818180084229 2023-01-24 02:02:29.378057: step: 692/466, loss: 0.28071627020835876 2023-01-24 02:02:29.997163: step: 694/466, loss: 0.1375160813331604 2023-01-24 02:02:30.559355: step: 696/466, loss: 0.11511513590812683 2023-01-24 02:02:31.125439: step: 698/466, loss: 0.9334464073181152 2023-01-24 02:02:31.727155: step: 700/466, loss: 0.11350943893194199 2023-01-24 02:02:32.332216: step: 702/466, loss: 0.08908645063638687 2023-01-24 02:02:33.113973: step: 704/466, loss: 0.07174257189035416 2023-01-24 02:02:33.692238: step: 706/466, loss: 0.07029732316732407 2023-01-24 02:02:34.383016: step: 708/466, loss: 0.14587418735027313 2023-01-24 02:02:35.044687: step: 710/466, loss: 0.17657369375228882 2023-01-24 02:02:35.688224: step: 712/466, loss: 0.12829914689064026 2023-01-24 02:02:36.270405: step: 714/466, loss: 0.3506952226161957 2023-01-24 02:02:36.885188: step: 716/466, loss: 0.09446898102760315 2023-01-24 02:02:37.493609: step: 718/466, loss: 0.17156566679477692 2023-01-24 02:02:38.104868: step: 720/466, loss: 0.1899915337562561 2023-01-24 02:02:38.721136: step: 722/466, loss: 0.2282724231481552 2023-01-24 02:02:39.288090: step: 724/466, loss: 0.07543264329433441 2023-01-24 02:02:39.892722: step: 726/466, loss: 0.24867254495620728 2023-01-24 02:02:40.515103: step: 728/466, loss: 0.4091861844062805 2023-01-24 02:02:41.190321: step: 730/466, loss: 0.08514761179685593 2023-01-24 02:02:41.813615: step: 732/466, loss: 0.2007623314857483 2023-01-24 02:02:42.439197: step: 734/466, loss: 0.045061640441417694 2023-01-24 02:02:43.046655: step: 736/466, loss: 0.08761599659919739 2023-01-24 02:02:43.764592: step: 738/466, loss: 0.21438206732273102 2023-01-24 02:02:44.422316: step: 740/466, loss: 0.14008253812789917 2023-01-24 02:02:45.177275: step: 742/466, loss: 0.14892008900642395 2023-01-24 02:02:45.809723: step: 744/466, loss: 0.1852099746465683 2023-01-24 02:02:46.467986: step: 746/466, loss: 0.12888222932815552 2023-01-24 02:02:47.084076: step: 748/466, loss: 0.20296093821525574 2023-01-24 02:02:47.703766: step: 750/466, loss: 0.04399174451828003 2023-01-24 02:02:48.313279: step: 752/466, loss: 0.10388326644897461 2023-01-24 02:02:48.958144: step: 754/466, loss: 0.09159988164901733 2023-01-24 02:02:49.564990: step: 756/466, loss: 0.05983365327119827 2023-01-24 02:02:50.205579: step: 758/466, loss: 0.42964819073677063 2023-01-24 02:02:50.818978: step: 760/466, loss: 0.10957743227481842 2023-01-24 02:02:51.405140: step: 762/466, loss: 0.06509745121002197 2023-01-24 02:02:52.042453: step: 764/466, loss: 0.06311726570129395 2023-01-24 02:02:52.799349: step: 766/466, loss: 0.3187008798122406 2023-01-24 02:02:53.417858: step: 768/466, loss: 0.07424849271774292 2023-01-24 02:02:53.967970: step: 770/466, loss: 0.06358221173286438 2023-01-24 02:02:54.601986: step: 772/466, loss: 0.22837388515472412 2023-01-24 02:02:55.225859: step: 774/466, loss: 0.06742311269044876 2023-01-24 02:02:55.858865: step: 776/466, loss: 0.5217674970626831 2023-01-24 02:02:56.550630: step: 778/466, loss: 0.4974120855331421 2023-01-24 02:02:57.201476: step: 780/466, loss: 0.1293429136276245 2023-01-24 02:02:57.825941: step: 782/466, loss: 0.13213907182216644 2023-01-24 02:02:58.478075: step: 784/466, loss: 0.06015023961663246 2023-01-24 02:02:59.117773: step: 786/466, loss: 0.14418943226337433 2023-01-24 02:02:59.725334: step: 788/466, loss: 0.20209401845932007 2023-01-24 02:03:00.294853: step: 790/466, loss: 0.08692203462123871 2023-01-24 02:03:01.001870: step: 792/466, loss: 0.11871273815631866 2023-01-24 02:03:01.590300: step: 794/466, loss: 0.13457347452640533 2023-01-24 02:03:02.184642: step: 796/466, loss: 0.5627838373184204 2023-01-24 02:03:02.799740: step: 798/466, loss: 0.06300508230924606 2023-01-24 02:03:03.438895: step: 800/466, loss: 0.12110118567943573 2023-01-24 02:03:04.130257: step: 802/466, loss: 1.1750342845916748 2023-01-24 02:03:04.750195: step: 804/466, loss: 0.08435027301311493 2023-01-24 02:03:05.423976: step: 806/466, loss: 0.17642229795455933 2023-01-24 02:03:05.992645: step: 808/466, loss: 0.08418819308280945 2023-01-24 02:03:06.633301: step: 810/466, loss: 0.06966585665941238 2023-01-24 02:03:07.262031: step: 812/466, loss: 0.25622549653053284 2023-01-24 02:03:07.879930: step: 814/466, loss: 0.18494339287281036 2023-01-24 02:03:08.535630: step: 816/466, loss: 0.22128863632678986 2023-01-24 02:03:09.207839: step: 818/466, loss: 0.19796879589557648 2023-01-24 02:03:09.818021: step: 820/466, loss: 0.17516906559467316 2023-01-24 02:03:10.405456: step: 822/466, loss: 0.028141312301158905 2023-01-24 02:03:11.038661: step: 824/466, loss: 0.25652971863746643 2023-01-24 02:03:11.632808: step: 826/466, loss: 0.1690840721130371 2023-01-24 02:03:12.320451: step: 828/466, loss: 0.04922156035900116 2023-01-24 02:03:12.922543: step: 830/466, loss: 0.1817835122346878 2023-01-24 02:03:13.510123: step: 832/466, loss: 0.1686851680278778 2023-01-24 02:03:14.113566: step: 834/466, loss: 0.16623874008655548 2023-01-24 02:03:14.782209: step: 836/466, loss: 0.18103083968162537 2023-01-24 02:03:15.371454: step: 838/466, loss: 0.15125532448291779 2023-01-24 02:03:16.010492: step: 840/466, loss: 0.1299140602350235 2023-01-24 02:03:16.636369: step: 842/466, loss: 0.08949729055166245 2023-01-24 02:03:17.161948: step: 844/466, loss: 0.09528897702693939 2023-01-24 02:03:17.746243: step: 846/466, loss: 0.26403525471687317 2023-01-24 02:03:18.372056: step: 848/466, loss: 0.11076653003692627 2023-01-24 02:03:18.992281: step: 850/466, loss: 0.11650221049785614 2023-01-24 02:03:19.659098: step: 852/466, loss: 0.02086993306875229 2023-01-24 02:03:20.201690: step: 854/466, loss: 0.03652055189013481 2023-01-24 02:03:20.807909: step: 856/466, loss: 0.22360306978225708 2023-01-24 02:03:21.409120: step: 858/466, loss: 0.05483764782547951 2023-01-24 02:03:21.973078: step: 860/466, loss: 0.11446105688810349 2023-01-24 02:03:22.599272: step: 862/466, loss: 0.11421798169612885 2023-01-24 02:03:23.242499: step: 864/466, loss: 0.2829704284667969 2023-01-24 02:03:23.889107: step: 866/466, loss: 0.13539977371692657 2023-01-24 02:03:24.500723: step: 868/466, loss: 0.1167941689491272 2023-01-24 02:03:25.169468: step: 870/466, loss: 0.1312185376882553 2023-01-24 02:03:25.809922: step: 872/466, loss: 0.7470857501029968 2023-01-24 02:03:26.425612: step: 874/466, loss: 0.4336667060852051 2023-01-24 02:03:27.043548: step: 876/466, loss: 1.348360300064087 2023-01-24 02:03:27.743661: step: 878/466, loss: 0.13151061534881592 2023-01-24 02:03:28.404098: step: 880/466, loss: 0.6406599283218384 2023-01-24 02:03:29.061553: step: 882/466, loss: 0.2077856957912445 2023-01-24 02:03:29.668554: step: 884/466, loss: 0.14853604137897491 2023-01-24 02:03:30.259556: step: 886/466, loss: 0.2359350323677063 2023-01-24 02:03:30.814180: step: 888/466, loss: 0.0797957330942154 2023-01-24 02:03:31.462111: step: 890/466, loss: 0.15271800756454468 2023-01-24 02:03:32.133202: step: 892/466, loss: 0.0849347859621048 2023-01-24 02:03:32.737355: step: 894/466, loss: 0.29333633184432983 2023-01-24 02:03:33.387530: step: 896/466, loss: 1.1319775581359863 2023-01-24 02:03:33.998738: step: 898/466, loss: 0.08828303962945938 2023-01-24 02:03:34.626260: step: 900/466, loss: 0.13764886558055878 2023-01-24 02:03:35.261670: step: 902/466, loss: 0.3116423189640045 2023-01-24 02:03:35.993851: step: 904/466, loss: 0.1575583517551422 2023-01-24 02:03:36.664060: step: 906/466, loss: 0.15604592859745026 2023-01-24 02:03:37.357929: step: 908/466, loss: 0.19787052273750305 2023-01-24 02:03:38.038940: step: 910/466, loss: 0.14253224432468414 2023-01-24 02:03:38.745608: step: 912/466, loss: 0.18719810247421265 2023-01-24 02:03:39.370179: step: 914/466, loss: 1.0880357027053833 2023-01-24 02:03:40.023523: step: 916/466, loss: 0.22350358963012695 2023-01-24 02:03:40.589018: step: 918/466, loss: 0.14658449590206146 2023-01-24 02:03:41.171444: step: 920/466, loss: 0.14227831363677979 2023-01-24 02:03:41.736245: step: 922/466, loss: 0.18480584025382996 2023-01-24 02:03:42.404844: step: 924/466, loss: 0.25192248821258545 2023-01-24 02:03:42.967294: step: 926/466, loss: 0.25395455956459045 2023-01-24 02:03:43.622228: step: 928/466, loss: 0.4100404977798462 2023-01-24 02:03:44.282932: step: 930/466, loss: 0.04821727052330971 2023-01-24 02:03:44.924922: step: 932/466, loss: 0.20392407476902008 ================================================== Loss: 0.213 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3423631822248733, 'r': 0.31118019788560586, 'f1': 0.32602776199943206}, 'combined': 0.2402309825258973, 'epoch': 13} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3296686088799001, 'r': 0.28146796389926493, 'f1': 0.3036674819867434}, 'combined': 0.19014692797300758, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3390094427098903, 'r': 0.3255005275354924, 'f1': 0.33211767281936977}, 'combined': 0.24471828523532507, 'epoch': 13} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.33680812818161, 'r': 0.2963789936616153, 'f1': 0.3153028564350166}, 'combined': 0.19539895328367224, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3127269139700079, 'r': 0.32222146923285444, 'f1': 0.3174032042723631}, 'combined': 0.23387604525332018, 'epoch': 13} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3419027033599297, 'r': 0.29063274061580296, 'f1': 0.3141898865836854}, 'combined': 0.20842299407036557, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35, 'r': 0.3, 'f1': 0.3230769230769231}, 'combined': 0.2153846153846154, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34, 'r': 0.3695652173913043, 'f1': 0.3541666666666667}, 'combined': 0.17708333333333334, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.20689655172413793, 'f1': 0.24489795918367346}, 'combined': 0.16326530612244897, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3101533457249071, 'r': 0.3166271347248577, 'f1': 0.31335680751173717}, 'combined': 0.23089448974549054, 'epoch': 12} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.34558112654998463, 'r': 0.27065838908657336, 'f1': 0.30356518411229655}, 'combined': 0.20137492411409771, 'epoch': 12} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.19827586206896552, 'f1': 0.2555555555555556}, 'combined': 0.1703703703703704, 'epoch': 12} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:06:18.091233: step: 2/466, loss: 0.21664494276046753 2023-01-24 02:06:18.780402: step: 4/466, loss: 0.2588813006877899 2023-01-24 02:06:19.374185: step: 6/466, loss: 0.23247914016246796 2023-01-24 02:06:19.994098: step: 8/466, loss: 0.10080494731664658 2023-01-24 02:06:20.588367: step: 10/466, loss: 0.07782622426748276 2023-01-24 02:06:21.174958: step: 12/466, loss: 0.27462607622146606 2023-01-24 02:06:21.698702: step: 14/466, loss: 0.2357061505317688 2023-01-24 02:06:22.288427: step: 16/466, loss: 0.06383047252893448 2023-01-24 02:06:22.936231: step: 18/466, loss: 0.09361665695905685 2023-01-24 02:06:23.532570: step: 20/466, loss: 0.7580082416534424 2023-01-24 02:06:24.159720: step: 22/466, loss: 0.06528540700674057 2023-01-24 02:06:24.721421: step: 24/466, loss: 0.05765198543667793 2023-01-24 02:06:25.364233: step: 26/466, loss: 0.0284025426954031 2023-01-24 02:06:25.971921: step: 28/466, loss: 0.12420958280563354 2023-01-24 02:06:26.595772: step: 30/466, loss: 0.07885674387216568 2023-01-24 02:06:27.251096: step: 32/466, loss: 0.04992187023162842 2023-01-24 02:06:27.879612: step: 34/466, loss: 0.0710175409913063 2023-01-24 02:06:28.422783: step: 36/466, loss: 0.0854632556438446 2023-01-24 02:06:29.085627: step: 38/466, loss: 0.24539466202259064 2023-01-24 02:06:29.654284: step: 40/466, loss: 0.08943222463130951 2023-01-24 02:06:30.395813: step: 42/466, loss: 0.417314738035202 2023-01-24 02:06:30.958102: step: 44/466, loss: 0.0895300880074501 2023-01-24 02:06:31.551096: step: 46/466, loss: 0.09953930974006653 2023-01-24 02:06:32.141005: step: 48/466, loss: 0.008541363291442394 2023-01-24 02:06:32.819958: step: 50/466, loss: 0.1880522072315216 2023-01-24 02:06:33.428515: step: 52/466, loss: 0.6083518862724304 2023-01-24 02:06:34.116008: step: 54/466, loss: 0.22280308604240417 2023-01-24 02:06:34.739689: step: 56/466, loss: 0.1025019958615303 2023-01-24 02:06:35.465989: step: 58/466, loss: 0.5052645802497864 2023-01-24 02:06:36.159570: step: 60/466, loss: 0.13890738785266876 2023-01-24 02:06:36.751034: step: 62/466, loss: 0.024933665990829468 2023-01-24 02:06:37.416646: step: 64/466, loss: 0.0999549999833107 2023-01-24 02:06:38.102906: step: 66/466, loss: 0.09658706188201904 2023-01-24 02:06:38.664146: step: 68/466, loss: 0.39057597517967224 2023-01-24 02:06:39.308002: step: 70/466, loss: 0.07361017912626266 2023-01-24 02:06:39.915555: step: 72/466, loss: 0.16718822717666626 2023-01-24 02:06:40.544493: step: 74/466, loss: 0.03773102909326553 2023-01-24 02:06:41.172917: step: 76/466, loss: 0.31345033645629883 2023-01-24 02:06:41.836329: step: 78/466, loss: 0.07871197164058685 2023-01-24 02:06:42.452506: step: 80/466, loss: 0.10839194804430008 2023-01-24 02:06:43.111854: step: 82/466, loss: 0.05946098640561104 2023-01-24 02:06:43.789788: step: 84/466, loss: 0.10564349591732025 2023-01-24 02:06:44.445138: step: 86/466, loss: 0.06663645058870316 2023-01-24 02:06:45.072278: step: 88/466, loss: 0.23888219892978668 2023-01-24 02:06:45.719462: step: 90/466, loss: 0.1846161186695099 2023-01-24 02:06:46.393664: step: 92/466, loss: 0.10354091972112656 2023-01-24 02:06:47.010539: step: 94/466, loss: 0.11738651990890503 2023-01-24 02:06:47.630129: step: 96/466, loss: 0.39901578426361084 2023-01-24 02:06:48.263750: step: 98/466, loss: 0.16923528909683228 2023-01-24 02:06:48.829028: step: 100/466, loss: 0.03673376888036728 2023-01-24 02:06:49.431578: step: 102/466, loss: 0.05416484177112579 2023-01-24 02:06:50.079790: step: 104/466, loss: 0.5004127025604248 2023-01-24 02:06:50.704277: step: 106/466, loss: 0.07831823825836182 2023-01-24 02:06:51.348510: step: 108/466, loss: 0.21493087708950043 2023-01-24 02:06:52.008893: step: 110/466, loss: 0.11279312521219254 2023-01-24 02:06:52.626654: step: 112/466, loss: 0.1883641928434372 2023-01-24 02:06:53.196699: step: 114/466, loss: 0.13482354581356049 2023-01-24 02:06:53.817458: step: 116/466, loss: 0.41808295249938965 2023-01-24 02:06:54.317832: step: 118/466, loss: 0.17674140632152557 2023-01-24 02:06:54.916009: step: 120/466, loss: 0.7946280837059021 2023-01-24 02:06:55.511180: step: 122/466, loss: 0.06088021770119667 2023-01-24 02:06:56.173813: step: 124/466, loss: 0.0929827019572258 2023-01-24 02:06:56.831449: step: 126/466, loss: 0.09999638050794601 2023-01-24 02:06:57.492333: step: 128/466, loss: 0.3582874536514282 2023-01-24 02:06:58.105614: step: 130/466, loss: 0.2149185836315155 2023-01-24 02:06:58.738035: step: 132/466, loss: 0.046425435692071915 2023-01-24 02:06:59.367012: step: 134/466, loss: 0.14573967456817627 2023-01-24 02:06:59.929840: step: 136/466, loss: 0.06734217703342438 2023-01-24 02:07:00.555713: step: 138/466, loss: 0.8096088171005249 2023-01-24 02:07:01.210858: step: 140/466, loss: 1.5389599800109863 2023-01-24 02:07:01.827904: step: 142/466, loss: 0.17086271941661835 2023-01-24 02:07:02.389699: step: 144/466, loss: 1.1946203708648682 2023-01-24 02:07:03.075543: step: 146/466, loss: 0.1435568928718567 2023-01-24 02:07:03.656887: step: 148/466, loss: 0.3198951780796051 2023-01-24 02:07:04.193834: step: 150/466, loss: 0.056569430977106094 2023-01-24 02:07:04.714026: step: 152/466, loss: 0.06694243848323822 2023-01-24 02:07:05.330974: step: 154/466, loss: 0.08120336383581161 2023-01-24 02:07:05.958130: step: 156/466, loss: 0.04717717319726944 2023-01-24 02:07:06.546100: step: 158/466, loss: 0.10398121923208237 2023-01-24 02:07:07.173184: step: 160/466, loss: 0.07995104044675827 2023-01-24 02:07:07.758871: step: 162/466, loss: 0.09553412348031998 2023-01-24 02:07:08.461220: step: 164/466, loss: 0.04434293508529663 2023-01-24 02:07:09.051848: step: 166/466, loss: 0.36308401823043823 2023-01-24 02:07:09.627164: step: 168/466, loss: 0.007103645242750645 2023-01-24 02:07:10.254374: step: 170/466, loss: 0.19649642705917358 2023-01-24 02:07:10.906549: step: 172/466, loss: 0.17209577560424805 2023-01-24 02:07:11.605632: step: 174/466, loss: 0.09232991188764572 2023-01-24 02:07:12.257636: step: 176/466, loss: 1.4124647378921509 2023-01-24 02:07:12.819046: step: 178/466, loss: 0.20708917081356049 2023-01-24 02:07:13.439649: step: 180/466, loss: 0.05523412674665451 2023-01-24 02:07:14.054831: step: 182/466, loss: 0.0384976901113987 2023-01-24 02:07:14.616562: step: 184/466, loss: 0.19864994287490845 2023-01-24 02:07:15.265192: step: 186/466, loss: 0.09787686169147491 2023-01-24 02:07:15.855853: step: 188/466, loss: 0.2546052038669586 2023-01-24 02:07:16.530613: step: 190/466, loss: 0.3419496417045593 2023-01-24 02:07:17.167214: step: 192/466, loss: 0.07933840900659561 2023-01-24 02:07:17.860171: step: 194/466, loss: 0.2112976312637329 2023-01-24 02:07:18.472812: step: 196/466, loss: 0.054273150861263275 2023-01-24 02:07:19.121211: step: 198/466, loss: 0.07770832628011703 2023-01-24 02:07:19.735311: step: 200/466, loss: 0.1153278797864914 2023-01-24 02:07:20.379669: step: 202/466, loss: 0.1545044481754303 2023-01-24 02:07:20.997461: step: 204/466, loss: 0.06076398864388466 2023-01-24 02:07:21.595870: step: 206/466, loss: 0.10364454984664917 2023-01-24 02:07:22.260105: step: 208/466, loss: 0.11774459481239319 2023-01-24 02:07:22.908245: step: 210/466, loss: 0.32848066091537476 2023-01-24 02:07:23.535805: step: 212/466, loss: 0.23459117114543915 2023-01-24 02:07:24.144267: step: 214/466, loss: 0.042070191353559494 2023-01-24 02:07:24.716380: step: 216/466, loss: 0.08094526082277298 2023-01-24 02:07:25.381656: step: 218/466, loss: 0.1356755793094635 2023-01-24 02:07:26.044460: step: 220/466, loss: 0.6302579641342163 2023-01-24 02:07:26.634169: step: 222/466, loss: 0.6548448204994202 2023-01-24 02:07:27.280815: step: 224/466, loss: 0.10057955980300903 2023-01-24 02:07:27.930223: step: 226/466, loss: 0.4888569712638855 2023-01-24 02:07:28.499562: step: 228/466, loss: 0.3553338050842285 2023-01-24 02:07:29.141527: step: 230/466, loss: 0.10962367802858353 2023-01-24 02:07:29.857816: step: 232/466, loss: 0.15608611702919006 2023-01-24 02:07:30.513403: step: 234/466, loss: 0.18678003549575806 2023-01-24 02:07:31.112496: step: 236/466, loss: 0.21261128783226013 2023-01-24 02:07:31.745123: step: 238/466, loss: 0.07746458053588867 2023-01-24 02:07:32.353274: step: 240/466, loss: 0.4363856911659241 2023-01-24 02:07:32.976643: step: 242/466, loss: 0.1325339823961258 2023-01-24 02:07:33.586732: step: 244/466, loss: 0.15851140022277832 2023-01-24 02:07:34.262424: step: 246/466, loss: 0.14201043546199799 2023-01-24 02:07:34.892236: step: 248/466, loss: 0.3462273180484772 2023-01-24 02:07:35.564157: step: 250/466, loss: 0.09096142649650574 2023-01-24 02:07:36.199424: step: 252/466, loss: 0.8484412431716919 2023-01-24 02:07:36.810142: step: 254/466, loss: 0.15960969030857086 2023-01-24 02:07:37.469020: step: 256/466, loss: 0.42807015776634216 2023-01-24 02:07:38.108869: step: 258/466, loss: 0.31719139218330383 2023-01-24 02:07:38.724018: step: 260/466, loss: 0.08133837580680847 2023-01-24 02:07:39.347441: step: 262/466, loss: 0.09300018101930618 2023-01-24 02:07:39.980646: step: 264/466, loss: 0.18810880184173584 2023-01-24 02:07:40.621398: step: 266/466, loss: 0.18063776195049286 2023-01-24 02:07:41.223831: step: 268/466, loss: 0.1170445904135704 2023-01-24 02:07:41.882630: step: 270/466, loss: 0.046691134572029114 2023-01-24 02:07:42.513560: step: 272/466, loss: 0.15165668725967407 2023-01-24 02:07:43.184629: step: 274/466, loss: 0.2840859591960907 2023-01-24 02:07:43.741930: step: 276/466, loss: 0.08033007383346558 2023-01-24 02:07:44.384790: step: 278/466, loss: 0.30552706122398376 2023-01-24 02:07:45.022756: step: 280/466, loss: 0.03319627046585083 2023-01-24 02:07:45.550669: step: 282/466, loss: 0.060096003115177155 2023-01-24 02:07:46.199032: step: 284/466, loss: 0.0664438009262085 2023-01-24 02:07:46.810244: step: 286/466, loss: 0.18188567459583282 2023-01-24 02:07:47.459603: step: 288/466, loss: 0.29840531945228577 2023-01-24 02:07:48.075887: step: 290/466, loss: 0.08392433077096939 2023-01-24 02:07:48.696347: step: 292/466, loss: 0.1635517030954361 2023-01-24 02:07:49.291417: step: 294/466, loss: 0.10469960421323776 2023-01-24 02:07:49.936685: step: 296/466, loss: 1.0305179357528687 2023-01-24 02:07:50.596235: step: 298/466, loss: 0.12768679857254028 2023-01-24 02:07:51.255440: step: 300/466, loss: 0.17497457563877106 2023-01-24 02:07:51.911145: step: 302/466, loss: 0.14417117834091187 2023-01-24 02:07:52.552952: step: 304/466, loss: 0.1257448047399521 2023-01-24 02:07:53.210420: step: 306/466, loss: 0.32153579592704773 2023-01-24 02:07:53.824252: step: 308/466, loss: 0.18655270338058472 2023-01-24 02:07:54.412564: step: 310/466, loss: 0.2795713245868683 2023-01-24 02:07:55.027015: step: 312/466, loss: 0.14642831683158875 2023-01-24 02:07:55.674680: step: 314/466, loss: 0.023899976164102554 2023-01-24 02:07:56.272193: step: 316/466, loss: 0.191590815782547 2023-01-24 02:07:56.845537: step: 318/466, loss: 0.23445306718349457 2023-01-24 02:07:57.463227: step: 320/466, loss: 0.0523383729159832 2023-01-24 02:07:58.061115: step: 322/466, loss: 0.12506917119026184 2023-01-24 02:07:58.668079: step: 324/466, loss: 0.4583035707473755 2023-01-24 02:07:59.219902: step: 326/466, loss: 0.08694583177566528 2023-01-24 02:07:59.806797: step: 328/466, loss: 0.5271000862121582 2023-01-24 02:08:00.386892: step: 330/466, loss: 0.14745131134986877 2023-01-24 02:08:01.003165: step: 332/466, loss: 0.07641014456748962 2023-01-24 02:08:01.586801: step: 334/466, loss: 0.08735503256320953 2023-01-24 02:08:02.230361: step: 336/466, loss: 0.06401679664850235 2023-01-24 02:08:02.876483: step: 338/466, loss: 0.07610401511192322 2023-01-24 02:08:03.438536: step: 340/466, loss: 0.32962048053741455 2023-01-24 02:08:04.140861: step: 342/466, loss: 0.13128696382045746 2023-01-24 02:08:04.729899: step: 344/466, loss: 0.021335458382964134 2023-01-24 02:08:05.354714: step: 346/466, loss: 0.14687997102737427 2023-01-24 02:08:05.978203: step: 348/466, loss: 0.19566234946250916 2023-01-24 02:08:06.670917: step: 350/466, loss: 0.5493302345275879 2023-01-24 02:08:07.311118: step: 352/466, loss: 0.08697886765003204 2023-01-24 02:08:07.954619: step: 354/466, loss: 0.22227326035499573 2023-01-24 02:08:08.551584: step: 356/466, loss: 0.04099361598491669 2023-01-24 02:08:09.161656: step: 358/466, loss: 0.10837606340646744 2023-01-24 02:08:09.791029: step: 360/466, loss: 0.09568150341510773 2023-01-24 02:08:10.394950: step: 362/466, loss: 0.14171753823757172 2023-01-24 02:08:10.977456: step: 364/466, loss: 0.08027523756027222 2023-01-24 02:08:11.548191: step: 366/466, loss: 0.15333522856235504 2023-01-24 02:08:12.189953: step: 368/466, loss: 0.052050329744815826 2023-01-24 02:08:12.838867: step: 370/466, loss: 0.08375398069620132 2023-01-24 02:08:13.468255: step: 372/466, loss: 0.03649580478668213 2023-01-24 02:08:14.082231: step: 374/466, loss: 0.16528141498565674 2023-01-24 02:08:14.714628: step: 376/466, loss: 0.05533326044678688 2023-01-24 02:08:15.356924: step: 378/466, loss: 0.05703647434711456 2023-01-24 02:08:16.048167: step: 380/466, loss: 0.12570974230766296 2023-01-24 02:08:16.622188: step: 382/466, loss: 0.8404884934425354 2023-01-24 02:08:17.207135: step: 384/466, loss: 0.11699473857879639 2023-01-24 02:08:17.810740: step: 386/466, loss: 0.030063355341553688 2023-01-24 02:08:18.416395: step: 388/466, loss: 0.1502676010131836 2023-01-24 02:08:19.099203: step: 390/466, loss: 0.1679421216249466 2023-01-24 02:08:19.696828: step: 392/466, loss: 0.09521698206663132 2023-01-24 02:08:20.319868: step: 394/466, loss: 0.10098012536764145 2023-01-24 02:08:20.973231: step: 396/466, loss: 0.1282902956008911 2023-01-24 02:08:21.597294: step: 398/466, loss: 0.09492602944374084 2023-01-24 02:08:22.295240: step: 400/466, loss: 0.12190713733434677 2023-01-24 02:08:22.900444: step: 402/466, loss: 0.15735898911952972 2023-01-24 02:08:23.499667: step: 404/466, loss: 0.08567700535058975 2023-01-24 02:08:24.105139: step: 406/466, loss: 0.09509891271591187 2023-01-24 02:08:24.698542: step: 408/466, loss: 0.009957714006304741 2023-01-24 02:08:25.323356: step: 410/466, loss: 0.09173969179391861 2023-01-24 02:08:25.918554: step: 412/466, loss: 0.10093410313129425 2023-01-24 02:08:26.571522: step: 414/466, loss: 1.867976188659668 2023-01-24 02:08:27.200635: step: 416/466, loss: 0.1114923283457756 2023-01-24 02:08:27.870377: step: 418/466, loss: 0.6304705142974854 2023-01-24 02:08:28.434934: step: 420/466, loss: 0.04427172616124153 2023-01-24 02:08:29.135403: step: 422/466, loss: 0.06850725412368774 2023-01-24 02:08:29.769710: step: 424/466, loss: 0.20498473942279816 2023-01-24 02:08:30.428289: step: 426/466, loss: 0.10735539346933365 2023-01-24 02:08:31.007013: step: 428/466, loss: 0.07468348741531372 2023-01-24 02:08:31.617790: step: 430/466, loss: 0.20282858610153198 2023-01-24 02:08:32.286270: step: 432/466, loss: 0.1597706526517868 2023-01-24 02:08:32.925202: step: 434/466, loss: 0.34720179438591003 2023-01-24 02:08:33.511154: step: 436/466, loss: 0.2628512382507324 2023-01-24 02:08:34.103785: step: 438/466, loss: 0.06142502278089523 2023-01-24 02:08:34.697274: step: 440/466, loss: 0.10227291285991669 2023-01-24 02:08:35.322058: step: 442/466, loss: 0.0427645742893219 2023-01-24 02:08:35.923223: step: 444/466, loss: 0.22859515249729156 2023-01-24 02:08:36.551698: step: 446/466, loss: 0.095253124833107 2023-01-24 02:08:37.169142: step: 448/466, loss: 0.12162463366985321 2023-01-24 02:08:37.774373: step: 450/466, loss: 0.07784131914377213 2023-01-24 02:08:38.461251: step: 452/466, loss: 0.507390022277832 2023-01-24 02:08:39.122307: step: 454/466, loss: 0.10997535288333893 2023-01-24 02:08:39.756867: step: 456/466, loss: 1.8567255735397339 2023-01-24 02:08:40.497715: step: 458/466, loss: 0.2736664414405823 2023-01-24 02:08:41.100104: step: 460/466, loss: 0.08910427242517471 2023-01-24 02:08:41.763898: step: 462/466, loss: 0.37200602889060974 2023-01-24 02:08:42.415031: step: 464/466, loss: 0.368376761674881 2023-01-24 02:08:43.048901: step: 466/466, loss: 0.06825045496225357 2023-01-24 02:08:43.632560: step: 468/466, loss: 0.08737295120954514 2023-01-24 02:08:44.286976: step: 470/466, loss: 0.7475695013999939 2023-01-24 02:08:44.982292: step: 472/466, loss: 0.17576569318771362 2023-01-24 02:08:45.616625: step: 474/466, loss: 0.09032467007637024 2023-01-24 02:08:46.249901: step: 476/466, loss: 0.23489834368228912 2023-01-24 02:08:46.925883: step: 478/466, loss: 0.12705419957637787 2023-01-24 02:08:47.532836: step: 480/466, loss: 0.26893118023872375 2023-01-24 02:08:48.174383: step: 482/466, loss: 0.13266262412071228 2023-01-24 02:08:48.783171: step: 484/466, loss: 0.11018592119216919 2023-01-24 02:08:49.386419: step: 486/466, loss: 0.13621945679187775 2023-01-24 02:08:50.010168: step: 488/466, loss: 0.14668114483356476 2023-01-24 02:08:50.609722: step: 490/466, loss: 0.1817501336336136 2023-01-24 02:08:51.180094: step: 492/466, loss: 0.06757530570030212 2023-01-24 02:08:51.889407: step: 494/466, loss: 0.13275770843029022 2023-01-24 02:08:52.473772: step: 496/466, loss: 0.1039777398109436 2023-01-24 02:08:53.104018: step: 498/466, loss: 0.07613973319530487 2023-01-24 02:08:53.807739: step: 500/466, loss: 0.06720180064439774 2023-01-24 02:08:54.404371: step: 502/466, loss: 0.14942774176597595 2023-01-24 02:08:54.948542: step: 504/466, loss: 0.15461429953575134 2023-01-24 02:08:55.565514: step: 506/466, loss: 0.14893974363803864 2023-01-24 02:08:56.205928: step: 508/466, loss: 0.07165000587701797 2023-01-24 02:08:56.820164: step: 510/466, loss: 0.23073656857013702 2023-01-24 02:08:57.416333: step: 512/466, loss: 0.14023169875144958 2023-01-24 02:08:58.045035: step: 514/466, loss: 0.5554453730583191 2023-01-24 02:08:58.649642: step: 516/466, loss: 0.39692938327789307 2023-01-24 02:08:59.319513: step: 518/466, loss: 0.20564937591552734 2023-01-24 02:08:59.890623: step: 520/466, loss: 0.09200834482908249 2023-01-24 02:09:00.460356: step: 522/466, loss: 0.1810818910598755 2023-01-24 02:09:01.091345: step: 524/466, loss: 0.08566950261592865 2023-01-24 02:09:01.745746: step: 526/466, loss: 0.07795694470405579 2023-01-24 02:09:02.415018: step: 528/466, loss: 0.04060513526201248 2023-01-24 02:09:03.086308: step: 530/466, loss: 0.1456119865179062 2023-01-24 02:09:03.694216: step: 532/466, loss: 0.46540188789367676 2023-01-24 02:09:04.382161: step: 534/466, loss: 0.04797811806201935 2023-01-24 02:09:05.053313: step: 536/466, loss: 0.1219116598367691 2023-01-24 02:09:05.682603: step: 538/466, loss: 0.13186098635196686 2023-01-24 02:09:06.325061: step: 540/466, loss: 0.1312079131603241 2023-01-24 02:09:06.958816: step: 542/466, loss: 0.13683204352855682 2023-01-24 02:09:07.581530: step: 544/466, loss: 0.20569588243961334 2023-01-24 02:09:08.172972: step: 546/466, loss: 3.6133134365081787 2023-01-24 02:09:08.800145: step: 548/466, loss: 0.1964557021856308 2023-01-24 02:09:09.484462: step: 550/466, loss: 0.032030846923589706 2023-01-24 02:09:10.160166: step: 552/466, loss: 0.23671674728393555 2023-01-24 02:09:10.751468: step: 554/466, loss: 0.06673205643892288 2023-01-24 02:09:11.376521: step: 556/466, loss: 0.205203115940094 2023-01-24 02:09:12.008523: step: 558/466, loss: 0.08683093637228012 2023-01-24 02:09:12.712745: step: 560/466, loss: 0.13626570999622345 2023-01-24 02:09:13.326231: step: 562/466, loss: 0.1898723691701889 2023-01-24 02:09:14.006690: step: 564/466, loss: 0.021120157092809677 2023-01-24 02:09:14.593955: step: 566/466, loss: 0.15750077366828918 2023-01-24 02:09:15.189062: step: 568/466, loss: 0.06532016396522522 2023-01-24 02:09:15.793999: step: 570/466, loss: 0.033128079026937485 2023-01-24 02:09:16.343561: step: 572/466, loss: 0.22112499177455902 2023-01-24 02:09:16.923459: step: 574/466, loss: 0.050089944154024124 2023-01-24 02:09:17.535536: step: 576/466, loss: 0.12589536607265472 2023-01-24 02:09:18.110964: step: 578/466, loss: 0.06084910407662392 2023-01-24 02:09:18.697200: step: 580/466, loss: 0.09911134093999863 2023-01-24 02:09:19.293815: step: 582/466, loss: 0.07547442615032196 2023-01-24 02:09:19.906462: step: 584/466, loss: 0.13044041395187378 2023-01-24 02:09:20.464170: step: 586/466, loss: 0.12678459286689758 2023-01-24 02:09:21.028965: step: 588/466, loss: 0.11398551613092422 2023-01-24 02:09:21.607024: step: 590/466, loss: 0.06138450652360916 2023-01-24 02:09:22.144952: step: 592/466, loss: 0.07820218056440353 2023-01-24 02:09:22.746186: step: 594/466, loss: 0.14266575872898102 2023-01-24 02:09:23.377100: step: 596/466, loss: 0.038015060126781464 2023-01-24 02:09:24.014567: step: 598/466, loss: 0.25807908177375793 2023-01-24 02:09:24.689305: step: 600/466, loss: 0.12017034739255905 2023-01-24 02:09:25.358460: step: 602/466, loss: 0.6884686350822449 2023-01-24 02:09:25.970529: step: 604/466, loss: 0.18772900104522705 2023-01-24 02:09:26.557320: step: 606/466, loss: 0.13137471675872803 2023-01-24 02:09:27.163806: step: 608/466, loss: 0.15554356575012207 2023-01-24 02:09:27.764250: step: 610/466, loss: 0.06444638967514038 2023-01-24 02:09:28.353882: step: 612/466, loss: 0.3176107108592987 2023-01-24 02:09:28.947224: step: 614/466, loss: 0.38138628005981445 2023-01-24 02:09:29.599588: step: 616/466, loss: 0.06246405467391014 2023-01-24 02:09:30.301313: step: 618/466, loss: 0.13891153037548065 2023-01-24 02:09:30.928020: step: 620/466, loss: 0.0745665654540062 2023-01-24 02:09:31.539918: step: 622/466, loss: 0.07133463025093079 2023-01-24 02:09:32.115171: step: 624/466, loss: 1.2177377939224243 2023-01-24 02:09:32.705834: step: 626/466, loss: 0.08016962558031082 2023-01-24 02:09:33.341684: step: 628/466, loss: 0.1497739851474762 2023-01-24 02:09:33.950500: step: 630/466, loss: 0.22510574758052826 2023-01-24 02:09:34.544150: step: 632/466, loss: 0.15465585887432098 2023-01-24 02:09:35.103635: step: 634/466, loss: 0.11404050141572952 2023-01-24 02:09:35.692990: step: 636/466, loss: 0.13050960004329681 2023-01-24 02:09:36.301775: step: 638/466, loss: 0.21021117269992828 2023-01-24 02:09:36.877041: step: 640/466, loss: 0.30213674902915955 2023-01-24 02:09:37.538243: step: 642/466, loss: 0.1877560168504715 2023-01-24 02:09:38.115040: step: 644/466, loss: 0.4449322521686554 2023-01-24 02:09:38.775325: step: 646/466, loss: 0.03945323824882507 2023-01-24 02:09:39.340413: step: 648/466, loss: 0.1734963208436966 2023-01-24 02:09:40.094427: step: 650/466, loss: 0.0660005435347557 2023-01-24 02:09:40.711889: step: 652/466, loss: 0.08190672099590302 2023-01-24 02:09:41.355060: step: 654/466, loss: 0.21880212426185608 2023-01-24 02:09:42.013785: step: 656/466, loss: 0.047313012182712555 2023-01-24 02:09:42.610640: step: 658/466, loss: 0.06312886625528336 2023-01-24 02:09:43.294750: step: 660/466, loss: 0.1220080703496933 2023-01-24 02:09:43.983906: step: 662/466, loss: 0.47863081097602844 2023-01-24 02:09:44.665109: step: 664/466, loss: 0.1615520715713501 2023-01-24 02:09:45.304278: step: 666/466, loss: 0.1046697348356247 2023-01-24 02:09:45.955237: step: 668/466, loss: 0.2509707808494568 2023-01-24 02:09:46.562184: step: 670/466, loss: 0.1696101576089859 2023-01-24 02:09:47.170377: step: 672/466, loss: 0.05695287883281708 2023-01-24 02:09:47.796768: step: 674/466, loss: 0.08349854499101639 2023-01-24 02:09:48.448110: step: 676/466, loss: 0.1294446587562561 2023-01-24 02:09:49.157705: step: 678/466, loss: 0.1571851223707199 2023-01-24 02:09:49.768089: step: 680/466, loss: 0.19614636898040771 2023-01-24 02:09:50.388173: step: 682/466, loss: 0.1105930507183075 2023-01-24 02:09:51.020279: step: 684/466, loss: 0.523503303527832 2023-01-24 02:09:51.678397: step: 686/466, loss: 0.05425763875246048 2023-01-24 02:09:52.292205: step: 688/466, loss: 0.07267965376377106 2023-01-24 02:09:52.929626: step: 690/466, loss: 0.40452754497528076 2023-01-24 02:09:53.542916: step: 692/466, loss: 0.11994507163763046 2023-01-24 02:09:54.083276: step: 694/466, loss: 0.3443394899368286 2023-01-24 02:09:54.711734: step: 696/466, loss: 0.2467920035123825 2023-01-24 02:09:55.278850: step: 698/466, loss: 0.13529010117053986 2023-01-24 02:09:55.873810: step: 700/466, loss: 0.10970811545848846 2023-01-24 02:09:56.505019: step: 702/466, loss: 0.5327137112617493 2023-01-24 02:09:57.141720: step: 704/466, loss: 0.5181942582130432 2023-01-24 02:09:57.793603: step: 706/466, loss: 0.2738834619522095 2023-01-24 02:09:58.381948: step: 708/466, loss: 0.08478600531816483 2023-01-24 02:09:58.970856: step: 710/466, loss: 0.05863865092396736 2023-01-24 02:09:59.592297: step: 712/466, loss: 0.05520498380064964 2023-01-24 02:10:00.243546: step: 714/466, loss: 0.19641073048114777 2023-01-24 02:10:00.854808: step: 716/466, loss: 0.8427484035491943 2023-01-24 02:10:01.498009: step: 718/466, loss: 0.09054213762283325 2023-01-24 02:10:02.153907: step: 720/466, loss: 0.11003617942333221 2023-01-24 02:10:02.821672: step: 722/466, loss: 0.09100736677646637 2023-01-24 02:10:03.430788: step: 724/466, loss: 0.10934733599424362 2023-01-24 02:10:04.119671: step: 726/466, loss: 0.22367994487285614 2023-01-24 02:10:04.740603: step: 728/466, loss: 0.08802761882543564 2023-01-24 02:10:05.402854: step: 730/466, loss: 0.7977679967880249 2023-01-24 02:10:06.002005: step: 732/466, loss: 0.13046298921108246 2023-01-24 02:10:06.609379: step: 734/466, loss: 0.10032875835895538 2023-01-24 02:10:07.218833: step: 736/466, loss: 0.06097870692610741 2023-01-24 02:10:07.830008: step: 738/466, loss: 0.08550991863012314 2023-01-24 02:10:08.474810: step: 740/466, loss: 0.10648110508918762 2023-01-24 02:10:09.098979: step: 742/466, loss: 0.046441011130809784 2023-01-24 02:10:09.743376: step: 744/466, loss: 0.20584163069725037 2023-01-24 02:10:10.391067: step: 746/466, loss: 0.42919909954071045 2023-01-24 02:10:10.999723: step: 748/466, loss: 0.09509290009737015 2023-01-24 02:10:11.534931: step: 750/466, loss: 0.0858200341463089 2023-01-24 02:10:12.214429: step: 752/466, loss: 0.29318660497665405 2023-01-24 02:10:12.867819: step: 754/466, loss: 0.18614895641803741 2023-01-24 02:10:13.501320: step: 756/466, loss: 1.4461302757263184 2023-01-24 02:10:14.110104: step: 758/466, loss: 0.11748125404119492 2023-01-24 02:10:14.744210: step: 760/466, loss: 0.19098018109798431 2023-01-24 02:10:15.366599: step: 762/466, loss: 0.15145912766456604 2023-01-24 02:10:15.975586: step: 764/466, loss: 0.05735521391034126 2023-01-24 02:10:16.587446: step: 766/466, loss: 0.186171293258667 2023-01-24 02:10:17.223516: step: 768/466, loss: 0.19050681591033936 2023-01-24 02:10:17.845431: step: 770/466, loss: 0.19891580939292908 2023-01-24 02:10:18.438210: step: 772/466, loss: 8.71715259552002 2023-01-24 02:10:19.118938: step: 774/466, loss: 0.09013688564300537 2023-01-24 02:10:19.707704: step: 776/466, loss: 0.44694456458091736 2023-01-24 02:10:20.394528: step: 778/466, loss: 0.1340249925851822 2023-01-24 02:10:21.013258: step: 780/466, loss: 0.5292209386825562 2023-01-24 02:10:21.645613: step: 782/466, loss: 0.32261040806770325 2023-01-24 02:10:22.302882: step: 784/466, loss: 0.09790646284818649 2023-01-24 02:10:22.881674: step: 786/466, loss: 0.038367241621017456 2023-01-24 02:10:23.535083: step: 788/466, loss: 0.4195810854434967 2023-01-24 02:10:24.153925: step: 790/466, loss: 0.2766675651073456 2023-01-24 02:10:24.821526: step: 792/466, loss: 0.21886131167411804 2023-01-24 02:10:25.396769: step: 794/466, loss: 0.08811365813016891 2023-01-24 02:10:26.007842: step: 796/466, loss: 0.05479082837700844 2023-01-24 02:10:26.621321: step: 798/466, loss: 0.10200653225183487 2023-01-24 02:10:27.235010: step: 800/466, loss: 0.22190265357494354 2023-01-24 02:10:27.851362: step: 802/466, loss: 0.15918050706386566 2023-01-24 02:10:28.448371: step: 804/466, loss: 0.3613383173942566 2023-01-24 02:10:29.080190: step: 806/466, loss: 0.1719532608985901 2023-01-24 02:10:29.727769: step: 808/466, loss: 0.09152864664793015 2023-01-24 02:10:30.329050: step: 810/466, loss: 0.38782280683517456 2023-01-24 02:10:30.941559: step: 812/466, loss: 0.13327616453170776 2023-01-24 02:10:31.518849: step: 814/466, loss: 0.10806596279144287 2023-01-24 02:10:32.144220: step: 816/466, loss: 0.01866796426475048 2023-01-24 02:10:32.831900: step: 818/466, loss: 0.007938460446894169 2023-01-24 02:10:33.481816: step: 820/466, loss: 0.08328758180141449 2023-01-24 02:10:34.056637: step: 822/466, loss: 0.09915492683649063 2023-01-24 02:10:34.696836: step: 824/466, loss: 0.19329403340816498 2023-01-24 02:10:35.308689: step: 826/466, loss: 0.21803811192512512 2023-01-24 02:10:35.944845: step: 828/466, loss: 0.08334982395172119 2023-01-24 02:10:36.553053: step: 830/466, loss: 1.3822726011276245 2023-01-24 02:10:37.153394: step: 832/466, loss: 0.08953722566366196 2023-01-24 02:10:37.758400: step: 834/466, loss: 0.048605214804410934 2023-01-24 02:10:38.433949: step: 836/466, loss: 0.4795377254486084 2023-01-24 02:10:39.033585: step: 838/466, loss: 0.07054489105939865 2023-01-24 02:10:39.663910: step: 840/466, loss: 0.20257791876792908 2023-01-24 02:10:40.217985: step: 842/466, loss: 0.1157168447971344 2023-01-24 02:10:40.826647: step: 844/466, loss: 0.0845470279455185 2023-01-24 02:10:41.501926: step: 846/466, loss: 0.10139591246843338 2023-01-24 02:10:42.228697: step: 848/466, loss: 0.2176181674003601 2023-01-24 02:10:42.861444: step: 850/466, loss: 0.067507803440094 2023-01-24 02:10:43.490908: step: 852/466, loss: 0.12499594688415527 2023-01-24 02:10:44.153043: step: 854/466, loss: 0.060795966535806656 2023-01-24 02:10:44.737838: step: 856/466, loss: 0.1424483060836792 2023-01-24 02:10:45.355082: step: 858/466, loss: 0.21677431464195251 2023-01-24 02:10:46.041684: step: 860/466, loss: 1.9143980741500854 2023-01-24 02:10:46.609485: step: 862/466, loss: 0.16681039333343506 2023-01-24 02:10:47.288549: step: 864/466, loss: 0.1073463186621666 2023-01-24 02:10:47.858224: step: 866/466, loss: 0.04946222901344299 2023-01-24 02:10:48.509853: step: 868/466, loss: 0.4168989956378937 2023-01-24 02:10:49.171971: step: 870/466, loss: 0.21804341673851013 2023-01-24 02:10:49.863046: step: 872/466, loss: 0.27743685245513916 2023-01-24 02:10:50.453179: step: 874/466, loss: 0.10394605249166489 2023-01-24 02:10:51.118773: step: 876/466, loss: 0.0699191465973854 2023-01-24 02:10:51.699316: step: 878/466, loss: 0.16605454683303833 2023-01-24 02:10:52.291711: step: 880/466, loss: 0.3739076554775238 2023-01-24 02:10:52.891228: step: 882/466, loss: 0.31669163703918457 2023-01-24 02:10:53.498893: step: 884/466, loss: 0.12097109854221344 2023-01-24 02:10:54.088395: step: 886/466, loss: 0.5599779486656189 2023-01-24 02:10:54.759282: step: 888/466, loss: 0.1497298777103424 2023-01-24 02:10:55.413888: step: 890/466, loss: 0.07432295382022858 2023-01-24 02:10:56.128476: step: 892/466, loss: 0.10274899750947952 2023-01-24 02:10:56.805340: step: 894/466, loss: 0.384609192609787 2023-01-24 02:10:57.467057: step: 896/466, loss: 0.4249957203865051 2023-01-24 02:10:58.124220: step: 898/466, loss: 0.04452158883213997 2023-01-24 02:10:58.814688: step: 900/466, loss: 0.015567224472761154 2023-01-24 02:10:59.519160: step: 902/466, loss: 0.19822470843791962 2023-01-24 02:11:00.182240: step: 904/466, loss: 0.4278334379196167 2023-01-24 02:11:00.826109: step: 906/466, loss: 0.11612143367528915 2023-01-24 02:11:01.448649: step: 908/466, loss: 0.2971527874469757 2023-01-24 02:11:02.061267: step: 910/466, loss: 0.16825099289417267 2023-01-24 02:11:02.635854: step: 912/466, loss: 0.05000316724181175 2023-01-24 02:11:03.362065: step: 914/466, loss: 0.18294182419776917 2023-01-24 02:11:04.033662: step: 916/466, loss: 0.3873891234397888 2023-01-24 02:11:04.615147: step: 918/466, loss: 0.11834461241960526 2023-01-24 02:11:05.262207: step: 920/466, loss: 0.18775823712348938 2023-01-24 02:11:05.961812: step: 922/466, loss: 0.19691264629364014 2023-01-24 02:11:06.535672: step: 924/466, loss: 0.1103270873427391 2023-01-24 02:11:07.317541: step: 926/466, loss: 0.1784142553806305 2023-01-24 02:11:07.937162: step: 928/466, loss: 0.3308243155479431 2023-01-24 02:11:08.572283: step: 930/466, loss: 0.38338178396224976 2023-01-24 02:11:09.193250: step: 932/466, loss: 0.12993201613426208 ================================================== Loss: 0.232 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3567945567945568, 'r': 0.3256511988959807, 'f1': 0.34051226551226554}, 'combined': 0.25090377458798513, 'epoch': 14} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3343288142128463, 'r': 0.285100632896953, 'f1': 0.30775855740314667}, 'combined': 0.19270862940197037, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34855844155844157, 'r': 0.34723563912368466, 'f1': 0.34789578292430007}, 'combined': 0.25634426110211583, 'epoch': 14} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3484016174236714, 'r': 0.29997756590449687, 'f1': 0.3223813220389743}, 'combined': 0.19978560802415307, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.314963167587477, 'r': 0.3245256166982922, 'f1': 0.3196728971962617}, 'combined': 0.23554845056566648, 'epoch': 14} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3476073169504833, 'r': 0.2922918668751804, 'f1': 0.31755874731232764}, 'combined': 0.21065778287055398, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.37037037037037035, 'r': 0.2857142857142857, 'f1': 0.3225806451612903}, 'combined': 0.2150537634408602, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3854166666666667, 'r': 0.40217391304347827, 'f1': 0.39361702127659576}, 'combined': 0.19680851063829788, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2631578947368421, 'r': 0.1724137931034483, 'f1': 0.20833333333333334}, 'combined': 0.1388888888888889, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3101533457249071, 'r': 0.3166271347248577, 'f1': 0.31335680751173717}, 'combined': 0.23089448974549054, 'epoch': 12} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.34558112654998463, 'r': 0.27065838908657336, 'f1': 0.30356518411229655}, 'combined': 0.20137492411409771, 'epoch': 12} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.19827586206896552, 'f1': 0.2555555555555556}, 'combined': 0.1703703703703704, 'epoch': 12} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:13:42.765526: step: 2/466, loss: 0.07601815462112427 2023-01-24 02:13:43.406467: step: 4/466, loss: 0.060319118201732635 2023-01-24 02:13:44.026726: step: 6/466, loss: 0.14708635210990906 2023-01-24 02:13:44.587313: step: 8/466, loss: 0.0160928163677454 2023-01-24 02:13:45.251155: step: 10/466, loss: 0.11274747550487518 2023-01-24 02:13:45.910956: step: 12/466, loss: 0.03984593600034714 2023-01-24 02:13:46.526231: step: 14/466, loss: 2.9654884338378906 2023-01-24 02:13:47.094275: step: 16/466, loss: 0.0605609156191349 2023-01-24 02:13:47.685159: step: 18/466, loss: 0.12641334533691406 2023-01-24 02:13:48.278484: step: 20/466, loss: 0.06787180155515671 2023-01-24 02:13:48.864228: step: 22/466, loss: 0.1013396605849266 2023-01-24 02:13:49.525296: step: 24/466, loss: 0.06734860688447952 2023-01-24 02:13:50.156737: step: 26/466, loss: 0.043701812624931335 2023-01-24 02:13:50.797836: step: 28/466, loss: 0.0785311907529831 2023-01-24 02:13:51.405366: step: 30/466, loss: 0.12360072135925293 2023-01-24 02:13:52.049830: step: 32/466, loss: 0.21179643273353577 2023-01-24 02:13:52.663207: step: 34/466, loss: 0.46735018491744995 2023-01-24 02:13:53.257501: step: 36/466, loss: 0.0781048834323883 2023-01-24 02:13:53.825324: step: 38/466, loss: 0.3329731523990631 2023-01-24 02:13:54.458474: step: 40/466, loss: 0.23325176537036896 2023-01-24 02:13:55.081517: step: 42/466, loss: 0.055112116038799286 2023-01-24 02:13:55.691363: step: 44/466, loss: 0.09325244277715683 2023-01-24 02:13:56.307320: step: 46/466, loss: 6.907464981079102 2023-01-24 02:13:57.003812: step: 48/466, loss: 0.1494046449661255 2023-01-24 02:13:57.595462: step: 50/466, loss: 0.1370428204536438 2023-01-24 02:13:58.236812: step: 52/466, loss: 0.09927735477685928 2023-01-24 02:13:58.982114: step: 54/466, loss: 0.5274226069450378 2023-01-24 02:13:59.636194: step: 56/466, loss: 0.11505471915006638 2023-01-24 02:14:00.268709: step: 58/466, loss: 0.13182595372200012 2023-01-24 02:14:00.916066: step: 60/466, loss: 0.18545888364315033 2023-01-24 02:14:01.571764: step: 62/466, loss: 0.36592501401901245 2023-01-24 02:14:02.180410: step: 64/466, loss: 0.122297003865242 2023-01-24 02:14:02.872456: step: 66/466, loss: 0.06750189512968063 2023-01-24 02:14:03.572340: step: 68/466, loss: 0.2126324325799942 2023-01-24 02:14:04.231140: step: 70/466, loss: 0.03938804194331169 2023-01-24 02:14:04.833352: step: 72/466, loss: 0.12230225652456284 2023-01-24 02:14:05.520691: step: 74/466, loss: 0.0553334578871727 2023-01-24 02:14:06.181972: step: 76/466, loss: 0.5174123048782349 2023-01-24 02:14:06.791780: step: 78/466, loss: 0.10611777007579803 2023-01-24 02:14:07.592815: step: 80/466, loss: 0.03924800083041191 2023-01-24 02:14:08.243460: step: 82/466, loss: 0.10162966698408127 2023-01-24 02:14:08.837497: step: 84/466, loss: 0.060497235506772995 2023-01-24 02:14:09.447281: step: 86/466, loss: 0.20899879932403564 2023-01-24 02:14:10.078507: step: 88/466, loss: 0.08463367074728012 2023-01-24 02:14:10.684125: step: 90/466, loss: 0.05687631666660309 2023-01-24 02:14:11.288097: step: 92/466, loss: 0.06662612408399582 2023-01-24 02:14:11.992913: step: 94/466, loss: 0.02623327076435089 2023-01-24 02:14:12.659088: step: 96/466, loss: 0.1103297770023346 2023-01-24 02:14:13.427932: step: 98/466, loss: 0.17696203291416168 2023-01-24 02:14:14.070586: step: 100/466, loss: 0.1444133222103119 2023-01-24 02:14:14.738512: step: 102/466, loss: 0.28258079290390015 2023-01-24 02:14:15.396768: step: 104/466, loss: 0.16617068648338318 2023-01-24 02:14:15.979039: step: 106/466, loss: 0.03628968447446823 2023-01-24 02:14:16.589786: step: 108/466, loss: 0.030992701649665833 2023-01-24 02:14:17.234588: step: 110/466, loss: 0.15708261728286743 2023-01-24 02:14:17.845018: step: 112/466, loss: 0.4777582585811615 2023-01-24 02:14:18.431081: step: 114/466, loss: 0.05228717252612114 2023-01-24 02:14:19.250876: step: 116/466, loss: 0.09536279737949371 2023-01-24 02:14:19.887896: step: 118/466, loss: 0.13929015398025513 2023-01-24 02:14:20.474784: step: 120/466, loss: 0.08980412036180496 2023-01-24 02:14:21.164183: step: 122/466, loss: 0.09776115417480469 2023-01-24 02:14:21.760685: step: 124/466, loss: 0.27029654383659363 2023-01-24 02:14:22.415431: step: 126/466, loss: 0.10421734303236008 2023-01-24 02:14:23.070249: step: 128/466, loss: 0.06566141545772552 2023-01-24 02:14:23.740777: step: 130/466, loss: 0.1898757666349411 2023-01-24 02:14:24.356526: step: 132/466, loss: 0.052807580679655075 2023-01-24 02:14:25.651998: step: 134/466, loss: 0.07725521177053452 2023-01-24 02:14:26.243022: step: 136/466, loss: 0.04953015223145485 2023-01-24 02:14:26.940580: step: 138/466, loss: 0.12636981904506683 2023-01-24 02:14:27.599342: step: 140/466, loss: 0.3295181095600128 2023-01-24 02:14:28.288297: step: 142/466, loss: 0.07382170855998993 2023-01-24 02:14:28.941980: step: 144/466, loss: 0.11951906234025955 2023-01-24 02:14:29.555867: step: 146/466, loss: 0.06142144650220871 2023-01-24 02:14:30.173436: step: 148/466, loss: 0.02745065465569496 2023-01-24 02:14:30.804602: step: 150/466, loss: 0.4685656428337097 2023-01-24 02:14:31.433810: step: 152/466, loss: 0.06786028295755386 2023-01-24 02:14:32.107242: step: 154/466, loss: 0.18896110355854034 2023-01-24 02:14:32.782098: step: 156/466, loss: 0.06052001938223839 2023-01-24 02:14:33.399023: step: 158/466, loss: 0.08299747109413147 2023-01-24 02:14:34.011195: step: 160/466, loss: 0.029955944046378136 2023-01-24 02:14:34.605106: step: 162/466, loss: 0.11253871768712997 2023-01-24 02:14:35.256575: step: 164/466, loss: 0.0854821652173996 2023-01-24 02:14:35.867897: step: 166/466, loss: 0.04195922985672951 2023-01-24 02:14:36.495101: step: 168/466, loss: 0.1804516315460205 2023-01-24 02:14:37.108826: step: 170/466, loss: 0.04488290473818779 2023-01-24 02:14:37.687520: step: 172/466, loss: 0.11197320371866226 2023-01-24 02:14:38.333350: step: 174/466, loss: 0.16332344710826874 2023-01-24 02:14:38.923332: step: 176/466, loss: 0.061043087393045425 2023-01-24 02:14:39.486136: step: 178/466, loss: 0.09745758026838303 2023-01-24 02:14:40.092793: step: 180/466, loss: 0.08789185434579849 2023-01-24 02:14:40.735030: step: 182/466, loss: 0.03371937945485115 2023-01-24 02:14:41.363597: step: 184/466, loss: 0.03100101836025715 2023-01-24 02:14:41.986786: step: 186/466, loss: 0.06895512342453003 2023-01-24 02:14:42.577122: step: 188/466, loss: 0.0844266265630722 2023-01-24 02:14:43.158503: step: 190/466, loss: 0.10798672586679459 2023-01-24 02:14:43.836505: step: 192/466, loss: 0.10067951679229736 2023-01-24 02:14:44.375223: step: 194/466, loss: 0.1494479924440384 2023-01-24 02:14:44.989783: step: 196/466, loss: 0.0792386457324028 2023-01-24 02:14:45.601646: step: 198/466, loss: 0.04849759489297867 2023-01-24 02:14:46.165181: step: 200/466, loss: 0.42589089274406433 2023-01-24 02:14:46.777294: step: 202/466, loss: 0.16432368755340576 2023-01-24 02:14:47.424375: step: 204/466, loss: 0.3137759566307068 2023-01-24 02:14:48.022609: step: 206/466, loss: 0.8784408569335938 2023-01-24 02:14:48.673493: step: 208/466, loss: 0.07093644142150879 2023-01-24 02:14:49.384617: step: 210/466, loss: 0.20242880284786224 2023-01-24 02:14:50.030046: step: 212/466, loss: 0.27905434370040894 2023-01-24 02:14:50.636715: step: 214/466, loss: 0.0937686562538147 2023-01-24 02:14:51.278918: step: 216/466, loss: 0.2798430323600769 2023-01-24 02:14:51.864314: step: 218/466, loss: 0.18162015080451965 2023-01-24 02:14:52.478294: step: 220/466, loss: 0.07361326366662979 2023-01-24 02:14:53.140466: step: 222/466, loss: 0.09918847680091858 2023-01-24 02:14:53.745749: step: 224/466, loss: 0.07619430869817734 2023-01-24 02:14:54.383299: step: 226/466, loss: 0.07150990515947342 2023-01-24 02:14:54.920631: step: 228/466, loss: 0.04885377362370491 2023-01-24 02:14:55.554096: step: 230/466, loss: 0.053341105580329895 2023-01-24 02:14:56.167946: step: 232/466, loss: 0.08202479779720306 2023-01-24 02:14:56.876438: step: 234/466, loss: 0.11043872684240341 2023-01-24 02:14:57.497940: step: 236/466, loss: 0.36070725321769714 2023-01-24 02:14:58.127723: step: 238/466, loss: 0.20646990835666656 2023-01-24 02:14:58.764443: step: 240/466, loss: 0.08996447920799255 2023-01-24 02:14:59.462865: step: 242/466, loss: 0.17766429483890533 2023-01-24 02:15:00.165840: step: 244/466, loss: 0.05220003053545952 2023-01-24 02:15:00.804900: step: 246/466, loss: 0.15690936148166656 2023-01-24 02:15:01.425456: step: 248/466, loss: 0.11198737472295761 2023-01-24 02:15:02.072784: step: 250/466, loss: 0.03883950784802437 2023-01-24 02:15:02.709674: step: 252/466, loss: 0.05150290206074715 2023-01-24 02:15:03.308496: step: 254/466, loss: 0.056819669902324677 2023-01-24 02:15:03.900361: step: 256/466, loss: 0.11226635426282883 2023-01-24 02:15:04.531057: step: 258/466, loss: 0.18033404648303986 2023-01-24 02:15:05.165832: step: 260/466, loss: 0.2898689806461334 2023-01-24 02:15:05.811708: step: 262/466, loss: 0.03009466640651226 2023-01-24 02:15:06.414214: step: 264/466, loss: 0.1417255401611328 2023-01-24 02:15:06.988542: step: 266/466, loss: 0.22924448549747467 2023-01-24 02:15:07.566134: step: 268/466, loss: 0.09219784289598465 2023-01-24 02:15:08.131317: step: 270/466, loss: 0.2014540135860443 2023-01-24 02:15:08.809748: step: 272/466, loss: 0.08747085928916931 2023-01-24 02:15:09.391748: step: 274/466, loss: 0.03199044615030289 2023-01-24 02:15:10.005970: step: 276/466, loss: 0.1765107810497284 2023-01-24 02:15:10.613913: step: 278/466, loss: 0.04221271350979805 2023-01-24 02:15:11.235803: step: 280/466, loss: 0.04762676730751991 2023-01-24 02:15:11.826229: step: 282/466, loss: 0.18033361434936523 2023-01-24 02:15:12.418581: step: 284/466, loss: 0.2735659182071686 2023-01-24 02:15:13.037849: step: 286/466, loss: 0.041960328817367554 2023-01-24 02:15:13.734473: step: 288/466, loss: 0.7181612849235535 2023-01-24 02:15:14.347393: step: 290/466, loss: 0.07804315537214279 2023-01-24 02:15:15.001265: step: 292/466, loss: 0.022585544735193253 2023-01-24 02:15:15.628245: step: 294/466, loss: 0.04021203890442848 2023-01-24 02:15:16.254548: step: 296/466, loss: 0.13588309288024902 2023-01-24 02:15:16.920685: step: 298/466, loss: 0.08786024898290634 2023-01-24 02:15:17.485152: step: 300/466, loss: 0.10801000148057938 2023-01-24 02:15:18.083720: step: 302/466, loss: 0.558252215385437 2023-01-24 02:15:18.696672: step: 304/466, loss: 0.05378791317343712 2023-01-24 02:15:19.301290: step: 306/466, loss: 0.08215916901826859 2023-01-24 02:15:19.888960: step: 308/466, loss: 0.10823218524456024 2023-01-24 02:15:20.554670: step: 310/466, loss: 0.1119224950671196 2023-01-24 02:15:21.188121: step: 312/466, loss: 0.0791698768734932 2023-01-24 02:15:21.763206: step: 314/466, loss: 0.09614011645317078 2023-01-24 02:15:22.452577: step: 316/466, loss: 0.18332503736019135 2023-01-24 02:15:23.132599: step: 318/466, loss: 0.9485373497009277 2023-01-24 02:15:23.712456: step: 320/466, loss: 0.08354584872722626 2023-01-24 02:15:24.297910: step: 322/466, loss: 0.1311836689710617 2023-01-24 02:15:24.867160: step: 324/466, loss: 0.061109501868486404 2023-01-24 02:15:25.460564: step: 326/466, loss: 0.09852179139852524 2023-01-24 02:15:26.068358: step: 328/466, loss: 0.05198030546307564 2023-01-24 02:15:26.707054: step: 330/466, loss: 0.34303319454193115 2023-01-24 02:15:27.268008: step: 332/466, loss: 0.03909998759627342 2023-01-24 02:15:27.848465: step: 334/466, loss: 0.1568313091993332 2023-01-24 02:15:28.470509: step: 336/466, loss: 0.2192315310239792 2023-01-24 02:15:29.122177: step: 338/466, loss: 0.05793094262480736 2023-01-24 02:15:29.819156: step: 340/466, loss: 0.08549658209085464 2023-01-24 02:15:30.437288: step: 342/466, loss: 0.22737926244735718 2023-01-24 02:15:31.043652: step: 344/466, loss: 0.04497341439127922 2023-01-24 02:15:31.632371: step: 346/466, loss: 0.08036022633314133 2023-01-24 02:15:32.224192: step: 348/466, loss: 0.10172585397958755 2023-01-24 02:15:32.779637: step: 350/466, loss: 0.20139440894126892 2023-01-24 02:15:33.409182: step: 352/466, loss: 0.004828613717108965 2023-01-24 02:15:34.006210: step: 354/466, loss: 0.02619241178035736 2023-01-24 02:15:34.566787: step: 356/466, loss: 0.10344935208559036 2023-01-24 02:15:35.163562: step: 358/466, loss: 0.004835547413676977 2023-01-24 02:15:35.768833: step: 360/466, loss: 0.019727077335119247 2023-01-24 02:15:36.401803: step: 362/466, loss: 0.030233588069677353 2023-01-24 02:15:37.034542: step: 364/466, loss: 0.1266247034072876 2023-01-24 02:15:37.672310: step: 366/466, loss: 0.10013896226882935 2023-01-24 02:15:38.268775: step: 368/466, loss: 0.04225357621908188 2023-01-24 02:15:38.864097: step: 370/466, loss: 0.07900624722242355 2023-01-24 02:15:39.463712: step: 372/466, loss: 0.046091485768556595 2023-01-24 02:15:40.166956: step: 374/466, loss: 0.0288006030023098 2023-01-24 02:15:40.735100: step: 376/466, loss: 0.039047833532094955 2023-01-24 02:15:41.299503: step: 378/466, loss: 0.029453450813889503 2023-01-24 02:15:41.913335: step: 380/466, loss: 0.07940339297056198 2023-01-24 02:15:42.609422: step: 382/466, loss: 0.07525602728128433 2023-01-24 02:15:43.210254: step: 384/466, loss: 0.07274336367845535 2023-01-24 02:15:43.797991: step: 386/466, loss: 0.04453727975487709 2023-01-24 02:15:44.379149: step: 388/466, loss: 0.1365232914686203 2023-01-24 02:15:45.032418: step: 390/466, loss: 0.2327868491411209 2023-01-24 02:15:45.619227: step: 392/466, loss: 0.02457980439066887 2023-01-24 02:15:46.217145: step: 394/466, loss: 0.04232895001769066 2023-01-24 02:15:46.829735: step: 396/466, loss: 0.08623890578746796 2023-01-24 02:15:47.467569: step: 398/466, loss: 0.09994079917669296 2023-01-24 02:15:48.148711: step: 400/466, loss: 1.7402442693710327 2023-01-24 02:15:48.822228: step: 402/466, loss: 0.7849536538124084 2023-01-24 02:15:49.443535: step: 404/466, loss: 0.07542157918214798 2023-01-24 02:15:50.046545: step: 406/466, loss: 0.5290749669075012 2023-01-24 02:15:50.617717: step: 408/466, loss: 0.06260983645915985 2023-01-24 02:15:51.289623: step: 410/466, loss: 0.11872803419828415 2023-01-24 02:15:51.853543: step: 412/466, loss: 0.09024270623922348 2023-01-24 02:15:52.493796: step: 414/466, loss: 0.07944059371948242 2023-01-24 02:15:53.089719: step: 416/466, loss: 0.08568576723337173 2023-01-24 02:15:53.753739: step: 418/466, loss: 0.11215931922197342 2023-01-24 02:15:54.324767: step: 420/466, loss: 0.08207669854164124 2023-01-24 02:15:54.861054: step: 422/466, loss: 0.5597177743911743 2023-01-24 02:15:55.477400: step: 424/466, loss: 0.1940152645111084 2023-01-24 02:15:56.116821: step: 426/466, loss: 0.13238102197647095 2023-01-24 02:15:56.684086: step: 428/466, loss: 0.05639738216996193 2023-01-24 02:15:57.409539: step: 430/466, loss: 0.12276247888803482 2023-01-24 02:15:58.063042: step: 432/466, loss: 0.3074175715446472 2023-01-24 02:15:58.619971: step: 434/466, loss: 0.7562771439552307 2023-01-24 02:15:59.262848: step: 436/466, loss: 0.04212067648768425 2023-01-24 02:15:59.862930: step: 438/466, loss: 0.3160960376262665 2023-01-24 02:16:00.512860: step: 440/466, loss: 0.04419974610209465 2023-01-24 02:16:01.186490: step: 442/466, loss: 0.09615902602672577 2023-01-24 02:16:01.781428: step: 444/466, loss: 0.11447758972644806 2023-01-24 02:16:02.408832: step: 446/466, loss: 0.03149205818772316 2023-01-24 02:16:02.978664: step: 448/466, loss: 0.1052766740322113 2023-01-24 02:16:03.576744: step: 450/466, loss: 4.804030895233154 2023-01-24 02:16:04.172238: step: 452/466, loss: 0.13028061389923096 2023-01-24 02:16:04.875468: step: 454/466, loss: 0.1731548011302948 2023-01-24 02:16:05.498983: step: 456/466, loss: 0.10007569938898087 2023-01-24 02:16:06.203054: step: 458/466, loss: 0.0489235520362854 2023-01-24 02:16:06.857919: step: 460/466, loss: 0.0729367583990097 2023-01-24 02:16:07.503223: step: 462/466, loss: 0.06604597717523575 2023-01-24 02:16:08.036776: step: 464/466, loss: 0.09258797019720078 2023-01-24 02:16:08.623191: step: 466/466, loss: 0.1299390345811844 2023-01-24 02:16:09.261336: step: 468/466, loss: 0.44548994302749634 2023-01-24 02:16:09.890383: step: 470/466, loss: 0.047990284860134125 2023-01-24 02:16:10.525603: step: 472/466, loss: 0.07879111170768738 2023-01-24 02:16:11.236909: step: 474/466, loss: 0.8586330413818359 2023-01-24 02:16:11.913764: step: 476/466, loss: 0.09463603794574738 2023-01-24 02:16:12.531205: step: 478/466, loss: 0.07230973988771439 2023-01-24 02:16:13.129905: step: 480/466, loss: 0.07197661697864532 2023-01-24 02:16:13.863132: step: 482/466, loss: 0.21483659744262695 2023-01-24 02:16:14.508190: step: 484/466, loss: 0.194536954164505 2023-01-24 02:16:15.168561: step: 486/466, loss: 0.5943006277084351 2023-01-24 02:16:15.807855: step: 488/466, loss: 0.14670486748218536 2023-01-24 02:16:16.463949: step: 490/466, loss: 0.0978560596704483 2023-01-24 02:16:17.080439: step: 492/466, loss: 0.046716898679733276 2023-01-24 02:16:17.650164: step: 494/466, loss: 0.07400691509246826 2023-01-24 02:16:18.298030: step: 496/466, loss: 0.37783244252204895 2023-01-24 02:16:19.008916: step: 498/466, loss: 0.09189492464065552 2023-01-24 02:16:19.576468: step: 500/466, loss: 0.07810716331005096 2023-01-24 02:16:20.125720: step: 502/466, loss: 1.526832938194275 2023-01-24 02:16:20.793164: step: 504/466, loss: 0.07685349136590958 2023-01-24 02:16:21.439722: step: 506/466, loss: 0.16347895562648773 2023-01-24 02:16:22.027195: step: 508/466, loss: 0.08108357340097427 2023-01-24 02:16:22.631498: step: 510/466, loss: 0.23870421946048737 2023-01-24 02:16:23.309425: step: 512/466, loss: 0.7059723138809204 2023-01-24 02:16:23.970071: step: 514/466, loss: 0.11288327723741531 2023-01-24 02:16:24.627019: step: 516/466, loss: 0.10943610966205597 2023-01-24 02:16:25.247528: step: 518/466, loss: 0.11879923939704895 2023-01-24 02:16:25.901737: step: 520/466, loss: 0.7888107895851135 2023-01-24 02:16:26.514066: step: 522/466, loss: 0.12313000112771988 2023-01-24 02:16:27.096650: step: 524/466, loss: 0.13041269779205322 2023-01-24 02:16:27.740131: step: 526/466, loss: 0.25587916374206543 2023-01-24 02:16:28.352073: step: 528/466, loss: 0.15828891098499298 2023-01-24 02:16:28.956548: step: 530/466, loss: 0.048195432871580124 2023-01-24 02:16:29.609718: step: 532/466, loss: 0.07142874598503113 2023-01-24 02:16:30.206135: step: 534/466, loss: 0.38426005840301514 2023-01-24 02:16:30.843948: step: 536/466, loss: 1.4751911163330078 2023-01-24 02:16:31.425500: step: 538/466, loss: 0.1260657161474228 2023-01-24 02:16:32.058579: step: 540/466, loss: 0.058579664677381516 2023-01-24 02:16:32.632443: step: 542/466, loss: 0.09295614063739777 2023-01-24 02:16:33.258274: step: 544/466, loss: 0.08101192861795425 2023-01-24 02:16:33.829789: step: 546/466, loss: 0.07070556282997131 2023-01-24 02:16:34.398294: step: 548/466, loss: 0.13065387308597565 2023-01-24 02:16:34.976557: step: 550/466, loss: 0.04952845722436905 2023-01-24 02:16:35.701344: step: 552/466, loss: 0.40858757495880127 2023-01-24 02:16:36.285776: step: 554/466, loss: 0.1270357072353363 2023-01-24 02:16:36.979895: step: 556/466, loss: 0.09554903954267502 2023-01-24 02:16:37.545732: step: 558/466, loss: 0.14123162627220154 2023-01-24 02:16:38.227097: step: 560/466, loss: 0.16500283777713776 2023-01-24 02:16:38.898062: step: 562/466, loss: 0.1087973341345787 2023-01-24 02:16:39.486764: step: 564/466, loss: 2.558933734893799 2023-01-24 02:16:40.135744: step: 566/466, loss: 0.06741175800561905 2023-01-24 02:16:40.754010: step: 568/466, loss: 0.31716012954711914 2023-01-24 02:16:41.355959: step: 570/466, loss: 0.21854546666145325 2023-01-24 02:16:41.978330: step: 572/466, loss: 0.07974494993686676 2023-01-24 02:16:42.610141: step: 574/466, loss: 0.20376497507095337 2023-01-24 02:16:43.211462: step: 576/466, loss: 0.1335975080728531 2023-01-24 02:16:43.836167: step: 578/466, loss: 0.6455204486846924 2023-01-24 02:16:44.425848: step: 580/466, loss: 0.1294020712375641 2023-01-24 02:16:45.056244: step: 582/466, loss: 0.059759803116321564 2023-01-24 02:16:45.680404: step: 584/466, loss: 0.09268747270107269 2023-01-24 02:16:46.352720: step: 586/466, loss: 0.0681382343173027 2023-01-24 02:16:46.980976: step: 588/466, loss: 0.37580254673957825 2023-01-24 02:16:47.626066: step: 590/466, loss: 0.04175303503870964 2023-01-24 02:16:48.250449: step: 592/466, loss: 0.06763419508934021 2023-01-24 02:16:48.843320: step: 594/466, loss: 0.9200158715248108 2023-01-24 02:16:49.442804: step: 596/466, loss: 0.5125054717063904 2023-01-24 02:16:50.026698: step: 598/466, loss: 0.05300223082304001 2023-01-24 02:16:50.677382: step: 600/466, loss: 0.15013554692268372 2023-01-24 02:16:51.327556: step: 602/466, loss: 0.0917985662817955 2023-01-24 02:16:51.967932: step: 604/466, loss: 0.05620555952191353 2023-01-24 02:16:52.658609: step: 606/466, loss: 0.0702977180480957 2023-01-24 02:16:53.280116: step: 608/466, loss: 0.10241170972585678 2023-01-24 02:16:53.884899: step: 610/466, loss: 0.1015259325504303 2023-01-24 02:16:54.534364: step: 612/466, loss: 0.2273823469877243 2023-01-24 02:16:55.136644: step: 614/466, loss: 0.07430493086576462 2023-01-24 02:16:55.802679: step: 616/466, loss: 0.05069427192211151 2023-01-24 02:16:56.448519: step: 618/466, loss: 0.48775190114974976 2023-01-24 02:16:57.073003: step: 620/466, loss: 0.11173519492149353 2023-01-24 02:16:57.779630: step: 622/466, loss: 0.09555468708276749 2023-01-24 02:16:58.365897: step: 624/466, loss: 0.07484864443540573 2023-01-24 02:16:58.995270: step: 626/466, loss: 0.31091174483299255 2023-01-24 02:16:59.689807: step: 628/466, loss: 0.10514596104621887 2023-01-24 02:17:00.280744: step: 630/466, loss: 0.08873254805803299 2023-01-24 02:17:00.871550: step: 632/466, loss: 0.10823825001716614 2023-01-24 02:17:01.486955: step: 634/466, loss: 0.128699392080307 2023-01-24 02:17:02.084640: step: 636/466, loss: 0.18408691883087158 2023-01-24 02:17:02.644090: step: 638/466, loss: 0.38384726643562317 2023-01-24 02:17:03.314618: step: 640/466, loss: 0.13084720075130463 2023-01-24 02:17:03.942006: step: 642/466, loss: 0.2408980429172516 2023-01-24 02:17:04.527007: step: 644/466, loss: 0.12450685352087021 2023-01-24 02:17:05.142501: step: 646/466, loss: 0.053344011306762695 2023-01-24 02:17:05.744215: step: 648/466, loss: 0.15100985765457153 2023-01-24 02:17:06.431026: step: 650/466, loss: 0.5366571545600891 2023-01-24 02:17:07.146177: step: 652/466, loss: 0.23490466177463531 2023-01-24 02:17:07.797947: step: 654/466, loss: 0.15787655115127563 2023-01-24 02:17:08.386512: step: 656/466, loss: 0.10110142827033997 2023-01-24 02:17:09.059479: step: 658/466, loss: 0.11273965984582901 2023-01-24 02:17:09.711489: step: 660/466, loss: 0.09545809775590897 2023-01-24 02:17:10.344868: step: 662/466, loss: 0.06254058331251144 2023-01-24 02:17:10.996080: step: 664/466, loss: 0.13021764159202576 2023-01-24 02:17:11.600859: step: 666/466, loss: 0.06415732204914093 2023-01-24 02:17:12.186759: step: 668/466, loss: 0.04331839457154274 2023-01-24 02:17:12.809706: step: 670/466, loss: 0.03789485618472099 2023-01-24 02:17:13.469180: step: 672/466, loss: 0.3357715308666229 2023-01-24 02:17:14.068234: step: 674/466, loss: 0.10420241206884384 2023-01-24 02:17:14.711885: step: 676/466, loss: 0.9282556772232056 2023-01-24 02:17:15.263454: step: 678/466, loss: 0.3590840697288513 2023-01-24 02:17:15.834065: step: 680/466, loss: 0.11960817128419876 2023-01-24 02:17:16.427315: step: 682/466, loss: 0.06062834709882736 2023-01-24 02:17:17.058602: step: 684/466, loss: 0.1513691395521164 2023-01-24 02:17:17.647544: step: 686/466, loss: 0.07220091670751572 2023-01-24 02:17:18.246753: step: 688/466, loss: 0.31088271737098694 2023-01-24 02:17:18.882609: step: 690/466, loss: 0.1168995201587677 2023-01-24 02:17:19.423093: step: 692/466, loss: 0.1224537119269371 2023-01-24 02:17:20.047817: step: 694/466, loss: 0.12207366526126862 2023-01-24 02:17:20.694657: step: 696/466, loss: 0.09792041778564453 2023-01-24 02:17:21.337394: step: 698/466, loss: 0.056785568594932556 2023-01-24 02:17:21.942296: step: 700/466, loss: 0.09063652902841568 2023-01-24 02:17:22.526974: step: 702/466, loss: 0.1667562574148178 2023-01-24 02:17:23.145984: step: 704/466, loss: 0.1611327975988388 2023-01-24 02:17:23.759792: step: 706/466, loss: 0.06084573268890381 2023-01-24 02:17:24.384795: step: 708/466, loss: 0.13086815178394318 2023-01-24 02:17:24.949072: step: 710/466, loss: 0.0684676244854927 2023-01-24 02:17:25.522293: step: 712/466, loss: 0.06935244798660278 2023-01-24 02:17:26.160862: step: 714/466, loss: 0.1311349719762802 2023-01-24 02:17:26.786761: step: 716/466, loss: 0.09169892221689224 2023-01-24 02:17:27.454588: step: 718/466, loss: 0.33384251594543457 2023-01-24 02:17:28.061874: step: 720/466, loss: 0.029739664867520332 2023-01-24 02:17:28.632722: step: 722/466, loss: 0.9668821692466736 2023-01-24 02:17:29.242629: step: 724/466, loss: 0.0668252483010292 2023-01-24 02:17:29.832871: step: 726/466, loss: 0.047192059457302094 2023-01-24 02:17:30.455265: step: 728/466, loss: 0.03709348663687706 2023-01-24 02:17:31.024835: step: 730/466, loss: 0.02418670989573002 2023-01-24 02:17:31.580261: step: 732/466, loss: 0.09607279300689697 2023-01-24 02:17:32.216410: step: 734/466, loss: 0.09355024248361588 2023-01-24 02:17:32.843739: step: 736/466, loss: 0.1411276012659073 2023-01-24 02:17:33.500764: step: 738/466, loss: 0.1141132116317749 2023-01-24 02:17:34.115440: step: 740/466, loss: 0.09725882858037949 2023-01-24 02:17:34.735957: step: 742/466, loss: 0.57298743724823 2023-01-24 02:17:35.320309: step: 744/466, loss: 0.22138182818889618 2023-01-24 02:17:35.941256: step: 746/466, loss: 0.18136608600616455 2023-01-24 02:17:36.506510: step: 748/466, loss: 0.05033509433269501 2023-01-24 02:17:37.162245: step: 750/466, loss: 0.1305447220802307 2023-01-24 02:17:37.799775: step: 752/466, loss: 0.15778738260269165 2023-01-24 02:17:38.437674: step: 754/466, loss: 0.07518231868743896 2023-01-24 02:17:38.980324: step: 756/466, loss: 0.4249395430088043 2023-01-24 02:17:39.644458: step: 758/466, loss: 0.059822119772434235 2023-01-24 02:17:40.249780: step: 760/466, loss: 0.0772186666727066 2023-01-24 02:17:40.903598: step: 762/466, loss: 0.03227313980460167 2023-01-24 02:17:41.594645: step: 764/466, loss: 0.37721946835517883 2023-01-24 02:17:42.204717: step: 766/466, loss: 0.36189213395118713 2023-01-24 02:17:42.823616: step: 768/466, loss: 0.27587422728538513 2023-01-24 02:17:43.533995: step: 770/466, loss: 0.34329304099082947 2023-01-24 02:17:44.227179: step: 772/466, loss: 0.15921442210674286 2023-01-24 02:17:44.779738: step: 774/466, loss: 0.020679069682955742 2023-01-24 02:17:45.402037: step: 776/466, loss: 4.652289867401123 2023-01-24 02:17:45.985335: step: 778/466, loss: 0.18183699250221252 2023-01-24 02:17:46.650244: step: 780/466, loss: 0.126389279961586 2023-01-24 02:17:47.242190: step: 782/466, loss: 0.12384083867073059 2023-01-24 02:17:47.862084: step: 784/466, loss: 0.13537752628326416 2023-01-24 02:17:48.410848: step: 786/466, loss: 0.16725100576877594 2023-01-24 02:17:49.033803: step: 788/466, loss: 0.04753489792346954 2023-01-24 02:17:49.685766: step: 790/466, loss: 0.30294153094291687 2023-01-24 02:17:50.290171: step: 792/466, loss: 0.03696773201227188 2023-01-24 02:17:50.927066: step: 794/466, loss: 0.020780369639396667 2023-01-24 02:17:51.542227: step: 796/466, loss: 0.08103333413600922 2023-01-24 02:17:52.265017: step: 798/466, loss: 0.0900353491306305 2023-01-24 02:17:52.910862: step: 800/466, loss: 0.20286378264427185 2023-01-24 02:17:53.489239: step: 802/466, loss: 0.3709118366241455 2023-01-24 02:17:54.123869: step: 804/466, loss: 0.029136160388588905 2023-01-24 02:17:54.734703: step: 806/466, loss: 0.059885792434215546 2023-01-24 02:17:55.387346: step: 808/466, loss: 0.1759241819381714 2023-01-24 02:17:55.944092: step: 810/466, loss: 0.056672364473342896 2023-01-24 02:17:56.508535: step: 812/466, loss: 0.046470172703266144 2023-01-24 02:17:57.151890: step: 814/466, loss: 0.08381474018096924 2023-01-24 02:17:57.692021: step: 816/466, loss: 0.08822699636220932 2023-01-24 02:17:58.347293: step: 818/466, loss: 0.09209505468606949 2023-01-24 02:17:59.102111: step: 820/466, loss: 4.051313877105713 2023-01-24 02:17:59.728024: step: 822/466, loss: 0.6569058895111084 2023-01-24 02:18:00.331678: step: 824/466, loss: 0.09321137517690659 2023-01-24 02:18:00.978552: step: 826/466, loss: 0.19895921647548676 2023-01-24 02:18:01.541054: step: 828/466, loss: 0.3110239505767822 2023-01-24 02:18:02.171047: step: 830/466, loss: 0.06787966936826706 2023-01-24 02:18:02.801396: step: 832/466, loss: 0.17457108199596405 2023-01-24 02:18:03.473530: step: 834/466, loss: 0.320141077041626 2023-01-24 02:18:04.056160: step: 836/466, loss: 0.11265549063682556 2023-01-24 02:18:04.780723: step: 838/466, loss: 0.08899205178022385 2023-01-24 02:18:05.464107: step: 840/466, loss: 0.33463096618652344 2023-01-24 02:18:06.109886: step: 842/466, loss: 0.027461307123303413 2023-01-24 02:18:06.720217: step: 844/466, loss: 0.20710547268390656 2023-01-24 02:18:07.328291: step: 846/466, loss: 0.10874569416046143 2023-01-24 02:18:07.951494: step: 848/466, loss: 0.09417065978050232 2023-01-24 02:18:08.542073: step: 850/466, loss: 0.09330161660909653 2023-01-24 02:18:09.157653: step: 852/466, loss: 0.03307119756937027 2023-01-24 02:18:09.769305: step: 854/466, loss: 0.42967110872268677 2023-01-24 02:18:10.376431: step: 856/466, loss: 0.17626191675662994 2023-01-24 02:18:10.947780: step: 858/466, loss: 0.07999088615179062 2023-01-24 02:18:11.526981: step: 860/466, loss: 0.40590253472328186 2023-01-24 02:18:12.173786: step: 862/466, loss: 0.1420111209154129 2023-01-24 02:18:12.775246: step: 864/466, loss: 0.06737148016691208 2023-01-24 02:18:13.365535: step: 866/466, loss: 0.086174875497818 2023-01-24 02:18:13.936840: step: 868/466, loss: 0.1882144808769226 2023-01-24 02:18:14.558589: step: 870/466, loss: 0.0016450968105345964 2023-01-24 02:18:15.133219: step: 872/466, loss: 2.2953505516052246 2023-01-24 02:18:15.742213: step: 874/466, loss: 0.33501380681991577 2023-01-24 02:18:16.397771: step: 876/466, loss: 0.06336680799722672 2023-01-24 02:18:17.004968: step: 878/466, loss: 0.14617972075939178 2023-01-24 02:18:17.614482: step: 880/466, loss: 0.10545101761817932 2023-01-24 02:18:18.295545: step: 882/466, loss: 0.5115710496902466 2023-01-24 02:18:18.888333: step: 884/466, loss: 0.044568244367837906 2023-01-24 02:18:19.619704: step: 886/466, loss: 0.09607965499162674 2023-01-24 02:18:20.218626: step: 888/466, loss: 0.30455282330513 2023-01-24 02:18:20.799531: step: 890/466, loss: 0.09439221024513245 2023-01-24 02:18:21.387987: step: 892/466, loss: 0.160738006234169 2023-01-24 02:18:22.015347: step: 894/466, loss: 0.055197641253471375 2023-01-24 02:18:22.629691: step: 896/466, loss: 0.10426119714975357 2023-01-24 02:18:23.264467: step: 898/466, loss: 0.07737888395786285 2023-01-24 02:18:23.831646: step: 900/466, loss: 0.08653061091899872 2023-01-24 02:18:24.501832: step: 902/466, loss: 0.20931552350521088 2023-01-24 02:18:25.174338: step: 904/466, loss: 0.05122126266360283 2023-01-24 02:18:25.862242: step: 906/466, loss: 0.13378262519836426 2023-01-24 02:18:26.407868: step: 908/466, loss: 0.11195065826177597 2023-01-24 02:18:27.061895: step: 910/466, loss: 0.1351906657218933 2023-01-24 02:18:27.654413: step: 912/466, loss: 0.03451892361044884 2023-01-24 02:18:28.314053: step: 914/466, loss: 0.1293785274028778 2023-01-24 02:18:28.941500: step: 916/466, loss: 0.08654919266700745 2023-01-24 02:18:29.548768: step: 918/466, loss: 0.059098485857248306 2023-01-24 02:18:30.140646: step: 920/466, loss: 0.46760809421539307 2023-01-24 02:18:30.775217: step: 922/466, loss: 0.13194270431995392 2023-01-24 02:18:31.379591: step: 924/466, loss: 0.14335869252681732 2023-01-24 02:18:32.021377: step: 926/466, loss: 0.017652394250035286 2023-01-24 02:18:32.642792: step: 928/466, loss: 0.13621695339679718 2023-01-24 02:18:33.313619: step: 930/466, loss: 0.07794871181249619 2023-01-24 02:18:33.910213: step: 932/466, loss: 0.12003447115421295 ================================================== Loss: 0.223 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34075696404873457, 'r': 0.30196110476424864, 'f1': 0.32018813322084316}, 'combined': 0.23592809816272653, 'epoch': 15} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32782164780778184, 'r': 0.2641525834187366, 'f1': 0.29256319144026155}, 'combined': 0.18319377407941614, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33175621837621233, 'r': 0.329238144612446, 'f1': 0.33049238516335055}, 'combined': 0.24352070485720567, 'epoch': 15} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.33833475020577397, 'r': 0.27787420820149306, 'f1': 0.3051383772916297}, 'combined': 0.18909983944833386, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30404738063720815, 'r': 0.31154760065292675, 'f1': 0.3077518004575303}, 'combined': 0.2267644845476539, 'epoch': 15} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.34085085070086374, 'r': 0.2785978020195125, 'f1': 0.3065961880931153}, 'combined': 0.2033855901211755, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2977941176470588, 'r': 0.2892857142857143, 'f1': 0.29347826086956524}, 'combined': 0.1956521739130435, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41, 'r': 0.44565217391304346, 'f1': 0.4270833333333333}, 'combined': 0.21354166666666666, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35714285714285715, 'r': 0.1724137931034483, 'f1': 0.23255813953488377}, 'combined': 0.1550387596899225, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3101533457249071, 'r': 0.3166271347248577, 'f1': 0.31335680751173717}, 'combined': 0.23089448974549054, 'epoch': 12} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.34558112654998463, 'r': 0.27065838908657336, 'f1': 0.30356518411229655}, 'combined': 0.20137492411409771, 'epoch': 12} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.19827586206896552, 'f1': 0.2555555555555556}, 'combined': 0.1703703703703704, 'epoch': 12} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:21:06.644413: step: 2/466, loss: 0.0364297553896904 2023-01-24 02:21:07.259452: step: 4/466, loss: 0.041913751512765884 2023-01-24 02:21:07.947073: step: 6/466, loss: 0.044302765280008316 2023-01-24 02:21:08.587881: step: 8/466, loss: 0.09393715113401413 2023-01-24 02:21:09.258278: step: 10/466, loss: 0.10036303848028183 2023-01-24 02:21:09.846593: step: 12/466, loss: 0.03882470354437828 2023-01-24 02:21:10.437917: step: 14/466, loss: 0.05609731003642082 2023-01-24 02:21:11.085618: step: 16/466, loss: 0.010594441555440426 2023-01-24 02:21:11.730995: step: 18/466, loss: 0.03913673385977745 2023-01-24 02:21:12.370098: step: 20/466, loss: 0.07897093147039413 2023-01-24 02:21:13.007550: step: 22/466, loss: 0.09117831289768219 2023-01-24 02:21:13.643693: step: 24/466, loss: 0.0415981151163578 2023-01-24 02:21:14.249178: step: 26/466, loss: 0.23397918045520782 2023-01-24 02:21:14.965587: step: 28/466, loss: 0.7837872505187988 2023-01-24 02:21:15.595741: step: 30/466, loss: 0.2118677943944931 2023-01-24 02:21:16.199791: step: 32/466, loss: 0.12464660406112671 2023-01-24 02:21:16.890605: step: 34/466, loss: 0.01113061048090458 2023-01-24 02:21:17.510766: step: 36/466, loss: 0.2734963893890381 2023-01-24 02:21:18.147427: step: 38/466, loss: 0.09920921921730042 2023-01-24 02:21:18.775144: step: 40/466, loss: 0.14077113568782806 2023-01-24 02:21:19.370535: step: 42/466, loss: 0.09466637670993805 2023-01-24 02:21:19.964946: step: 44/466, loss: 0.07245796173810959 2023-01-24 02:21:20.651131: step: 46/466, loss: 0.16933250427246094 2023-01-24 02:21:21.205738: step: 48/466, loss: 0.07951069623231888 2023-01-24 02:21:21.855572: step: 50/466, loss: 0.14674483239650726 2023-01-24 02:21:22.512373: step: 52/466, loss: 0.059572115540504456 2023-01-24 02:21:23.098390: step: 54/466, loss: 0.056674208492040634 2023-01-24 02:21:23.716447: step: 56/466, loss: 0.06632649153470993 2023-01-24 02:21:24.345221: step: 58/466, loss: 0.05860430747270584 2023-01-24 02:21:25.012066: step: 60/466, loss: 0.09247143566608429 2023-01-24 02:21:25.632845: step: 62/466, loss: 0.07196611911058426 2023-01-24 02:21:26.286213: step: 64/466, loss: 0.06352918595075607 2023-01-24 02:21:26.972786: step: 66/466, loss: 0.1437981277704239 2023-01-24 02:21:27.579987: step: 68/466, loss: 0.04908664897084236 2023-01-24 02:21:28.195351: step: 70/466, loss: 0.2820562720298767 2023-01-24 02:21:28.848202: step: 72/466, loss: 0.030099721625447273 2023-01-24 02:21:29.485190: step: 74/466, loss: 0.03254220262169838 2023-01-24 02:21:30.135337: step: 76/466, loss: 0.05652083083987236 2023-01-24 02:21:30.788106: step: 78/466, loss: 0.03945573419332504 2023-01-24 02:21:31.498450: step: 80/466, loss: 0.06642769277095795 2023-01-24 02:21:32.121740: step: 82/466, loss: 0.04746884107589722 2023-01-24 02:21:32.790183: step: 84/466, loss: 0.07368164509534836 2023-01-24 02:21:33.413401: step: 86/466, loss: 0.17138804495334625 2023-01-24 02:21:34.033296: step: 88/466, loss: 0.10994307696819305 2023-01-24 02:21:34.698025: step: 90/466, loss: 0.07358071208000183 2023-01-24 02:21:35.357176: step: 92/466, loss: 0.2667846977710724 2023-01-24 02:21:35.975162: step: 94/466, loss: 0.1575712114572525 2023-01-24 02:21:36.571162: step: 96/466, loss: 0.06615050882101059 2023-01-24 02:21:37.157212: step: 98/466, loss: 0.04318195581436157 2023-01-24 02:21:37.768893: step: 100/466, loss: 2.450364112854004 2023-01-24 02:21:38.423115: step: 102/466, loss: 0.16622458398342133 2023-01-24 02:21:39.065432: step: 104/466, loss: 0.1172252893447876 2023-01-24 02:21:39.672713: step: 106/466, loss: 0.15681900084018707 2023-01-24 02:21:40.301732: step: 108/466, loss: 0.11366615444421768 2023-01-24 02:21:40.979732: step: 110/466, loss: 0.13074658811092377 2023-01-24 02:21:41.609741: step: 112/466, loss: 0.9056664705276489 2023-01-24 02:21:42.272622: step: 114/466, loss: 0.07722001522779465 2023-01-24 02:21:42.912481: step: 116/466, loss: 0.07057715952396393 2023-01-24 02:21:43.527768: step: 118/466, loss: 0.06568702310323715 2023-01-24 02:21:44.120098: step: 120/466, loss: 0.06861089169979095 2023-01-24 02:21:44.734402: step: 122/466, loss: 0.27800774574279785 2023-01-24 02:21:45.378100: step: 124/466, loss: 0.08259350806474686 2023-01-24 02:21:46.014587: step: 126/466, loss: 0.16336436569690704 2023-01-24 02:21:46.655667: step: 128/466, loss: 0.07314635068178177 2023-01-24 02:21:47.359816: step: 130/466, loss: 0.13182193040847778 2023-01-24 02:21:47.941457: step: 132/466, loss: 0.09079380333423615 2023-01-24 02:21:48.571172: step: 134/466, loss: 0.06319943070411682 2023-01-24 02:21:49.198777: step: 136/466, loss: 0.08041159063577652 2023-01-24 02:21:49.833598: step: 138/466, loss: 0.09127406030893326 2023-01-24 02:21:50.392467: step: 140/466, loss: 0.07198181003332138 2023-01-24 02:21:51.079976: step: 142/466, loss: 0.4676929712295532 2023-01-24 02:21:51.714675: step: 144/466, loss: 0.0892963632941246 2023-01-24 02:21:52.434642: step: 146/466, loss: 0.07624605298042297 2023-01-24 02:21:53.066140: step: 148/466, loss: 0.21611839532852173 2023-01-24 02:21:53.716479: step: 150/466, loss: 0.06725850701332092 2023-01-24 02:21:54.380613: step: 152/466, loss: 0.13193872570991516 2023-01-24 02:21:54.977257: step: 154/466, loss: 0.13217201828956604 2023-01-24 02:21:55.676992: step: 156/466, loss: 2.136812210083008 2023-01-24 02:21:56.289269: step: 158/466, loss: 0.016867976635694504 2023-01-24 02:21:56.958741: step: 160/466, loss: 0.058701291680336 2023-01-24 02:21:57.622648: step: 162/466, loss: 0.09831482172012329 2023-01-24 02:21:58.273233: step: 164/466, loss: 0.054717060178518295 2023-01-24 02:21:58.892670: step: 166/466, loss: 0.16726908087730408 2023-01-24 02:21:59.474103: step: 168/466, loss: 0.04749428108334541 2023-01-24 02:22:00.035607: step: 170/466, loss: 0.39669886231422424 2023-01-24 02:22:00.629439: step: 172/466, loss: 0.024730466306209564 2023-01-24 02:22:01.289937: step: 174/466, loss: 0.22909945249557495 2023-01-24 02:22:01.857058: step: 176/466, loss: 0.03844194486737251 2023-01-24 02:22:02.431611: step: 178/466, loss: 0.16384950280189514 2023-01-24 02:22:03.108312: step: 180/466, loss: 0.7103508710861206 2023-01-24 02:22:03.685717: step: 182/466, loss: 0.039173197001218796 2023-01-24 02:22:04.339491: step: 184/466, loss: 0.04706037417054176 2023-01-24 02:22:04.986968: step: 186/466, loss: 0.09144410490989685 2023-01-24 02:22:05.609972: step: 188/466, loss: 0.1519036740064621 2023-01-24 02:22:06.203158: step: 190/466, loss: 0.03659029304981232 2023-01-24 02:22:06.817072: step: 192/466, loss: 0.054771166294813156 2023-01-24 02:22:07.481959: step: 194/466, loss: 0.438940167427063 2023-01-24 02:22:08.093187: step: 196/466, loss: 0.25274887681007385 2023-01-24 02:22:08.677374: step: 198/466, loss: 0.012997581623494625 2023-01-24 02:22:09.359840: step: 200/466, loss: 0.13784900307655334 2023-01-24 02:22:10.093330: step: 202/466, loss: 0.17327864468097687 2023-01-24 02:22:10.664985: step: 204/466, loss: 0.06874261796474457 2023-01-24 02:22:11.320603: step: 206/466, loss: 0.1746750921010971 2023-01-24 02:22:12.003739: step: 208/466, loss: 0.0177956260740757 2023-01-24 02:22:12.650048: step: 210/466, loss: 0.05754571035504341 2023-01-24 02:22:13.293260: step: 212/466, loss: 0.10158266127109528 2023-01-24 02:22:13.955927: step: 214/466, loss: 0.031053008511662483 2023-01-24 02:22:14.597823: step: 216/466, loss: 0.029317565262317657 2023-01-24 02:22:15.258317: step: 218/466, loss: 0.15037000179290771 2023-01-24 02:22:15.890872: step: 220/466, loss: 0.1555047184228897 2023-01-24 02:22:16.578158: step: 222/466, loss: 0.9966866970062256 2023-01-24 02:22:17.197356: step: 224/466, loss: 0.05561330169439316 2023-01-24 02:22:17.856546: step: 226/466, loss: 0.27670490741729736 2023-01-24 02:22:18.547980: step: 228/466, loss: 0.07437361031770706 2023-01-24 02:22:19.165535: step: 230/466, loss: 0.059094492346048355 2023-01-24 02:22:19.838808: step: 232/466, loss: 0.08575653284788132 2023-01-24 02:22:20.468352: step: 234/466, loss: 0.14250144362449646 2023-01-24 02:22:21.074962: step: 236/466, loss: 0.12382957339286804 2023-01-24 02:22:21.686749: step: 238/466, loss: 0.11464880406856537 2023-01-24 02:22:22.298824: step: 240/466, loss: 0.07331466674804688 2023-01-24 02:22:22.934814: step: 242/466, loss: 0.0688156932592392 2023-01-24 02:22:23.503847: step: 244/466, loss: 0.03604768216609955 2023-01-24 02:22:24.139647: step: 246/466, loss: 0.07054924219846725 2023-01-24 02:22:24.806444: step: 248/466, loss: 0.1382962018251419 2023-01-24 02:22:25.485692: step: 250/466, loss: 0.1107596680521965 2023-01-24 02:22:26.042383: step: 252/466, loss: 0.02213207073509693 2023-01-24 02:22:26.710710: step: 254/466, loss: 0.08174173533916473 2023-01-24 02:22:27.344493: step: 256/466, loss: 0.2501335144042969 2023-01-24 02:22:28.043186: step: 258/466, loss: 0.053914088755846024 2023-01-24 02:22:28.678881: step: 260/466, loss: 0.04311135411262512 2023-01-24 02:22:29.236528: step: 262/466, loss: 0.25014761090278625 2023-01-24 02:22:29.851616: step: 264/466, loss: 0.1010250523686409 2023-01-24 02:22:30.486210: step: 266/466, loss: 0.09395529329776764 2023-01-24 02:22:31.066806: step: 268/466, loss: 0.02777744270861149 2023-01-24 02:22:31.722787: step: 270/466, loss: 0.053821831941604614 2023-01-24 02:22:32.286051: step: 272/466, loss: 0.12027788907289505 2023-01-24 02:22:32.891828: step: 274/466, loss: 0.07144496589899063 2023-01-24 02:22:33.505623: step: 276/466, loss: 0.07142110913991928 2023-01-24 02:22:34.121270: step: 278/466, loss: 0.03224104270339012 2023-01-24 02:22:34.683418: step: 280/466, loss: 0.1393880844116211 2023-01-24 02:22:35.200641: step: 282/466, loss: 0.09324046969413757 2023-01-24 02:22:35.805466: step: 284/466, loss: 0.09826359152793884 2023-01-24 02:22:36.407273: step: 286/466, loss: 0.06192191690206528 2023-01-24 02:22:37.030046: step: 288/466, loss: 0.059555042535066605 2023-01-24 02:22:37.622989: step: 290/466, loss: 0.32981789112091064 2023-01-24 02:22:38.247334: step: 292/466, loss: 0.14390693604946136 2023-01-24 02:22:38.875436: step: 294/466, loss: 0.1129252165555954 2023-01-24 02:22:39.493874: step: 296/466, loss: 0.08461034297943115 2023-01-24 02:22:40.137524: step: 298/466, loss: 0.09603862464427948 2023-01-24 02:22:40.691612: step: 300/466, loss: 0.0672009214758873 2023-01-24 02:22:41.438819: step: 302/466, loss: 0.09006774425506592 2023-01-24 02:22:42.074917: step: 304/466, loss: 0.0714765414595604 2023-01-24 02:22:42.700984: step: 306/466, loss: 0.3602089583873749 2023-01-24 02:22:43.331972: step: 308/466, loss: 0.045358337461948395 2023-01-24 02:22:43.932400: step: 310/466, loss: 0.17103302478790283 2023-01-24 02:22:44.511904: step: 312/466, loss: 1.0719202756881714 2023-01-24 02:22:45.136649: step: 314/466, loss: 0.14032472670078278 2023-01-24 02:22:45.712234: step: 316/466, loss: 0.13565972447395325 2023-01-24 02:22:46.351220: step: 318/466, loss: 0.6626256108283997 2023-01-24 02:22:46.948211: step: 320/466, loss: 0.9905956387519836 2023-01-24 02:22:47.554772: step: 322/466, loss: 0.09602364897727966 2023-01-24 02:22:48.162941: step: 324/466, loss: 0.07717530429363251 2023-01-24 02:22:48.825784: step: 326/466, loss: 0.30983152985572815 2023-01-24 02:22:49.439868: step: 328/466, loss: 0.1082986518740654 2023-01-24 02:22:49.999008: step: 330/466, loss: 0.11601077020168304 2023-01-24 02:22:50.637392: step: 332/466, loss: 0.11921878904104233 2023-01-24 02:22:51.204722: step: 334/466, loss: 0.0804547518491745 2023-01-24 02:22:51.808721: step: 336/466, loss: 0.09161564707756042 2023-01-24 02:22:52.513162: step: 338/466, loss: 0.40797990560531616 2023-01-24 02:22:53.058194: step: 340/466, loss: 0.13910165429115295 2023-01-24 02:22:53.639025: step: 342/466, loss: 0.07941452413797379 2023-01-24 02:22:54.271192: step: 344/466, loss: 0.055207520723342896 2023-01-24 02:22:54.914368: step: 346/466, loss: 0.0494774654507637 2023-01-24 02:22:55.542953: step: 348/466, loss: 0.19999352097511292 2023-01-24 02:22:56.206467: step: 350/466, loss: 0.12860815227031708 2023-01-24 02:22:56.822461: step: 352/466, loss: 0.20784658193588257 2023-01-24 02:22:57.458087: step: 354/466, loss: 0.04734490066766739 2023-01-24 02:22:58.119238: step: 356/466, loss: 0.048202116042375565 2023-01-24 02:22:58.699134: step: 358/466, loss: 0.37989869713783264 2023-01-24 02:22:59.284855: step: 360/466, loss: 0.040857430547475815 2023-01-24 02:22:59.878672: step: 362/466, loss: 0.19746412336826324 2023-01-24 02:23:00.551892: step: 364/466, loss: 0.15635210275650024 2023-01-24 02:23:01.161060: step: 366/466, loss: 0.1906730830669403 2023-01-24 02:23:01.878420: step: 368/466, loss: 0.13276511430740356 2023-01-24 02:23:02.485488: step: 370/466, loss: 0.07794889807701111 2023-01-24 02:23:03.050815: step: 372/466, loss: 0.056645467877388 2023-01-24 02:23:03.633694: step: 374/466, loss: 0.08633853495121002 2023-01-24 02:23:04.297803: step: 376/466, loss: 0.10069998353719711 2023-01-24 02:23:04.899945: step: 378/466, loss: 0.17765261232852936 2023-01-24 02:23:05.550543: step: 380/466, loss: 0.151209756731987 2023-01-24 02:23:06.182969: step: 382/466, loss: 0.22974668443202972 2023-01-24 02:23:06.817137: step: 384/466, loss: 0.0981052815914154 2023-01-24 02:23:07.431086: step: 386/466, loss: 0.2308218628168106 2023-01-24 02:23:08.051660: step: 388/466, loss: 0.12051533907651901 2023-01-24 02:23:08.690664: step: 390/466, loss: 0.08414598554372787 2023-01-24 02:23:09.268896: step: 392/466, loss: 0.10993286222219467 2023-01-24 02:23:09.840134: step: 394/466, loss: 0.01752668432891369 2023-01-24 02:23:10.439146: step: 396/466, loss: 0.098649762570858 2023-01-24 02:23:11.031854: step: 398/466, loss: 0.16422350704669952 2023-01-24 02:23:11.619136: step: 400/466, loss: 0.0024283877573907375 2023-01-24 02:23:12.223628: step: 402/466, loss: 0.05185849219560623 2023-01-24 02:23:12.801703: step: 404/466, loss: 0.16710449755191803 2023-01-24 02:23:13.405769: step: 406/466, loss: 0.17796562612056732 2023-01-24 02:23:14.078048: step: 408/466, loss: 0.2567916810512543 2023-01-24 02:23:14.713422: step: 410/466, loss: 0.10605175793170929 2023-01-24 02:23:15.319570: step: 412/466, loss: 0.24402956664562225 2023-01-24 02:23:15.898665: step: 414/466, loss: 0.0167254451662302 2023-01-24 02:23:16.465200: step: 416/466, loss: 0.4071357250213623 2023-01-24 02:23:17.066133: step: 418/466, loss: 0.4210357367992401 2023-01-24 02:23:17.661608: step: 420/466, loss: 0.20658299326896667 2023-01-24 02:23:18.259818: step: 422/466, loss: 0.36707136034965515 2023-01-24 02:23:18.865309: step: 424/466, loss: 0.026186460629105568 2023-01-24 02:23:19.484933: step: 426/466, loss: 0.04088641703128815 2023-01-24 02:23:20.110326: step: 428/466, loss: 0.08710642158985138 2023-01-24 02:23:20.681583: step: 430/466, loss: 0.1455727219581604 2023-01-24 02:23:21.313017: step: 432/466, loss: 0.038881801068782806 2023-01-24 02:23:21.917460: step: 434/466, loss: 0.1724172830581665 2023-01-24 02:23:22.529238: step: 436/466, loss: 0.181453675031662 2023-01-24 02:23:23.210868: step: 438/466, loss: 0.2943277955055237 2023-01-24 02:23:23.808688: step: 440/466, loss: 0.1346326470375061 2023-01-24 02:23:24.421056: step: 442/466, loss: 0.09035190939903259 2023-01-24 02:23:25.016872: step: 444/466, loss: 0.044517651200294495 2023-01-24 02:23:25.676683: step: 446/466, loss: 0.1681986153125763 2023-01-24 02:23:26.305528: step: 448/466, loss: 0.37380313873291016 2023-01-24 02:23:26.944962: step: 450/466, loss: 0.3123873174190521 2023-01-24 02:23:27.561118: step: 452/466, loss: 0.056177251040935516 2023-01-24 02:23:28.113111: step: 454/466, loss: 0.1289960741996765 2023-01-24 02:23:28.698177: step: 456/466, loss: 0.047767218202352524 2023-01-24 02:23:29.298596: step: 458/466, loss: 0.22923243045806885 2023-01-24 02:23:29.830412: step: 460/466, loss: 0.06283791363239288 2023-01-24 02:23:30.430530: step: 462/466, loss: 0.24462512135505676 2023-01-24 02:23:31.128705: step: 464/466, loss: 6.311313152313232 2023-01-24 02:23:31.704932: step: 466/466, loss: 1.465904712677002 2023-01-24 02:23:32.328179: step: 468/466, loss: 0.19708696007728577 2023-01-24 02:23:32.987990: step: 470/466, loss: 0.09209314733743668 2023-01-24 02:23:33.584697: step: 472/466, loss: 0.07932702451944351 2023-01-24 02:23:34.233941: step: 474/466, loss: 0.18532010912895203 2023-01-24 02:23:34.882185: step: 476/466, loss: 0.12062534689903259 2023-01-24 02:23:35.508766: step: 478/466, loss: 0.5024365782737732 2023-01-24 02:23:36.115481: step: 480/466, loss: 0.04506577178835869 2023-01-24 02:23:36.698011: step: 482/466, loss: 0.07182347029447556 2023-01-24 02:23:37.371316: step: 484/466, loss: 0.18841418623924255 2023-01-24 02:23:37.929697: step: 486/466, loss: 0.12853263318538666 2023-01-24 02:23:38.654696: step: 488/466, loss: 0.06982048600912094 2023-01-24 02:23:39.242700: step: 490/466, loss: 0.08866838365793228 2023-01-24 02:23:39.854082: step: 492/466, loss: 0.1469661295413971 2023-01-24 02:23:40.450175: step: 494/466, loss: 0.04782625660300255 2023-01-24 02:23:41.067298: step: 496/466, loss: 0.0394095852971077 2023-01-24 02:23:41.691250: step: 498/466, loss: 0.06809506565332413 2023-01-24 02:23:42.340266: step: 500/466, loss: 0.06742847710847855 2023-01-24 02:23:42.995512: step: 502/466, loss: 0.6899810433387756 2023-01-24 02:23:43.587478: step: 504/466, loss: 0.04344059154391289 2023-01-24 02:23:44.222360: step: 506/466, loss: 0.02709837257862091 2023-01-24 02:23:44.819590: step: 508/466, loss: 0.17914627492427826 2023-01-24 02:23:45.472382: step: 510/466, loss: 0.17754724621772766 2023-01-24 02:23:46.062250: step: 512/466, loss: 0.0071636890061199665 2023-01-24 02:23:46.666691: step: 514/466, loss: 0.02729751169681549 2023-01-24 02:23:47.294524: step: 516/466, loss: 0.33817991614341736 2023-01-24 02:23:47.894919: step: 518/466, loss: 0.17708256840705872 2023-01-24 02:23:48.488567: step: 520/466, loss: 0.019362229853868484 2023-01-24 02:23:49.077275: step: 522/466, loss: 0.06572502851486206 2023-01-24 02:23:49.732455: step: 524/466, loss: 0.08744283765554428 2023-01-24 02:23:50.340742: step: 526/466, loss: 0.2761349678039551 2023-01-24 02:23:51.022162: step: 528/466, loss: 0.5027775764465332 2023-01-24 02:23:51.626731: step: 530/466, loss: 0.025422899052500725 2023-01-24 02:23:52.309146: step: 532/466, loss: 0.05502090975642204 2023-01-24 02:23:52.887519: step: 534/466, loss: 0.20646455883979797 2023-01-24 02:23:53.549824: step: 536/466, loss: 0.07636255770921707 2023-01-24 02:23:54.149671: step: 538/466, loss: 0.08909200876951218 2023-01-24 02:23:54.757194: step: 540/466, loss: 0.07184493541717529 2023-01-24 02:23:55.438935: step: 542/466, loss: 0.4698965847492218 2023-01-24 02:23:56.032126: step: 544/466, loss: 0.16242723166942596 2023-01-24 02:23:56.597644: step: 546/466, loss: 0.14060641825199127 2023-01-24 02:23:57.182272: step: 548/466, loss: 0.25153452157974243 2023-01-24 02:23:57.799243: step: 550/466, loss: 0.06144038960337639 2023-01-24 02:23:58.433170: step: 552/466, loss: 0.28175848722457886 2023-01-24 02:23:59.059379: step: 554/466, loss: 0.0455288402736187 2023-01-24 02:23:59.694926: step: 556/466, loss: 0.26265767216682434 2023-01-24 02:24:00.347137: step: 558/466, loss: 0.4596168100833893 2023-01-24 02:24:00.924579: step: 560/466, loss: 0.04697806388139725 2023-01-24 02:24:01.503845: step: 562/466, loss: 0.22388292849063873 2023-01-24 02:24:02.140784: step: 564/466, loss: 0.08523406833410263 2023-01-24 02:24:02.788609: step: 566/466, loss: 0.0574776865541935 2023-01-24 02:24:03.417167: step: 568/466, loss: 0.19718588888645172 2023-01-24 02:24:04.112433: step: 570/466, loss: 0.13567817211151123 2023-01-24 02:24:04.697810: step: 572/466, loss: 0.06724944710731506 2023-01-24 02:24:05.325844: step: 574/466, loss: 0.104249507188797 2023-01-24 02:24:05.953541: step: 576/466, loss: 0.23395521938800812 2023-01-24 02:24:06.579733: step: 578/466, loss: 0.1479276567697525 2023-01-24 02:24:07.214743: step: 580/466, loss: 0.07878604531288147 2023-01-24 02:24:07.792682: step: 582/466, loss: 0.3168390691280365 2023-01-24 02:24:08.488575: step: 584/466, loss: 0.1844732016324997 2023-01-24 02:24:09.110792: step: 586/466, loss: 0.052865419536828995 2023-01-24 02:24:09.897395: step: 588/466, loss: 0.16163483262062073 2023-01-24 02:24:10.438096: step: 590/466, loss: 0.10157793760299683 2023-01-24 02:24:11.113365: step: 592/466, loss: 0.5824289321899414 2023-01-24 02:24:11.722409: step: 594/466, loss: 0.04869154840707779 2023-01-24 02:24:12.336881: step: 596/466, loss: 0.04857758805155754 2023-01-24 02:24:12.960816: step: 598/466, loss: 0.09195052087306976 2023-01-24 02:24:13.565057: step: 600/466, loss: 0.3458186686038971 2023-01-24 02:24:14.151208: step: 602/466, loss: 0.08263885974884033 2023-01-24 02:24:14.821295: step: 604/466, loss: 0.11075650900602341 2023-01-24 02:24:15.460771: step: 606/466, loss: 0.07145028561353683 2023-01-24 02:24:16.079758: step: 608/466, loss: 0.06847894936800003 2023-01-24 02:24:16.698693: step: 610/466, loss: 0.07767244428396225 2023-01-24 02:24:17.307879: step: 612/466, loss: 0.09935230761766434 2023-01-24 02:24:17.844159: step: 614/466, loss: 0.12344237416982651 2023-01-24 02:24:18.454203: step: 616/466, loss: 0.11530716717243195 2023-01-24 02:24:19.261865: step: 618/466, loss: 0.3105453550815582 2023-01-24 02:24:19.824198: step: 620/466, loss: 0.03813612833619118 2023-01-24 02:24:20.420101: step: 622/466, loss: 0.1672123670578003 2023-01-24 02:24:21.009392: step: 624/466, loss: 0.09853332489728928 2023-01-24 02:24:21.590215: step: 626/466, loss: 0.05499133840203285 2023-01-24 02:24:22.265994: step: 628/466, loss: 0.06897318363189697 2023-01-24 02:24:22.862614: step: 630/466, loss: 0.08766872435808182 2023-01-24 02:24:23.459091: step: 632/466, loss: 0.10207361727952957 2023-01-24 02:24:24.105535: step: 634/466, loss: 0.05842089653015137 2023-01-24 02:24:24.682462: step: 636/466, loss: 0.030612140893936157 2023-01-24 02:24:25.368265: step: 638/466, loss: 0.039262767881155014 2023-01-24 02:24:25.974211: step: 640/466, loss: 0.015716716647148132 2023-01-24 02:24:26.578299: step: 642/466, loss: 0.1715877503156662 2023-01-24 02:24:27.251151: step: 644/466, loss: 0.1346689909696579 2023-01-24 02:24:27.913908: step: 646/466, loss: 0.14579948782920837 2023-01-24 02:24:28.474188: step: 648/466, loss: 0.08950228244066238 2023-01-24 02:24:29.056884: step: 650/466, loss: 0.13232272863388062 2023-01-24 02:24:29.637997: step: 652/466, loss: 0.17211388051509857 2023-01-24 02:24:30.254895: step: 654/466, loss: 0.028392614796757698 2023-01-24 02:24:30.831441: step: 656/466, loss: 0.12627607583999634 2023-01-24 02:24:31.432643: step: 658/466, loss: 0.12688058614730835 2023-01-24 02:24:32.058043: step: 660/466, loss: 0.10577981919050217 2023-01-24 02:24:32.722330: step: 662/466, loss: 0.15648826956748962 2023-01-24 02:24:33.299235: step: 664/466, loss: 0.033302515745162964 2023-01-24 02:24:33.954769: step: 666/466, loss: 0.05960897356271744 2023-01-24 02:24:34.540762: step: 668/466, loss: 0.09449905157089233 2023-01-24 02:24:35.167483: step: 670/466, loss: 0.16037170588970184 2023-01-24 02:24:35.784455: step: 672/466, loss: 0.117377370595932 2023-01-24 02:24:36.436467: step: 674/466, loss: 0.13370610773563385 2023-01-24 02:24:37.036435: step: 676/466, loss: 0.0766676589846611 2023-01-24 02:24:37.770450: step: 678/466, loss: 0.059579480439424515 2023-01-24 02:24:38.364346: step: 680/466, loss: 0.054846640676259995 2023-01-24 02:24:38.989043: step: 682/466, loss: 0.009645046666264534 2023-01-24 02:24:39.651950: step: 684/466, loss: 0.23150868713855743 2023-01-24 02:24:40.279530: step: 686/466, loss: 0.21831421554088593 2023-01-24 02:24:40.859455: step: 688/466, loss: 0.0784607082605362 2023-01-24 02:24:41.374676: step: 690/466, loss: 0.061767883598804474 2023-01-24 02:24:42.091522: step: 692/466, loss: 0.05261386185884476 2023-01-24 02:24:42.739920: step: 694/466, loss: 0.2991788387298584 2023-01-24 02:24:43.347723: step: 696/466, loss: 0.03715163469314575 2023-01-24 02:24:43.966124: step: 698/466, loss: 0.1540336012840271 2023-01-24 02:24:44.554057: step: 700/466, loss: 0.027739468961954117 2023-01-24 02:24:45.173503: step: 702/466, loss: 0.06844502687454224 2023-01-24 02:24:45.840694: step: 704/466, loss: 0.1026684120297432 2023-01-24 02:24:46.463673: step: 706/466, loss: 0.18602652847766876 2023-01-24 02:24:47.147675: step: 708/466, loss: 0.08809852600097656 2023-01-24 02:24:47.753937: step: 710/466, loss: 0.2600739896297455 2023-01-24 02:24:48.432929: step: 712/466, loss: 0.6950864195823669 2023-01-24 02:24:49.059346: step: 714/466, loss: 0.06049566715955734 2023-01-24 02:24:49.734755: step: 716/466, loss: 0.07595871388912201 2023-01-24 02:24:50.372406: step: 718/466, loss: 0.15007273852825165 2023-01-24 02:24:50.970241: step: 720/466, loss: 0.019157638773322105 2023-01-24 02:24:51.591000: step: 722/466, loss: 0.03895778954029083 2023-01-24 02:24:52.173715: step: 724/466, loss: 0.048599064350128174 2023-01-24 02:24:52.804532: step: 726/466, loss: 0.0881853774189949 2023-01-24 02:24:53.454024: step: 728/466, loss: 0.05115104094147682 2023-01-24 02:24:54.069817: step: 730/466, loss: 0.12414344400167465 2023-01-24 02:24:54.714614: step: 732/466, loss: 0.0771954357624054 2023-01-24 02:24:55.338293: step: 734/466, loss: 0.07813917845487595 2023-01-24 02:24:56.012081: step: 736/466, loss: 0.09652063250541687 2023-01-24 02:24:56.616757: step: 738/466, loss: 0.041155245155096054 2023-01-24 02:24:57.214066: step: 740/466, loss: 0.10691135376691818 2023-01-24 02:24:57.817959: step: 742/466, loss: 0.10555917769670486 2023-01-24 02:24:58.461562: step: 744/466, loss: 0.04085260257124901 2023-01-24 02:24:59.034485: step: 746/466, loss: 0.04388800263404846 2023-01-24 02:24:59.702078: step: 748/466, loss: 0.068348228931427 2023-01-24 02:25:00.322920: step: 750/466, loss: 0.09691111743450165 2023-01-24 02:25:00.961596: step: 752/466, loss: 0.040126457810401917 2023-01-24 02:25:01.535053: step: 754/466, loss: 0.07099471241235733 2023-01-24 02:25:02.148769: step: 756/466, loss: 0.13388031721115112 2023-01-24 02:25:02.860151: step: 758/466, loss: 0.1699666678905487 2023-01-24 02:25:03.463593: step: 760/466, loss: 0.051609255373477936 2023-01-24 02:25:04.062950: step: 762/466, loss: 0.01984989456832409 2023-01-24 02:25:04.643828: step: 764/466, loss: 0.35947635769844055 2023-01-24 02:25:05.270107: step: 766/466, loss: 0.112865149974823 2023-01-24 02:25:05.875099: step: 768/466, loss: 0.04542209953069687 2023-01-24 02:25:06.519514: step: 770/466, loss: 0.07269290834665298 2023-01-24 02:25:07.110116: step: 772/466, loss: 0.11141180992126465 2023-01-24 02:25:07.735243: step: 774/466, loss: 0.04350070655345917 2023-01-24 02:25:08.321464: step: 776/466, loss: 0.09883411973714828 2023-01-24 02:25:08.921206: step: 778/466, loss: 0.0780629888176918 2023-01-24 02:25:09.568581: step: 780/466, loss: 0.05787418782711029 2023-01-24 02:25:10.257237: step: 782/466, loss: 0.5285390019416809 2023-01-24 02:25:10.849379: step: 784/466, loss: 0.04558119550347328 2023-01-24 02:25:11.507079: step: 786/466, loss: 0.20061707496643066 2023-01-24 02:25:12.165264: step: 788/466, loss: 0.11438582092523575 2023-01-24 02:25:12.871393: step: 790/466, loss: 0.01749095879495144 2023-01-24 02:25:13.503773: step: 792/466, loss: 0.11763517558574677 2023-01-24 02:25:14.170092: step: 794/466, loss: 0.09132517874240875 2023-01-24 02:25:14.738289: step: 796/466, loss: 0.08119666576385498 2023-01-24 02:25:15.406121: step: 798/466, loss: 0.07370875775814056 2023-01-24 02:25:15.962394: step: 800/466, loss: 0.14316020905971527 2023-01-24 02:25:16.630058: step: 802/466, loss: 0.34998998045921326 2023-01-24 02:25:17.280501: step: 804/466, loss: 0.1565779745578766 2023-01-24 02:25:17.875423: step: 806/466, loss: 0.045447055250406265 2023-01-24 02:25:18.516806: step: 808/466, loss: 0.03068552538752556 2023-01-24 02:25:19.092169: step: 810/466, loss: 0.06253843754529953 2023-01-24 02:25:19.716929: step: 812/466, loss: 0.354098379611969 2023-01-24 02:25:20.390565: step: 814/466, loss: 0.046952590346336365 2023-01-24 02:25:21.022539: step: 816/466, loss: 0.12536762654781342 2023-01-24 02:25:21.673079: step: 818/466, loss: 0.24964533746242523 2023-01-24 02:25:22.344705: step: 820/466, loss: 0.14116087555885315 2023-01-24 02:25:22.951509: step: 822/466, loss: 0.10435947775840759 2023-01-24 02:25:23.538902: step: 824/466, loss: 0.03001273237168789 2023-01-24 02:25:24.081975: step: 826/466, loss: 0.04351481422781944 2023-01-24 02:25:24.753923: step: 828/466, loss: 0.1352987289428711 2023-01-24 02:25:25.392081: step: 830/466, loss: 0.07766462117433548 2023-01-24 02:25:26.028479: step: 832/466, loss: 0.06786693632602692 2023-01-24 02:25:26.682483: step: 834/466, loss: 0.2304045706987381 2023-01-24 02:25:27.238839: step: 836/466, loss: 0.07369163632392883 2023-01-24 02:25:27.849770: step: 838/466, loss: 0.1992221474647522 2023-01-24 02:25:28.420272: step: 840/466, loss: 0.04259900376200676 2023-01-24 02:25:29.029983: step: 842/466, loss: 0.14957328140735626 2023-01-24 02:25:29.760781: step: 844/466, loss: 0.19445879757404327 2023-01-24 02:25:30.295385: step: 846/466, loss: 0.02491155080497265 2023-01-24 02:25:30.878390: step: 848/466, loss: 0.09199264645576477 2023-01-24 02:25:31.527191: step: 850/466, loss: 0.058149468153715134 2023-01-24 02:25:32.182119: step: 852/466, loss: 0.4377081096172333 2023-01-24 02:25:32.830495: step: 854/466, loss: 0.2140495330095291 2023-01-24 02:25:33.450370: step: 856/466, loss: 0.1769515573978424 2023-01-24 02:25:34.038067: step: 858/466, loss: 0.576966404914856 2023-01-24 02:25:34.648592: step: 860/466, loss: 0.14778044819831848 2023-01-24 02:25:35.244571: step: 862/466, loss: 0.10003938525915146 2023-01-24 02:25:35.819367: step: 864/466, loss: 0.2692798674106598 2023-01-24 02:25:36.378194: step: 866/466, loss: 0.0770498588681221 2023-01-24 02:25:36.989151: step: 868/466, loss: 0.4713871479034424 2023-01-24 02:25:37.595842: step: 870/466, loss: 2.7093138694763184 2023-01-24 02:25:38.161063: step: 872/466, loss: 0.07809975743293762 2023-01-24 02:25:38.733290: step: 874/466, loss: 0.08426910638809204 2023-01-24 02:25:39.345080: step: 876/466, loss: 0.08476223796606064 2023-01-24 02:25:39.940630: step: 878/466, loss: 0.05442659556865692 2023-01-24 02:25:40.545111: step: 880/466, loss: 0.02732786163687706 2023-01-24 02:25:41.162245: step: 882/466, loss: 0.07974054664373398 2023-01-24 02:25:41.822297: step: 884/466, loss: 0.2001144289970398 2023-01-24 02:25:42.436359: step: 886/466, loss: 0.015165749937295914 2023-01-24 02:25:43.055955: step: 888/466, loss: 0.05666099488735199 2023-01-24 02:25:43.650570: step: 890/466, loss: 0.13954006135463715 2023-01-24 02:25:44.232924: step: 892/466, loss: 0.21236880123615265 2023-01-24 02:25:44.903869: step: 894/466, loss: 0.06382488459348679 2023-01-24 02:25:45.521264: step: 896/466, loss: 0.12354041635990143 2023-01-24 02:25:46.114876: step: 898/466, loss: 0.09248878061771393 2023-01-24 02:25:46.740608: step: 900/466, loss: 0.08557818830013275 2023-01-24 02:25:47.351370: step: 902/466, loss: 0.06944967806339264 2023-01-24 02:25:48.068662: step: 904/466, loss: 0.25239336490631104 2023-01-24 02:25:48.667716: step: 906/466, loss: 0.046425044536590576 2023-01-24 02:25:49.268894: step: 908/466, loss: 0.2908068001270294 2023-01-24 02:25:49.873465: step: 910/466, loss: 0.2102268934249878 2023-01-24 02:25:50.467238: step: 912/466, loss: 0.1882074475288391 2023-01-24 02:25:51.083262: step: 914/466, loss: 0.14248530566692352 2023-01-24 02:25:51.751142: step: 916/466, loss: 0.5766107439994812 2023-01-24 02:25:52.435125: step: 918/466, loss: 0.07786186784505844 2023-01-24 02:25:53.057965: step: 920/466, loss: 0.08814342319965363 2023-01-24 02:25:53.680000: step: 922/466, loss: 0.11284119635820389 2023-01-24 02:25:54.334494: step: 924/466, loss: 0.3373393416404724 2023-01-24 02:25:54.890972: step: 926/466, loss: 0.012794055975973606 2023-01-24 02:25:55.506545: step: 928/466, loss: 0.019589319825172424 2023-01-24 02:25:56.198819: step: 930/466, loss: 0.017791852355003357 2023-01-24 02:25:56.789444: step: 932/466, loss: 0.037928689271211624 ================================================== Loss: 0.170 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3706943874410014, 'r': 0.31160837502156286, 'f1': 0.3385930178069353}, 'combined': 0.2494895920682681, 'epoch': 16} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.341316343165776, 'r': 0.2544470211687103, 'f1': 0.29154839654077663}, 'combined': 0.1825583417591779, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35561438470355317, 'r': 0.3083790774374266, 'f1': 0.3303166134339915}, 'combined': 0.24339118884609898, 'epoch': 16} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.35744325326003573, 'r': 0.2735666775852981, 'f1': 0.30993034638497985}, 'combined': 0.19206951043576215, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35129310344827586, 'r': 0.2910714285714286, 'f1': 0.31835937500000006}, 'combined': 0.21223958333333337, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3425925925925926, 'r': 0.40217391304347827, 'f1': 0.37}, 'combined': 0.185, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:28:35.577519: step: 2/466, loss: 0.0552273727953434 2023-01-24 02:28:36.162013: step: 4/466, loss: 0.03032900206744671 2023-01-24 02:28:36.719775: step: 6/466, loss: 0.0997084379196167 2023-01-24 02:28:37.420975: step: 8/466, loss: 0.0820755660533905 2023-01-24 02:28:38.062153: step: 10/466, loss: 0.24237942695617676 2023-01-24 02:28:38.730410: step: 12/466, loss: 0.6239765882492065 2023-01-24 02:28:39.365268: step: 14/466, loss: 0.10396306961774826 2023-01-24 02:28:39.943659: step: 16/466, loss: 0.05649964511394501 2023-01-24 02:28:40.516354: step: 18/466, loss: 0.037888143211603165 2023-01-24 02:28:41.149847: step: 20/466, loss: 0.0896913930773735 2023-01-24 02:28:41.773388: step: 22/466, loss: 0.13036413490772247 2023-01-24 02:28:42.410860: step: 24/466, loss: 0.08791163563728333 2023-01-24 02:28:43.099253: step: 26/466, loss: 0.03862424194812775 2023-01-24 02:28:43.713658: step: 28/466, loss: 0.024081386625766754 2023-01-24 02:28:44.346731: step: 30/466, loss: 0.050525031983852386 2023-01-24 02:28:44.882553: step: 32/466, loss: 0.4528115689754486 2023-01-24 02:28:45.585463: step: 34/466, loss: 0.14363859593868256 2023-01-24 02:28:46.298981: step: 36/466, loss: 0.3207593858242035 2023-01-24 02:28:46.941894: step: 38/466, loss: 0.08752243220806122 2023-01-24 02:28:47.554093: step: 40/466, loss: 0.03985515609383583 2023-01-24 02:28:48.196865: step: 42/466, loss: 0.011252796277403831 2023-01-24 02:28:48.852461: step: 44/466, loss: 0.06112007051706314 2023-01-24 02:28:49.487503: step: 46/466, loss: 0.13862279057502747 2023-01-24 02:28:50.140826: step: 48/466, loss: 0.053066641092300415 2023-01-24 02:28:50.722504: step: 50/466, loss: 0.43007010221481323 2023-01-24 02:28:51.323495: step: 52/466, loss: 0.13144581019878387 2023-01-24 02:28:51.939769: step: 54/466, loss: 0.07558626681566238 2023-01-24 02:28:52.569262: step: 56/466, loss: 0.23427191376686096 2023-01-24 02:28:53.184078: step: 58/466, loss: 0.08729560673236847 2023-01-24 02:28:53.873022: step: 60/466, loss: 0.05417446047067642 2023-01-24 02:28:54.470663: step: 62/466, loss: 0.13129599392414093 2023-01-24 02:28:55.057688: step: 64/466, loss: 0.07614006847143173 2023-01-24 02:28:55.691056: step: 66/466, loss: 0.16889949142932892 2023-01-24 02:28:56.402100: step: 68/466, loss: 0.24261288344860077 2023-01-24 02:28:56.976064: step: 70/466, loss: 0.038321852684020996 2023-01-24 02:28:57.545416: step: 72/466, loss: 0.044602423906326294 2023-01-24 02:28:58.186960: step: 74/466, loss: 0.01967819780111313 2023-01-24 02:28:58.816403: step: 76/466, loss: 0.02813856489956379 2023-01-24 02:28:59.410208: step: 78/466, loss: 0.04209369421005249 2023-01-24 02:28:59.996306: step: 80/466, loss: 0.024040287360548973 2023-01-24 02:29:00.622641: step: 82/466, loss: 0.09991413354873657 2023-01-24 02:29:01.215243: step: 84/466, loss: 0.07372264564037323 2023-01-24 02:29:01.863533: step: 86/466, loss: 0.029386376962065697 2023-01-24 02:29:02.483047: step: 88/466, loss: 0.4689430594444275 2023-01-24 02:29:03.066031: step: 90/466, loss: 0.053494032472372055 2023-01-24 02:29:03.690248: step: 92/466, loss: 0.013472192920744419 2023-01-24 02:29:04.262419: step: 94/466, loss: 0.0707637220621109 2023-01-24 02:29:05.073076: step: 96/466, loss: 0.061210937798023224 2023-01-24 02:29:05.659907: step: 98/466, loss: 0.04127485305070877 2023-01-24 02:29:06.276327: step: 100/466, loss: 0.07582556456327438 2023-01-24 02:29:06.901097: step: 102/466, loss: 0.029441645368933678 2023-01-24 02:29:07.482976: step: 104/466, loss: 0.06904749572277069 2023-01-24 02:29:08.131312: step: 106/466, loss: 0.20207440853118896 2023-01-24 02:29:08.726195: step: 108/466, loss: 0.09177403151988983 2023-01-24 02:29:09.361965: step: 110/466, loss: 0.06599751859903336 2023-01-24 02:29:10.025963: step: 112/466, loss: 0.05812675133347511 2023-01-24 02:29:10.662422: step: 114/466, loss: 0.024880943819880486 2023-01-24 02:29:11.278456: step: 116/466, loss: 0.06967656314373016 2023-01-24 02:29:11.918925: step: 118/466, loss: 0.090497687458992 2023-01-24 02:29:12.601703: step: 120/466, loss: 0.12288883328437805 2023-01-24 02:29:13.223078: step: 122/466, loss: 0.12109700590372086 2023-01-24 02:29:13.801558: step: 124/466, loss: 0.07176961749792099 2023-01-24 02:29:14.414235: step: 126/466, loss: 0.12157073616981506 2023-01-24 02:29:15.023832: step: 128/466, loss: 0.015516109764575958 2023-01-24 02:29:15.613860: step: 130/466, loss: 0.02540462650358677 2023-01-24 02:29:16.201406: step: 132/466, loss: 0.007711430545896292 2023-01-24 02:29:16.787720: step: 134/466, loss: 0.07219022512435913 2023-01-24 02:29:17.415107: step: 136/466, loss: 0.106453076004982 2023-01-24 02:29:18.054945: step: 138/466, loss: 0.08396579325199127 2023-01-24 02:29:18.702438: step: 140/466, loss: 0.09653432667255402 2023-01-24 02:29:19.335922: step: 142/466, loss: 0.01763550192117691 2023-01-24 02:29:19.897020: step: 144/466, loss: 0.05190001428127289 2023-01-24 02:29:20.540082: step: 146/466, loss: 0.18059158325195312 2023-01-24 02:29:21.050791: step: 148/466, loss: 0.1982903927564621 2023-01-24 02:29:21.680970: step: 150/466, loss: 0.02685152180492878 2023-01-24 02:29:22.314351: step: 152/466, loss: 0.6735672354698181 2023-01-24 02:29:22.965128: step: 154/466, loss: 0.37587738037109375 2023-01-24 02:29:23.556339: step: 156/466, loss: 0.03102908469736576 2023-01-24 02:29:24.115659: step: 158/466, loss: 0.05640149489045143 2023-01-24 02:29:24.737086: step: 160/466, loss: 0.1766110360622406 2023-01-24 02:29:25.341053: step: 162/466, loss: 0.08404644578695297 2023-01-24 02:29:26.058937: step: 164/466, loss: 0.07357435673475266 2023-01-24 02:29:26.718593: step: 166/466, loss: 0.047804638743400574 2023-01-24 02:29:27.419171: step: 168/466, loss: 0.022309113293886185 2023-01-24 02:29:28.041881: step: 170/466, loss: 0.016016261652112007 2023-01-24 02:29:28.731757: step: 172/466, loss: 0.29546135663986206 2023-01-24 02:29:29.392009: step: 174/466, loss: 0.03904377669095993 2023-01-24 02:29:29.982672: step: 176/466, loss: 0.014926779083907604 2023-01-24 02:29:30.611209: step: 178/466, loss: 0.07515985518693924 2023-01-24 02:29:31.311326: step: 180/466, loss: 0.024379044771194458 2023-01-24 02:29:31.995023: step: 182/466, loss: 0.05204268544912338 2023-01-24 02:29:32.593977: step: 184/466, loss: 0.009238921105861664 2023-01-24 02:29:33.225700: step: 186/466, loss: 0.04101286828517914 2023-01-24 02:29:33.836547: step: 188/466, loss: 0.020316865295171738 2023-01-24 02:29:34.415609: step: 190/466, loss: 0.3560216724872589 2023-01-24 02:29:35.007905: step: 192/466, loss: 0.07066544890403748 2023-01-24 02:29:35.618706: step: 194/466, loss: 0.07007250934839249 2023-01-24 02:29:36.174262: step: 196/466, loss: 0.017157820984721184 2023-01-24 02:29:36.792202: step: 198/466, loss: 0.13828325271606445 2023-01-24 02:29:37.408881: step: 200/466, loss: 0.12338992208242416 2023-01-24 02:29:38.052368: step: 202/466, loss: 0.09103899449110031 2023-01-24 02:29:38.679747: step: 204/466, loss: 0.16109821200370789 2023-01-24 02:29:39.269176: step: 206/466, loss: 0.07542702555656433 2023-01-24 02:29:39.864958: step: 208/466, loss: 0.06226639449596405 2023-01-24 02:29:40.494581: step: 210/466, loss: 0.03805557265877724 2023-01-24 02:29:41.074265: step: 212/466, loss: 0.1261565387248993 2023-01-24 02:29:41.675950: step: 214/466, loss: 0.07257590442895889 2023-01-24 02:29:42.356940: step: 216/466, loss: 0.16820329427719116 2023-01-24 02:29:42.927951: step: 218/466, loss: 0.024088187143206596 2023-01-24 02:29:43.613166: step: 220/466, loss: 0.2577759325504303 2023-01-24 02:29:44.182426: step: 222/466, loss: 0.26061850786209106 2023-01-24 02:29:44.815227: step: 224/466, loss: 0.026563717052340508 2023-01-24 02:29:45.474089: step: 226/466, loss: 0.06560484319925308 2023-01-24 02:29:46.143046: step: 228/466, loss: 0.9863904714584351 2023-01-24 02:29:46.722583: step: 230/466, loss: 0.029565483331680298 2023-01-24 02:29:47.372713: step: 232/466, loss: 0.10892686992883682 2023-01-24 02:29:47.992822: step: 234/466, loss: 0.06505019962787628 2023-01-24 02:29:48.601557: step: 236/466, loss: 0.019741952419281006 2023-01-24 02:29:49.189540: step: 238/466, loss: 0.029368475079536438 2023-01-24 02:29:49.873504: step: 240/466, loss: 0.03153730928897858 2023-01-24 02:29:50.490003: step: 242/466, loss: 0.3902517855167389 2023-01-24 02:29:51.089132: step: 244/466, loss: 0.03810492530465126 2023-01-24 02:29:51.700721: step: 246/466, loss: 0.06299792230129242 2023-01-24 02:29:52.355550: step: 248/466, loss: 0.10881893336772919 2023-01-24 02:29:52.923214: step: 250/466, loss: 0.20353665947914124 2023-01-24 02:29:53.558131: step: 252/466, loss: 0.04542895406484604 2023-01-24 02:29:54.286398: step: 254/466, loss: 0.04115154221653938 2023-01-24 02:29:54.901718: step: 256/466, loss: 0.034337591379880905 2023-01-24 02:29:55.503799: step: 258/466, loss: 0.20907801389694214 2023-01-24 02:29:56.098153: step: 260/466, loss: 0.049190454185009 2023-01-24 02:29:56.751557: step: 262/466, loss: 0.091862253844738 2023-01-24 02:29:57.436379: step: 264/466, loss: 5.106974124908447 2023-01-24 02:29:58.079001: step: 266/466, loss: 0.07528732717037201 2023-01-24 02:29:58.678164: step: 268/466, loss: 0.06413239240646362 2023-01-24 02:29:59.322182: step: 270/466, loss: 1.6005995273590088 2023-01-24 02:29:59.909736: step: 272/466, loss: 0.06114184856414795 2023-01-24 02:30:00.456182: step: 274/466, loss: 0.10123945027589798 2023-01-24 02:30:01.082354: step: 276/466, loss: 0.25699582695961 2023-01-24 02:30:01.694599: step: 278/466, loss: 0.08306179195642471 2023-01-24 02:30:02.304354: step: 280/466, loss: 0.03990897908806801 2023-01-24 02:30:02.907258: step: 282/466, loss: 0.03646804392337799 2023-01-24 02:30:03.531680: step: 284/466, loss: 0.10514121502637863 2023-01-24 02:30:04.139559: step: 286/466, loss: 0.005694656167179346 2023-01-24 02:30:04.874534: step: 288/466, loss: 0.026155471801757812 2023-01-24 02:30:05.445876: step: 290/466, loss: 0.13834121823310852 2023-01-24 02:30:06.062956: step: 292/466, loss: 0.02711542136967182 2023-01-24 02:30:06.674784: step: 294/466, loss: 0.05465665087103844 2023-01-24 02:30:07.279582: step: 296/466, loss: 0.049648333340883255 2023-01-24 02:30:07.874498: step: 298/466, loss: 0.0783451497554779 2023-01-24 02:30:08.445629: step: 300/466, loss: 0.049439627677202225 2023-01-24 02:30:09.111138: step: 302/466, loss: 0.15175487101078033 2023-01-24 02:30:09.708762: step: 304/466, loss: 0.06957145035266876 2023-01-24 02:30:10.334183: step: 306/466, loss: 0.06854043900966644 2023-01-24 02:30:10.945415: step: 308/466, loss: 0.12385958433151245 2023-01-24 02:30:11.599939: step: 310/466, loss: 0.020379463210701942 2023-01-24 02:30:12.231993: step: 312/466, loss: 0.028115959838032722 2023-01-24 02:30:12.920950: step: 314/466, loss: 0.057811226695775986 2023-01-24 02:30:13.499227: step: 316/466, loss: 0.02322530187666416 2023-01-24 02:30:14.107415: step: 318/466, loss: 0.0995943695306778 2023-01-24 02:30:14.690652: step: 320/466, loss: 0.08957862854003906 2023-01-24 02:30:15.323559: step: 322/466, loss: 0.2572743594646454 2023-01-24 02:30:15.944520: step: 324/466, loss: 0.3369157314300537 2023-01-24 02:30:16.558384: step: 326/466, loss: 0.013277021236717701 2023-01-24 02:30:17.208839: step: 328/466, loss: 0.06267574429512024 2023-01-24 02:30:17.879884: step: 330/466, loss: 0.07259294390678406 2023-01-24 02:30:18.575201: step: 332/466, loss: 0.04938018321990967 2023-01-24 02:30:19.217474: step: 334/466, loss: 0.050406381487846375 2023-01-24 02:30:19.818563: step: 336/466, loss: 0.08300729840993881 2023-01-24 02:30:20.508653: step: 338/466, loss: 0.03568604961037636 2023-01-24 02:30:21.061661: step: 340/466, loss: 0.03801991045475006 2023-01-24 02:30:21.635785: step: 342/466, loss: 0.09266859292984009 2023-01-24 02:30:22.299193: step: 344/466, loss: 0.1025686264038086 2023-01-24 02:30:22.910266: step: 346/466, loss: 0.03731289505958557 2023-01-24 02:30:23.608346: step: 348/466, loss: 0.1353796124458313 2023-01-24 02:30:24.215389: step: 350/466, loss: 0.04047910496592522 2023-01-24 02:30:24.816124: step: 352/466, loss: 0.10990084707736969 2023-01-24 02:30:25.465333: step: 354/466, loss: 0.15826545655727386 2023-01-24 02:30:26.047563: step: 356/466, loss: 0.10664297640323639 2023-01-24 02:30:26.742355: step: 358/466, loss: 0.17812858521938324 2023-01-24 02:30:27.326636: step: 360/466, loss: 0.06930878013372421 2023-01-24 02:30:27.935072: step: 362/466, loss: 0.06571146845817566 2023-01-24 02:30:28.453085: step: 364/466, loss: 0.03256172314286232 2023-01-24 02:30:29.067160: step: 366/466, loss: 0.12351781129837036 2023-01-24 02:30:29.677030: step: 368/466, loss: 0.2548113167285919 2023-01-24 02:30:30.288260: step: 370/466, loss: 0.1263582408428192 2023-01-24 02:30:30.855213: step: 372/466, loss: 0.07582645118236542 2023-01-24 02:30:31.565436: step: 374/466, loss: 0.05496755614876747 2023-01-24 02:30:32.157336: step: 376/466, loss: 0.08954503387212753 2023-01-24 02:30:32.768680: step: 378/466, loss: 0.12195761501789093 2023-01-24 02:30:33.395256: step: 380/466, loss: 0.39381787180900574 2023-01-24 02:30:34.047951: step: 382/466, loss: 0.16818027198314667 2023-01-24 02:30:34.646129: step: 384/466, loss: 0.04547747224569321 2023-01-24 02:30:35.213332: step: 386/466, loss: 0.12755371630191803 2023-01-24 02:30:35.847043: step: 388/466, loss: 0.11845839768648148 2023-01-24 02:30:36.414745: step: 390/466, loss: 0.47920721769332886 2023-01-24 02:30:37.035131: step: 392/466, loss: 0.08540887385606766 2023-01-24 02:30:37.641276: step: 394/466, loss: 0.19534656405448914 2023-01-24 02:30:38.275383: step: 396/466, loss: 0.1510317176580429 2023-01-24 02:30:38.905257: step: 398/466, loss: 0.09187552332878113 2023-01-24 02:30:39.501773: step: 400/466, loss: 0.06261609494686127 2023-01-24 02:30:40.151984: step: 402/466, loss: 0.03584316000342369 2023-01-24 02:30:40.786100: step: 404/466, loss: 0.14746913313865662 2023-01-24 02:30:41.429423: step: 406/466, loss: 0.14273861050605774 2023-01-24 02:30:42.041607: step: 408/466, loss: 0.06443028897047043 2023-01-24 02:30:42.710913: step: 410/466, loss: 0.044804513454437256 2023-01-24 02:30:43.371488: step: 412/466, loss: 0.2150534689426422 2023-01-24 02:30:43.939452: step: 414/466, loss: 0.01897166483104229 2023-01-24 02:30:44.527129: step: 416/466, loss: 0.028580371290445328 2023-01-24 02:30:45.213078: step: 418/466, loss: 0.18393245339393616 2023-01-24 02:30:45.850256: step: 420/466, loss: 0.19016799330711365 2023-01-24 02:30:46.420265: step: 422/466, loss: 0.07028649002313614 2023-01-24 02:30:46.986492: step: 424/466, loss: 0.2922304570674896 2023-01-24 02:30:47.697102: step: 426/466, loss: 0.042799223214387894 2023-01-24 02:30:48.307375: step: 428/466, loss: 0.04283499717712402 2023-01-24 02:30:48.952310: step: 430/466, loss: 0.02132521942257881 2023-01-24 02:30:49.664589: step: 432/466, loss: 0.0802520364522934 2023-01-24 02:30:50.296151: step: 434/466, loss: 0.15386797487735748 2023-01-24 02:30:50.951753: step: 436/466, loss: 0.043837256729602814 2023-01-24 02:30:51.570926: step: 438/466, loss: 0.06992541253566742 2023-01-24 02:30:52.175817: step: 440/466, loss: 0.05010535567998886 2023-01-24 02:30:52.745612: step: 442/466, loss: 0.09200336784124374 2023-01-24 02:30:53.333453: step: 444/466, loss: 0.03776991367340088 2023-01-24 02:30:53.974276: step: 446/466, loss: 0.006169892381876707 2023-01-24 02:30:54.627828: step: 448/466, loss: 0.060634829103946686 2023-01-24 02:30:55.319390: step: 450/466, loss: 0.031265951693058014 2023-01-24 02:30:55.962869: step: 452/466, loss: 0.021036386489868164 2023-01-24 02:30:56.571683: step: 454/466, loss: 0.023153940215706825 2023-01-24 02:30:57.207706: step: 456/466, loss: 0.10088212043046951 2023-01-24 02:30:57.858297: step: 458/466, loss: 0.036887239664793015 2023-01-24 02:30:58.504569: step: 460/466, loss: 0.05326121300458908 2023-01-24 02:30:59.061960: step: 462/466, loss: 0.02424703910946846 2023-01-24 02:30:59.661269: step: 464/466, loss: 0.4780868589878082 2023-01-24 02:31:00.383045: step: 466/466, loss: 0.052299145609140396 2023-01-24 02:31:01.018199: step: 468/466, loss: 2.543187141418457 2023-01-24 02:31:01.583301: step: 470/466, loss: 0.15826913714408875 2023-01-24 02:31:02.166498: step: 472/466, loss: 0.027156120166182518 2023-01-24 02:31:02.788708: step: 474/466, loss: 0.018557526171207428 2023-01-24 02:31:03.380743: step: 476/466, loss: 0.19445599615573883 2023-01-24 02:31:04.050296: step: 478/466, loss: 0.09860493242740631 2023-01-24 02:31:04.697851: step: 480/466, loss: 0.04384823516011238 2023-01-24 02:31:05.333466: step: 482/466, loss: 0.04084863141179085 2023-01-24 02:31:05.974299: step: 484/466, loss: 0.34274789690971375 2023-01-24 02:31:06.560178: step: 486/466, loss: 0.0950087234377861 2023-01-24 02:31:07.202421: step: 488/466, loss: 0.10143581032752991 2023-01-24 02:31:07.842220: step: 490/466, loss: 0.07003463804721832 2023-01-24 02:31:08.459453: step: 492/466, loss: 0.08866386115550995 2023-01-24 02:31:09.104692: step: 494/466, loss: 0.06682950258255005 2023-01-24 02:31:09.755379: step: 496/466, loss: 0.11515610665082932 2023-01-24 02:31:10.378966: step: 498/466, loss: 0.758357048034668 2023-01-24 02:31:10.990279: step: 500/466, loss: 0.1596226990222931 2023-01-24 02:31:11.676072: step: 502/466, loss: 0.17213603854179382 2023-01-24 02:31:12.284645: step: 504/466, loss: 0.029680604115128517 2023-01-24 02:31:12.938735: step: 506/466, loss: 0.4964957535266876 2023-01-24 02:31:13.582537: step: 508/466, loss: 0.027684777975082397 2023-01-24 02:31:14.192876: step: 510/466, loss: 0.09656545519828796 2023-01-24 02:31:14.835371: step: 512/466, loss: 0.058670736849308014 2023-01-24 02:31:15.474919: step: 514/466, loss: 0.046255968511104584 2023-01-24 02:31:16.040002: step: 516/466, loss: 0.0634763315320015 2023-01-24 02:31:16.698037: step: 518/466, loss: 0.06766517460346222 2023-01-24 02:31:17.319670: step: 520/466, loss: 0.11908484250307083 2023-01-24 02:31:17.913947: step: 522/466, loss: 0.046817272901535034 2023-01-24 02:31:18.451267: step: 524/466, loss: 0.05099989473819733 2023-01-24 02:31:19.083231: step: 526/466, loss: 0.093465656042099 2023-01-24 02:31:19.663051: step: 528/466, loss: 0.07280781865119934 2023-01-24 02:31:20.242509: step: 530/466, loss: 0.1566302329301834 2023-01-24 02:31:20.886597: step: 532/466, loss: 0.027845079079270363 2023-01-24 02:31:21.523504: step: 534/466, loss: 0.16751457750797272 2023-01-24 02:31:22.122077: step: 536/466, loss: 0.105364590883255 2023-01-24 02:31:22.776029: step: 538/466, loss: 0.022764407098293304 2023-01-24 02:31:23.399519: step: 540/466, loss: 0.016718149185180664 2023-01-24 02:31:24.019288: step: 542/466, loss: 0.01735505647957325 2023-01-24 02:31:24.664861: step: 544/466, loss: 0.0612952895462513 2023-01-24 02:31:25.396714: step: 546/466, loss: 0.022740516811609268 2023-01-24 02:31:26.026644: step: 548/466, loss: 0.07891835272312164 2023-01-24 02:31:26.649009: step: 550/466, loss: 0.03998878598213196 2023-01-24 02:31:27.261782: step: 552/466, loss: 0.07046996057033539 2023-01-24 02:31:27.833659: step: 554/466, loss: 0.09587815403938293 2023-01-24 02:31:28.447742: step: 556/466, loss: 0.10038372874259949 2023-01-24 02:31:29.000840: step: 558/466, loss: 0.08871347457170486 2023-01-24 02:31:29.658963: step: 560/466, loss: 0.5055253505706787 2023-01-24 02:31:30.262428: step: 562/466, loss: 0.512459933757782 2023-01-24 02:31:30.808632: step: 564/466, loss: 0.16453763842582703 2023-01-24 02:31:31.457616: step: 566/466, loss: 0.18959884345531464 2023-01-24 02:31:32.126695: step: 568/466, loss: 0.025106430053710938 2023-01-24 02:31:32.759942: step: 570/466, loss: 0.03585100546479225 2023-01-24 02:31:33.402284: step: 572/466, loss: 1.0049701929092407 2023-01-24 02:31:34.043464: step: 574/466, loss: 0.09710993617773056 2023-01-24 02:31:34.691294: step: 576/466, loss: 0.09198971837759018 2023-01-24 02:31:35.343009: step: 578/466, loss: 0.22630296647548676 2023-01-24 02:31:36.047897: step: 580/466, loss: 0.09247155487537384 2023-01-24 02:31:36.640576: step: 582/466, loss: 0.33415427803993225 2023-01-24 02:31:37.244603: step: 584/466, loss: 0.22004912793636322 2023-01-24 02:31:37.911208: step: 586/466, loss: 0.04450481757521629 2023-01-24 02:31:38.556760: step: 588/466, loss: 0.06292744725942612 2023-01-24 02:31:39.162393: step: 590/466, loss: 0.1736353039741516 2023-01-24 02:31:39.726287: step: 592/466, loss: 0.025799855589866638 2023-01-24 02:31:40.360887: step: 594/466, loss: 0.1439337283372879 2023-01-24 02:31:40.963063: step: 596/466, loss: 0.012740693055093288 2023-01-24 02:31:41.575554: step: 598/466, loss: 0.02078421227633953 2023-01-24 02:31:42.211018: step: 600/466, loss: 0.07006371021270752 2023-01-24 02:31:42.943663: step: 602/466, loss: 0.146609827876091 2023-01-24 02:31:43.581896: step: 604/466, loss: 0.09524525701999664 2023-01-24 02:31:44.250919: step: 606/466, loss: 0.09198599308729172 2023-01-24 02:31:44.868405: step: 608/466, loss: 1.013004183769226 2023-01-24 02:31:45.489597: step: 610/466, loss: 0.02928597293794155 2023-01-24 02:31:46.107187: step: 612/466, loss: 0.06546822190284729 2023-01-24 02:31:46.717384: step: 614/466, loss: 0.16060662269592285 2023-01-24 02:31:47.357432: step: 616/466, loss: 0.13668808341026306 2023-01-24 02:31:48.015716: step: 618/466, loss: 0.04572014883160591 2023-01-24 02:31:48.701775: step: 620/466, loss: 0.09879611432552338 2023-01-24 02:31:49.341343: step: 622/466, loss: 0.0328047052025795 2023-01-24 02:31:50.041269: step: 624/466, loss: 0.14962564408779144 2023-01-24 02:31:50.689261: step: 626/466, loss: 0.5662903189659119 2023-01-24 02:31:51.333172: step: 628/466, loss: 0.0919908806681633 2023-01-24 02:31:51.997173: step: 630/466, loss: 0.051337748765945435 2023-01-24 02:31:52.562694: step: 632/466, loss: 0.0314980186522007 2023-01-24 02:31:53.164395: step: 634/466, loss: 0.40809789299964905 2023-01-24 02:31:53.760603: step: 636/466, loss: 0.04629041627049446 2023-01-24 02:31:54.387645: step: 638/466, loss: 0.12016425281763077 2023-01-24 02:31:55.090190: step: 640/466, loss: 0.025917401537299156 2023-01-24 02:31:55.759079: step: 642/466, loss: 0.24926666915416718 2023-01-24 02:31:56.431486: step: 644/466, loss: 0.11441686749458313 2023-01-24 02:31:56.989193: step: 646/466, loss: 0.09986777603626251 2023-01-24 02:31:57.565433: step: 648/466, loss: 0.01881973072886467 2023-01-24 02:31:58.157589: step: 650/466, loss: 0.029893090948462486 2023-01-24 02:31:58.795273: step: 652/466, loss: 0.041874662041664124 2023-01-24 02:31:59.471765: step: 654/466, loss: 0.05112221837043762 2023-01-24 02:32:00.121108: step: 656/466, loss: 0.06019367277622223 2023-01-24 02:32:00.682035: step: 658/466, loss: 0.06566891819238663 2023-01-24 02:32:01.282685: step: 660/466, loss: 0.3472343683242798 2023-01-24 02:32:01.885026: step: 662/466, loss: 0.08255734294652939 2023-01-24 02:32:02.531570: step: 664/466, loss: 0.03303737938404083 2023-01-24 02:32:03.188360: step: 666/466, loss: 0.33435532450675964 2023-01-24 02:32:03.767086: step: 668/466, loss: 0.04343652352690697 2023-01-24 02:32:04.394459: step: 670/466, loss: 0.5589689612388611 2023-01-24 02:32:05.025713: step: 672/466, loss: 0.1640758216381073 2023-01-24 02:32:05.736606: step: 674/466, loss: 0.09021838754415512 2023-01-24 02:32:06.369013: step: 676/466, loss: 0.06118958070874214 2023-01-24 02:32:07.020210: step: 678/466, loss: 0.3956887125968933 2023-01-24 02:32:07.652211: step: 680/466, loss: 0.026741499081254005 2023-01-24 02:32:08.255483: step: 682/466, loss: 0.17171710729599 2023-01-24 02:32:08.859027: step: 684/466, loss: 0.3079081177711487 2023-01-24 02:32:09.455377: step: 686/466, loss: 0.10954593867063522 2023-01-24 02:32:10.061310: step: 688/466, loss: 0.6995450258255005 2023-01-24 02:32:10.694786: step: 690/466, loss: 0.043250422924757004 2023-01-24 02:32:11.302613: step: 692/466, loss: 0.09776352345943451 2023-01-24 02:32:11.932211: step: 694/466, loss: 0.3069460690021515 2023-01-24 02:32:12.562965: step: 696/466, loss: 0.013332807458937168 2023-01-24 02:32:13.196931: step: 698/466, loss: 0.23258814215660095 2023-01-24 02:32:13.819232: step: 700/466, loss: 0.05831009894609451 2023-01-24 02:32:14.389840: step: 702/466, loss: 0.06570618599653244 2023-01-24 02:32:15.004852: step: 704/466, loss: 0.04204113036394119 2023-01-24 02:32:15.643677: step: 706/466, loss: 0.03426389768719673 2023-01-24 02:32:16.230651: step: 708/466, loss: 0.0980169028043747 2023-01-24 02:32:16.843405: step: 710/466, loss: 0.15948888659477234 2023-01-24 02:32:17.526675: step: 712/466, loss: 0.056519124656915665 2023-01-24 02:32:18.181156: step: 714/466, loss: 0.06070170924067497 2023-01-24 02:32:18.789853: step: 716/466, loss: 0.11161601543426514 2023-01-24 02:32:19.430191: step: 718/466, loss: 0.23625513911247253 2023-01-24 02:32:20.037714: step: 720/466, loss: 0.012628679163753986 2023-01-24 02:32:20.599658: step: 722/466, loss: 0.12756291031837463 2023-01-24 02:32:21.279098: step: 724/466, loss: 0.1475050300359726 2023-01-24 02:32:21.880830: step: 726/466, loss: 0.04249989986419678 2023-01-24 02:32:22.556318: step: 728/466, loss: 0.49722063541412354 2023-01-24 02:32:23.260217: step: 730/466, loss: 0.04375768452882767 2023-01-24 02:32:23.908256: step: 732/466, loss: 0.07949644327163696 2023-01-24 02:32:24.477801: step: 734/466, loss: 0.05384787917137146 2023-01-24 02:32:25.126221: step: 736/466, loss: 0.15113519132137299 2023-01-24 02:32:25.778555: step: 738/466, loss: 0.10131586343050003 2023-01-24 02:32:26.346074: step: 740/466, loss: 0.05296603962779045 2023-01-24 02:32:27.036020: step: 742/466, loss: 0.039442624896764755 2023-01-24 02:32:27.621895: step: 744/466, loss: 0.1297570765018463 2023-01-24 02:32:28.332360: step: 746/466, loss: 0.17817160487174988 2023-01-24 02:32:28.922026: step: 748/466, loss: 0.025621848180890083 2023-01-24 02:32:29.566226: step: 750/466, loss: 1.840409517288208 2023-01-24 02:32:30.156522: step: 752/466, loss: 3.542585849761963 2023-01-24 02:32:30.777608: step: 754/466, loss: 0.08421312272548676 2023-01-24 02:32:31.472663: step: 756/466, loss: 0.14330342411994934 2023-01-24 02:32:32.081450: step: 758/466, loss: 0.09391725063323975 2023-01-24 02:32:32.657300: step: 760/466, loss: 0.1429196298122406 2023-01-24 02:32:33.327207: step: 762/466, loss: 0.20400117337703705 2023-01-24 02:32:34.085649: step: 764/466, loss: 0.06184087693691254 2023-01-24 02:32:34.714766: step: 766/466, loss: 0.19144916534423828 2023-01-24 02:32:35.337324: step: 768/466, loss: 0.7109355330467224 2023-01-24 02:32:35.938756: step: 770/466, loss: 0.03441312536597252 2023-01-24 02:32:36.574145: step: 772/466, loss: 0.06554859131574631 2023-01-24 02:32:37.151718: step: 774/466, loss: 0.12567520141601562 2023-01-24 02:32:37.778365: step: 776/466, loss: 0.14205007255077362 2023-01-24 02:32:38.288782: step: 778/466, loss: 0.009420069865882397 2023-01-24 02:32:38.875994: step: 780/466, loss: 0.10395492613315582 2023-01-24 02:32:39.488025: step: 782/466, loss: 1.3356351852416992 2023-01-24 02:32:40.063933: step: 784/466, loss: 0.06981461495161057 2023-01-24 02:32:40.653131: step: 786/466, loss: 0.04637700691819191 2023-01-24 02:32:41.257688: step: 788/466, loss: 0.0754745677113533 2023-01-24 02:32:41.858059: step: 790/466, loss: 0.03780405595898628 2023-01-24 02:32:42.534118: step: 792/466, loss: 0.04822218418121338 2023-01-24 02:32:43.203053: step: 794/466, loss: 0.022367283701896667 2023-01-24 02:32:43.829334: step: 796/466, loss: 0.2079140990972519 2023-01-24 02:32:44.485527: step: 798/466, loss: 1.3244308233261108 2023-01-24 02:32:45.184772: step: 800/466, loss: 0.34323838353157043 2023-01-24 02:32:45.818761: step: 802/466, loss: 0.14083780348300934 2023-01-24 02:32:46.470801: step: 804/466, loss: 0.22988341748714447 2023-01-24 02:32:47.108592: step: 806/466, loss: 0.04329225793480873 2023-01-24 02:32:47.752820: step: 808/466, loss: 0.032999325543642044 2023-01-24 02:32:48.367170: step: 810/466, loss: 5.826337814331055 2023-01-24 02:32:48.964589: step: 812/466, loss: 0.09369110316038132 2023-01-24 02:32:49.578891: step: 814/466, loss: 0.025686241686344147 2023-01-24 02:32:50.161923: step: 816/466, loss: 0.03150394186377525 2023-01-24 02:32:50.785304: step: 818/466, loss: 0.01394703146070242 2023-01-24 02:32:51.352152: step: 820/466, loss: 0.10388254374265671 2023-01-24 02:32:52.024444: step: 822/466, loss: 0.05112575739622116 2023-01-24 02:32:52.584528: step: 824/466, loss: 0.1600683033466339 2023-01-24 02:32:53.253930: step: 826/466, loss: 0.17425380647182465 2023-01-24 02:32:53.849253: step: 828/466, loss: 0.021439744159579277 2023-01-24 02:32:54.478317: step: 830/466, loss: 0.0466870479285717 2023-01-24 02:32:55.117249: step: 832/466, loss: 0.10297922044992447 2023-01-24 02:32:55.831282: step: 834/466, loss: 0.1491861492395401 2023-01-24 02:32:56.430361: step: 836/466, loss: 0.0686078816652298 2023-01-24 02:32:57.016019: step: 838/466, loss: 0.08716202527284622 2023-01-24 02:32:57.639947: step: 840/466, loss: 0.20605508983135223 2023-01-24 02:32:58.300042: step: 842/466, loss: 0.0744304209947586 2023-01-24 02:32:58.991317: step: 844/466, loss: 0.049539387226104736 2023-01-24 02:32:59.593856: step: 846/466, loss: 0.12177298218011856 2023-01-24 02:33:00.155912: step: 848/466, loss: 0.07692743092775345 2023-01-24 02:33:00.798074: step: 850/466, loss: 0.1394100934267044 2023-01-24 02:33:01.420417: step: 852/466, loss: 0.1861666738986969 2023-01-24 02:33:01.999658: step: 854/466, loss: 0.1190071627497673 2023-01-24 02:33:02.571931: step: 856/466, loss: 0.13761764764785767 2023-01-24 02:33:03.179056: step: 858/466, loss: 0.09148634970188141 2023-01-24 02:33:03.840113: step: 860/466, loss: 0.05858558416366577 2023-01-24 02:33:04.466426: step: 862/466, loss: 0.19387279450893402 2023-01-24 02:33:04.998071: step: 864/466, loss: 0.06237471103668213 2023-01-24 02:33:05.600135: step: 866/466, loss: 0.06779488176107407 2023-01-24 02:33:06.171911: step: 868/466, loss: 0.0751604288816452 2023-01-24 02:33:06.811650: step: 870/466, loss: 0.07606161385774612 2023-01-24 02:33:07.382853: step: 872/466, loss: 0.029564842581748962 2023-01-24 02:33:08.003984: step: 874/466, loss: 0.03874971345067024 2023-01-24 02:33:08.682126: step: 876/466, loss: 0.10743102431297302 2023-01-24 02:33:09.357931: step: 878/466, loss: 0.1099063903093338 2023-01-24 02:33:10.011486: step: 880/466, loss: 0.04139665514230728 2023-01-24 02:33:10.607609: step: 882/466, loss: 0.14583469927310944 2023-01-24 02:33:11.220158: step: 884/466, loss: 0.08397570252418518 2023-01-24 02:33:11.773022: step: 886/466, loss: 0.07745757699012756 2023-01-24 02:33:12.446166: step: 888/466, loss: 0.1580204963684082 2023-01-24 02:33:13.161168: step: 890/466, loss: 0.1032017320394516 2023-01-24 02:33:13.800687: step: 892/466, loss: 0.06455115973949432 2023-01-24 02:33:14.442835: step: 894/466, loss: 0.08590283989906311 2023-01-24 02:33:15.052887: step: 896/466, loss: 0.19114746153354645 2023-01-24 02:33:15.647459: step: 898/466, loss: 0.3613285720348358 2023-01-24 02:33:16.322520: step: 900/466, loss: 0.2982882857322693 2023-01-24 02:33:16.902837: step: 902/466, loss: 0.014200277626514435 2023-01-24 02:33:17.517225: step: 904/466, loss: 0.058966249227523804 2023-01-24 02:33:18.129837: step: 906/466, loss: 0.09376897662878036 2023-01-24 02:33:18.668215: step: 908/466, loss: 0.29971009492874146 2023-01-24 02:33:19.331708: step: 910/466, loss: 0.07468503713607788 2023-01-24 02:33:19.987535: step: 912/466, loss: 0.08336369693279266 2023-01-24 02:33:20.635862: step: 914/466, loss: 0.04851008951663971 2023-01-24 02:33:21.260617: step: 916/466, loss: 0.19046419858932495 2023-01-24 02:33:21.833335: step: 918/466, loss: 0.022284705191850662 2023-01-24 02:33:22.481004: step: 920/466, loss: 0.05849120393395424 2023-01-24 02:33:23.156118: step: 922/466, loss: 0.0836711972951889 2023-01-24 02:33:23.803896: step: 924/466, loss: 0.13933195173740387 2023-01-24 02:33:24.350766: step: 926/466, loss: 0.11811508238315582 2023-01-24 02:33:25.007707: step: 928/466, loss: 0.09238840639591217 2023-01-24 02:33:25.641926: step: 930/466, loss: 0.49136781692504883 2023-01-24 02:33:26.196307: step: 932/466, loss: 0.04988570883870125 ================================================== Loss: 0.167 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3498274946921444, 'r': 0.31265417457305505, 'f1': 0.3301978957915832}, 'combined': 0.24330371268853498, 'epoch': 17} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3630822573186151, 'r': 0.2665874231663706, 'f1': 0.3074410802230351}, 'combined': 0.19250983527984444, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33911273413474974, 'r': 0.32881709135267007, 'f1': 0.3338855628956785}, 'combined': 0.24602094108102626, 'epoch': 17} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36920378222963196, 'r': 0.2819798048228118, 'f1': 0.31975010587781005}, 'combined': 0.19815499519188226, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30083242814171124, 'r': 0.3105367000172503, 'f1': 0.30560754604872253}, 'combined': 0.22518450761484818, 'epoch': 17} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3698220798618687, 'r': 0.2787016576576357, 'f1': 0.31786045978863653}, 'combined': 0.2108579287706797, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32407407407407407, 'r': 0.25, 'f1': 0.282258064516129}, 'combined': 0.18817204301075266, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2638888888888889, 'r': 0.16379310344827586, 'f1': 0.20212765957446807}, 'combined': 0.13475177304964536, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:35:59.298959: step: 2/466, loss: 0.030299948528409004 2023-01-24 02:35:59.922128: step: 4/466, loss: 0.18529173731803894 2023-01-24 02:36:00.549053: step: 6/466, loss: 0.23373645544052124 2023-01-24 02:36:01.169640: step: 8/466, loss: 0.3838213384151459 2023-01-24 02:36:01.771602: step: 10/466, loss: 0.020283006131649017 2023-01-24 02:36:02.450802: step: 12/466, loss: 0.03348774090409279 2023-01-24 02:36:03.069737: step: 14/466, loss: 0.05698230862617493 2023-01-24 02:36:03.675534: step: 16/466, loss: 0.08389558643102646 2023-01-24 02:36:04.340793: step: 18/466, loss: 0.0727858766913414 2023-01-24 02:36:04.917469: step: 20/466, loss: 0.03135587275028229 2023-01-24 02:36:05.519536: step: 22/466, loss: 0.07733876258134842 2023-01-24 02:36:06.112784: step: 24/466, loss: 0.0626508891582489 2023-01-24 02:36:06.739621: step: 26/466, loss: 0.11715943366289139 2023-01-24 02:36:07.326295: step: 28/466, loss: 0.06282183527946472 2023-01-24 02:36:08.024211: step: 30/466, loss: 0.06618069112300873 2023-01-24 02:36:08.642582: step: 32/466, loss: 0.36386919021606445 2023-01-24 02:36:09.280238: step: 34/466, loss: 0.023268923163414 2023-01-24 02:36:09.877601: step: 36/466, loss: 0.026226868852972984 2023-01-24 02:36:10.559354: step: 38/466, loss: 0.1648685336112976 2023-01-24 02:36:11.154995: step: 40/466, loss: 0.09379521757364273 2023-01-24 02:36:11.740049: step: 42/466, loss: 0.009535979479551315 2023-01-24 02:36:12.379375: step: 44/466, loss: 0.022059675306081772 2023-01-24 02:36:12.970110: step: 46/466, loss: 0.04059113934636116 2023-01-24 02:36:13.568082: step: 48/466, loss: 0.017242122441530228 2023-01-24 02:36:14.167744: step: 50/466, loss: 0.053310081362724304 2023-01-24 02:36:14.761221: step: 52/466, loss: 0.029806554317474365 2023-01-24 02:36:15.380554: step: 54/466, loss: 0.3543507158756256 2023-01-24 02:36:15.999472: step: 56/466, loss: 0.014609129168093204 2023-01-24 02:36:16.568499: step: 58/466, loss: 0.03231660649180412 2023-01-24 02:36:17.184867: step: 60/466, loss: 0.04060859978199005 2023-01-24 02:36:17.773450: step: 62/466, loss: 0.3378612995147705 2023-01-24 02:36:18.363000: step: 64/466, loss: 0.114456906914711 2023-01-24 02:36:19.135003: step: 66/466, loss: 0.004361505154520273 2023-01-24 02:36:19.726283: step: 68/466, loss: 0.14874155819416046 2023-01-24 02:36:20.347160: step: 70/466, loss: 0.24420452117919922 2023-01-24 02:36:20.925640: step: 72/466, loss: 0.04582386463880539 2023-01-24 02:36:21.572626: step: 74/466, loss: 0.17232546210289001 2023-01-24 02:36:22.200663: step: 76/466, loss: 0.11485756933689117 2023-01-24 02:36:22.806255: step: 78/466, loss: 0.0269269198179245 2023-01-24 02:36:23.359654: step: 80/466, loss: 0.058198269456624985 2023-01-24 02:36:24.014101: step: 82/466, loss: 0.11508843302726746 2023-01-24 02:36:24.574884: step: 84/466, loss: 0.023328503593802452 2023-01-24 02:36:25.140639: step: 86/466, loss: 0.03980041295289993 2023-01-24 02:36:25.783881: step: 88/466, loss: 0.08690007030963898 2023-01-24 02:36:26.455087: step: 90/466, loss: 0.04720456153154373 2023-01-24 02:36:27.151508: step: 92/466, loss: 0.062441691756248474 2023-01-24 02:36:27.810415: step: 94/466, loss: 0.04656941071152687 2023-01-24 02:36:28.475228: step: 96/466, loss: 0.08133785426616669 2023-01-24 02:36:29.103206: step: 98/466, loss: 0.08496791124343872 2023-01-24 02:36:29.817426: step: 100/466, loss: 0.08659321069717407 2023-01-24 02:36:30.474091: step: 102/466, loss: 0.057392898947000504 2023-01-24 02:36:31.031808: step: 104/466, loss: 0.03435138985514641 2023-01-24 02:36:31.662943: step: 106/466, loss: 0.02512931078672409 2023-01-24 02:36:32.353931: step: 108/466, loss: 0.06552870571613312 2023-01-24 02:36:32.941133: step: 110/466, loss: 0.08875542134046555 2023-01-24 02:36:33.665456: step: 112/466, loss: 3.140612840652466 2023-01-24 02:36:34.337090: step: 114/466, loss: 0.018068477511405945 2023-01-24 02:36:34.962297: step: 116/466, loss: 0.2105257362127304 2023-01-24 02:36:35.614270: step: 118/466, loss: 0.1643773317337036 2023-01-24 02:36:36.231367: step: 120/466, loss: 0.026753926649689674 2023-01-24 02:36:36.984678: step: 122/466, loss: 0.18800696730613708 2023-01-24 02:36:37.639733: step: 124/466, loss: 0.047364406287670135 2023-01-24 02:36:38.275965: step: 126/466, loss: 0.03672315552830696 2023-01-24 02:36:38.905745: step: 128/466, loss: 0.12688279151916504 2023-01-24 02:36:39.655319: step: 130/466, loss: 0.08330725878477097 2023-01-24 02:36:40.224683: step: 132/466, loss: 0.07648205012083054 2023-01-24 02:36:40.808669: step: 134/466, loss: 0.04698558151721954 2023-01-24 02:36:41.469939: step: 136/466, loss: 0.03224561735987663 2023-01-24 02:36:42.106893: step: 138/466, loss: 8.10254192352295 2023-01-24 02:36:42.723012: step: 140/466, loss: 0.041842713952064514 2023-01-24 02:36:43.307026: step: 142/466, loss: 0.2807225286960602 2023-01-24 02:36:43.919312: step: 144/466, loss: 0.04971598833799362 2023-01-24 02:36:44.559025: step: 146/466, loss: 0.11303872615098953 2023-01-24 02:36:45.150661: step: 148/466, loss: 0.0421968437731266 2023-01-24 02:36:45.759701: step: 150/466, loss: 0.03591064363718033 2023-01-24 02:36:46.382447: step: 152/466, loss: 0.07601898908615112 2023-01-24 02:36:47.011322: step: 154/466, loss: 0.14888262748718262 2023-01-24 02:36:47.643575: step: 156/466, loss: 0.05524364858865738 2023-01-24 02:36:48.269500: step: 158/466, loss: 0.04960860311985016 2023-01-24 02:36:48.874993: step: 160/466, loss: 0.10067957639694214 2023-01-24 02:36:49.480636: step: 162/466, loss: 0.44527915120124817 2023-01-24 02:36:50.075699: step: 164/466, loss: 0.05498115345835686 2023-01-24 02:36:50.726691: step: 166/466, loss: 0.14127767086029053 2023-01-24 02:36:51.337325: step: 168/466, loss: 0.057458266615867615 2023-01-24 02:36:51.886686: step: 170/466, loss: 0.06778069585561752 2023-01-24 02:36:52.499719: step: 172/466, loss: 0.03087298944592476 2023-01-24 02:36:53.159270: step: 174/466, loss: 0.05096074193716049 2023-01-24 02:36:53.763875: step: 176/466, loss: 0.010274292901158333 2023-01-24 02:36:54.401661: step: 178/466, loss: 0.046914875507354736 2023-01-24 02:36:55.023470: step: 180/466, loss: 0.06641078740358353 2023-01-24 02:36:55.636535: step: 182/466, loss: 0.060954492539167404 2023-01-24 02:36:56.270703: step: 184/466, loss: 0.14127805829048157 2023-01-24 02:36:56.873144: step: 186/466, loss: 0.07113578915596008 2023-01-24 02:36:57.546553: step: 188/466, loss: 0.5118537545204163 2023-01-24 02:36:58.184370: step: 190/466, loss: 0.18475711345672607 2023-01-24 02:36:58.891349: step: 192/466, loss: 0.009850537404417992 2023-01-24 02:36:59.430851: step: 194/466, loss: 0.06644842028617859 2023-01-24 02:37:00.063258: step: 196/466, loss: 0.3611626625061035 2023-01-24 02:37:00.722959: step: 198/466, loss: 0.03637146204710007 2023-01-24 02:37:01.400176: step: 200/466, loss: 0.02886369824409485 2023-01-24 02:37:01.928049: step: 202/466, loss: 0.08305783569812775 2023-01-24 02:37:02.544487: step: 204/466, loss: 0.06811418384313583 2023-01-24 02:37:03.215326: step: 206/466, loss: 0.04947146400809288 2023-01-24 02:37:03.828031: step: 208/466, loss: 0.03452489525079727 2023-01-24 02:37:04.439618: step: 210/466, loss: 0.01952638477087021 2023-01-24 02:37:05.020294: step: 212/466, loss: 0.03577161207795143 2023-01-24 02:37:05.623984: step: 214/466, loss: 0.8099901676177979 2023-01-24 02:37:06.232984: step: 216/466, loss: 0.1089983657002449 2023-01-24 02:37:06.855794: step: 218/466, loss: 0.05485111102461815 2023-01-24 02:37:07.455671: step: 220/466, loss: 0.2529895603656769 2023-01-24 02:37:08.045455: step: 222/466, loss: 0.08142302185297012 2023-01-24 02:37:08.652078: step: 224/466, loss: 0.05828835442662239 2023-01-24 02:37:09.272290: step: 226/466, loss: 0.08212900906801224 2023-01-24 02:37:09.881133: step: 228/466, loss: 0.03911758214235306 2023-01-24 02:37:10.520236: step: 230/466, loss: 0.11328721046447754 2023-01-24 02:37:11.064376: step: 232/466, loss: 0.09146860986948013 2023-01-24 02:37:11.662019: step: 234/466, loss: 0.042395103722810745 2023-01-24 02:37:12.338391: step: 236/466, loss: 0.04986429214477539 2023-01-24 02:37:12.962617: step: 238/466, loss: 0.09979227930307388 2023-01-24 02:37:13.608816: step: 240/466, loss: 0.08626820892095566 2023-01-24 02:37:14.172031: step: 242/466, loss: 0.03569401800632477 2023-01-24 02:37:14.750786: step: 244/466, loss: 0.7742249965667725 2023-01-24 02:37:15.341833: step: 246/466, loss: 0.2065431922674179 2023-01-24 02:37:15.862219: step: 248/466, loss: 0.02376331388950348 2023-01-24 02:37:16.431064: step: 250/466, loss: 0.2195350080728531 2023-01-24 02:37:17.009538: step: 252/466, loss: 0.032758988440036774 2023-01-24 02:37:17.626288: step: 254/466, loss: 0.8895750045776367 2023-01-24 02:37:18.298150: step: 256/466, loss: 0.032414816319942474 2023-01-24 02:37:18.889599: step: 258/466, loss: 0.06055746227502823 2023-01-24 02:37:19.436450: step: 260/466, loss: 0.028709404170513153 2023-01-24 02:37:20.044086: step: 262/466, loss: 0.05509772151708603 2023-01-24 02:37:20.644648: step: 264/466, loss: 0.05865830183029175 2023-01-24 02:37:21.248492: step: 266/466, loss: 0.1421409249305725 2023-01-24 02:37:21.760960: step: 268/466, loss: 0.05160105600953102 2023-01-24 02:37:22.349858: step: 270/466, loss: 0.017292175441980362 2023-01-24 02:37:23.069559: step: 272/466, loss: 0.4796491861343384 2023-01-24 02:37:23.643972: step: 274/466, loss: 0.3645254075527191 2023-01-24 02:37:24.258318: step: 276/466, loss: 0.01594046875834465 2023-01-24 02:37:24.888657: step: 278/466, loss: 0.12046370655298233 2023-01-24 02:37:25.616477: step: 280/466, loss: 0.0017643040046095848 2023-01-24 02:37:26.234923: step: 282/466, loss: 0.06266909837722778 2023-01-24 02:37:26.836749: step: 284/466, loss: 0.11617766320705414 2023-01-24 02:37:27.463745: step: 286/466, loss: 0.1816425919532776 2023-01-24 02:37:28.073916: step: 288/466, loss: 0.05535324662923813 2023-01-24 02:37:28.699655: step: 290/466, loss: 0.09581460803747177 2023-01-24 02:37:29.318112: step: 292/466, loss: 0.0881056934595108 2023-01-24 02:37:29.885337: step: 294/466, loss: 0.1429542750120163 2023-01-24 02:37:30.468266: step: 296/466, loss: 0.07996713370084763 2023-01-24 02:37:31.056693: step: 298/466, loss: 0.11317909508943558 2023-01-24 02:37:31.698805: step: 300/466, loss: 0.09892372041940689 2023-01-24 02:37:32.339768: step: 302/466, loss: 0.016523847356438637 2023-01-24 02:37:32.917512: step: 304/466, loss: 0.12549863755702972 2023-01-24 02:37:33.550550: step: 306/466, loss: 0.024841489270329475 2023-01-24 02:37:34.214634: step: 308/466, loss: 0.0675235316157341 2023-01-24 02:37:34.832511: step: 310/466, loss: 0.054056644439697266 2023-01-24 02:37:35.440525: step: 312/466, loss: 0.03715312108397484 2023-01-24 02:37:36.030172: step: 314/466, loss: 0.020889652892947197 2023-01-24 02:37:36.718416: step: 316/466, loss: 0.08342738449573517 2023-01-24 02:37:37.294306: step: 318/466, loss: 0.0254612285643816 2023-01-24 02:37:37.978150: step: 320/466, loss: 0.12345464527606964 2023-01-24 02:37:38.570795: step: 322/466, loss: 0.012913811951875687 2023-01-24 02:37:39.248470: step: 324/466, loss: 0.05068105459213257 2023-01-24 02:37:39.808869: step: 326/466, loss: 0.03996245190501213 2023-01-24 02:37:40.366975: step: 328/466, loss: 0.057917702943086624 2023-01-24 02:37:41.048281: step: 330/466, loss: 0.078567273914814 2023-01-24 02:37:41.602345: step: 332/466, loss: 0.032266248017549515 2023-01-24 02:37:42.202131: step: 334/466, loss: 0.015446437522768974 2023-01-24 02:37:42.867481: step: 336/466, loss: 0.01915135607123375 2023-01-24 02:37:43.545637: step: 338/466, loss: 0.07407674938440323 2023-01-24 02:37:44.124331: step: 340/466, loss: 0.055599890649318695 2023-01-24 02:37:44.698824: step: 342/466, loss: 0.06590849161148071 2023-01-24 02:37:45.321586: step: 344/466, loss: 0.030756264925003052 2023-01-24 02:37:45.947105: step: 346/466, loss: 0.09312482923269272 2023-01-24 02:37:46.595008: step: 348/466, loss: 0.04294842481613159 2023-01-24 02:37:47.212566: step: 350/466, loss: 0.023063605651259422 2023-01-24 02:37:47.780324: step: 352/466, loss: 0.050428006798028946 2023-01-24 02:37:48.397738: step: 354/466, loss: 0.035761695355176926 2023-01-24 02:37:49.020172: step: 356/466, loss: 0.4424636960029602 2023-01-24 02:37:49.656279: step: 358/466, loss: 0.061999861150979996 2023-01-24 02:37:50.258019: step: 360/466, loss: 0.06003555655479431 2023-01-24 02:37:50.930251: step: 362/466, loss: 0.09660322964191437 2023-01-24 02:37:51.549081: step: 364/466, loss: 0.32913368940353394 2023-01-24 02:37:52.144944: step: 366/466, loss: 0.06287943571805954 2023-01-24 02:37:52.700215: step: 368/466, loss: 0.04135376960039139 2023-01-24 02:37:53.267404: step: 370/466, loss: 0.12273543328046799 2023-01-24 02:37:53.869908: step: 372/466, loss: 0.09697325527667999 2023-01-24 02:37:54.602390: step: 374/466, loss: 0.20717570185661316 2023-01-24 02:37:55.327213: step: 376/466, loss: 0.17564678192138672 2023-01-24 02:37:55.926287: step: 378/466, loss: 0.02127458155155182 2023-01-24 02:37:56.508995: step: 380/466, loss: 0.03983462601900101 2023-01-24 02:37:57.149326: step: 382/466, loss: 0.0415821298956871 2023-01-24 02:37:57.755891: step: 384/466, loss: 0.03752491623163223 2023-01-24 02:37:58.329912: step: 386/466, loss: 0.20886972546577454 2023-01-24 02:37:58.954163: step: 388/466, loss: 0.06603700667619705 2023-01-24 02:37:59.568456: step: 390/466, loss: 0.659958004951477 2023-01-24 02:38:00.204454: step: 392/466, loss: 0.02154848352074623 2023-01-24 02:38:00.890907: step: 394/466, loss: 0.122312992811203 2023-01-24 02:38:01.445889: step: 396/466, loss: 0.11460147053003311 2023-01-24 02:38:02.029069: step: 398/466, loss: 0.02296714298427105 2023-01-24 02:38:02.687144: step: 400/466, loss: 0.12299858033657074 2023-01-24 02:38:03.227243: step: 402/466, loss: 0.04612415283918381 2023-01-24 02:38:03.863389: step: 404/466, loss: 0.036721426993608475 2023-01-24 02:38:04.459410: step: 406/466, loss: 0.049663905054330826 2023-01-24 02:38:05.133116: step: 408/466, loss: 2.824491024017334 2023-01-24 02:38:05.786780: step: 410/466, loss: 0.06330715864896774 2023-01-24 02:38:06.419859: step: 412/466, loss: 0.08676332980394363 2023-01-24 02:38:07.127812: step: 414/466, loss: 0.0636594295501709 2023-01-24 02:38:07.777150: step: 416/466, loss: 0.082150399684906 2023-01-24 02:38:08.395502: step: 418/466, loss: 0.0154802855104208 2023-01-24 02:38:08.937238: step: 420/466, loss: 0.10068855434656143 2023-01-24 02:38:09.604807: step: 422/466, loss: 0.06975843757390976 2023-01-24 02:38:10.195854: step: 424/466, loss: 0.0383467897772789 2023-01-24 02:38:10.793930: step: 426/466, loss: 0.4652051031589508 2023-01-24 02:38:11.351780: step: 428/466, loss: 0.16508370637893677 2023-01-24 02:38:12.148732: step: 430/466, loss: 0.07636451721191406 2023-01-24 02:38:12.803564: step: 432/466, loss: 0.15149201452732086 2023-01-24 02:38:13.461815: step: 434/466, loss: 0.057252347469329834 2023-01-24 02:38:14.137276: step: 436/466, loss: 0.04258215054869652 2023-01-24 02:38:14.765458: step: 438/466, loss: 0.014870988205075264 2023-01-24 02:38:15.481406: step: 440/466, loss: 0.040493033826351166 2023-01-24 02:38:16.172516: step: 442/466, loss: 0.21843920648097992 2023-01-24 02:38:16.767482: step: 444/466, loss: 0.07427722215652466 2023-01-24 02:38:17.345742: step: 446/466, loss: 0.022667555138468742 2023-01-24 02:38:18.037693: step: 448/466, loss: 0.08415772020816803 2023-01-24 02:38:18.683671: step: 450/466, loss: 0.21720390021800995 2023-01-24 02:38:19.296220: step: 452/466, loss: 0.18814308941364288 2023-01-24 02:38:19.916839: step: 454/466, loss: 0.0605594739317894 2023-01-24 02:38:20.515897: step: 456/466, loss: 1.33790123462677 2023-01-24 02:38:21.172225: step: 458/466, loss: 0.3248315751552582 2023-01-24 02:38:21.808262: step: 460/466, loss: 0.025710759684443474 2023-01-24 02:38:22.390821: step: 462/466, loss: 0.06335184723138809 2023-01-24 02:38:23.017894: step: 464/466, loss: 0.021018141880631447 2023-01-24 02:38:23.648210: step: 466/466, loss: 0.15620258450508118 2023-01-24 02:38:24.307164: step: 468/466, loss: 0.09127360582351685 2023-01-24 02:38:24.937815: step: 470/466, loss: 0.09151540696620941 2023-01-24 02:38:25.638122: step: 472/466, loss: 0.19718727469444275 2023-01-24 02:38:26.279004: step: 474/466, loss: 0.1726331263780594 2023-01-24 02:38:26.940484: step: 476/466, loss: 0.4496327340602875 2023-01-24 02:38:27.525266: step: 478/466, loss: 0.04377531632781029 2023-01-24 02:38:28.117607: step: 480/466, loss: 0.07953672856092453 2023-01-24 02:38:28.750611: step: 482/466, loss: 0.08268735557794571 2023-01-24 02:38:29.437721: step: 484/466, loss: 0.05951434746384621 2023-01-24 02:38:30.045028: step: 486/466, loss: 0.16865141689777374 2023-01-24 02:38:30.601378: step: 488/466, loss: 0.013609996996819973 2023-01-24 02:38:31.275104: step: 490/466, loss: 0.07433077692985535 2023-01-24 02:38:31.845968: step: 492/466, loss: 0.5957027077674866 2023-01-24 02:38:32.421092: step: 494/466, loss: 0.07487273216247559 2023-01-24 02:38:33.080967: step: 496/466, loss: 1.4150335788726807 2023-01-24 02:38:33.619485: step: 498/466, loss: 0.021949447691440582 2023-01-24 02:38:34.266316: step: 500/466, loss: 0.20540384948253632 2023-01-24 02:38:34.899038: step: 502/466, loss: 0.022929711267352104 2023-01-24 02:38:35.471666: step: 504/466, loss: 0.04285015910863876 2023-01-24 02:38:36.146609: step: 506/466, loss: 0.053249284625053406 2023-01-24 02:38:36.747374: step: 508/466, loss: 0.061508845537900925 2023-01-24 02:38:37.321679: step: 510/466, loss: 0.11744923144578934 2023-01-24 02:38:37.948853: step: 512/466, loss: 0.043242596089839935 2023-01-24 02:38:38.529480: step: 514/466, loss: 0.082844078540802 2023-01-24 02:38:39.109420: step: 516/466, loss: 0.5550380349159241 2023-01-24 02:38:39.779224: step: 518/466, loss: 0.09134536981582642 2023-01-24 02:38:40.412375: step: 520/466, loss: 0.050895120948553085 2023-01-24 02:38:41.026452: step: 522/466, loss: 0.08660466969013214 2023-01-24 02:38:41.664866: step: 524/466, loss: 0.1148732453584671 2023-01-24 02:38:42.334501: step: 526/466, loss: 0.06860499083995819 2023-01-24 02:38:42.885783: step: 528/466, loss: 0.04227155074477196 2023-01-24 02:38:43.529901: step: 530/466, loss: 0.02154962532222271 2023-01-24 02:38:44.132319: step: 532/466, loss: 0.016374345868825912 2023-01-24 02:38:44.825031: step: 534/466, loss: 0.00850632879883051 2023-01-24 02:38:45.401674: step: 536/466, loss: 0.07412139326334 2023-01-24 02:38:45.980817: step: 538/466, loss: 0.016608145087957382 2023-01-24 02:38:46.589381: step: 540/466, loss: 0.2312714159488678 2023-01-24 02:38:47.223515: step: 542/466, loss: 0.07605907320976257 2023-01-24 02:38:47.861751: step: 544/466, loss: 0.195210799574852 2023-01-24 02:38:48.474948: step: 546/466, loss: 0.08772149682044983 2023-01-24 02:38:49.079198: step: 548/466, loss: 0.03903600573539734 2023-01-24 02:38:49.689045: step: 550/466, loss: 0.23316924273967743 2023-01-24 02:38:50.319813: step: 552/466, loss: 0.15976104140281677 2023-01-24 02:38:50.927309: step: 554/466, loss: 0.17922474443912506 2023-01-24 02:38:51.693074: step: 556/466, loss: 0.06768238544464111 2023-01-24 02:38:52.323563: step: 558/466, loss: 0.06729494780302048 2023-01-24 02:38:52.947606: step: 560/466, loss: 0.15633723139762878 2023-01-24 02:38:53.635986: step: 562/466, loss: 0.4016426205635071 2023-01-24 02:38:54.248436: step: 564/466, loss: 0.03239607810974121 2023-01-24 02:38:54.917226: step: 566/466, loss: 0.1015559509396553 2023-01-24 02:38:55.515438: step: 568/466, loss: 0.06320659071207047 2023-01-24 02:38:56.177926: step: 570/466, loss: 0.11583807319402695 2023-01-24 02:38:56.825053: step: 572/466, loss: 0.03362603485584259 2023-01-24 02:38:57.500022: step: 574/466, loss: 0.13453516364097595 2023-01-24 02:38:58.067274: step: 576/466, loss: 0.12629076838493347 2023-01-24 02:38:58.708510: step: 578/466, loss: 0.07032449543476105 2023-01-24 02:38:59.299420: step: 580/466, loss: 0.09611865133047104 2023-01-24 02:38:59.904994: step: 582/466, loss: 0.05391501635313034 2023-01-24 02:39:00.603607: step: 584/466, loss: 0.36836880445480347 2023-01-24 02:39:01.337258: step: 586/466, loss: 0.08569643646478653 2023-01-24 02:39:02.006672: step: 588/466, loss: 0.20361623167991638 2023-01-24 02:39:02.682347: step: 590/466, loss: 0.013120735064148903 2023-01-24 02:39:03.348610: step: 592/466, loss: 0.09215152263641357 2023-01-24 02:39:03.938634: step: 594/466, loss: 0.05402675271034241 2023-01-24 02:39:04.609608: step: 596/466, loss: 0.10387832671403885 2023-01-24 02:39:05.202758: step: 598/466, loss: 0.02782166376709938 2023-01-24 02:39:05.798725: step: 600/466, loss: 0.0770563930273056 2023-01-24 02:39:06.412015: step: 602/466, loss: 0.12681631743907928 2023-01-24 02:39:07.028705: step: 604/466, loss: 0.14232604205608368 2023-01-24 02:39:07.593335: step: 606/466, loss: 2.2815752029418945 2023-01-24 02:39:08.190353: step: 608/466, loss: 0.4033258855342865 2023-01-24 02:39:08.820943: step: 610/466, loss: 0.0938134416937828 2023-01-24 02:39:09.436605: step: 612/466, loss: 0.08770494163036346 2023-01-24 02:39:10.085220: step: 614/466, loss: 0.026598917320370674 2023-01-24 02:39:10.750233: step: 616/466, loss: 0.060589879751205444 2023-01-24 02:39:11.365331: step: 618/466, loss: 0.2691894769668579 2023-01-24 02:39:11.932758: step: 620/466, loss: 0.08119969069957733 2023-01-24 02:39:12.542892: step: 622/466, loss: 0.1171530932188034 2023-01-24 02:39:13.202384: step: 624/466, loss: 0.06047392264008522 2023-01-24 02:39:13.829336: step: 626/466, loss: 0.06534136086702347 2023-01-24 02:39:14.474933: step: 628/466, loss: 0.03197570890188217 2023-01-24 02:39:15.102327: step: 630/466, loss: 0.3263348639011383 2023-01-24 02:39:15.795710: step: 632/466, loss: 0.05140954628586769 2023-01-24 02:39:16.405552: step: 634/466, loss: 0.06105520948767662 2023-01-24 02:39:16.938339: step: 636/466, loss: 0.005694460589438677 2023-01-24 02:39:17.586493: step: 638/466, loss: 0.5783259868621826 2023-01-24 02:39:18.269030: step: 640/466, loss: 0.2999393343925476 2023-01-24 02:39:18.882230: step: 642/466, loss: 0.04860677570104599 2023-01-24 02:39:19.483700: step: 644/466, loss: 0.062296394258737564 2023-01-24 02:39:20.074190: step: 646/466, loss: 0.09006703644990921 2023-01-24 02:39:20.731420: step: 648/466, loss: 0.0551469512283802 2023-01-24 02:39:21.311059: step: 650/466, loss: 0.09826161712408066 2023-01-24 02:39:21.955065: step: 652/466, loss: 0.059834785759449005 2023-01-24 02:39:22.570108: step: 654/466, loss: 0.09664934873580933 2023-01-24 02:39:23.238052: step: 656/466, loss: 0.3826269209384918 2023-01-24 02:39:23.850143: step: 658/466, loss: 0.10701795667409897 2023-01-24 02:39:24.535642: step: 660/466, loss: 0.030481388792395592 2023-01-24 02:39:25.198280: step: 662/466, loss: 0.04147927090525627 2023-01-24 02:39:25.828917: step: 664/466, loss: 0.04073699563741684 2023-01-24 02:39:26.423625: step: 666/466, loss: 0.0898548811674118 2023-01-24 02:39:27.060680: step: 668/466, loss: 0.04935910552740097 2023-01-24 02:39:27.733225: step: 670/466, loss: 0.09589585661888123 2023-01-24 02:39:28.327933: step: 672/466, loss: 0.10625513643026352 2023-01-24 02:39:28.942905: step: 674/466, loss: 0.003971959929913282 2023-01-24 02:39:29.568564: step: 676/466, loss: 0.03810294717550278 2023-01-24 02:39:30.262187: step: 678/466, loss: 0.5456706881523132 2023-01-24 02:39:30.905110: step: 680/466, loss: 0.06984337419271469 2023-01-24 02:39:31.557463: step: 682/466, loss: 0.05832561105489731 2023-01-24 02:39:32.200874: step: 684/466, loss: 0.11955627799034119 2023-01-24 02:39:32.839274: step: 686/466, loss: 0.076075479388237 2023-01-24 02:39:33.426163: step: 688/466, loss: 0.04003051295876503 2023-01-24 02:39:34.012078: step: 690/466, loss: 0.02638140693306923 2023-01-24 02:39:34.580823: step: 692/466, loss: 0.034705035388469696 2023-01-24 02:39:35.143082: step: 694/466, loss: 0.07898691296577454 2023-01-24 02:39:35.752442: step: 696/466, loss: 0.3519253730773926 2023-01-24 02:39:36.358991: step: 698/466, loss: 0.043164726346731186 2023-01-24 02:39:36.984884: step: 700/466, loss: 0.13846589624881744 2023-01-24 02:39:37.591623: step: 702/466, loss: 0.04918210208415985 2023-01-24 02:39:38.194004: step: 704/466, loss: 0.2499314844608307 2023-01-24 02:39:38.843792: step: 706/466, loss: 0.031565580517053604 2023-01-24 02:39:39.417611: step: 708/466, loss: 0.03719315677881241 2023-01-24 02:39:40.063763: step: 710/466, loss: 0.0273834727704525 2023-01-24 02:39:40.728814: step: 712/466, loss: 0.9689732789993286 2023-01-24 02:39:41.344797: step: 714/466, loss: 0.046712346374988556 2023-01-24 02:39:41.973070: step: 716/466, loss: 0.11686748266220093 2023-01-24 02:39:42.676500: step: 718/466, loss: 0.0049114711582660675 2023-01-24 02:39:43.249665: step: 720/466, loss: 0.23147809505462646 2023-01-24 02:39:43.911887: step: 722/466, loss: 0.1909041404724121 2023-01-24 02:39:44.463281: step: 724/466, loss: 0.03427097946405411 2023-01-24 02:39:45.096604: step: 726/466, loss: 0.060599882155656815 2023-01-24 02:39:45.787582: step: 728/466, loss: 0.1319151222705841 2023-01-24 02:39:46.412871: step: 730/466, loss: 0.20778167247772217 2023-01-24 02:39:47.062743: step: 732/466, loss: 0.061015255749225616 2023-01-24 02:39:47.665846: step: 734/466, loss: 0.08025775849819183 2023-01-24 02:39:48.348354: step: 736/466, loss: 0.05546239763498306 2023-01-24 02:39:49.011646: step: 738/466, loss: 0.158542200922966 2023-01-24 02:39:49.649308: step: 740/466, loss: 0.06881777942180634 2023-01-24 02:39:50.325334: step: 742/466, loss: 0.15654173493385315 2023-01-24 02:39:50.950542: step: 744/466, loss: 0.9193609952926636 2023-01-24 02:39:51.536203: step: 746/466, loss: 0.007909778505563736 2023-01-24 02:39:52.181503: step: 748/466, loss: 0.022688794881105423 2023-01-24 02:39:52.805564: step: 750/466, loss: 0.071705661714077 2023-01-24 02:39:53.408666: step: 752/466, loss: 0.1914301961660385 2023-01-24 02:39:54.085576: step: 754/466, loss: 0.005395537707954645 2023-01-24 02:39:54.737040: step: 756/466, loss: 0.08468862622976303 2023-01-24 02:39:55.287959: step: 758/466, loss: 0.17027100920677185 2023-01-24 02:39:55.871228: step: 760/466, loss: 0.05055655539035797 2023-01-24 02:39:56.493917: step: 762/466, loss: 0.055712856352329254 2023-01-24 02:39:57.142796: step: 764/466, loss: 0.0562840960919857 2023-01-24 02:39:57.745385: step: 766/466, loss: 0.230503112077713 2023-01-24 02:39:58.343699: step: 768/466, loss: 0.013948547653853893 2023-01-24 02:39:58.951505: step: 770/466, loss: 0.980506420135498 2023-01-24 02:39:59.613202: step: 772/466, loss: 0.04344959929585457 2023-01-24 02:40:00.266918: step: 774/466, loss: 0.4502314031124115 2023-01-24 02:40:00.910999: step: 776/466, loss: 0.09271130710840225 2023-01-24 02:40:01.574971: step: 778/466, loss: 0.09359487891197205 2023-01-24 02:40:02.201603: step: 780/466, loss: 0.0551677905023098 2023-01-24 02:40:02.778611: step: 782/466, loss: 0.06417146325111389 2023-01-24 02:40:03.420506: step: 784/466, loss: 0.1077897772192955 2023-01-24 02:40:04.033600: step: 786/466, loss: 0.048716768622398376 2023-01-24 02:40:04.639339: step: 788/466, loss: 0.004000423010438681 2023-01-24 02:40:05.309428: step: 790/466, loss: 0.18237416446208954 2023-01-24 02:40:05.930346: step: 792/466, loss: 0.040526874363422394 2023-01-24 02:40:06.556876: step: 794/466, loss: 0.0530264787375927 2023-01-24 02:40:07.197781: step: 796/466, loss: 0.06333018839359283 2023-01-24 02:40:07.820493: step: 798/466, loss: 0.07818601280450821 2023-01-24 02:40:08.464526: step: 800/466, loss: 0.06600316613912582 2023-01-24 02:40:09.071054: step: 802/466, loss: 1.0129446983337402 2023-01-24 02:40:09.689940: step: 804/466, loss: 0.09120383113622665 2023-01-24 02:40:10.428464: step: 806/466, loss: 0.09274714440107346 2023-01-24 02:40:11.033553: step: 808/466, loss: 0.049328066408634186 2023-01-24 02:40:11.633544: step: 810/466, loss: 0.07375475764274597 2023-01-24 02:40:12.292758: step: 812/466, loss: 0.5586576461791992 2023-01-24 02:40:12.940765: step: 814/466, loss: 0.384542852640152 2023-01-24 02:40:13.557575: step: 816/466, loss: 0.09888996183872223 2023-01-24 02:40:14.198280: step: 818/466, loss: 0.06262814998626709 2023-01-24 02:40:14.776539: step: 820/466, loss: 0.01604458875954151 2023-01-24 02:40:15.314793: step: 822/466, loss: 0.04279208555817604 2023-01-24 02:40:15.924144: step: 824/466, loss: 0.010919441469013691 2023-01-24 02:40:16.517468: step: 826/466, loss: 0.02665875479578972 2023-01-24 02:40:17.155259: step: 828/466, loss: 0.11534024775028229 2023-01-24 02:40:17.736627: step: 830/466, loss: 0.04270065203309059 2023-01-24 02:40:18.297283: step: 832/466, loss: 0.11828695237636566 2023-01-24 02:40:18.931698: step: 834/466, loss: 0.026358330622315407 2023-01-24 02:40:19.585743: step: 836/466, loss: 0.005910153966397047 2023-01-24 02:40:20.164521: step: 838/466, loss: 0.34486469626426697 2023-01-24 02:40:20.777133: step: 840/466, loss: 0.029074277728796005 2023-01-24 02:40:21.402038: step: 842/466, loss: 0.10085507482290268 2023-01-24 02:40:21.966693: step: 844/466, loss: 0.06384452432394028 2023-01-24 02:40:22.555237: step: 846/466, loss: 0.8601533770561218 2023-01-24 02:40:23.116072: step: 848/466, loss: 0.05942033231258392 2023-01-24 02:40:23.715541: step: 850/466, loss: 0.019594671204686165 2023-01-24 02:40:24.313512: step: 852/466, loss: 0.07780236750841141 2023-01-24 02:40:24.914017: step: 854/466, loss: 0.03243729844689369 2023-01-24 02:40:25.476107: step: 856/466, loss: 0.05148492380976677 2023-01-24 02:40:26.119163: step: 858/466, loss: 0.11528807878494263 2023-01-24 02:40:26.733267: step: 860/466, loss: 0.0014300361508503556 2023-01-24 02:40:27.405926: step: 862/466, loss: 0.09031727910041809 2023-01-24 02:40:27.995110: step: 864/466, loss: 0.19160300493240356 2023-01-24 02:40:28.610701: step: 866/466, loss: 0.12051482498645782 2023-01-24 02:40:29.268613: step: 868/466, loss: 0.07214026898145676 2023-01-24 02:40:29.858332: step: 870/466, loss: 0.08420170843601227 2023-01-24 02:40:30.478940: step: 872/466, loss: 0.3128611743450165 2023-01-24 02:40:31.024815: step: 874/466, loss: 0.0372660830616951 2023-01-24 02:40:31.676363: step: 876/466, loss: 0.08981478214263916 2023-01-24 02:40:32.291374: step: 878/466, loss: 0.05171480029821396 2023-01-24 02:40:32.945028: step: 880/466, loss: 0.16538923978805542 2023-01-24 02:40:33.665334: step: 882/466, loss: 0.09176956862211227 2023-01-24 02:40:34.216439: step: 884/466, loss: 0.028760412707924843 2023-01-24 02:40:34.786201: step: 886/466, loss: 0.052656181156635284 2023-01-24 02:40:35.368280: step: 888/466, loss: 0.24712523818016052 2023-01-24 02:40:36.007157: step: 890/466, loss: 0.07343322783708572 2023-01-24 02:40:36.684831: step: 892/466, loss: 0.038961004465818405 2023-01-24 02:40:37.290582: step: 894/466, loss: 0.0370122492313385 2023-01-24 02:40:37.918476: step: 896/466, loss: 0.11388856917619705 2023-01-24 02:40:38.563999: step: 898/466, loss: 0.30981096625328064 2023-01-24 02:40:39.207621: step: 900/466, loss: 0.03733278065919876 2023-01-24 02:40:39.841510: step: 902/466, loss: 0.1259738802909851 2023-01-24 02:40:40.410987: step: 904/466, loss: 0.025919705629348755 2023-01-24 02:40:41.015942: step: 906/466, loss: 0.04043346643447876 2023-01-24 02:40:41.687524: step: 908/466, loss: 0.48000040650367737 2023-01-24 02:40:42.328646: step: 910/466, loss: 0.028711294755339622 2023-01-24 02:40:42.989698: step: 912/466, loss: 0.05317974463105202 2023-01-24 02:40:43.626996: step: 914/466, loss: 0.15121129155158997 2023-01-24 02:40:44.185453: step: 916/466, loss: 0.02880292758345604 2023-01-24 02:40:44.836074: step: 918/466, loss: 0.23636382818222046 2023-01-24 02:40:45.417690: step: 920/466, loss: 0.06250850856304169 2023-01-24 02:40:46.064902: step: 922/466, loss: 0.10944357514381409 2023-01-24 02:40:46.653108: step: 924/466, loss: 0.06018063798546791 2023-01-24 02:40:47.267156: step: 926/466, loss: 0.07557946443557739 2023-01-24 02:40:47.963937: step: 928/466, loss: 0.11695948988199234 2023-01-24 02:40:48.574072: step: 930/466, loss: 0.8124833703041077 2023-01-24 02:40:49.172707: step: 932/466, loss: 0.12831737101078033 ================================================== Loss: 0.158 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34080075187969927, 'r': 0.30717335321225264, 'f1': 0.3231144853150842}, 'combined': 0.23808435760058833, 'epoch': 18} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3347074453364013, 'r': 0.2790740488215884, 'f1': 0.30436943058669785}, 'combined': 0.19058646588139028, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3269674308079396, 'r': 0.3170405258877745, 'f1': 0.32192747041012937}, 'combined': 0.23720971503904267, 'epoch': 18} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.34662681076601914, 'r': 0.2978237579866879, 'f1': 0.32037740179538854}, 'combined': 0.19854374195770555, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3021635901551632, 'r': 0.3061771482786663, 'f1': 0.3041571293927562}, 'combined': 0.22411577955255718, 'epoch': 18} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.34691598935127127, 'r': 0.2888875462861852, 'f1': 0.3152537012259702}, 'combined': 0.2091286928924753, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3017241379310345, 'r': 0.25, 'f1': 0.2734375}, 'combined': 0.18229166666666666, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2625, 'r': 0.22826086956521738, 'f1': 0.24418604651162792}, 'combined': 0.12209302325581396, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:43:21.653855: step: 2/466, loss: 0.13111266493797302 2023-01-24 02:43:22.280423: step: 4/466, loss: 0.030377304181456566 2023-01-24 02:43:22.939434: step: 6/466, loss: 0.062473639845848083 2023-01-24 02:43:23.640123: step: 8/466, loss: 0.2828475534915924 2023-01-24 02:43:24.289384: step: 10/466, loss: 0.09224869310855865 2023-01-24 02:43:24.861658: step: 12/466, loss: 0.10512147098779678 2023-01-24 02:43:25.437645: step: 14/466, loss: 0.0439586415886879 2023-01-24 02:43:26.046248: step: 16/466, loss: 0.04128061607480049 2023-01-24 02:43:26.605168: step: 18/466, loss: 0.08516164124011993 2023-01-24 02:43:27.152698: step: 20/466, loss: 0.0048162443563342094 2023-01-24 02:43:27.784499: step: 22/466, loss: 0.09766031056642532 2023-01-24 02:43:28.382580: step: 24/466, loss: 0.09914689511060715 2023-01-24 02:43:29.009308: step: 26/466, loss: 0.04481856897473335 2023-01-24 02:43:29.576840: step: 28/466, loss: 0.05663405358791351 2023-01-24 02:43:30.224412: step: 30/466, loss: 0.054416682571172714 2023-01-24 02:43:30.786803: step: 32/466, loss: 0.25426021218299866 2023-01-24 02:43:31.390072: step: 34/466, loss: 0.05254456400871277 2023-01-24 02:43:32.030321: step: 36/466, loss: 0.01980424113571644 2023-01-24 02:43:32.618547: step: 38/466, loss: 0.010959596373140812 2023-01-24 02:43:33.242183: step: 40/466, loss: 0.051106374710798264 2023-01-24 02:43:33.877177: step: 42/466, loss: 0.1551026999950409 2023-01-24 02:43:34.526117: step: 44/466, loss: 0.0789317935705185 2023-01-24 02:43:35.095776: step: 46/466, loss: 0.05944628641009331 2023-01-24 02:43:35.755695: step: 48/466, loss: 0.03127778694033623 2023-01-24 02:43:36.331541: step: 50/466, loss: 0.08062749356031418 2023-01-24 02:43:36.983594: step: 52/466, loss: 5.5594801902771 2023-01-24 02:43:37.640826: step: 54/466, loss: 0.07140307873487473 2023-01-24 02:43:38.279522: step: 56/466, loss: 0.04742332920432091 2023-01-24 02:43:38.836660: step: 58/466, loss: 0.016738969832658768 2023-01-24 02:43:39.439471: step: 60/466, loss: 0.028866413980722427 2023-01-24 02:43:40.050208: step: 62/466, loss: 0.07691535353660583 2023-01-24 02:43:40.642976: step: 64/466, loss: 0.10655559599399567 2023-01-24 02:43:41.255262: step: 66/466, loss: 0.040917158126831055 2023-01-24 02:43:41.861095: step: 68/466, loss: 0.1279160976409912 2023-01-24 02:43:42.463711: step: 70/466, loss: 0.2998110055923462 2023-01-24 02:43:43.049526: step: 72/466, loss: 0.070008784532547 2023-01-24 02:43:43.664106: step: 74/466, loss: 0.09292187541723251 2023-01-24 02:43:44.324004: step: 76/466, loss: 0.10964535176753998 2023-01-24 02:43:44.882139: step: 78/466, loss: 0.049997728317976 2023-01-24 02:43:45.468417: step: 80/466, loss: 0.9389095902442932 2023-01-24 02:43:46.073601: step: 82/466, loss: 0.04234647378325462 2023-01-24 02:43:46.736239: step: 84/466, loss: 0.09408138692378998 2023-01-24 02:43:47.306684: step: 86/466, loss: 0.03672255575656891 2023-01-24 02:43:47.887693: step: 88/466, loss: 0.017950015142560005 2023-01-24 02:43:48.476551: step: 90/466, loss: 0.04448329284787178 2023-01-24 02:43:49.076740: step: 92/466, loss: 0.0492829866707325 2023-01-24 02:43:49.697593: step: 94/466, loss: 0.6904706954956055 2023-01-24 02:43:50.293198: step: 96/466, loss: 0.07388099282979965 2023-01-24 02:43:50.897924: step: 98/466, loss: 0.07274051010608673 2023-01-24 02:43:51.511554: step: 100/466, loss: 0.017620012164115906 2023-01-24 02:43:52.105087: step: 102/466, loss: 0.04263727739453316 2023-01-24 02:43:52.725630: step: 104/466, loss: 0.05030503869056702 2023-01-24 02:43:53.267172: step: 106/466, loss: 0.018043609336018562 2023-01-24 02:43:53.904189: step: 108/466, loss: 0.09793735295534134 2023-01-24 02:43:54.533012: step: 110/466, loss: 0.01932547613978386 2023-01-24 02:43:55.199997: step: 112/466, loss: 0.04864615947008133 2023-01-24 02:43:55.798791: step: 114/466, loss: 0.038060806691646576 2023-01-24 02:43:56.402969: step: 116/466, loss: 0.0922747403383255 2023-01-24 02:43:57.010905: step: 118/466, loss: 0.07914816588163376 2023-01-24 02:43:57.666541: step: 120/466, loss: 0.06315623968839645 2023-01-24 02:43:58.356164: step: 122/466, loss: 0.08343784511089325 2023-01-24 02:43:59.042658: step: 124/466, loss: 0.017767300829291344 2023-01-24 02:43:59.618390: step: 126/466, loss: 0.08279591798782349 2023-01-24 02:44:00.202223: step: 128/466, loss: 0.036457426846027374 2023-01-24 02:44:00.787307: step: 130/466, loss: 0.03488573431968689 2023-01-24 02:44:01.402077: step: 132/466, loss: 0.11147907376289368 2023-01-24 02:44:02.024993: step: 134/466, loss: 0.0624677874147892 2023-01-24 02:44:02.652680: step: 136/466, loss: 0.3967929184436798 2023-01-24 02:44:03.283574: step: 138/466, loss: 0.061561837792396545 2023-01-24 02:44:03.908000: step: 140/466, loss: 0.04596086964011192 2023-01-24 02:44:04.489135: step: 142/466, loss: 0.07178357988595963 2023-01-24 02:44:05.153750: step: 144/466, loss: 0.04006802290678024 2023-01-24 02:44:05.854897: step: 146/466, loss: 0.2670358419418335 2023-01-24 02:44:06.399571: step: 148/466, loss: 0.0967395082116127 2023-01-24 02:44:07.053155: step: 150/466, loss: 0.059438809752464294 2023-01-24 02:44:07.682604: step: 152/466, loss: 0.07806328684091568 2023-01-24 02:44:08.309490: step: 154/466, loss: 0.12640124559402466 2023-01-24 02:44:08.880607: step: 156/466, loss: 0.056182656437158585 2023-01-24 02:44:09.488531: step: 158/466, loss: 0.0118029173463583 2023-01-24 02:44:10.089197: step: 160/466, loss: 0.05292297899723053 2023-01-24 02:44:10.626053: step: 162/466, loss: 0.04758679121732712 2023-01-24 02:44:11.275312: step: 164/466, loss: 0.1466142237186432 2023-01-24 02:44:12.021981: step: 166/466, loss: 0.014361128211021423 2023-01-24 02:44:12.628949: step: 168/466, loss: 0.07442910969257355 2023-01-24 02:44:13.237207: step: 170/466, loss: 0.12469945847988129 2023-01-24 02:44:13.859597: step: 172/466, loss: 0.03124932013452053 2023-01-24 02:44:14.406572: step: 174/466, loss: 0.12064909189939499 2023-01-24 02:44:15.081853: step: 176/466, loss: 0.010970078408718109 2023-01-24 02:44:15.692252: step: 178/466, loss: 0.044288016855716705 2023-01-24 02:44:16.302338: step: 180/466, loss: 0.08396290987730026 2023-01-24 02:44:16.906130: step: 182/466, loss: 0.041987329721450806 2023-01-24 02:44:17.526928: step: 184/466, loss: 0.048830945044755936 2023-01-24 02:44:18.234578: step: 186/466, loss: 0.0017868517898023129 2023-01-24 02:44:18.819930: step: 188/466, loss: 0.03212421387434006 2023-01-24 02:44:19.492855: step: 190/466, loss: 0.12949633598327637 2023-01-24 02:44:20.192028: step: 192/466, loss: 0.04548605531454086 2023-01-24 02:44:20.751514: step: 194/466, loss: 0.059311699122190475 2023-01-24 02:44:21.383062: step: 196/466, loss: 0.019009608775377274 2023-01-24 02:44:22.010415: step: 198/466, loss: 0.004964243154972792 2023-01-24 02:44:22.714157: step: 200/466, loss: 0.09717816114425659 2023-01-24 02:44:23.318125: step: 202/466, loss: 0.08246175199747086 2023-01-24 02:44:23.970923: step: 204/466, loss: 2.0336592197418213 2023-01-24 02:44:24.559287: step: 206/466, loss: 0.027664698660373688 2023-01-24 02:44:25.217761: step: 208/466, loss: 0.036029212176799774 2023-01-24 02:44:25.794831: step: 210/466, loss: 0.0020111678168177605 2023-01-24 02:44:26.364609: step: 212/466, loss: 0.07115798443555832 2023-01-24 02:44:26.987412: step: 214/466, loss: 0.029138866811990738 2023-01-24 02:44:27.669440: step: 216/466, loss: 0.06382667273283005 2023-01-24 02:44:28.266767: step: 218/466, loss: 0.029953237622976303 2023-01-24 02:44:28.841916: step: 220/466, loss: 0.5576077699661255 2023-01-24 02:44:29.484438: step: 222/466, loss: 0.055012043565511703 2023-01-24 02:44:30.133479: step: 224/466, loss: 0.05663612112402916 2023-01-24 02:44:30.823099: step: 226/466, loss: 0.040842924267053604 2023-01-24 02:44:31.434237: step: 228/466, loss: 0.07554465532302856 2023-01-24 02:44:32.079871: step: 230/466, loss: 0.08036981523036957 2023-01-24 02:44:32.774513: step: 232/466, loss: 0.02852747216820717 2023-01-24 02:44:33.413623: step: 234/466, loss: 0.0490492507815361 2023-01-24 02:44:34.037415: step: 236/466, loss: 0.08456084877252579 2023-01-24 02:44:34.730414: step: 238/466, loss: 0.02745949849486351 2023-01-24 02:44:35.303357: step: 240/466, loss: 0.005800113547593355 2023-01-24 02:44:35.982854: step: 242/466, loss: 0.15487007796764374 2023-01-24 02:44:36.578042: step: 244/466, loss: 0.16259633004665375 2023-01-24 02:44:37.348155: step: 246/466, loss: 0.2039099931716919 2023-01-24 02:44:37.895841: step: 248/466, loss: 0.017041269689798355 2023-01-24 02:44:38.463502: step: 250/466, loss: 0.06916677206754684 2023-01-24 02:44:39.214396: step: 252/466, loss: 0.1294555813074112 2023-01-24 02:44:39.889780: step: 254/466, loss: 0.05329286679625511 2023-01-24 02:44:40.600924: step: 256/466, loss: 0.06700216233730316 2023-01-24 02:44:41.220031: step: 258/466, loss: 0.05133777856826782 2023-01-24 02:44:41.826422: step: 260/466, loss: 0.21220001578330994 2023-01-24 02:44:42.538123: step: 262/466, loss: 0.20323196053504944 2023-01-24 02:44:43.150605: step: 264/466, loss: 1.021466612815857 2023-01-24 02:44:43.756943: step: 266/466, loss: 0.29994967579841614 2023-01-24 02:44:44.374560: step: 268/466, loss: 0.04251955449581146 2023-01-24 02:44:45.025460: step: 270/466, loss: 0.03821241855621338 2023-01-24 02:44:45.647918: step: 272/466, loss: 0.27829816937446594 2023-01-24 02:44:46.355635: step: 274/466, loss: 0.017957674339413643 2023-01-24 02:44:47.013276: step: 276/466, loss: 0.032462190836668015 2023-01-24 02:44:47.695637: step: 278/466, loss: 0.05432771518826485 2023-01-24 02:44:48.384212: step: 280/466, loss: 0.06187226250767708 2023-01-24 02:44:49.019897: step: 282/466, loss: 0.10310728847980499 2023-01-24 02:44:49.616518: step: 284/466, loss: 0.00489452388137579 2023-01-24 02:44:50.216522: step: 286/466, loss: 0.1849922090768814 2023-01-24 02:44:50.833816: step: 288/466, loss: 0.06912319362163544 2023-01-24 02:44:51.478971: step: 290/466, loss: 0.06165488809347153 2023-01-24 02:44:52.139149: step: 292/466, loss: 0.03635357692837715 2023-01-24 02:44:52.660748: step: 294/466, loss: 0.025050140917301178 2023-01-24 02:44:53.257879: step: 296/466, loss: 0.05851269140839577 2023-01-24 02:44:53.926414: step: 298/466, loss: 0.013517416082322598 2023-01-24 02:44:54.625503: step: 300/466, loss: 0.03553743287920952 2023-01-24 02:44:55.144055: step: 302/466, loss: 0.0637500062584877 2023-01-24 02:44:55.730106: step: 304/466, loss: 0.20800507068634033 2023-01-24 02:44:56.332518: step: 306/466, loss: 0.04209780693054199 2023-01-24 02:44:56.943625: step: 308/466, loss: 0.09888007491827011 2023-01-24 02:44:57.554788: step: 310/466, loss: 0.07714834809303284 2023-01-24 02:44:58.225615: step: 312/466, loss: 0.3459691107273102 2023-01-24 02:44:58.870171: step: 314/466, loss: 0.10351727902889252 2023-01-24 02:44:59.436735: step: 316/466, loss: 0.021968403831124306 2023-01-24 02:45:00.089323: step: 318/466, loss: 0.025281671434640884 2023-01-24 02:45:00.717426: step: 320/466, loss: 0.22677254676818848 2023-01-24 02:45:01.321169: step: 322/466, loss: 0.038426004350185394 2023-01-24 02:45:01.965870: step: 324/466, loss: 0.13164333999156952 2023-01-24 02:45:02.537298: step: 326/466, loss: 0.014954368583858013 2023-01-24 02:45:03.144685: step: 328/466, loss: 0.09496348351240158 2023-01-24 02:45:03.840346: step: 330/466, loss: 0.038174692541360855 2023-01-24 02:45:04.378413: step: 332/466, loss: 0.009528448805212975 2023-01-24 02:45:04.968357: step: 334/466, loss: 0.046064235270023346 2023-01-24 02:45:05.599811: step: 336/466, loss: 0.07075343281030655 2023-01-24 02:45:06.288407: step: 338/466, loss: 0.021331191062927246 2023-01-24 02:45:06.918033: step: 340/466, loss: 0.05234293267130852 2023-01-24 02:45:07.507531: step: 342/466, loss: 0.006639528088271618 2023-01-24 02:45:08.112555: step: 344/466, loss: 0.02011287584900856 2023-01-24 02:45:08.665208: step: 346/466, loss: 0.06851448863744736 2023-01-24 02:45:09.238562: step: 348/466, loss: 0.05958017334342003 2023-01-24 02:45:09.845171: step: 350/466, loss: 0.026816874742507935 2023-01-24 02:45:10.422712: step: 352/466, loss: 0.022725136950612068 2023-01-24 02:45:11.038802: step: 354/466, loss: 0.040746208280324936 2023-01-24 02:45:11.661563: step: 356/466, loss: 0.06710579246282578 2023-01-24 02:45:12.275663: step: 358/466, loss: 0.03984346240758896 2023-01-24 02:45:12.962533: step: 360/466, loss: 0.44433659315109253 2023-01-24 02:45:13.564934: step: 362/466, loss: 0.06242652237415314 2023-01-24 02:45:14.200741: step: 364/466, loss: 0.16920684278011322 2023-01-24 02:45:14.737534: step: 366/466, loss: 0.05334998667240143 2023-01-24 02:45:15.346829: step: 368/466, loss: 0.1814628690481186 2023-01-24 02:45:15.881546: step: 370/466, loss: 0.029765360057353973 2023-01-24 02:45:16.467007: step: 372/466, loss: 0.07934707403182983 2023-01-24 02:45:17.108629: step: 374/466, loss: 0.11064215749502182 2023-01-24 02:45:17.680399: step: 376/466, loss: 0.04964088648557663 2023-01-24 02:45:18.283099: step: 378/466, loss: 0.030018683522939682 2023-01-24 02:45:18.950972: step: 380/466, loss: 0.014424442313611507 2023-01-24 02:45:19.583896: step: 382/466, loss: 0.0486597903072834 2023-01-24 02:45:20.202922: step: 384/466, loss: 0.17942792177200317 2023-01-24 02:45:20.818887: step: 386/466, loss: 0.16916340589523315 2023-01-24 02:45:21.416001: step: 388/466, loss: 0.029808206483721733 2023-01-24 02:45:21.993086: step: 390/466, loss: 0.020174860954284668 2023-01-24 02:45:22.592474: step: 392/466, loss: 0.02940339222550392 2023-01-24 02:45:23.241782: step: 394/466, loss: 0.1403074562549591 2023-01-24 02:45:23.876924: step: 396/466, loss: 0.04364815354347229 2023-01-24 02:45:24.544034: step: 398/466, loss: 0.08063490688800812 2023-01-24 02:45:25.166469: step: 400/466, loss: 0.05557699501514435 2023-01-24 02:45:25.829039: step: 402/466, loss: 0.03992890194058418 2023-01-24 02:45:26.590871: step: 404/466, loss: 0.010678840801119804 2023-01-24 02:45:27.228760: step: 406/466, loss: 0.05775224417448044 2023-01-24 02:45:27.886794: step: 408/466, loss: 0.15753324329853058 2023-01-24 02:45:28.522725: step: 410/466, loss: 0.026866205036640167 2023-01-24 02:45:29.165688: step: 412/466, loss: 0.0920744314789772 2023-01-24 02:45:29.772994: step: 414/466, loss: 0.009046662598848343 2023-01-24 02:45:30.372704: step: 416/466, loss: 0.5653553009033203 2023-01-24 02:45:30.937240: step: 418/466, loss: 0.02821408212184906 2023-01-24 02:45:31.611678: step: 420/466, loss: 0.01933966390788555 2023-01-24 02:45:32.178841: step: 422/466, loss: 0.08733387291431427 2023-01-24 02:45:32.856221: step: 424/466, loss: 0.06458159536123276 2023-01-24 02:45:33.457024: step: 426/466, loss: 0.020542047917842865 2023-01-24 02:45:34.103722: step: 428/466, loss: 0.01162881962954998 2023-01-24 02:45:34.676671: step: 430/466, loss: 0.08747508376836777 2023-01-24 02:45:35.362232: step: 432/466, loss: 0.01085928175598383 2023-01-24 02:45:35.937434: step: 434/466, loss: 0.045907966792583466 2023-01-24 02:45:36.605683: step: 436/466, loss: 0.03741806745529175 2023-01-24 02:45:37.178171: step: 438/466, loss: 0.14698748290538788 2023-01-24 02:45:37.814429: step: 440/466, loss: 0.010019396431744099 2023-01-24 02:45:38.459971: step: 442/466, loss: 0.06800957024097443 2023-01-24 02:45:39.085975: step: 444/466, loss: 0.027711616829037666 2023-01-24 02:45:39.646559: step: 446/466, loss: 0.03754089027643204 2023-01-24 02:45:40.269072: step: 448/466, loss: 0.1358477920293808 2023-01-24 02:45:40.918760: step: 450/466, loss: 0.04760868102312088 2023-01-24 02:45:41.542044: step: 452/466, loss: 0.009993338957428932 2023-01-24 02:45:42.294928: step: 454/466, loss: 0.03732457756996155 2023-01-24 02:45:42.934928: step: 456/466, loss: 0.025478068739175797 2023-01-24 02:45:43.537137: step: 458/466, loss: 0.10090377926826477 2023-01-24 02:45:44.150860: step: 460/466, loss: 0.02373766154050827 2023-01-24 02:45:44.768754: step: 462/466, loss: 0.049498915672302246 2023-01-24 02:45:45.413958: step: 464/466, loss: 0.19183450937271118 2023-01-24 02:45:46.062517: step: 466/466, loss: 0.050915125757455826 2023-01-24 02:45:46.666863: step: 468/466, loss: 0.2524608373641968 2023-01-24 02:45:47.274549: step: 470/466, loss: 0.09752923250198364 2023-01-24 02:45:47.910374: step: 472/466, loss: 0.01077574584633112 2023-01-24 02:45:48.521500: step: 474/466, loss: 0.02293819561600685 2023-01-24 02:45:49.127398: step: 476/466, loss: 0.11887908726930618 2023-01-24 02:45:49.802799: step: 478/466, loss: 0.03859763592481613 2023-01-24 02:45:50.418172: step: 480/466, loss: 0.024055566638708115 2023-01-24 02:45:51.069173: step: 482/466, loss: 0.08013734221458435 2023-01-24 02:45:51.760662: step: 484/466, loss: 0.11558590829372406 2023-01-24 02:45:52.437172: step: 486/466, loss: 0.02335383929312229 2023-01-24 02:45:53.053962: step: 488/466, loss: 1.0002586841583252 2023-01-24 02:45:53.687099: step: 490/466, loss: 0.14254963397979736 2023-01-24 02:45:54.369297: step: 492/466, loss: 0.07002247869968414 2023-01-24 02:45:54.937812: step: 494/466, loss: 0.014882318675518036 2023-01-24 02:45:55.527775: step: 496/466, loss: 0.016202298924326897 2023-01-24 02:45:56.185982: step: 498/466, loss: 0.10031528770923615 2023-01-24 02:45:56.842622: step: 500/466, loss: 0.10986057668924332 2023-01-24 02:45:57.415543: step: 502/466, loss: 0.052671339362859726 2023-01-24 02:45:58.071941: step: 504/466, loss: 0.02062651887536049 2023-01-24 02:45:58.715400: step: 506/466, loss: 0.04933162406086922 2023-01-24 02:45:59.346444: step: 508/466, loss: 0.4361085593700409 2023-01-24 02:45:59.995836: step: 510/466, loss: 0.2579571008682251 2023-01-24 02:46:00.629349: step: 512/466, loss: 0.07255497574806213 2023-01-24 02:46:01.269053: step: 514/466, loss: 0.08569323271512985 2023-01-24 02:46:01.848820: step: 516/466, loss: 0.03176683187484741 2023-01-24 02:46:02.465370: step: 518/466, loss: 0.06901123374700546 2023-01-24 02:46:03.114158: step: 520/466, loss: 0.2679722011089325 2023-01-24 02:46:03.805420: step: 522/466, loss: 0.15320459008216858 2023-01-24 02:46:04.488974: step: 524/466, loss: 0.02791094221174717 2023-01-24 02:46:05.113251: step: 526/466, loss: 0.17762930691242218 2023-01-24 02:46:05.765021: step: 528/466, loss: 0.04258953034877777 2023-01-24 02:46:06.445143: step: 530/466, loss: 0.01920774206519127 2023-01-24 02:46:07.004156: step: 532/466, loss: 0.18327747285366058 2023-01-24 02:46:07.606977: step: 534/466, loss: 0.02820243313908577 2023-01-24 02:46:08.219980: step: 536/466, loss: 0.267128586769104 2023-01-24 02:46:08.774855: step: 538/466, loss: 0.044478755444288254 2023-01-24 02:46:09.418100: step: 540/466, loss: 0.05204843729734421 2023-01-24 02:46:10.030521: step: 542/466, loss: 0.11069519072771072 2023-01-24 02:46:10.743391: step: 544/466, loss: 0.30489975214004517 2023-01-24 02:46:11.328390: step: 546/466, loss: 0.13504533469676971 2023-01-24 02:46:11.980117: step: 548/466, loss: 0.05704271420836449 2023-01-24 02:46:12.631896: step: 550/466, loss: 0.024533575400710106 2023-01-24 02:46:13.305717: step: 552/466, loss: 0.058383066207170486 2023-01-24 02:46:13.930250: step: 554/466, loss: 0.3767291009426117 2023-01-24 02:46:14.581621: step: 556/466, loss: 0.06400660425424576 2023-01-24 02:46:15.189538: step: 558/466, loss: 3.120527982711792 2023-01-24 02:46:15.821932: step: 560/466, loss: 0.1019875556230545 2023-01-24 02:46:16.532714: step: 562/466, loss: 0.14001332223415375 2023-01-24 02:46:17.119969: step: 564/466, loss: 0.040228210389614105 2023-01-24 02:46:17.750475: step: 566/466, loss: 0.042286038398742676 2023-01-24 02:46:18.370471: step: 568/466, loss: 0.14664529263973236 2023-01-24 02:46:18.910707: step: 570/466, loss: 0.3690696656703949 2023-01-24 02:46:19.612150: step: 572/466, loss: 0.047218356281518936 2023-01-24 02:46:20.254630: step: 574/466, loss: 0.0679393857717514 2023-01-24 02:46:20.890051: step: 576/466, loss: 0.0728268176317215 2023-01-24 02:46:21.498374: step: 578/466, loss: 0.11438603699207306 2023-01-24 02:46:22.062079: step: 580/466, loss: 0.3222803771495819 2023-01-24 02:46:22.769995: step: 582/466, loss: 0.5018194913864136 2023-01-24 02:46:23.403998: step: 584/466, loss: 0.10466251522302628 2023-01-24 02:46:24.030102: step: 586/466, loss: 0.0772814080119133 2023-01-24 02:46:24.713499: step: 588/466, loss: 0.07693363726139069 2023-01-24 02:46:25.370413: step: 590/466, loss: 0.11500084400177002 2023-01-24 02:46:26.025750: step: 592/466, loss: 0.18170012533664703 2023-01-24 02:46:26.646702: step: 594/466, loss: 0.08627156913280487 2023-01-24 02:46:27.243354: step: 596/466, loss: 0.0973271057009697 2023-01-24 02:46:27.878830: step: 598/466, loss: 0.3498533070087433 2023-01-24 02:46:28.500137: step: 600/466, loss: 0.15622715651988983 2023-01-24 02:46:29.087031: step: 602/466, loss: 0.06789826601743698 2023-01-24 02:46:29.715559: step: 604/466, loss: 0.07342401891946793 2023-01-24 02:46:30.390860: step: 606/466, loss: 0.052906207740306854 2023-01-24 02:46:31.019977: step: 608/466, loss: 0.44701021909713745 2023-01-24 02:46:31.684182: step: 610/466, loss: 0.030610734596848488 2023-01-24 02:46:32.330677: step: 612/466, loss: 0.22132834792137146 2023-01-24 02:46:32.953214: step: 614/466, loss: 0.05592351034283638 2023-01-24 02:46:33.550172: step: 616/466, loss: 0.09313730895519257 2023-01-24 02:46:34.138047: step: 618/466, loss: 0.06869268417358398 2023-01-24 02:46:34.739324: step: 620/466, loss: 0.6655417084693909 2023-01-24 02:46:35.384868: step: 622/466, loss: 0.14721828699111938 2023-01-24 02:46:36.081111: step: 624/466, loss: 0.03533070161938667 2023-01-24 02:46:36.702333: step: 626/466, loss: 0.0813283696770668 2023-01-24 02:46:37.314127: step: 628/466, loss: 0.009937165305018425 2023-01-24 02:46:37.916339: step: 630/466, loss: 0.010268451645970345 2023-01-24 02:46:38.542503: step: 632/466, loss: 0.0864536464214325 2023-01-24 02:46:39.125183: step: 634/466, loss: 0.03386939689517021 2023-01-24 02:46:39.758617: step: 636/466, loss: 0.23723642528057098 2023-01-24 02:46:40.432160: step: 638/466, loss: 0.09166768193244934 2023-01-24 02:46:41.123743: step: 640/466, loss: 0.06017732992768288 2023-01-24 02:46:41.716163: step: 642/466, loss: 0.038364145904779434 2023-01-24 02:46:42.377204: step: 644/466, loss: 0.13770458102226257 2023-01-24 02:46:43.127665: step: 646/466, loss: 0.08427499979734421 2023-01-24 02:46:43.740652: step: 648/466, loss: 0.41027942299842834 2023-01-24 02:46:44.391067: step: 650/466, loss: 0.06100025773048401 2023-01-24 02:46:44.958563: step: 652/466, loss: 0.11708735674619675 2023-01-24 02:46:45.504173: step: 654/466, loss: 0.04891982674598694 2023-01-24 02:46:46.118664: step: 656/466, loss: 0.22542575001716614 2023-01-24 02:46:46.778202: step: 658/466, loss: 0.028449928387999535 2023-01-24 02:46:47.387479: step: 660/466, loss: 0.013650071807205677 2023-01-24 02:46:48.040337: step: 662/466, loss: 0.3902254104614258 2023-01-24 02:46:48.710598: step: 664/466, loss: 1.6393153667449951 2023-01-24 02:46:49.360588: step: 666/466, loss: 0.02685406059026718 2023-01-24 02:46:50.048216: step: 668/466, loss: 0.039072226732969284 2023-01-24 02:46:50.743603: step: 670/466, loss: 0.05245056375861168 2023-01-24 02:46:51.368712: step: 672/466, loss: 0.03552987053990364 2023-01-24 02:46:51.989113: step: 674/466, loss: 0.8868280053138733 2023-01-24 02:46:52.568946: step: 676/466, loss: 0.10024195909500122 2023-01-24 02:46:53.157762: step: 678/466, loss: 0.1645973175764084 2023-01-24 02:46:53.719355: step: 680/466, loss: 0.07723838090896606 2023-01-24 02:46:54.330938: step: 682/466, loss: 0.07511638849973679 2023-01-24 02:46:55.004526: step: 684/466, loss: 0.13545194268226624 2023-01-24 02:46:55.578500: step: 686/466, loss: 0.17986997961997986 2023-01-24 02:46:56.198864: step: 688/466, loss: 0.03680276498198509 2023-01-24 02:46:56.801822: step: 690/466, loss: 0.10135433822870255 2023-01-24 02:46:57.415324: step: 692/466, loss: 0.22157587110996246 2023-01-24 02:46:58.009036: step: 694/466, loss: 0.018694933503866196 2023-01-24 02:46:58.634592: step: 696/466, loss: 0.04209217429161072 2023-01-24 02:46:59.232725: step: 698/466, loss: 0.04767581820487976 2023-01-24 02:46:59.888814: step: 700/466, loss: 0.05723273381590843 2023-01-24 02:47:00.575576: step: 702/466, loss: 0.04313664138317108 2023-01-24 02:47:01.300673: step: 704/466, loss: 0.128017857670784 2023-01-24 02:47:01.876358: step: 706/466, loss: 0.06857144832611084 2023-01-24 02:47:02.425876: step: 708/466, loss: 0.0038373505230993032 2023-01-24 02:47:03.065310: step: 710/466, loss: 0.031138163059949875 2023-01-24 02:47:03.693386: step: 712/466, loss: 0.03506476804614067 2023-01-24 02:47:04.338029: step: 714/466, loss: 0.32810282707214355 2023-01-24 02:47:04.990700: step: 716/466, loss: 0.06982269883155823 2023-01-24 02:47:05.690734: step: 718/466, loss: 0.06388357281684875 2023-01-24 02:47:06.354716: step: 720/466, loss: 0.07002855837345123 2023-01-24 02:47:06.936212: step: 722/466, loss: 0.043687477707862854 2023-01-24 02:47:07.513753: step: 724/466, loss: 0.0316498838365078 2023-01-24 02:47:08.148194: step: 726/466, loss: 0.05471200495958328 2023-01-24 02:47:08.770774: step: 728/466, loss: 0.03243080899119377 2023-01-24 02:47:09.397999: step: 730/466, loss: 0.044905055314302444 2023-01-24 02:47:10.043302: step: 732/466, loss: 2.867321729660034 2023-01-24 02:47:10.730281: step: 734/466, loss: 0.2733849883079529 2023-01-24 02:47:11.425527: step: 736/466, loss: 0.06968646496534348 2023-01-24 02:47:12.064015: step: 738/466, loss: 0.03289669007062912 2023-01-24 02:47:12.642760: step: 740/466, loss: 0.05700673535466194 2023-01-24 02:47:13.307482: step: 742/466, loss: 0.05384642258286476 2023-01-24 02:47:13.926193: step: 744/466, loss: 0.09741157293319702 2023-01-24 02:47:14.542539: step: 746/466, loss: 0.14611674845218658 2023-01-24 02:47:15.173616: step: 748/466, loss: 0.33620062470436096 2023-01-24 02:47:15.769273: step: 750/466, loss: 0.03367577865719795 2023-01-24 02:47:16.308616: step: 752/466, loss: 0.45889565348625183 2023-01-24 02:47:16.878016: step: 754/466, loss: 0.9962413311004639 2023-01-24 02:47:17.537892: step: 756/466, loss: 0.07204210758209229 2023-01-24 02:47:18.172346: step: 758/466, loss: 0.03932822868227959 2023-01-24 02:47:18.782905: step: 760/466, loss: 0.040566276758909225 2023-01-24 02:47:19.379582: step: 762/466, loss: 0.06714367866516113 2023-01-24 02:47:19.966651: step: 764/466, loss: 0.04591933637857437 2023-01-24 02:47:20.610514: step: 766/466, loss: 0.055630650371313095 2023-01-24 02:47:21.186565: step: 768/466, loss: 0.06030455604195595 2023-01-24 02:47:21.795057: step: 770/466, loss: 0.03375536948442459 2023-01-24 02:47:22.445205: step: 772/466, loss: 0.17689305543899536 2023-01-24 02:47:23.086583: step: 774/466, loss: 0.2330557405948639 2023-01-24 02:47:23.673057: step: 776/466, loss: 0.5213000774383545 2023-01-24 02:47:24.311890: step: 778/466, loss: 0.4058179557323456 2023-01-24 02:47:24.863198: step: 780/466, loss: 0.11434870958328247 2023-01-24 02:47:25.474312: step: 782/466, loss: 0.037684813141822815 2023-01-24 02:47:26.138682: step: 784/466, loss: 0.08802974224090576 2023-01-24 02:47:26.737218: step: 786/466, loss: 0.03473489359021187 2023-01-24 02:47:27.325317: step: 788/466, loss: 0.027913058176636696 2023-01-24 02:47:27.886421: step: 790/466, loss: 0.05223953723907471 2023-01-24 02:47:28.436489: step: 792/466, loss: 0.04339861124753952 2023-01-24 02:47:29.061503: step: 794/466, loss: 0.08510332554578781 2023-01-24 02:47:29.649311: step: 796/466, loss: 0.11441497504711151 2023-01-24 02:47:30.240493: step: 798/466, loss: 0.04340003430843353 2023-01-24 02:47:30.819617: step: 800/466, loss: 0.07298655062913895 2023-01-24 02:47:31.427053: step: 802/466, loss: 0.06131652370095253 2023-01-24 02:47:32.111367: step: 804/466, loss: 0.1494426429271698 2023-01-24 02:47:32.681616: step: 806/466, loss: 0.07292305678129196 2023-01-24 02:47:33.325660: step: 808/466, loss: 0.43063512444496155 2023-01-24 02:47:33.905096: step: 810/466, loss: 0.09854334592819214 2023-01-24 02:47:34.485991: step: 812/466, loss: 0.027418632060289383 2023-01-24 02:47:35.073638: step: 814/466, loss: 0.0341159962117672 2023-01-24 02:47:35.713321: step: 816/466, loss: 0.08582870662212372 2023-01-24 02:47:36.381943: step: 818/466, loss: 0.08624772727489471 2023-01-24 02:47:37.017603: step: 820/466, loss: 0.07493390142917633 2023-01-24 02:47:37.613223: step: 822/466, loss: 0.035955145955085754 2023-01-24 02:47:38.151240: step: 824/466, loss: 0.01353217102587223 2023-01-24 02:47:38.822083: step: 826/466, loss: 0.14625613391399384 2023-01-24 02:47:39.483031: step: 828/466, loss: 0.0401049479842186 2023-01-24 02:47:40.071349: step: 830/466, loss: 0.02966419607400894 2023-01-24 02:47:40.697170: step: 832/466, loss: 0.02818690799176693 2023-01-24 02:47:41.274712: step: 834/466, loss: 0.15146100521087646 2023-01-24 02:47:41.857797: step: 836/466, loss: 0.045433297753334045 2023-01-24 02:47:42.488322: step: 838/466, loss: 0.0740746334195137 2023-01-24 02:47:43.080028: step: 840/466, loss: 0.09819278866052628 2023-01-24 02:47:43.700684: step: 842/466, loss: 0.1914547085762024 2023-01-24 02:47:44.296969: step: 844/466, loss: 0.008679748512804508 2023-01-24 02:47:44.880748: step: 846/466, loss: 0.017056753858923912 2023-01-24 02:47:45.544033: step: 848/466, loss: 0.04419960081577301 2023-01-24 02:47:46.120146: step: 850/466, loss: 0.05022304132580757 2023-01-24 02:47:46.740879: step: 852/466, loss: 0.08829095959663391 2023-01-24 02:47:47.363488: step: 854/466, loss: 0.05575420334935188 2023-01-24 02:47:48.013618: step: 856/466, loss: 0.013077719137072563 2023-01-24 02:47:48.600570: step: 858/466, loss: 0.0013224079739302397 2023-01-24 02:47:49.228414: step: 860/466, loss: 0.07103028893470764 2023-01-24 02:47:49.846301: step: 862/466, loss: 0.06670407205820084 2023-01-24 02:47:50.553711: step: 864/466, loss: 0.00398715166375041 2023-01-24 02:47:51.114519: step: 866/466, loss: 0.20541343092918396 2023-01-24 02:47:51.710293: step: 868/466, loss: 0.0585695244371891 2023-01-24 02:47:52.339502: step: 870/466, loss: 0.04970893636345863 2023-01-24 02:47:52.960063: step: 872/466, loss: 0.0989101380109787 2023-01-24 02:47:53.654499: step: 874/466, loss: 0.03108326904475689 2023-01-24 02:47:54.250200: step: 876/466, loss: 0.1255584955215454 2023-01-24 02:47:54.987590: step: 878/466, loss: 0.10420676320791245 2023-01-24 02:47:55.622268: step: 880/466, loss: 0.08131679892539978 2023-01-24 02:47:56.266914: step: 882/466, loss: 0.15888020396232605 2023-01-24 02:47:56.862527: step: 884/466, loss: 0.1120786964893341 2023-01-24 02:47:57.449425: step: 886/466, loss: 0.06874912232160568 2023-01-24 02:47:58.028472: step: 888/466, loss: 0.14452779293060303 2023-01-24 02:47:58.668043: step: 890/466, loss: 0.026590008288621902 2023-01-24 02:47:59.245638: step: 892/466, loss: 0.05112864449620247 2023-01-24 02:47:59.934222: step: 894/466, loss: 0.1316065490245819 2023-01-24 02:48:00.494611: step: 896/466, loss: 0.10517143458127975 2023-01-24 02:48:01.099402: step: 898/466, loss: 0.03901936486363411 2023-01-24 02:48:01.771748: step: 900/466, loss: 0.0990578681230545 2023-01-24 02:48:02.378700: step: 902/466, loss: 0.059160854667425156 2023-01-24 02:48:03.036211: step: 904/466, loss: 0.04121256619691849 2023-01-24 02:48:03.569612: step: 906/466, loss: 0.06555764377117157 2023-01-24 02:48:04.208640: step: 908/466, loss: 0.025577547028660774 2023-01-24 02:48:04.817713: step: 910/466, loss: 0.07716955244541168 2023-01-24 02:48:05.408276: step: 912/466, loss: 0.3638467788696289 2023-01-24 02:48:05.967230: step: 914/466, loss: 3.7387709617614746 2023-01-24 02:48:06.567648: step: 916/466, loss: 0.050838202238082886 2023-01-24 02:48:07.132466: step: 918/466, loss: 0.04963570088148117 2023-01-24 02:48:07.743997: step: 920/466, loss: 0.044707197695970535 2023-01-24 02:48:08.283584: step: 922/466, loss: 0.08209022879600525 2023-01-24 02:48:08.922568: step: 924/466, loss: 0.09267618507146835 2023-01-24 02:48:09.554618: step: 926/466, loss: 0.08240016549825668 2023-01-24 02:48:10.193006: step: 928/466, loss: 0.030464094132184982 2023-01-24 02:48:10.805490: step: 930/466, loss: 0.058836594223976135 2023-01-24 02:48:11.473178: step: 932/466, loss: 0.18826398253440857 ================================================== Loss: 0.142 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37647988505747126, 'r': 0.31075664136622394, 'f1': 0.34047557172557175}, 'combined': 0.2508767370609476, 'epoch': 19} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35561704728744054, 'r': 0.27858808397608154, 'f1': 0.312424693365889}, 'combined': 0.19563041547209875, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3521857193732194, 'r': 0.3127569576217584, 'f1': 0.33130234505862655}, 'combined': 0.24411751741161955, 'epoch': 19} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3638454597894836, 'r': 0.2939004390898807, 'f1': 0.3251539555782206}, 'combined': 0.20150385979495358, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33676394628099177, 'r': 0.30928605313092983, 'f1': 0.32244065281899115}, 'combined': 0.2375878494455724, 'epoch': 19} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.35582208144263194, 'r': 0.27796589364053703, 'f1': 0.3121119577324203}, 'combined': 0.20704456602051644, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.2571428571428571, 'f1': 0.30508474576271183}, 'combined': 0.20338983050847453, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44047619047619047, 'r': 0.40217391304347827, 'f1': 0.4204545454545454}, 'combined': 0.2102272727272727, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35714285714285715, 'r': 0.1724137931034483, 'f1': 0.23255813953488377}, 'combined': 0.1550387596899225, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:50:45.030881: step: 2/466, loss: 0.04473452270030975 2023-01-24 02:50:45.579393: step: 4/466, loss: 0.02670433558523655 2023-01-24 02:50:46.202703: step: 6/466, loss: 0.09869610518217087 2023-01-24 02:50:46.879963: step: 8/466, loss: 0.0571296364068985 2023-01-24 02:50:47.526654: step: 10/466, loss: 0.005984506104141474 2023-01-24 02:50:48.194342: step: 12/466, loss: 0.0931580662727356 2023-01-24 02:50:48.827636: step: 14/466, loss: 0.05044613778591156 2023-01-24 02:50:49.435270: step: 16/466, loss: 0.07179475575685501 2023-01-24 02:50:49.969881: step: 18/466, loss: 0.05603202059864998 2023-01-24 02:50:50.617956: step: 20/466, loss: 0.011259501799941063 2023-01-24 02:50:51.191881: step: 22/466, loss: 0.03856699913740158 2023-01-24 02:50:51.833742: step: 24/466, loss: 0.505285382270813 2023-01-24 02:50:52.437189: step: 26/466, loss: 0.030787307769060135 2023-01-24 02:50:53.083623: step: 28/466, loss: 4.232536315917969 2023-01-24 02:50:53.660275: step: 30/466, loss: 0.057020630687475204 2023-01-24 02:50:54.317953: step: 32/466, loss: 0.4303550124168396 2023-01-24 02:50:54.937257: step: 34/466, loss: 2.1507062911987305 2023-01-24 02:50:55.546964: step: 36/466, loss: 0.04527447745203972 2023-01-24 02:50:56.161724: step: 38/466, loss: 0.2569637894630432 2023-01-24 02:50:56.844739: step: 40/466, loss: 0.09066056460142136 2023-01-24 02:50:57.447896: step: 42/466, loss: 0.0494978092610836 2023-01-24 02:50:58.056280: step: 44/466, loss: 0.014772189781069756 2023-01-24 02:50:58.679584: step: 46/466, loss: 0.04391703009605408 2023-01-24 02:50:59.216144: step: 48/466, loss: 0.015545777045190334 2023-01-24 02:50:59.899825: step: 50/466, loss: 0.04105439782142639 2023-01-24 02:51:00.550837: step: 52/466, loss: 0.03398992493748665 2023-01-24 02:51:01.163630: step: 54/466, loss: 0.022285019978880882 2023-01-24 02:51:01.822647: step: 56/466, loss: 0.6243164539337158 2023-01-24 02:51:02.432637: step: 58/466, loss: 0.048682548105716705 2023-01-24 02:51:02.992155: step: 60/466, loss: 0.014602897688746452 2023-01-24 02:51:03.615332: step: 62/466, loss: 0.006099842954427004 2023-01-24 02:51:04.208863: step: 64/466, loss: 0.04590804874897003 2023-01-24 02:51:04.857488: step: 66/466, loss: 0.037340402603149414 2023-01-24 02:51:05.474668: step: 68/466, loss: 0.051009513437747955 2023-01-24 02:51:06.079388: step: 70/466, loss: 0.0889776349067688 2023-01-24 02:51:06.657111: step: 72/466, loss: 0.03151462972164154 2023-01-24 02:51:07.272296: step: 74/466, loss: 0.0397343672811985 2023-01-24 02:51:07.913189: step: 76/466, loss: 0.15170085430145264 2023-01-24 02:51:08.555836: step: 78/466, loss: 0.07274332642555237 2023-01-24 02:51:09.183487: step: 80/466, loss: 0.19480833411216736 2023-01-24 02:51:09.807963: step: 82/466, loss: 0.19183169305324554 2023-01-24 02:51:10.405571: step: 84/466, loss: 3.4221458435058594 2023-01-24 02:51:10.935829: step: 86/466, loss: 0.00953746773302555 2023-01-24 02:51:11.598974: step: 88/466, loss: 0.053085751831531525 2023-01-24 02:51:12.239176: step: 90/466, loss: 1.3711509704589844 2023-01-24 02:51:12.901057: step: 92/466, loss: 0.15095512568950653 2023-01-24 02:51:13.506785: step: 94/466, loss: 0.03161424770951271 2023-01-24 02:51:14.144819: step: 96/466, loss: 0.12189152091741562 2023-01-24 02:51:14.804915: step: 98/466, loss: 0.06684818863868713 2023-01-24 02:51:15.460960: step: 100/466, loss: 0.21442358195781708 2023-01-24 02:51:16.125124: step: 102/466, loss: 0.2841276526451111 2023-01-24 02:51:16.758416: step: 104/466, loss: 0.028847578912973404 2023-01-24 02:51:17.314077: step: 106/466, loss: 0.036764275282621384 2023-01-24 02:51:17.975645: step: 108/466, loss: 0.15567131340503693 2023-01-24 02:51:18.539355: step: 110/466, loss: 0.08534884452819824 2023-01-24 02:51:19.200365: step: 112/466, loss: 0.030880440026521683 2023-01-24 02:51:19.896212: step: 114/466, loss: 0.08005496859550476 2023-01-24 02:51:20.487444: step: 116/466, loss: 0.054005514830350876 2023-01-24 02:51:21.107518: step: 118/466, loss: 0.07104286551475525 2023-01-24 02:51:21.737813: step: 120/466, loss: 0.1706767976284027 2023-01-24 02:51:22.355891: step: 122/466, loss: 0.15481732785701752 2023-01-24 02:51:22.988737: step: 124/466, loss: 0.03148510307073593 2023-01-24 02:51:23.568815: step: 126/466, loss: 0.030918629840016365 2023-01-24 02:51:24.134746: step: 128/466, loss: 0.036177732050418854 2023-01-24 02:51:24.768322: step: 130/466, loss: 0.32562559843063354 2023-01-24 02:51:25.450304: step: 132/466, loss: 0.07243402302265167 2023-01-24 02:51:26.031373: step: 134/466, loss: 0.10035238415002823 2023-01-24 02:51:26.673781: step: 136/466, loss: 0.08090130239725113 2023-01-24 02:51:27.231427: step: 138/466, loss: 0.02899058908224106 2023-01-24 02:51:27.873639: step: 140/466, loss: 0.02151150442659855 2023-01-24 02:51:28.455840: step: 142/466, loss: 0.011760477907955647 2023-01-24 02:51:29.117048: step: 144/466, loss: 0.017362717539072037 2023-01-24 02:51:29.744823: step: 146/466, loss: 0.04929214343428612 2023-01-24 02:51:30.383996: step: 148/466, loss: 0.19189995527267456 2023-01-24 02:51:31.045799: step: 150/466, loss: 0.09975867718458176 2023-01-24 02:51:31.657060: step: 152/466, loss: 0.02128298208117485 2023-01-24 02:51:32.261751: step: 154/466, loss: 0.027812689542770386 2023-01-24 02:51:32.907185: step: 156/466, loss: 0.02206970937550068 2023-01-24 02:51:33.567526: step: 158/466, loss: 0.018312718719244003 2023-01-24 02:51:34.219137: step: 160/466, loss: 0.033462077379226685 2023-01-24 02:51:34.819069: step: 162/466, loss: 0.013343071565032005 2023-01-24 02:51:35.407234: step: 164/466, loss: 0.06444109231233597 2023-01-24 02:51:36.019516: step: 166/466, loss: 0.04069790616631508 2023-01-24 02:51:36.637489: step: 168/466, loss: 0.037937380373477936 2023-01-24 02:51:37.201042: step: 170/466, loss: 0.029808631166815758 2023-01-24 02:51:37.801905: step: 172/466, loss: 0.06813343614339828 2023-01-24 02:51:38.358086: step: 174/466, loss: 0.056821562349796295 2023-01-24 02:51:38.964448: step: 176/466, loss: 0.09389083832502365 2023-01-24 02:51:39.575497: step: 178/466, loss: 0.014280026778578758 2023-01-24 02:51:40.233458: step: 180/466, loss: 0.08231594413518906 2023-01-24 02:51:40.867722: step: 182/466, loss: 0.030400605872273445 2023-01-24 02:51:41.501537: step: 184/466, loss: 0.09933170676231384 2023-01-24 02:51:42.123429: step: 186/466, loss: 0.010236128233373165 2023-01-24 02:51:42.727693: step: 188/466, loss: 0.039357565343379974 2023-01-24 02:51:43.391675: step: 190/466, loss: 0.03649383783340454 2023-01-24 02:51:44.006351: step: 192/466, loss: 0.04769815132021904 2023-01-24 02:51:44.697159: step: 194/466, loss: 0.11039624363183975 2023-01-24 02:51:45.323512: step: 196/466, loss: 0.02091108448803425 2023-01-24 02:51:45.890595: step: 198/466, loss: 0.02651154436171055 2023-01-24 02:51:46.482756: step: 200/466, loss: 0.05700225010514259 2023-01-24 02:51:47.114705: step: 202/466, loss: 0.8323856592178345 2023-01-24 02:51:47.765082: step: 204/466, loss: 0.026068931445479393 2023-01-24 02:51:48.390543: step: 206/466, loss: 0.2673957049846649 2023-01-24 02:51:49.092459: step: 208/466, loss: 0.15241274237632751 2023-01-24 02:51:49.710255: step: 210/466, loss: 0.028414934873580933 2023-01-24 02:51:50.356535: step: 212/466, loss: 0.026978882029652596 2023-01-24 02:51:50.955600: step: 214/466, loss: 0.008560284040868282 2023-01-24 02:51:51.610648: step: 216/466, loss: 0.06276308745145798 2023-01-24 02:51:52.205040: step: 218/466, loss: 0.09750475734472275 2023-01-24 02:51:52.854779: step: 220/466, loss: 0.04805998131632805 2023-01-24 02:51:53.477743: step: 222/466, loss: 0.08236956596374512 2023-01-24 02:51:54.061561: step: 224/466, loss: 0.07440599054098129 2023-01-24 02:51:54.629402: step: 226/466, loss: 0.018338710069656372 2023-01-24 02:51:55.232202: step: 228/466, loss: 0.011853711679577827 2023-01-24 02:51:55.931573: step: 230/466, loss: 0.24913370609283447 2023-01-24 02:51:56.545601: step: 232/466, loss: 0.023227758705615997 2023-01-24 02:51:57.177340: step: 234/466, loss: 0.048622407019138336 2023-01-24 02:51:57.786994: step: 236/466, loss: 0.049675896763801575 2023-01-24 02:51:58.526172: step: 238/466, loss: 0.30055010318756104 2023-01-24 02:51:59.146239: step: 240/466, loss: 0.031212367117404938 2023-01-24 02:51:59.741718: step: 242/466, loss: 0.03245452046394348 2023-01-24 02:52:00.433364: step: 244/466, loss: 0.05381162464618683 2023-01-24 02:52:01.021245: step: 246/466, loss: 0.04562469944357872 2023-01-24 02:52:01.657829: step: 248/466, loss: 0.3888448476791382 2023-01-24 02:52:02.290619: step: 250/466, loss: 0.07395334541797638 2023-01-24 02:52:02.978742: step: 252/466, loss: 0.04723105579614639 2023-01-24 02:52:03.574025: step: 254/466, loss: 0.154044970870018 2023-01-24 02:52:04.153429: step: 256/466, loss: 0.0067335874773561954 2023-01-24 02:52:04.785898: step: 258/466, loss: 0.046946655958890915 2023-01-24 02:52:05.409055: step: 260/466, loss: 0.056307390332221985 2023-01-24 02:52:06.006721: step: 262/466, loss: 0.018942169845104218 2023-01-24 02:52:06.614625: step: 264/466, loss: 0.15772342681884766 2023-01-24 02:52:07.254281: step: 266/466, loss: 0.012910989113152027 2023-01-24 02:52:07.837401: step: 268/466, loss: 0.031117776408791542 2023-01-24 02:52:08.472314: step: 270/466, loss: 0.08422558009624481 2023-01-24 02:52:09.168337: step: 272/466, loss: 0.04051937535405159 2023-01-24 02:52:09.785245: step: 274/466, loss: 0.034043360501527786 2023-01-24 02:52:10.395475: step: 276/466, loss: 0.027019595727324486 2023-01-24 02:52:10.967938: step: 278/466, loss: 0.05760733783245087 2023-01-24 02:52:11.650620: step: 280/466, loss: 0.03902026265859604 2023-01-24 02:52:12.294637: step: 282/466, loss: 0.07579785585403442 2023-01-24 02:52:12.885892: step: 284/466, loss: 0.0660325214266777 2023-01-24 02:52:13.494697: step: 286/466, loss: 0.020649529993534088 2023-01-24 02:52:14.159775: step: 288/466, loss: 0.017145643010735512 2023-01-24 02:52:14.727076: step: 290/466, loss: 0.01371070183813572 2023-01-24 02:52:15.288688: step: 292/466, loss: 0.04645758122205734 2023-01-24 02:52:15.899787: step: 294/466, loss: 0.02898515574634075 2023-01-24 02:52:16.534849: step: 296/466, loss: 0.067694291472435 2023-01-24 02:52:17.141856: step: 298/466, loss: 0.19380968809127808 2023-01-24 02:52:17.668167: step: 300/466, loss: 0.04423229768872261 2023-01-24 02:52:18.298271: step: 302/466, loss: 0.13991779088974 2023-01-24 02:52:18.869762: step: 304/466, loss: 0.030483869835734367 2023-01-24 02:52:19.504951: step: 306/466, loss: 0.03580395504832268 2023-01-24 02:52:20.077964: step: 308/466, loss: 0.04315454140305519 2023-01-24 02:52:20.688931: step: 310/466, loss: 0.03765023499727249 2023-01-24 02:52:21.237906: step: 312/466, loss: 0.013311164453625679 2023-01-24 02:52:21.911947: step: 314/466, loss: 0.022211074829101562 2023-01-24 02:52:22.501148: step: 316/466, loss: 0.022172167897224426 2023-01-24 02:52:23.099010: step: 318/466, loss: 0.027014657855033875 2023-01-24 02:52:23.754885: step: 320/466, loss: 1.5955520868301392 2023-01-24 02:52:24.460114: step: 322/466, loss: 0.1916019171476364 2023-01-24 02:52:25.075402: step: 324/466, loss: 0.2803540825843811 2023-01-24 02:52:25.779408: step: 326/466, loss: 0.08756520599126816 2023-01-24 02:52:26.385268: step: 328/466, loss: 0.037374790757894516 2023-01-24 02:52:27.019324: step: 330/466, loss: 0.03666498512029648 2023-01-24 02:52:27.589780: step: 332/466, loss: 0.014097894541919231 2023-01-24 02:52:28.319968: step: 334/466, loss: 0.05914449319243431 2023-01-24 02:52:28.921227: step: 336/466, loss: 0.25361284613609314 2023-01-24 02:52:29.514436: step: 338/466, loss: 0.050735026597976685 2023-01-24 02:52:30.155803: step: 340/466, loss: 0.123032346367836 2023-01-24 02:52:30.811767: step: 342/466, loss: 0.13411374390125275 2023-01-24 02:52:31.375233: step: 344/466, loss: 0.0036818471271544695 2023-01-24 02:52:31.982041: step: 346/466, loss: 0.18625682592391968 2023-01-24 02:52:32.630776: step: 348/466, loss: 0.09510066360235214 2023-01-24 02:52:33.295587: step: 350/466, loss: 0.011853153817355633 2023-01-24 02:52:33.959624: step: 352/466, loss: 0.06537090241909027 2023-01-24 02:52:34.537378: step: 354/466, loss: 0.04980561509728432 2023-01-24 02:52:35.145937: step: 356/466, loss: 0.0770934671163559 2023-01-24 02:52:35.691794: step: 358/466, loss: 0.08176696300506592 2023-01-24 02:52:36.336471: step: 360/466, loss: 0.015116947703063488 2023-01-24 02:52:36.953334: step: 362/466, loss: 0.08408878743648529 2023-01-24 02:52:37.518349: step: 364/466, loss: 0.017760295420885086 2023-01-24 02:52:38.136956: step: 366/466, loss: 0.4415796399116516 2023-01-24 02:52:38.774658: step: 368/466, loss: 0.07064615935087204 2023-01-24 02:52:39.361364: step: 370/466, loss: 0.05070814490318298 2023-01-24 02:52:39.967282: step: 372/466, loss: 0.04469557851552963 2023-01-24 02:52:40.592413: step: 374/466, loss: 0.5184050798416138 2023-01-24 02:52:41.272625: step: 376/466, loss: 0.10490608960390091 2023-01-24 02:52:41.936959: step: 378/466, loss: 0.017590532079339027 2023-01-24 02:52:42.741312: step: 380/466, loss: 0.043248046189546585 2023-01-24 02:52:43.420903: step: 382/466, loss: 0.05493703484535217 2023-01-24 02:52:44.084452: step: 384/466, loss: 0.10812664777040482 2023-01-24 02:52:44.704678: step: 386/466, loss: 0.012005889788269997 2023-01-24 02:52:45.311323: step: 388/466, loss: 0.03784538805484772 2023-01-24 02:52:45.904113: step: 390/466, loss: 0.021849500015378 2023-01-24 02:52:46.476191: step: 392/466, loss: 0.014305876567959785 2023-01-24 02:52:47.024080: step: 394/466, loss: 0.0313371866941452 2023-01-24 02:52:47.607070: step: 396/466, loss: 0.18692046403884888 2023-01-24 02:52:48.212634: step: 398/466, loss: 0.09531613439321518 2023-01-24 02:52:48.828629: step: 400/466, loss: 0.04120203107595444 2023-01-24 02:52:49.424802: step: 402/466, loss: 0.04370715841650963 2023-01-24 02:52:50.049921: step: 404/466, loss: 0.07401634007692337 2023-01-24 02:52:50.703896: step: 406/466, loss: 0.10191718488931656 2023-01-24 02:52:51.316716: step: 408/466, loss: 0.0735027864575386 2023-01-24 02:52:51.948275: step: 410/466, loss: 0.07235123217105865 2023-01-24 02:52:52.575924: step: 412/466, loss: 0.11868435144424438 2023-01-24 02:52:53.203616: step: 414/466, loss: 0.05378936976194382 2023-01-24 02:52:53.794915: step: 416/466, loss: 0.14694419503211975 2023-01-24 02:52:54.483073: step: 418/466, loss: 0.0017634829273447394 2023-01-24 02:52:55.165955: step: 420/466, loss: 0.0756598636507988 2023-01-24 02:52:55.827037: step: 422/466, loss: 0.054010868072509766 2023-01-24 02:52:56.418082: step: 424/466, loss: 0.017388856038451195 2023-01-24 02:52:56.999498: step: 426/466, loss: 11.393009185791016 2023-01-24 02:52:57.646816: step: 428/466, loss: 0.31075289845466614 2023-01-24 02:52:58.281961: step: 430/466, loss: 0.04381495714187622 2023-01-24 02:52:58.890068: step: 432/466, loss: 0.8955109715461731 2023-01-24 02:52:59.459920: step: 434/466, loss: 0.018910974264144897 2023-01-24 02:53:00.093645: step: 436/466, loss: 0.024038417264819145 2023-01-24 02:53:00.668130: step: 438/466, loss: 0.030103906989097595 2023-01-24 02:53:01.232859: step: 440/466, loss: 0.37985509634017944 2023-01-24 02:53:01.803845: step: 442/466, loss: 0.005558432079851627 2023-01-24 02:53:02.411932: step: 444/466, loss: 0.03839689865708351 2023-01-24 02:53:03.005182: step: 446/466, loss: 0.06781277805566788 2023-01-24 02:53:03.641232: step: 448/466, loss: 0.18135598301887512 2023-01-24 02:53:04.218261: step: 450/466, loss: 0.04757753014564514 2023-01-24 02:53:04.860859: step: 452/466, loss: 0.032391536980867386 2023-01-24 02:53:05.466553: step: 454/466, loss: 0.04910852015018463 2023-01-24 02:53:06.099163: step: 456/466, loss: 0.06771580129861832 2023-01-24 02:53:06.739266: step: 458/466, loss: 0.09586554765701294 2023-01-24 02:53:07.338480: step: 460/466, loss: 0.08000804483890533 2023-01-24 02:53:07.978269: step: 462/466, loss: 0.03900667652487755 2023-01-24 02:53:08.579604: step: 464/466, loss: 0.09564217180013657 2023-01-24 02:53:09.214593: step: 466/466, loss: 0.04399758204817772 2023-01-24 02:53:09.874613: step: 468/466, loss: 0.05565526336431503 2023-01-24 02:53:10.464940: step: 470/466, loss: 0.0949612408876419 2023-01-24 02:53:11.112125: step: 472/466, loss: 0.032630555331707 2023-01-24 02:53:11.776431: step: 474/466, loss: 0.03060712106525898 2023-01-24 02:53:12.433536: step: 476/466, loss: 0.034767478704452515 2023-01-24 02:53:13.068582: step: 478/466, loss: 0.10432400554418564 2023-01-24 02:53:13.701512: step: 480/466, loss: 0.11564582586288452 2023-01-24 02:53:14.370464: step: 482/466, loss: 0.06722190976142883 2023-01-24 02:53:14.978220: step: 484/466, loss: 0.053551219403743744 2023-01-24 02:53:15.598982: step: 486/466, loss: 0.0823635533452034 2023-01-24 02:53:16.242005: step: 488/466, loss: 0.09532275795936584 2023-01-24 02:53:16.828854: step: 490/466, loss: 0.0684138834476471 2023-01-24 02:53:17.418326: step: 492/466, loss: 0.013710126280784607 2023-01-24 02:53:18.024479: step: 494/466, loss: 0.03670593351125717 2023-01-24 02:53:18.607690: step: 496/466, loss: 0.034739699214696884 2023-01-24 02:53:19.201310: step: 498/466, loss: 0.0073441630229353905 2023-01-24 02:53:19.824167: step: 500/466, loss: 0.07757313549518585 2023-01-24 02:53:20.477891: step: 502/466, loss: 0.04221266508102417 2023-01-24 02:53:21.141830: step: 504/466, loss: 0.018677473068237305 2023-01-24 02:53:21.793644: step: 506/466, loss: 0.056042835116386414 2023-01-24 02:53:22.387384: step: 508/466, loss: 0.09111560881137848 2023-01-24 02:53:22.972601: step: 510/466, loss: 0.033317744731903076 2023-01-24 02:53:23.682334: step: 512/466, loss: 0.08932971954345703 2023-01-24 02:53:24.276718: step: 514/466, loss: 0.02863481268286705 2023-01-24 02:53:24.881027: step: 516/466, loss: 0.06317055225372314 2023-01-24 02:53:25.473630: step: 518/466, loss: 0.046415556222200394 2023-01-24 02:53:26.083871: step: 520/466, loss: 0.059857502579689026 2023-01-24 02:53:26.696312: step: 522/466, loss: 0.022902199998497963 2023-01-24 02:53:27.324835: step: 524/466, loss: 0.09407820552587509 2023-01-24 02:53:27.928050: step: 526/466, loss: 0.06468775123357773 2023-01-24 02:53:28.577736: step: 528/466, loss: 0.061519160866737366 2023-01-24 02:53:29.197912: step: 530/466, loss: 0.10522399842739105 2023-01-24 02:53:29.811458: step: 532/466, loss: 0.006822334136813879 2023-01-24 02:53:30.405214: step: 534/466, loss: 0.039750613272190094 2023-01-24 02:53:31.012977: step: 536/466, loss: 0.05738213658332825 2023-01-24 02:53:31.582355: step: 538/466, loss: 0.2611234486103058 2023-01-24 02:53:32.270456: step: 540/466, loss: 0.04813788831233978 2023-01-24 02:53:32.942804: step: 542/466, loss: 0.14306648075580597 2023-01-24 02:53:33.622101: step: 544/466, loss: 0.06339039653539658 2023-01-24 02:53:34.272293: step: 546/466, loss: 0.11885727941989899 2023-01-24 02:53:34.940179: step: 548/466, loss: 0.133851557970047 2023-01-24 02:53:35.544377: step: 550/466, loss: 0.034811291843652725 2023-01-24 02:53:36.153331: step: 552/466, loss: 0.07138923555612564 2023-01-24 02:53:36.772029: step: 554/466, loss: 0.06095288693904877 2023-01-24 02:53:37.450118: step: 556/466, loss: 0.041248105466365814 2023-01-24 02:53:38.061621: step: 558/466, loss: 0.2250605970621109 2023-01-24 02:53:38.672830: step: 560/466, loss: 0.02611805871129036 2023-01-24 02:53:39.351169: step: 562/466, loss: 0.17415495216846466 2023-01-24 02:53:39.939390: step: 564/466, loss: 0.012367604300379753 2023-01-24 02:53:40.514442: step: 566/466, loss: 0.008169536478817463 2023-01-24 02:53:41.141063: step: 568/466, loss: 0.007444975897669792 2023-01-24 02:53:41.812878: step: 570/466, loss: 0.06542429327964783 2023-01-24 02:53:42.438641: step: 572/466, loss: 0.07730428129434586 2023-01-24 02:53:43.026581: step: 574/466, loss: 0.055045854300260544 2023-01-24 02:53:43.663496: step: 576/466, loss: 0.01367559190839529 2023-01-24 02:53:44.289997: step: 578/466, loss: 0.24741590023040771 2023-01-24 02:53:44.884358: step: 580/466, loss: 0.055768102407455444 2023-01-24 02:53:45.494488: step: 582/466, loss: 0.01121440064162016 2023-01-24 02:53:46.097008: step: 584/466, loss: 0.058712225407361984 2023-01-24 02:53:46.708725: step: 586/466, loss: 0.14266453683376312 2023-01-24 02:53:47.314956: step: 588/466, loss: 0.019000612199306488 2023-01-24 02:53:47.946655: step: 590/466, loss: 0.08274511992931366 2023-01-24 02:53:48.534773: step: 592/466, loss: 1.2463836669921875 2023-01-24 02:53:49.158914: step: 594/466, loss: 0.0700952559709549 2023-01-24 02:53:49.751712: step: 596/466, loss: 0.05400775372982025 2023-01-24 02:53:50.358396: step: 598/466, loss: 0.09943843632936478 2023-01-24 02:53:50.961525: step: 600/466, loss: 0.051910433918237686 2023-01-24 02:53:51.582203: step: 602/466, loss: 0.0629982128739357 2023-01-24 02:53:52.318012: step: 604/466, loss: 0.14483165740966797 2023-01-24 02:53:52.917660: step: 606/466, loss: 0.0398201122879982 2023-01-24 02:53:53.544216: step: 608/466, loss: 0.016322026029229164 2023-01-24 02:53:54.130898: step: 610/466, loss: 0.09787385165691376 2023-01-24 02:53:54.687532: step: 612/466, loss: 0.05622246488928795 2023-01-24 02:53:55.275894: step: 614/466, loss: 0.02025759033858776 2023-01-24 02:53:55.885076: step: 616/466, loss: 0.07201140373945236 2023-01-24 02:53:56.530603: step: 618/466, loss: 0.03505564108490944 2023-01-24 02:53:57.107058: step: 620/466, loss: 0.01954931952059269 2023-01-24 02:53:57.855076: step: 622/466, loss: 0.054865192621946335 2023-01-24 02:53:58.488134: step: 624/466, loss: 0.04479478299617767 2023-01-24 02:53:59.090835: step: 626/466, loss: 0.0170596893876791 2023-01-24 02:53:59.714949: step: 628/466, loss: 0.06574743986129761 2023-01-24 02:54:00.315855: step: 630/466, loss: 0.10950388759374619 2023-01-24 02:54:00.918912: step: 632/466, loss: 0.05113056302070618 2023-01-24 02:54:01.564504: step: 634/466, loss: 0.08861953020095825 2023-01-24 02:54:02.220070: step: 636/466, loss: 0.004567514173686504 2023-01-24 02:54:02.859093: step: 638/466, loss: 0.04586312919855118 2023-01-24 02:54:03.522563: step: 640/466, loss: 0.08604729920625687 2023-01-24 02:54:04.143299: step: 642/466, loss: 0.039753928780555725 2023-01-24 02:54:04.734301: step: 644/466, loss: 0.1817084401845932 2023-01-24 02:54:05.303500: step: 646/466, loss: 0.08637432008981705 2023-01-24 02:54:05.902239: step: 648/466, loss: 0.02143004909157753 2023-01-24 02:54:06.558655: step: 650/466, loss: 0.024217985570430756 2023-01-24 02:54:07.212692: step: 652/466, loss: 0.017095118761062622 2023-01-24 02:54:07.835870: step: 654/466, loss: 0.016036296263337135 2023-01-24 02:54:08.467514: step: 656/466, loss: 0.06510784476995468 2023-01-24 02:54:09.036297: step: 658/466, loss: 0.08545298129320145 2023-01-24 02:54:09.637007: step: 660/466, loss: 0.04476512596011162 2023-01-24 02:54:10.253539: step: 662/466, loss: 0.04669211059808731 2023-01-24 02:54:10.959588: step: 664/466, loss: 0.03477967903017998 2023-01-24 02:54:11.574618: step: 666/466, loss: 0.07388802617788315 2023-01-24 02:54:12.205634: step: 668/466, loss: 0.3461158275604248 2023-01-24 02:54:12.903961: step: 670/466, loss: 0.06619387865066528 2023-01-24 02:54:13.522681: step: 672/466, loss: 0.034900009632110596 2023-01-24 02:54:14.103407: step: 674/466, loss: 0.037802595645189285 2023-01-24 02:54:14.711632: step: 676/466, loss: 0.24723856151103973 2023-01-24 02:54:15.288955: step: 678/466, loss: 0.026428377255797386 2023-01-24 02:54:15.954084: step: 680/466, loss: 0.01633252017199993 2023-01-24 02:54:16.607091: step: 682/466, loss: 0.029313204810023308 2023-01-24 02:54:17.342908: step: 684/466, loss: 0.3518146276473999 2023-01-24 02:54:17.929544: step: 686/466, loss: 0.0816594809293747 2023-01-24 02:54:18.588649: step: 688/466, loss: 0.06408290565013885 2023-01-24 02:54:19.247124: step: 690/466, loss: 0.049211613833904266 2023-01-24 02:54:19.892104: step: 692/466, loss: 0.03745852783322334 2023-01-24 02:54:20.491879: step: 694/466, loss: 0.018650511279702187 2023-01-24 02:54:21.122757: step: 696/466, loss: 0.18802715837955475 2023-01-24 02:54:21.754509: step: 698/466, loss: 0.018624436110258102 2023-01-24 02:54:22.379411: step: 700/466, loss: 0.09911294281482697 2023-01-24 02:54:22.985227: step: 702/466, loss: 0.0057364520616829395 2023-01-24 02:54:23.596123: step: 704/466, loss: 0.10185503959655762 2023-01-24 02:54:24.199580: step: 706/466, loss: 0.010374201461672783 2023-01-24 02:54:24.889360: step: 708/466, loss: 0.08099198341369629 2023-01-24 02:54:25.540880: step: 710/466, loss: 0.03127642348408699 2023-01-24 02:54:26.223166: step: 712/466, loss: 0.1152448058128357 2023-01-24 02:54:26.897370: step: 714/466, loss: 0.017789151519536972 2023-01-24 02:54:27.473137: step: 716/466, loss: 0.022697532549500465 2023-01-24 02:54:28.139266: step: 718/466, loss: 0.014777543023228645 2023-01-24 02:54:28.839137: step: 720/466, loss: 0.024037595838308334 2023-01-24 02:54:29.414026: step: 722/466, loss: 0.06626082211732864 2023-01-24 02:54:30.136128: step: 724/466, loss: 0.06400647759437561 2023-01-24 02:54:30.746483: step: 726/466, loss: 0.040571171790361404 2023-01-24 02:54:31.296288: step: 728/466, loss: 0.0280370581895113 2023-01-24 02:54:31.982207: step: 730/466, loss: 0.15912574529647827 2023-01-24 02:54:32.604533: step: 732/466, loss: 0.05670500919222832 2023-01-24 02:54:33.232103: step: 734/466, loss: 0.05155463516712189 2023-01-24 02:54:33.819251: step: 736/466, loss: 0.0762219950556755 2023-01-24 02:54:34.431898: step: 738/466, loss: 0.030081335455179214 2023-01-24 02:54:35.034432: step: 740/466, loss: 0.03961065784096718 2023-01-24 02:54:35.693294: step: 742/466, loss: 0.2696669101715088 2023-01-24 02:54:36.360488: step: 744/466, loss: 0.0510166771709919 2023-01-24 02:54:36.976814: step: 746/466, loss: 0.043876972049474716 2023-01-24 02:54:37.550581: step: 748/466, loss: 0.5507307052612305 2023-01-24 02:54:38.138484: step: 750/466, loss: 0.00416531041264534 2023-01-24 02:54:38.790916: step: 752/466, loss: 0.12272489070892334 2023-01-24 02:54:39.382186: step: 754/466, loss: 0.19147373735904694 2023-01-24 02:54:40.005537: step: 756/466, loss: 0.1723286509513855 2023-01-24 02:54:40.740702: step: 758/466, loss: 0.045513223856687546 2023-01-24 02:54:41.339461: step: 760/466, loss: 0.07953735440969467 2023-01-24 02:54:41.984467: step: 762/466, loss: 0.074615478515625 2023-01-24 02:54:42.571992: step: 764/466, loss: 0.48381203413009644 2023-01-24 02:54:43.238835: step: 766/466, loss: 0.04739003628492355 2023-01-24 02:54:43.857414: step: 768/466, loss: 0.04128978028893471 2023-01-24 02:54:44.476868: step: 770/466, loss: 0.3272797763347626 2023-01-24 02:54:45.069707: step: 772/466, loss: 0.00991341657936573 2023-01-24 02:54:45.679162: step: 774/466, loss: 0.0712292268872261 2023-01-24 02:54:46.387484: step: 776/466, loss: 0.05232344567775726 2023-01-24 02:54:47.099881: step: 778/466, loss: 0.18456433713436127 2023-01-24 02:54:47.705050: step: 780/466, loss: 0.8814983367919922 2023-01-24 02:54:48.328907: step: 782/466, loss: 0.2667369544506073 2023-01-24 02:54:48.967859: step: 784/466, loss: 0.10731915384531021 2023-01-24 02:54:49.530804: step: 786/466, loss: 0.006539562717080116 2023-01-24 02:54:50.152050: step: 788/466, loss: 0.13255411386489868 2023-01-24 02:54:50.851195: step: 790/466, loss: 0.033760346472263336 2023-01-24 02:54:51.492907: step: 792/466, loss: 0.03486182168126106 2023-01-24 02:54:52.057021: step: 794/466, loss: 0.08852153271436691 2023-01-24 02:54:52.710264: step: 796/466, loss: 0.03008638136088848 2023-01-24 02:54:53.275564: step: 798/466, loss: 0.04051116853952408 2023-01-24 02:54:53.883973: step: 800/466, loss: 0.10273575782775879 2023-01-24 02:54:54.556863: step: 802/466, loss: 0.09617326408624649 2023-01-24 02:54:55.223678: step: 804/466, loss: 0.05812990292906761 2023-01-24 02:54:55.781608: step: 806/466, loss: 0.06590338051319122 2023-01-24 02:54:56.400359: step: 808/466, loss: 0.05216827243566513 2023-01-24 02:54:57.033072: step: 810/466, loss: 0.01766282506287098 2023-01-24 02:54:57.607868: step: 812/466, loss: 0.04496309161186218 2023-01-24 02:54:58.209623: step: 814/466, loss: 0.1217527985572815 2023-01-24 02:54:58.845818: step: 816/466, loss: 0.15513049066066742 2023-01-24 02:54:59.445140: step: 818/466, loss: 0.024965690448880196 2023-01-24 02:55:00.062837: step: 820/466, loss: 0.13559532165527344 2023-01-24 02:55:00.674094: step: 822/466, loss: 0.04282497987151146 2023-01-24 02:55:01.272714: step: 824/466, loss: 0.007608668878674507 2023-01-24 02:55:01.985756: step: 826/466, loss: 0.049658216536045074 2023-01-24 02:55:02.595988: step: 828/466, loss: 0.5602688789367676 2023-01-24 02:55:03.131004: step: 830/466, loss: 0.14390172064304352 2023-01-24 02:55:03.674529: step: 832/466, loss: 0.018171867355704308 2023-01-24 02:55:04.231682: step: 834/466, loss: 0.19310128688812256 2023-01-24 02:55:04.843611: step: 836/466, loss: 0.0631764605641365 2023-01-24 02:55:05.383290: step: 838/466, loss: 0.0030215552542358637 2023-01-24 02:55:05.980051: step: 840/466, loss: 0.012556682340800762 2023-01-24 02:55:06.641784: step: 842/466, loss: 0.06738331913948059 2023-01-24 02:55:07.267969: step: 844/466, loss: 0.112983338534832 2023-01-24 02:55:07.890071: step: 846/466, loss: 0.04611273854970932 2023-01-24 02:55:08.489576: step: 848/466, loss: 0.08048969507217407 2023-01-24 02:55:09.092628: step: 850/466, loss: 0.06972993165254593 2023-01-24 02:55:09.698640: step: 852/466, loss: 0.06211049109697342 2023-01-24 02:55:10.412935: step: 854/466, loss: 0.012988202273845673 2023-01-24 02:55:11.066582: step: 856/466, loss: 0.3481960892677307 2023-01-24 02:55:11.653105: step: 858/466, loss: 0.026356499642133713 2023-01-24 02:55:12.277242: step: 860/466, loss: 0.017003362998366356 2023-01-24 02:55:12.879753: step: 862/466, loss: 0.07605820149183273 2023-01-24 02:55:13.526773: step: 864/466, loss: 0.027923665940761566 2023-01-24 02:55:14.149754: step: 866/466, loss: 0.035243429243564606 2023-01-24 02:55:14.783661: step: 868/466, loss: 0.024010855704545975 2023-01-24 02:55:15.393286: step: 870/466, loss: 0.30430516600608826 2023-01-24 02:55:16.005203: step: 872/466, loss: 0.021546028554439545 2023-01-24 02:55:16.674110: step: 874/466, loss: 0.056758493185043335 2023-01-24 02:55:17.285680: step: 876/466, loss: 0.01230048481374979 2023-01-24 02:55:17.953103: step: 878/466, loss: 0.2607000470161438 2023-01-24 02:55:18.696353: step: 880/466, loss: 0.02006744220852852 2023-01-24 02:55:19.286967: step: 882/466, loss: 0.2876383066177368 2023-01-24 02:55:19.893749: step: 884/466, loss: 0.8681106567382812 2023-01-24 02:55:20.442339: step: 886/466, loss: 0.015471463091671467 2023-01-24 02:55:21.002056: step: 888/466, loss: 0.017921268939971924 2023-01-24 02:55:21.618384: step: 890/466, loss: 0.12608306109905243 2023-01-24 02:55:22.222076: step: 892/466, loss: 0.11684809625148773 2023-01-24 02:55:22.884066: step: 894/466, loss: 0.03564350679516792 2023-01-24 02:55:23.466546: step: 896/466, loss: 0.06272316724061966 2023-01-24 02:55:24.129853: step: 898/466, loss: 0.021497024223208427 2023-01-24 02:55:24.749348: step: 900/466, loss: 0.11995331943035126 2023-01-24 02:55:25.405808: step: 902/466, loss: 0.013620620593428612 2023-01-24 02:55:26.069706: step: 904/466, loss: 0.3535931408405304 2023-01-24 02:55:26.641630: step: 906/466, loss: 0.04088161885738373 2023-01-24 02:55:27.250391: step: 908/466, loss: 0.0372898168861866 2023-01-24 02:55:27.890808: step: 910/466, loss: 0.0914231464266777 2023-01-24 02:55:28.522448: step: 912/466, loss: 0.24533428251743317 2023-01-24 02:55:29.167324: step: 914/466, loss: 0.8424860835075378 2023-01-24 02:55:29.786791: step: 916/466, loss: 0.03754405677318573 2023-01-24 02:55:30.416755: step: 918/466, loss: 1.756027340888977 2023-01-24 02:55:31.038328: step: 920/466, loss: 0.030273964628577232 2023-01-24 02:55:31.656167: step: 922/466, loss: 0.04297087714076042 2023-01-24 02:55:32.305796: step: 924/466, loss: 3.040848731994629 2023-01-24 02:55:32.991122: step: 926/466, loss: 0.16071191430091858 2023-01-24 02:55:33.622777: step: 928/466, loss: 0.026798753067851067 2023-01-24 02:55:34.197072: step: 930/466, loss: 0.014087713323533535 2023-01-24 02:55:34.877020: step: 932/466, loss: 0.24279646575450897 ================================================== Loss: 0.153 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35668096784168213, 'r': 0.3126880590946056, 'f1': 0.33323884154268385}, 'combined': 0.24554440955776705, 'epoch': 20} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3506305707668762, 'r': 0.2730994427543449, 'f1': 0.3070463547151843}, 'combined': 0.19226267070950795, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3398558097109178, 'r': 0.32824783139062075, 'f1': 0.3339509790402648}, 'combined': 0.2460691424507214, 'epoch': 20} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.35996283456899286, 'r': 0.2865408123554618, 'f1': 0.31908263325613234}, 'combined': 0.1977413501868989, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31390400955921405, 'r': 0.3204560856600705, 'f1': 0.3171462105969149}, 'combined': 0.23368668149246358, 'epoch': 20} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3599981510552939, 'r': 0.284808663809568, 'f1': 0.31801956807004766}, 'combined': 0.21096347584844746, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.325, 'r': 0.325, 'f1': 0.325}, 'combined': 0.21666666666666667, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.358695652173913, 'f1': 0.3666666666666666}, 'combined': 0.1833333333333333, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.12931034482758622, 'f1': 0.1829268292682927}, 'combined': 0.12195121951219513, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36041468253968256, 'r': 0.30717160443722946, 'f1': 0.3316699532573766}, 'combined': 0.24438838661069853, 'epoch': 9} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34616060989632147, 'r': 0.26180739268331893, 'f1': 0.29813215939683185}, 'combined': 0.18668088485596016, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4375, 'r': 0.275, 'f1': 0.3377192982456141}, 'combined': 0.22514619883040937, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:58:08.039613: step: 2/466, loss: 0.2881387174129486 2023-01-24 02:58:08.685903: step: 4/466, loss: 0.10788802802562714 2023-01-24 02:58:09.319864: step: 6/466, loss: 0.060809627175331116 2023-01-24 02:58:09.919691: step: 8/466, loss: 0.07192735373973846 2023-01-24 02:58:10.555064: step: 10/466, loss: 0.035257741808891296 2023-01-24 02:58:11.115262: step: 12/466, loss: 0.042483024299144745 2023-01-24 02:58:11.744069: step: 14/466, loss: 0.1682446449995041 2023-01-24 02:58:12.302149: step: 16/466, loss: 0.02340201660990715 2023-01-24 02:58:12.952333: step: 18/466, loss: 0.05219205096364021 2023-01-24 02:58:13.492689: step: 20/466, loss: 0.1132228821516037 2023-01-24 02:58:14.046342: step: 22/466, loss: 0.10254693776369095 2023-01-24 02:58:14.632623: step: 24/466, loss: 0.031528573483228683 2023-01-24 02:58:15.222796: step: 26/466, loss: 1.483457088470459 2023-01-24 02:58:15.897911: step: 28/466, loss: 0.06329408288002014 2023-01-24 02:58:16.616298: step: 30/466, loss: 0.021416379138827324 2023-01-24 02:58:17.220078: step: 32/466, loss: 0.03439817950129509 2023-01-24 02:58:17.893826: step: 34/466, loss: 0.04783051088452339 2023-01-24 02:58:18.518898: step: 36/466, loss: 0.04394936561584473 2023-01-24 02:58:19.174804: step: 38/466, loss: 0.049832724034786224 2023-01-24 02:58:19.791604: step: 40/466, loss: 0.02321450226008892 2023-01-24 02:58:20.393465: step: 42/466, loss: 0.08828366547822952 2023-01-24 02:58:20.989312: step: 44/466, loss: 0.08183683454990387 2023-01-24 02:58:21.610445: step: 46/466, loss: 0.36498403549194336 2023-01-24 02:58:22.202794: step: 48/466, loss: 0.005173651501536369 2023-01-24 02:58:22.843213: step: 50/466, loss: 0.0514238141477108 2023-01-24 02:58:23.471917: step: 52/466, loss: 0.002969948807731271 2023-01-24 02:58:24.219130: step: 54/466, loss: 0.09550666809082031 2023-01-24 02:58:24.792670: step: 56/466, loss: 0.023871449753642082 2023-01-24 02:58:25.399051: step: 58/466, loss: 0.03539581596851349 2023-01-24 02:58:25.994832: step: 60/466, loss: 0.0438140369951725 2023-01-24 02:58:26.587409: step: 62/466, loss: 0.040493231266736984 2023-01-24 02:58:27.168498: step: 64/466, loss: 0.10182592272758484 2023-01-24 02:58:27.769458: step: 66/466, loss: 0.02831822633743286 2023-01-24 02:58:28.420557: step: 68/466, loss: 0.06584684550762177 2023-01-24 02:58:29.007554: step: 70/466, loss: 0.004456068854779005 2023-01-24 02:58:29.622818: step: 72/466, loss: 0.15545929968357086 2023-01-24 02:58:30.279251: step: 74/466, loss: 0.0639757588505745 2023-01-24 02:58:30.871699: step: 76/466, loss: 0.012895762920379639 2023-01-24 02:58:31.486123: step: 78/466, loss: 0.044522590935230255 2023-01-24 02:58:32.162384: step: 80/466, loss: 0.18327957391738892 2023-01-24 02:58:32.737586: step: 82/466, loss: 0.1578586846590042 2023-01-24 02:58:33.335646: step: 84/466, loss: 2.384281635284424 2023-01-24 02:58:33.861607: step: 86/466, loss: 0.11151908338069916 2023-01-24 02:58:34.536825: step: 88/466, loss: 0.10691257566213608 2023-01-24 02:58:35.190252: step: 90/466, loss: 0.10476025938987732 2023-01-24 02:58:35.771518: step: 92/466, loss: 0.01965838298201561 2023-01-24 02:58:36.394653: step: 94/466, loss: 0.1380435824394226 2023-01-24 02:58:37.063059: step: 96/466, loss: 0.01881014183163643 2023-01-24 02:58:37.622028: step: 98/466, loss: 0.03556925803422928 2023-01-24 02:58:38.236742: step: 100/466, loss: 0.32697975635528564 2023-01-24 02:58:38.872598: step: 102/466, loss: 0.03480343520641327 2023-01-24 02:58:39.474574: step: 104/466, loss: 0.08207301050424576 2023-01-24 02:58:40.080260: step: 106/466, loss: 0.2136201113462448 2023-01-24 02:58:40.642212: step: 108/466, loss: 0.09086485207080841 2023-01-24 02:58:41.244839: step: 110/466, loss: 0.050858333706855774 2023-01-24 02:58:41.901823: step: 112/466, loss: 0.09101345390081406 2023-01-24 02:58:42.554900: step: 114/466, loss: 0.025258095934987068 2023-01-24 02:58:43.156477: step: 116/466, loss: 0.05563211441040039 2023-01-24 02:58:43.744968: step: 118/466, loss: 0.0823717713356018 2023-01-24 02:58:44.334553: step: 120/466, loss: 0.029101431369781494 2023-01-24 02:58:44.950620: step: 122/466, loss: 0.021514972671866417 2023-01-24 02:58:45.500069: step: 124/466, loss: 0.03122498281300068 2023-01-24 02:58:46.091942: step: 126/466, loss: 0.07634158432483673 2023-01-24 02:58:46.666140: step: 128/466, loss: 0.04212908074259758 2023-01-24 02:58:47.260615: step: 130/466, loss: 0.009921550750732422 2023-01-24 02:58:47.916976: step: 132/466, loss: 0.01286023948341608 2023-01-24 02:58:48.533851: step: 134/466, loss: 0.04147264361381531 2023-01-24 02:58:49.210930: step: 136/466, loss: 0.06387916952371597 2023-01-24 02:58:49.833321: step: 138/466, loss: 0.03931007161736488 2023-01-24 02:58:50.461560: step: 140/466, loss: 0.0503489151597023 2023-01-24 02:58:51.012187: step: 142/466, loss: 0.0030978983268141747 2023-01-24 02:58:51.721151: step: 144/466, loss: 0.029595546424388885 2023-01-24 02:58:52.309016: step: 146/466, loss: 0.07707898318767548 2023-01-24 02:58:52.899315: step: 148/466, loss: 0.045399896800518036 2023-01-24 02:58:53.442478: step: 150/466, loss: 0.046740684658288956 2023-01-24 02:58:54.033777: step: 152/466, loss: 0.021156247705221176 2023-01-24 02:58:54.685875: step: 154/466, loss: 0.021989645436406136 2023-01-24 02:58:55.334670: step: 156/466, loss: 0.05270089954137802 2023-01-24 02:58:55.987807: step: 158/466, loss: 0.08426899462938309 2023-01-24 02:58:56.582475: step: 160/466, loss: 0.05149581655859947 2023-01-24 02:58:57.115494: step: 162/466, loss: 0.38246864080429077 2023-01-24 02:58:57.795260: step: 164/466, loss: 0.027103828266263008 2023-01-24 02:58:58.431500: step: 166/466, loss: 0.022400904446840286 2023-01-24 02:58:59.145569: step: 168/466, loss: 0.07104605436325073 2023-01-24 02:58:59.770582: step: 170/466, loss: 0.041852835565805435 2023-01-24 02:59:00.329130: step: 172/466, loss: 0.018567143008112907 2023-01-24 02:59:00.928821: step: 174/466, loss: 0.018758349120616913 2023-01-24 02:59:01.553659: step: 176/466, loss: 0.03230084851384163 2023-01-24 02:59:02.223655: step: 178/466, loss: 0.06489178538322449 2023-01-24 02:59:02.798755: step: 180/466, loss: 0.015959657728672028 2023-01-24 02:59:03.448895: step: 182/466, loss: 0.0533185712993145 2023-01-24 02:59:04.140441: step: 184/466, loss: 0.0024304878897964954 2023-01-24 02:59:04.720308: step: 186/466, loss: 0.0210149846971035 2023-01-24 02:59:05.281056: step: 188/466, loss: 0.02649274840950966 2023-01-24 02:59:05.942687: step: 190/466, loss: 0.02087913081049919 2023-01-24 02:59:06.608223: step: 192/466, loss: 0.07188748568296432 2023-01-24 02:59:07.259974: step: 194/466, loss: 0.061781302094459534 2023-01-24 02:59:07.858160: step: 196/466, loss: 0.005796810146421194 2023-01-24 02:59:08.433020: step: 198/466, loss: 0.02908952720463276 2023-01-24 02:59:09.059054: step: 200/466, loss: 0.015466309152543545 2023-01-24 02:59:09.673658: step: 202/466, loss: 0.014555848203599453 2023-01-24 02:59:10.259934: step: 204/466, loss: 0.26382020115852356 2023-01-24 02:59:10.901753: step: 206/466, loss: 0.08366383612155914 2023-01-24 02:59:11.466889: step: 208/466, loss: 0.02350272424519062 2023-01-24 02:59:12.082378: step: 210/466, loss: 0.007414946332573891 2023-01-24 02:59:12.707590: step: 212/466, loss: 0.1358312964439392 2023-01-24 02:59:13.259644: step: 214/466, loss: 0.023835817351937294 2023-01-24 02:59:13.905679: step: 216/466, loss: 0.03659604862332344 2023-01-24 02:59:14.506612: step: 218/466, loss: 0.18813088536262512 2023-01-24 02:59:15.171712: step: 220/466, loss: 0.18920515477657318 2023-01-24 02:59:15.785853: step: 222/466, loss: 0.08653844892978668 2023-01-24 02:59:16.380112: step: 224/466, loss: 0.032764118164777756 2023-01-24 02:59:16.982110: step: 226/466, loss: 0.032470885664224625 2023-01-24 02:59:17.608063: step: 228/466, loss: 0.04481193795800209 2023-01-24 02:59:18.223847: step: 230/466, loss: 0.09721400588750839 2023-01-24 02:59:18.818080: step: 232/466, loss: 0.04957348108291626 2023-01-24 02:59:19.414923: step: 234/466, loss: 0.0030713321175426245 2023-01-24 02:59:20.052576: step: 236/466, loss: 0.06352909654378891 2023-01-24 02:59:20.696132: step: 238/466, loss: 0.07602309435606003 2023-01-24 02:59:21.277350: step: 240/466, loss: 0.05582737177610397 2023-01-24 02:59:21.872060: step: 242/466, loss: 0.08120190352201462 2023-01-24 02:59:22.464825: step: 244/466, loss: 0.376693993806839 2023-01-24 02:59:23.131801: step: 246/466, loss: 0.4345664381980896 2023-01-24 02:59:23.695377: step: 248/466, loss: 0.04912075027823448 2023-01-24 02:59:24.298399: step: 250/466, loss: 0.05676799267530441 2023-01-24 02:59:24.968936: step: 252/466, loss: 0.015716781839728355 2023-01-24 02:59:25.574048: step: 254/466, loss: 0.4627186357975006 2023-01-24 02:59:26.167939: step: 256/466, loss: 0.013965928927063942 2023-01-24 02:59:26.740354: step: 258/466, loss: 0.0017522589769214392 2023-01-24 02:59:27.358406: step: 260/466, loss: 0.2042047530412674 2023-01-24 02:59:27.952907: step: 262/466, loss: 1.5125832557678223 2023-01-24 02:59:28.565155: step: 264/466, loss: 0.01134142279624939 2023-01-24 02:59:29.226578: step: 266/466, loss: 0.04114525020122528 2023-01-24 02:59:29.822216: step: 268/466, loss: 1.2687081098556519 2023-01-24 02:59:30.413065: step: 270/466, loss: 0.050007227808237076 2023-01-24 02:59:30.987132: step: 272/466, loss: 0.007330013904720545 2023-01-24 02:59:31.637656: step: 274/466, loss: 0.01399720087647438 2023-01-24 02:59:32.212385: step: 276/466, loss: 0.23334860801696777 2023-01-24 02:59:32.878377: step: 278/466, loss: 0.0035590690094977617 2023-01-24 02:59:33.515956: step: 280/466, loss: 0.03379582613706589 2023-01-24 02:59:34.080751: step: 282/466, loss: 0.037499357014894485 2023-01-24 02:59:34.664250: step: 284/466, loss: 0.021356504410505295 2023-01-24 02:59:35.261138: step: 286/466, loss: 0.12415394186973572 2023-01-24 02:59:35.834315: step: 288/466, loss: 0.03271034359931946 2023-01-24 02:59:36.475106: step: 290/466, loss: 0.04419732838869095 2023-01-24 02:59:37.137001: step: 292/466, loss: 0.10165723413228989 2023-01-24 02:59:37.877288: step: 294/466, loss: 0.17787736654281616 2023-01-24 02:59:38.484415: step: 296/466, loss: 0.07455705851316452 2023-01-24 02:59:39.141495: step: 298/466, loss: 0.017839863896369934 2023-01-24 02:59:39.872284: step: 300/466, loss: 0.03834192827343941 2023-01-24 02:59:40.536466: step: 302/466, loss: 0.02332369238138199 2023-01-24 02:59:41.150631: step: 304/466, loss: 0.03470650315284729 2023-01-24 02:59:41.742859: step: 306/466, loss: 0.04157089442014694 2023-01-24 02:59:42.433610: step: 308/466, loss: 0.03688224032521248 2023-01-24 02:59:43.039343: step: 310/466, loss: 0.047191083431243896 2023-01-24 02:59:43.690812: step: 312/466, loss: 1.0049819946289062 2023-01-24 02:59:44.262301: step: 314/466, loss: 0.03437206894159317 2023-01-24 02:59:44.846863: step: 316/466, loss: 0.1354771852493286 2023-01-24 02:59:45.482316: step: 318/466, loss: 0.0745021179318428 2023-01-24 02:59:46.081811: step: 320/466, loss: 0.024394439533352852 2023-01-24 02:59:46.711174: step: 322/466, loss: 0.0415155328810215 2023-01-24 02:59:47.339032: step: 324/466, loss: 0.03859580308198929 2023-01-24 02:59:47.901511: step: 326/466, loss: 0.1962372362613678 2023-01-24 02:59:48.565538: step: 328/466, loss: 0.3744176924228668 2023-01-24 02:59:49.153101: step: 330/466, loss: 0.12103322893381119 2023-01-24 02:59:49.797589: step: 332/466, loss: 0.07620114833116531 2023-01-24 02:59:50.554592: step: 334/466, loss: 0.08342382311820984 2023-01-24 02:59:51.144535: step: 336/466, loss: 0.04669582098722458 2023-01-24 02:59:51.716860: step: 338/466, loss: 0.01537937019020319 2023-01-24 02:59:52.397387: step: 340/466, loss: 0.0483732670545578 2023-01-24 02:59:53.018310: step: 342/466, loss: 0.01084462832659483 2023-01-24 02:59:53.649674: step: 344/466, loss: 0.13782353699207306 2023-01-24 02:59:54.271996: step: 346/466, loss: 0.054718755185604095 2023-01-24 02:59:54.866374: step: 348/466, loss: 0.03365980461239815 2023-01-24 02:59:55.493025: step: 350/466, loss: 0.20594370365142822 2023-01-24 02:59:56.127675: step: 352/466, loss: 0.04973024129867554 2023-01-24 02:59:56.714649: step: 354/466, loss: 0.03241625428199768 2023-01-24 02:59:57.377049: step: 356/466, loss: 0.01917138136923313 2023-01-24 02:59:58.001797: step: 358/466, loss: 0.04949241504073143 2023-01-24 02:59:58.606412: step: 360/466, loss: 0.04784351959824562 2023-01-24 02:59:59.226418: step: 362/466, loss: 0.25273874402046204 2023-01-24 02:59:59.856419: step: 364/466, loss: 0.596697211265564 2023-01-24 03:00:00.533023: step: 366/466, loss: 0.0008260474423877895 2023-01-24 03:00:01.196569: step: 368/466, loss: 0.25643035769462585 2023-01-24 03:00:01.769130: step: 370/466, loss: 0.09255601465702057 2023-01-24 03:00:02.454115: step: 372/466, loss: 0.05651896074414253 2023-01-24 03:00:03.052645: step: 374/466, loss: 0.039041806012392044 2023-01-24 03:00:03.877428: step: 376/466, loss: 0.1036134660243988 2023-01-24 03:00:04.482217: step: 378/466, loss: 0.14990819990634918 2023-01-24 03:00:05.111102: step: 380/466, loss: 0.02866818755865097 2023-01-24 03:00:05.750005: step: 382/466, loss: 0.2643183767795563 2023-01-24 03:00:06.361948: step: 384/466, loss: 0.3558858633041382 2023-01-24 03:00:07.043034: step: 386/466, loss: 0.043460335582494736 2023-01-24 03:00:07.616265: step: 388/466, loss: 0.03281353414058685 2023-01-24 03:00:08.255609: step: 390/466, loss: 0.14346811175346375 2023-01-24 03:00:08.950195: step: 392/466, loss: 0.7667785882949829 2023-01-24 03:00:09.581105: step: 394/466, loss: 0.025008998811244965 2023-01-24 03:00:10.157771: step: 396/466, loss: 0.02988959103822708 2023-01-24 03:00:10.693780: step: 398/466, loss: 0.38024312257766724 2023-01-24 03:00:11.316932: step: 400/466, loss: 0.03910621628165245 2023-01-24 03:00:11.979836: step: 402/466, loss: 0.0872785747051239 2023-01-24 03:00:12.593854: step: 404/466, loss: 0.048010170459747314 2023-01-24 03:00:13.222039: step: 406/466, loss: 0.07352911680936813 2023-01-24 03:00:13.870782: step: 408/466, loss: 0.022242622449994087 2023-01-24 03:00:14.481649: step: 410/466, loss: 0.007695019245147705 2023-01-24 03:00:15.122537: step: 412/466, loss: 0.027991732582449913 2023-01-24 03:00:15.789991: step: 414/466, loss: 0.013311603106558323 2023-01-24 03:00:16.425657: step: 416/466, loss: 0.016815369948744774 2023-01-24 03:00:17.054429: step: 418/466, loss: 0.06997986137866974 2023-01-24 03:00:17.675323: step: 420/466, loss: 0.039529308676719666 2023-01-24 03:00:18.340419: step: 422/466, loss: 0.057546790689229965 2023-01-24 03:00:18.949500: step: 424/466, loss: 0.024760199710726738 2023-01-24 03:00:19.566005: step: 426/466, loss: 0.020182352513074875 2023-01-24 03:00:20.262073: step: 428/466, loss: 0.019982418045401573 2023-01-24 03:00:20.931426: step: 430/466, loss: 0.04188135638833046 2023-01-24 03:00:21.510336: step: 432/466, loss: 0.033656615763902664 2023-01-24 03:00:22.117551: step: 434/466, loss: 0.02355225943028927 2023-01-24 03:00:22.733262: step: 436/466, loss: 0.0621369406580925 2023-01-24 03:00:23.341956: step: 438/466, loss: 0.1696689873933792 2023-01-24 03:00:24.011692: step: 440/466, loss: 0.084506556391716 2023-01-24 03:00:24.673989: step: 442/466, loss: 0.13824161887168884 2023-01-24 03:00:25.348937: step: 444/466, loss: 0.023126086220145226 2023-01-24 03:00:25.929286: step: 446/466, loss: 0.04749936982989311 2023-01-24 03:00:26.617510: step: 448/466, loss: 0.031942762434482574 2023-01-24 03:00:27.296352: step: 450/466, loss: 0.03469008207321167 2023-01-24 03:00:27.859580: step: 452/466, loss: 0.008654058910906315 2023-01-24 03:00:28.514161: step: 454/466, loss: 0.09825482219457626 2023-01-24 03:00:29.174961: step: 456/466, loss: 0.034265968948602676 2023-01-24 03:00:29.760973: step: 458/466, loss: 0.06974327564239502 2023-01-24 03:00:30.444289: step: 460/466, loss: 0.031080160290002823 2023-01-24 03:00:30.979970: step: 462/466, loss: 0.41150692105293274 2023-01-24 03:00:31.612703: step: 464/466, loss: 0.006570714525878429 2023-01-24 03:00:32.234106: step: 466/466, loss: 0.16019433736801147 2023-01-24 03:00:32.794964: step: 468/466, loss: 0.023206396028399467 2023-01-24 03:00:33.494362: step: 470/466, loss: 0.053858887404203415 2023-01-24 03:00:34.105350: step: 472/466, loss: 0.3436448276042938 2023-01-24 03:00:34.756039: step: 474/466, loss: 0.30463162064552307 2023-01-24 03:00:35.417475: step: 476/466, loss: 0.5427336096763611 2023-01-24 03:00:36.068702: step: 478/466, loss: 0.06205464527010918 2023-01-24 03:00:36.653531: step: 480/466, loss: 0.03354858234524727 2023-01-24 03:00:37.295206: step: 482/466, loss: 0.015020878054201603 2023-01-24 03:00:38.012904: step: 484/466, loss: 0.014658765867352486 2023-01-24 03:00:38.636039: step: 486/466, loss: 0.06817291676998138 2023-01-24 03:00:39.333247: step: 488/466, loss: 0.02162042073905468 2023-01-24 03:00:39.933315: step: 490/466, loss: 0.07436501979827881 2023-01-24 03:00:40.488153: step: 492/466, loss: 0.043860066682100296 2023-01-24 03:00:41.097370: step: 494/466, loss: 0.028184695169329643 2023-01-24 03:00:41.759175: step: 496/466, loss: 0.0674021914601326 2023-01-24 03:00:42.385050: step: 498/466, loss: 0.05346820876002312 2023-01-24 03:00:43.059806: step: 500/466, loss: 0.020384909585118294 2023-01-24 03:00:43.707819: step: 502/466, loss: 0.02656802535057068 2023-01-24 03:00:44.324995: step: 504/466, loss: 0.09113605320453644 2023-01-24 03:00:44.914738: step: 506/466, loss: 0.031240740790963173 2023-01-24 03:00:45.588548: step: 508/466, loss: 0.09123338013887405 2023-01-24 03:00:46.220783: step: 510/466, loss: 0.634196400642395 2023-01-24 03:00:46.805578: step: 512/466, loss: 0.04012421891093254 2023-01-24 03:00:47.418536: step: 514/466, loss: 0.0730554610490799 2023-01-24 03:00:48.087181: step: 516/466, loss: 0.009127141907811165 2023-01-24 03:00:48.685256: step: 518/466, loss: 0.02245831862092018 2023-01-24 03:00:49.299965: step: 520/466, loss: 0.11491833627223969 2023-01-24 03:00:49.911741: step: 522/466, loss: 0.014328841120004654 2023-01-24 03:00:50.510504: step: 524/466, loss: 0.13443446159362793 2023-01-24 03:00:51.121486: step: 526/466, loss: 0.01783270388841629 2023-01-24 03:00:51.770539: step: 528/466, loss: 0.026168670505285263 2023-01-24 03:00:52.469222: step: 530/466, loss: 0.006085870787501335 2023-01-24 03:00:53.091264: step: 532/466, loss: 0.03878147900104523 2023-01-24 03:00:53.664015: step: 534/466, loss: 0.02244793437421322 2023-01-24 03:00:54.288930: step: 536/466, loss: 0.05535934120416641 2023-01-24 03:00:54.873580: step: 538/466, loss: 0.020007774233818054 2023-01-24 03:00:55.525987: step: 540/466, loss: 0.23324796557426453 2023-01-24 03:00:56.165175: step: 542/466, loss: 0.01911095529794693 2023-01-24 03:00:56.780979: step: 544/466, loss: 0.08553896099328995 2023-01-24 03:00:57.444832: step: 546/466, loss: 0.010592618025839329 2023-01-24 03:00:58.048293: step: 548/466, loss: 0.020420879125595093 2023-01-24 03:00:58.670402: step: 550/466, loss: 0.22198089957237244 2023-01-24 03:00:59.289685: step: 552/466, loss: 0.005745941307395697 2023-01-24 03:00:59.900146: step: 554/466, loss: 0.1390320509672165 2023-01-24 03:01:00.525407: step: 556/466, loss: 0.0484815314412117 2023-01-24 03:01:01.135710: step: 558/466, loss: 0.03415696322917938 2023-01-24 03:01:01.779178: step: 560/466, loss: 0.049011725932359695 2023-01-24 03:01:02.409952: step: 562/466, loss: 0.07793086767196655 2023-01-24 03:01:03.058408: step: 564/466, loss: 0.02373851090669632 2023-01-24 03:01:03.604705: step: 566/466, loss: 0.0372571125626564 2023-01-24 03:01:04.202639: step: 568/466, loss: 0.21455547213554382 2023-01-24 03:01:04.777243: step: 570/466, loss: 0.03172042965888977 2023-01-24 03:01:05.396375: step: 572/466, loss: 0.01622224971652031 2023-01-24 03:01:05.998586: step: 574/466, loss: 0.05237264558672905 2023-01-24 03:01:06.633270: step: 576/466, loss: 0.045655809342861176 2023-01-24 03:01:07.271974: step: 578/466, loss: 0.020912673324346542 2023-01-24 03:01:07.873501: step: 580/466, loss: 0.8630627989768982 2023-01-24 03:01:08.511222: step: 582/466, loss: 0.0424136221408844 2023-01-24 03:01:09.120508: step: 584/466, loss: 0.03257325664162636 2023-01-24 03:01:09.789833: step: 586/466, loss: 0.034630049020051956 2023-01-24 03:01:10.406913: step: 588/466, loss: 0.6312510371208191 2023-01-24 03:01:11.044293: step: 590/466, loss: 0.044650305062532425 2023-01-24 03:01:11.654083: step: 592/466, loss: 0.010726863518357277 2023-01-24 03:01:12.360068: step: 594/466, loss: 0.040749356150627136 2023-01-24 03:01:12.974954: step: 596/466, loss: 0.06677427142858505 2023-01-24 03:01:13.643212: step: 598/466, loss: 0.05118778720498085 2023-01-24 03:01:14.277752: step: 600/466, loss: 0.0253734327852726 2023-01-24 03:01:14.889062: step: 602/466, loss: 0.04381844028830528 2023-01-24 03:01:15.503398: step: 604/466, loss: 0.04041266813874245 2023-01-24 03:01:16.158684: step: 606/466, loss: 0.1804942786693573 2023-01-24 03:01:16.786957: step: 608/466, loss: 0.016160905361175537 2023-01-24 03:01:17.391516: step: 610/466, loss: 0.04123891890048981 2023-01-24 03:01:18.012071: step: 612/466, loss: 0.03186152130365372 2023-01-24 03:01:18.554654: step: 614/466, loss: 0.041749026626348495 2023-01-24 03:01:19.158701: step: 616/466, loss: 0.046115558594465256 2023-01-24 03:01:19.713848: step: 618/466, loss: 0.17633561789989471 2023-01-24 03:01:20.296985: step: 620/466, loss: 0.28329288959503174 2023-01-24 03:01:20.909671: step: 622/466, loss: 0.015036552213132381 2023-01-24 03:01:21.588488: step: 624/466, loss: 0.17075170576572418 2023-01-24 03:01:22.177853: step: 626/466, loss: 0.025837387889623642 2023-01-24 03:01:22.782047: step: 628/466, loss: 0.06743304431438446 2023-01-24 03:01:23.454545: step: 630/466, loss: 0.16815988719463348 2023-01-24 03:01:24.076805: step: 632/466, loss: 0.08669286221265793 2023-01-24 03:01:24.698907: step: 634/466, loss: 0.03428836911916733 2023-01-24 03:01:25.359481: step: 636/466, loss: 0.214237779378891 2023-01-24 03:01:25.969086: step: 638/466, loss: 0.02262129820883274 2023-01-24 03:01:26.606869: step: 640/466, loss: 0.01413120049983263 2023-01-24 03:01:27.201862: step: 642/466, loss: 0.024058640003204346 2023-01-24 03:01:27.795847: step: 644/466, loss: 0.03418533504009247 2023-01-24 03:01:28.426575: step: 646/466, loss: 0.07237999141216278 2023-01-24 03:01:29.023199: step: 648/466, loss: 0.008366351947188377 2023-01-24 03:01:29.604859: step: 650/466, loss: 0.08939099311828613 2023-01-24 03:01:30.198719: step: 652/466, loss: 0.040893878787755966 2023-01-24 03:01:30.799680: step: 654/466, loss: 0.1322600394487381 2023-01-24 03:01:31.381695: step: 656/466, loss: 0.08454293757677078 2023-01-24 03:01:32.000780: step: 658/466, loss: 0.032387323677539825 2023-01-24 03:01:32.663718: step: 660/466, loss: 0.2031060755252838 2023-01-24 03:01:33.271407: step: 662/466, loss: 0.49967092275619507 2023-01-24 03:01:33.901013: step: 664/466, loss: 0.042758241295814514 2023-01-24 03:01:34.532050: step: 666/466, loss: 0.02878168784081936 2023-01-24 03:01:35.226226: step: 668/466, loss: 0.0734516903758049 2023-01-24 03:01:35.869560: step: 670/466, loss: 3.0360729694366455 2023-01-24 03:01:36.524654: step: 672/466, loss: 0.2565597891807556 2023-01-24 03:01:37.149025: step: 674/466, loss: 0.60455721616745 2023-01-24 03:01:37.807230: step: 676/466, loss: 0.05530927702784538 2023-01-24 03:01:38.455985: step: 678/466, loss: 0.07635991275310516 2023-01-24 03:01:39.023107: step: 680/466, loss: 0.04688085615634918 2023-01-24 03:01:39.612084: step: 682/466, loss: 0.016532668843865395 2023-01-24 03:01:40.268515: step: 684/466, loss: 0.06713685393333435 2023-01-24 03:01:40.864380: step: 686/466, loss: 0.028215663507580757 2023-01-24 03:01:41.493617: step: 688/466, loss: 0.027146853506565094 2023-01-24 03:01:42.116497: step: 690/466, loss: 0.04736965522170067 2023-01-24 03:01:42.740674: step: 692/466, loss: 0.03607270121574402 2023-01-24 03:01:43.402483: step: 694/466, loss: 0.04344771057367325 2023-01-24 03:01:43.999144: step: 696/466, loss: 0.004855440929532051 2023-01-24 03:01:44.610631: step: 698/466, loss: 0.05644207075238228 2023-01-24 03:01:45.251135: step: 700/466, loss: 0.2104901671409607 2023-01-24 03:01:45.929158: step: 702/466, loss: 0.11141925305128098 2023-01-24 03:01:46.603146: step: 704/466, loss: 0.0320793054997921 2023-01-24 03:01:47.312921: step: 706/466, loss: 0.013713311403989792 2023-01-24 03:01:47.853709: step: 708/466, loss: 0.3742595911026001 2023-01-24 03:01:48.443681: step: 710/466, loss: 0.027933279052376747 2023-01-24 03:01:49.048877: step: 712/466, loss: 0.049460165202617645 2023-01-24 03:01:49.585989: step: 714/466, loss: 0.005260720383375883 2023-01-24 03:01:50.173019: step: 716/466, loss: 0.04703853651881218 2023-01-24 03:01:50.823247: step: 718/466, loss: 0.12700912356376648 2023-01-24 03:01:51.503569: step: 720/466, loss: 0.04476194083690643 2023-01-24 03:01:52.124050: step: 722/466, loss: 0.8264212608337402 2023-01-24 03:01:52.745724: step: 724/466, loss: 0.007151505909860134 2023-01-24 03:01:53.341210: step: 726/466, loss: 0.032925426959991455 2023-01-24 03:01:53.974112: step: 728/466, loss: 0.09729457646608353 2023-01-24 03:01:54.573442: step: 730/466, loss: 0.015224629081785679 2023-01-24 03:01:55.195562: step: 732/466, loss: 0.023833435028791428 2023-01-24 03:01:55.765608: step: 734/466, loss: 0.04177038371562958 2023-01-24 03:01:56.394225: step: 736/466, loss: 0.012526067905128002 2023-01-24 03:01:57.007828: step: 738/466, loss: 0.21370099484920502 2023-01-24 03:01:57.546629: step: 740/466, loss: 0.04333192855119705 2023-01-24 03:01:58.156498: step: 742/466, loss: 0.019405148923397064 2023-01-24 03:01:58.741740: step: 744/466, loss: 0.047963518649339676 2023-01-24 03:01:59.391840: step: 746/466, loss: 0.09783299267292023 2023-01-24 03:01:59.985479: step: 748/466, loss: 0.1445675790309906 2023-01-24 03:02:00.632185: step: 750/466, loss: 0.07998822629451752 2023-01-24 03:02:01.241541: step: 752/466, loss: 0.03509335592389107 2023-01-24 03:02:01.846098: step: 754/466, loss: 0.042283378541469574 2023-01-24 03:02:02.385223: step: 756/466, loss: 0.16924165189266205 2023-01-24 03:02:03.031902: step: 758/466, loss: 0.003952855244278908 2023-01-24 03:02:03.680049: step: 760/466, loss: 0.07449666410684586 2023-01-24 03:02:04.317741: step: 762/466, loss: 0.0128124188631773 2023-01-24 03:02:04.985479: step: 764/466, loss: 0.5852197408676147 2023-01-24 03:02:05.642192: step: 766/466, loss: 0.10924971848726273 2023-01-24 03:02:06.340547: step: 768/466, loss: 0.013517715968191624 2023-01-24 03:02:06.919337: step: 770/466, loss: 0.040156129747629166 2023-01-24 03:02:07.495435: step: 772/466, loss: 0.26231157779693604 2023-01-24 03:02:08.185261: step: 774/466, loss: 0.035481423139572144 2023-01-24 03:02:08.771261: step: 776/466, loss: 0.8418871164321899 2023-01-24 03:02:09.435587: step: 778/466, loss: 0.027692481875419617 2023-01-24 03:02:10.023097: step: 780/466, loss: 0.05802244693040848 2023-01-24 03:02:10.663797: step: 782/466, loss: 0.10543683171272278 2023-01-24 03:02:11.225893: step: 784/466, loss: 0.031873684376478195 2023-01-24 03:02:11.878329: step: 786/466, loss: 0.10513757169246674 2023-01-24 03:02:12.532499: step: 788/466, loss: 0.08614762127399445 2023-01-24 03:02:13.140520: step: 790/466, loss: 0.11027763038873672 2023-01-24 03:02:13.759253: step: 792/466, loss: 0.1143607497215271 2023-01-24 03:02:14.418122: step: 794/466, loss: 0.17218486964702606 2023-01-24 03:02:15.021686: step: 796/466, loss: 0.08547357469797134 2023-01-24 03:02:15.795960: step: 798/466, loss: 0.03696130961179733 2023-01-24 03:02:16.465169: step: 800/466, loss: 0.13679207861423492 2023-01-24 03:02:17.103520: step: 802/466, loss: 0.06206807866692543 2023-01-24 03:02:17.696985: step: 804/466, loss: 0.01966715045273304 2023-01-24 03:02:18.329674: step: 806/466, loss: 0.09318049997091293 2023-01-24 03:02:19.004838: step: 808/466, loss: 0.02171410620212555 2023-01-24 03:02:19.518992: step: 810/466, loss: 0.012220603413879871 2023-01-24 03:02:20.120103: step: 812/466, loss: 0.03538200631737709 2023-01-24 03:02:20.818377: step: 814/466, loss: 0.04497019946575165 2023-01-24 03:02:21.447123: step: 816/466, loss: 0.08776964992284775 2023-01-24 03:02:22.095405: step: 818/466, loss: 0.049796875566244125 2023-01-24 03:02:22.685626: step: 820/466, loss: 0.11578883230686188 2023-01-24 03:02:23.327319: step: 822/466, loss: 0.02360844425857067 2023-01-24 03:02:23.976027: step: 824/466, loss: 0.07886414974927902 2023-01-24 03:02:24.567703: step: 826/466, loss: 0.2746346890926361 2023-01-24 03:02:25.238232: step: 828/466, loss: 0.053636591881513596 2023-01-24 03:02:25.827426: step: 830/466, loss: 0.0729004293680191 2023-01-24 03:02:26.481514: step: 832/466, loss: 0.013451708480715752 2023-01-24 03:02:27.132829: step: 834/466, loss: 0.031762100756168365 2023-01-24 03:02:27.678891: step: 836/466, loss: 0.024036163464188576 2023-01-24 03:02:28.286459: step: 838/466, loss: 0.04187922924757004 2023-01-24 03:02:28.886691: step: 840/466, loss: 0.016345703974366188 2023-01-24 03:02:29.494750: step: 842/466, loss: 0.02965148165822029 2023-01-24 03:02:30.132862: step: 844/466, loss: 0.050337281078100204 2023-01-24 03:02:30.706317: step: 846/466, loss: 0.005305310245603323 2023-01-24 03:02:31.410310: step: 848/466, loss: 0.05893997102975845 2023-01-24 03:02:31.997317: step: 850/466, loss: 0.017510700970888138 2023-01-24 03:02:32.614619: step: 852/466, loss: 0.043549079447984695 2023-01-24 03:02:33.240256: step: 854/466, loss: 0.05888461694121361 2023-01-24 03:02:33.900187: step: 856/466, loss: 0.12205497175455093 2023-01-24 03:02:34.494203: step: 858/466, loss: 0.035270705819129944 2023-01-24 03:02:35.087933: step: 860/466, loss: 0.014527016319334507 2023-01-24 03:02:35.738332: step: 862/466, loss: 0.031558044254779816 2023-01-24 03:02:36.374893: step: 864/466, loss: 0.04664739593863487 2023-01-24 03:02:36.979528: step: 866/466, loss: 0.08443605154752731 2023-01-24 03:02:37.570242: step: 868/466, loss: 0.11665617674589157 2023-01-24 03:02:38.229880: step: 870/466, loss: 0.06290258467197418 2023-01-24 03:02:38.811957: step: 872/466, loss: 0.01301049068570137 2023-01-24 03:02:39.439369: step: 874/466, loss: 0.031586259603500366 2023-01-24 03:02:40.168050: step: 876/466, loss: 0.0244061928242445 2023-01-24 03:02:40.789049: step: 878/466, loss: 0.05870399996638298 2023-01-24 03:02:41.388397: step: 880/466, loss: 0.007997308857738972 2023-01-24 03:02:42.046330: step: 882/466, loss: 0.16636857390403748 2023-01-24 03:02:42.639641: step: 884/466, loss: 0.030749928206205368 2023-01-24 03:02:43.289171: step: 886/466, loss: 0.014747374691069126 2023-01-24 03:02:43.884769: step: 888/466, loss: 0.05380203574895859 2023-01-24 03:02:44.495595: step: 890/466, loss: 0.057087745517492294 2023-01-24 03:02:45.155258: step: 892/466, loss: 0.5643709897994995 2023-01-24 03:02:45.812579: step: 894/466, loss: 0.10381991416215897 2023-01-24 03:02:46.433456: step: 896/466, loss: 0.044120799750089645 2023-01-24 03:02:47.118128: step: 898/466, loss: 0.051240790635347366 2023-01-24 03:02:47.813711: step: 900/466, loss: 0.1393282115459442 2023-01-24 03:02:48.407246: step: 902/466, loss: 0.03885339945554733 2023-01-24 03:02:49.068999: step: 904/466, loss: 0.04772276058793068 2023-01-24 03:02:49.703494: step: 906/466, loss: 0.029068568721413612 2023-01-24 03:02:50.353121: step: 908/466, loss: 0.013651649467647076 2023-01-24 03:02:51.037649: step: 910/466, loss: 0.13033856451511383 2023-01-24 03:02:51.649499: step: 912/466, loss: 0.011761067435145378 2023-01-24 03:02:52.252736: step: 914/466, loss: 0.49115467071533203 2023-01-24 03:02:52.955377: step: 916/466, loss: 0.02538359723985195 2023-01-24 03:02:53.557045: step: 918/466, loss: 0.061281025409698486 2023-01-24 03:02:54.180413: step: 920/466, loss: 0.27115190029144287 2023-01-24 03:02:54.776365: step: 922/466, loss: 0.06859371811151505 2023-01-24 03:02:55.445301: step: 924/466, loss: 0.0548187717795372 2023-01-24 03:02:56.066150: step: 926/466, loss: 0.06278951466083527 2023-01-24 03:02:56.659859: step: 928/466, loss: 0.038607917726039886 2023-01-24 03:02:57.237891: step: 930/466, loss: 0.02120407484471798 2023-01-24 03:02:57.875403: step: 932/466, loss: 0.3725205063819885 ================================================== Loss: 0.110 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34057620575988895, 'r': 0.33863744178023114, 'f1': 0.33960405674249633}, 'combined': 0.25023456812604994, 'epoch': 21} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.33981061120638145, 'r': 0.2772462026449177, 'f1': 0.30535665261487954}, 'combined': 0.18923510866274224, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3119048105671307, 'r': 0.3290684528943542, 'f1': 0.32025683227206775}, 'combined': 0.23597871851626043, 'epoch': 21} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.33388307345214147, 'r': 0.2735609282936696, 'f1': 0.30072685960386525}, 'combined': 0.1994920751827621, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30952380952380953, 'r': 0.2826086956521739, 'f1': 0.29545454545454547}, 'combined': 0.14772727272727273, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.1724137931034483, 'f1': 0.22727272727272724}, 'combined': 0.1515151515151515, 'epoch': 21} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:05:39.500497: step: 2/466, loss: 0.06524524092674255 2023-01-24 03:05:40.041314: step: 4/466, loss: 0.9549713730812073 2023-01-24 03:05:40.746258: step: 6/466, loss: 0.10257505625486374 2023-01-24 03:05:41.361299: step: 8/466, loss: 0.05847657844424248 2023-01-24 03:05:41.932642: step: 10/466, loss: 0.047362662851810455 2023-01-24 03:05:42.556144: step: 12/466, loss: 0.03749975562095642 2023-01-24 03:05:43.133488: step: 14/466, loss: 0.0317390151321888 2023-01-24 03:05:43.774309: step: 16/466, loss: 0.026071008294820786 2023-01-24 03:05:44.383234: step: 18/466, loss: 0.049456655979156494 2023-01-24 03:05:45.019907: step: 20/466, loss: 1.5443713665008545 2023-01-24 03:05:45.637822: step: 22/466, loss: 2.770106792449951 2023-01-24 03:05:46.327208: step: 24/466, loss: 0.012899891473352909 2023-01-24 03:05:46.975281: step: 26/466, loss: 0.03178015723824501 2023-01-24 03:05:47.529686: step: 28/466, loss: 0.006744012236595154 2023-01-24 03:05:48.102854: step: 30/466, loss: 0.01676984131336212 2023-01-24 03:05:48.749756: step: 32/466, loss: 0.03753257915377617 2023-01-24 03:05:49.402047: step: 34/466, loss: 0.05055082589387894 2023-01-24 03:05:49.972147: step: 36/466, loss: 0.006336488761007786 2023-01-24 03:05:50.564369: step: 38/466, loss: 0.01944642700254917 2023-01-24 03:05:51.209597: step: 40/466, loss: 0.08281132578849792 2023-01-24 03:05:51.818831: step: 42/466, loss: 0.03852911293506622 2023-01-24 03:05:52.437889: step: 44/466, loss: 0.11461681872606277 2023-01-24 03:05:53.036523: step: 46/466, loss: 0.05935616046190262 2023-01-24 03:05:53.647832: step: 48/466, loss: 0.07514059543609619 2023-01-24 03:05:54.247216: step: 50/466, loss: 0.015336104668676853 2023-01-24 03:05:54.844619: step: 52/466, loss: 0.011880473233759403 2023-01-24 03:05:55.454508: step: 54/466, loss: 0.04381778836250305 2023-01-24 03:05:56.130994: step: 56/466, loss: 0.07939602434635162 2023-01-24 03:05:56.713676: step: 58/466, loss: 0.002874930389225483 2023-01-24 03:05:57.324361: step: 60/466, loss: 0.011842741630971432 2023-01-24 03:05:57.900552: step: 62/466, loss: 0.324677437543869 2023-01-24 03:05:58.529645: step: 64/466, loss: 0.26164790987968445 2023-01-24 03:05:59.088437: step: 66/466, loss: 0.04579608887434006 2023-01-24 03:05:59.731853: step: 68/466, loss: 0.8473932147026062 2023-01-24 03:06:00.385144: step: 70/466, loss: 0.0588836707174778 2023-01-24 03:06:01.008652: step: 72/466, loss: 0.029760386794805527 2023-01-24 03:06:01.591256: step: 74/466, loss: 0.05991767719388008 2023-01-24 03:06:02.201596: step: 76/466, loss: 0.0347471684217453 2023-01-24 03:06:02.881907: step: 78/466, loss: 0.11526650935411453 2023-01-24 03:06:03.460109: step: 80/466, loss: 0.01211107149720192 2023-01-24 03:06:04.050350: step: 82/466, loss: 0.06731019914150238 2023-01-24 03:06:04.672162: step: 84/466, loss: 0.020972801372408867 2023-01-24 03:06:05.296691: step: 86/466, loss: 0.07082726061344147 2023-01-24 03:06:05.966974: step: 88/466, loss: 0.010794714093208313 2023-01-24 03:06:06.547628: step: 90/466, loss: 0.024650154635310173 2023-01-24 03:06:07.162404: step: 92/466, loss: 0.019832108169794083 2023-01-24 03:06:07.758008: step: 94/466, loss: 0.009915913455188274 2023-01-24 03:06:08.374628: step: 96/466, loss: 0.08327384293079376 2023-01-24 03:06:09.041774: step: 98/466, loss: 0.09288603067398071 2023-01-24 03:06:09.631659: step: 100/466, loss: 0.013974449597299099 2023-01-24 03:06:10.297592: step: 102/466, loss: 0.04457048699259758 2023-01-24 03:06:10.858142: step: 104/466, loss: 0.4012412428855896 2023-01-24 03:06:11.419040: step: 106/466, loss: 0.1333913803100586 2023-01-24 03:06:11.992829: step: 108/466, loss: 0.07510756701231003 2023-01-24 03:06:12.652114: step: 110/466, loss: 0.008668056689202785 2023-01-24 03:06:13.288452: step: 112/466, loss: 0.03645727410912514 2023-01-24 03:06:13.889970: step: 114/466, loss: 0.08467361330986023 2023-01-24 03:06:14.526273: step: 116/466, loss: 0.07070888578891754 2023-01-24 03:06:15.112928: step: 118/466, loss: 0.020062562078237534 2023-01-24 03:06:15.662374: step: 120/466, loss: 0.016420189291238785 2023-01-24 03:06:16.281957: step: 122/466, loss: 0.07351458072662354 2023-01-24 03:06:16.886900: step: 124/466, loss: 0.04343784973025322 2023-01-24 03:06:17.503674: step: 126/466, loss: 0.007278779987245798 2023-01-24 03:06:18.105892: step: 128/466, loss: 0.10511089116334915 2023-01-24 03:06:18.750058: step: 130/466, loss: 0.0025861081667244434 2023-01-24 03:06:19.359418: step: 132/466, loss: 0.046648066490888596 2023-01-24 03:06:20.000058: step: 134/466, loss: 0.153986856341362 2023-01-24 03:06:20.645374: step: 136/466, loss: 0.01985854096710682 2023-01-24 03:06:21.215869: step: 138/466, loss: 0.015150061808526516 2023-01-24 03:06:21.836425: step: 140/466, loss: 0.17932286858558655 2023-01-24 03:06:22.420490: step: 142/466, loss: 0.013499271124601364 2023-01-24 03:06:23.052588: step: 144/466, loss: 0.30745261907577515 2023-01-24 03:06:23.662088: step: 146/466, loss: 0.014704683795571327 2023-01-24 03:06:24.301981: step: 148/466, loss: 0.12307847291231155 2023-01-24 03:06:24.877625: step: 150/466, loss: 0.004720141179859638 2023-01-24 03:06:25.608621: step: 152/466, loss: 0.043396204710006714 2023-01-24 03:06:26.195943: step: 154/466, loss: 0.02459942363202572 2023-01-24 03:06:26.843374: step: 156/466, loss: 0.06305425614118576 2023-01-24 03:06:27.382553: step: 158/466, loss: 0.031243721023201942 2023-01-24 03:06:27.979224: step: 160/466, loss: 0.05132858082652092 2023-01-24 03:06:28.572753: step: 162/466, loss: 0.07695545256137848 2023-01-24 03:06:29.175723: step: 164/466, loss: 0.4492061734199524 2023-01-24 03:06:29.780717: step: 166/466, loss: 0.02572566270828247 2023-01-24 03:06:30.414329: step: 168/466, loss: 0.00399467209354043 2023-01-24 03:06:30.995595: step: 170/466, loss: 0.0033108617644757032 2023-01-24 03:06:31.575241: step: 172/466, loss: 0.032420262694358826 2023-01-24 03:06:32.137871: step: 174/466, loss: 0.02495727315545082 2023-01-24 03:06:32.778494: step: 176/466, loss: 0.024452194571495056 2023-01-24 03:06:33.367675: step: 178/466, loss: 0.044989731162786484 2023-01-24 03:06:33.971032: step: 180/466, loss: 0.06311078369617462 2023-01-24 03:06:34.619532: step: 182/466, loss: 0.1478491574525833 2023-01-24 03:06:35.175429: step: 184/466, loss: 0.03128127381205559 2023-01-24 03:06:35.793970: step: 186/466, loss: 0.011426354758441448 2023-01-24 03:06:36.392301: step: 188/466, loss: 0.030414700508117676 2023-01-24 03:06:37.006343: step: 190/466, loss: 0.04100172221660614 2023-01-24 03:06:37.598910: step: 192/466, loss: 0.03840643912553787 2023-01-24 03:06:38.209250: step: 194/466, loss: 0.8019331097602844 2023-01-24 03:06:38.855400: step: 196/466, loss: 0.05144786834716797 2023-01-24 03:06:39.458875: step: 198/466, loss: 0.03573480620980263 2023-01-24 03:06:40.082372: step: 200/466, loss: 0.2057233452796936 2023-01-24 03:06:40.646357: step: 202/466, loss: 0.03584648296236992 2023-01-24 03:06:41.331255: step: 204/466, loss: 0.02789546735584736 2023-01-24 03:06:41.989052: step: 206/466, loss: 0.04161622375249863 2023-01-24 03:06:42.620029: step: 208/466, loss: 0.032335065305233 2023-01-24 03:06:43.207629: step: 210/466, loss: 0.43722274899482727 2023-01-24 03:06:43.835623: step: 212/466, loss: 0.015449839644134045 2023-01-24 03:06:44.479367: step: 214/466, loss: 0.027153804898262024 2023-01-24 03:06:45.050696: step: 216/466, loss: 0.028914693742990494 2023-01-24 03:06:45.692017: step: 218/466, loss: 0.03848309814929962 2023-01-24 03:06:46.313595: step: 220/466, loss: 0.04102660343050957 2023-01-24 03:06:46.913137: step: 222/466, loss: 0.2741301953792572 2023-01-24 03:06:47.581375: step: 224/466, loss: 0.03221369534730911 2023-01-24 03:06:48.226888: step: 226/466, loss: 0.0764458030462265 2023-01-24 03:06:48.851971: step: 228/466, loss: 0.03933442011475563 2023-01-24 03:06:49.450327: step: 230/466, loss: 0.03705897182226181 2023-01-24 03:06:50.045025: step: 232/466, loss: 0.006504240911453962 2023-01-24 03:06:50.680915: step: 234/466, loss: 0.05261344462633133 2023-01-24 03:06:51.292420: step: 236/466, loss: 0.08687302470207214 2023-01-24 03:06:51.951601: step: 238/466, loss: 0.05586743727326393 2023-01-24 03:06:52.545318: step: 240/466, loss: 0.0024894988164305687 2023-01-24 03:06:53.157766: step: 242/466, loss: 0.3717498481273651 2023-01-24 03:06:53.788799: step: 244/466, loss: 0.05251733958721161 2023-01-24 03:06:54.347098: step: 246/466, loss: 0.006758150178939104 2023-01-24 03:06:54.991172: step: 248/466, loss: 0.10216856747865677 2023-01-24 03:06:55.690102: step: 250/466, loss: 0.009869629517197609 2023-01-24 03:06:56.373821: step: 252/466, loss: 0.05833861976861954 2023-01-24 03:06:56.986482: step: 254/466, loss: 0.01736012101173401 2023-01-24 03:06:57.570913: step: 256/466, loss: 0.022860318422317505 2023-01-24 03:06:58.172263: step: 258/466, loss: 0.009119870141148567 2023-01-24 03:06:58.731543: step: 260/466, loss: 0.015188897959887981 2023-01-24 03:06:59.336918: step: 262/466, loss: 0.3766849935054779 2023-01-24 03:06:59.968095: step: 264/466, loss: 0.08701756596565247 2023-01-24 03:07:00.582843: step: 266/466, loss: 0.011309411376714706 2023-01-24 03:07:01.177351: step: 268/466, loss: 0.0732068195939064 2023-01-24 03:07:01.802201: step: 270/466, loss: 0.01978411339223385 2023-01-24 03:07:02.458577: step: 272/466, loss: 0.1555345356464386 2023-01-24 03:07:03.106140: step: 274/466, loss: 0.3639795780181885 2023-01-24 03:07:03.783512: step: 276/466, loss: 0.15518170595169067 2023-01-24 03:07:04.409202: step: 278/466, loss: 0.027007892727851868 2023-01-24 03:07:05.026223: step: 280/466, loss: 0.09517539292573929 2023-01-24 03:07:05.639640: step: 282/466, loss: 0.03286255896091461 2023-01-24 03:07:06.245684: step: 284/466, loss: 0.0063650342635810375 2023-01-24 03:07:06.898407: step: 286/466, loss: 0.045704491436481476 2023-01-24 03:07:07.503567: step: 288/466, loss: 0.00865288358181715 2023-01-24 03:07:08.148996: step: 290/466, loss: 0.13274767994880676 2023-01-24 03:07:08.733609: step: 292/466, loss: 0.056273847818374634 2023-01-24 03:07:09.302590: step: 294/466, loss: 0.038613658398389816 2023-01-24 03:07:09.965804: step: 296/466, loss: 0.07675481587648392 2023-01-24 03:07:10.602819: step: 298/466, loss: 0.10594279319047928 2023-01-24 03:07:11.252725: step: 300/466, loss: 0.029124725610017776 2023-01-24 03:07:11.885931: step: 302/466, loss: 0.08076826483011246 2023-01-24 03:07:12.482989: step: 304/466, loss: 0.008677887730300426 2023-01-24 03:07:13.125357: step: 306/466, loss: 0.04437677562236786 2023-01-24 03:07:13.722868: step: 308/466, loss: 0.03545172140002251 2023-01-24 03:07:14.340453: step: 310/466, loss: 0.0892721489071846 2023-01-24 03:07:14.946364: step: 312/466, loss: 0.09720361232757568 2023-01-24 03:07:15.565204: step: 314/466, loss: 0.02379939705133438 2023-01-24 03:07:16.170629: step: 316/466, loss: 0.07661573588848114 2023-01-24 03:07:16.796012: step: 318/466, loss: 0.18681415915489197 2023-01-24 03:07:17.439850: step: 320/466, loss: 0.0562223419547081 2023-01-24 03:07:18.011814: step: 322/466, loss: 0.056488998234272 2023-01-24 03:07:18.615225: step: 324/466, loss: 0.016898782923817635 2023-01-24 03:07:19.243749: step: 326/466, loss: 0.015045711770653725 2023-01-24 03:07:19.789361: step: 328/466, loss: 0.012584555894136429 2023-01-24 03:07:20.505074: step: 330/466, loss: 0.027717135846614838 2023-01-24 03:07:21.177182: step: 332/466, loss: 0.033037345856428146 2023-01-24 03:07:21.798609: step: 334/466, loss: 0.06914247572422028 2023-01-24 03:07:22.447998: step: 336/466, loss: 0.06818067282438278 2023-01-24 03:07:23.066375: step: 338/466, loss: 0.041885361075401306 2023-01-24 03:07:23.720167: step: 340/466, loss: 0.019851483404636383 2023-01-24 03:07:24.349428: step: 342/466, loss: 0.01451868750154972 2023-01-24 03:07:24.914697: step: 344/466, loss: 0.015311257913708687 2023-01-24 03:07:25.513649: step: 346/466, loss: 0.04596361145377159 2023-01-24 03:07:26.108428: step: 348/466, loss: 0.007450594566762447 2023-01-24 03:07:26.795836: step: 350/466, loss: 0.003928270190954208 2023-01-24 03:07:27.358085: step: 352/466, loss: 0.035604193806648254 2023-01-24 03:07:28.064288: step: 354/466, loss: 0.023971788585186005 2023-01-24 03:07:28.680435: step: 356/466, loss: 0.3693704307079315 2023-01-24 03:07:29.331930: step: 358/466, loss: 0.031583916395902634 2023-01-24 03:07:29.944379: step: 360/466, loss: 0.05496159940958023 2023-01-24 03:07:30.510952: step: 362/466, loss: 0.06164189800620079 2023-01-24 03:07:31.075748: step: 364/466, loss: 0.3457597494125366 2023-01-24 03:07:31.712540: step: 366/466, loss: 0.10331287235021591 2023-01-24 03:07:32.283783: step: 368/466, loss: 0.08303102850914001 2023-01-24 03:07:32.912883: step: 370/466, loss: 0.040373966097831726 2023-01-24 03:07:33.524169: step: 372/466, loss: 0.4742472767829895 2023-01-24 03:07:34.186670: step: 374/466, loss: 0.04618077352643013 2023-01-24 03:07:34.757812: step: 376/466, loss: 0.014955085702240467 2023-01-24 03:07:35.389347: step: 378/466, loss: 0.020503893494606018 2023-01-24 03:07:35.995395: step: 380/466, loss: 0.0315471813082695 2023-01-24 03:07:36.641265: step: 382/466, loss: 0.0067911832593381405 2023-01-24 03:07:37.258647: step: 384/466, loss: 0.03241651877760887 2023-01-24 03:07:37.810578: step: 386/466, loss: 0.03235870972275734 2023-01-24 03:07:38.472433: step: 388/466, loss: 0.07316027581691742 2023-01-24 03:07:39.125661: step: 390/466, loss: 0.11621598899364471 2023-01-24 03:07:39.742369: step: 392/466, loss: 0.03541046753525734 2023-01-24 03:07:40.359115: step: 394/466, loss: 0.02560034766793251 2023-01-24 03:07:40.957605: step: 396/466, loss: 0.032955411821603775 2023-01-24 03:07:41.553185: step: 398/466, loss: 0.2330137938261032 2023-01-24 03:07:42.201113: step: 400/466, loss: 0.1967945694923401 2023-01-24 03:07:42.852982: step: 402/466, loss: 0.03604523837566376 2023-01-24 03:07:43.386098: step: 404/466, loss: 0.1295190453529358 2023-01-24 03:07:44.011796: step: 406/466, loss: 0.05102947726845741 2023-01-24 03:07:44.742626: step: 408/466, loss: 0.1345944106578827 2023-01-24 03:07:45.437611: step: 410/466, loss: 0.05486585572361946 2023-01-24 03:07:46.062862: step: 412/466, loss: 0.05415716767311096 2023-01-24 03:07:46.768461: step: 414/466, loss: 0.03243786841630936 2023-01-24 03:07:47.356680: step: 416/466, loss: 0.9294023513793945 2023-01-24 03:07:47.968700: step: 418/466, loss: 0.03535958379507065 2023-01-24 03:07:48.604900: step: 420/466, loss: 0.0102820610627532 2023-01-24 03:07:49.204816: step: 422/466, loss: 0.27872762084007263 2023-01-24 03:07:49.831864: step: 424/466, loss: 0.014268352650105953 2023-01-24 03:07:50.523261: step: 426/466, loss: 0.055146560072898865 2023-01-24 03:07:51.101260: step: 428/466, loss: 0.05941939726471901 2023-01-24 03:07:51.726038: step: 430/466, loss: 0.0571625716984272 2023-01-24 03:07:52.315881: step: 432/466, loss: 0.1478642076253891 2023-01-24 03:07:52.889862: step: 434/466, loss: 0.017696944996714592 2023-01-24 03:07:53.532908: step: 436/466, loss: 0.02196957729756832 2023-01-24 03:07:54.086846: step: 438/466, loss: 0.03287990763783455 2023-01-24 03:07:54.644816: step: 440/466, loss: 0.3364804685115814 2023-01-24 03:07:55.280419: step: 442/466, loss: 0.018891671672463417 2023-01-24 03:07:55.945304: step: 444/466, loss: 0.079230397939682 2023-01-24 03:07:56.568063: step: 446/466, loss: 0.14929592609405518 2023-01-24 03:07:57.193905: step: 448/466, loss: 0.0891253799200058 2023-01-24 03:07:57.892192: step: 450/466, loss: 0.008315548300743103 2023-01-24 03:07:58.476407: step: 452/466, loss: 0.1016860380768776 2023-01-24 03:07:59.086250: step: 454/466, loss: 0.07582006603479385 2023-01-24 03:07:59.689423: step: 456/466, loss: 0.009356861934065819 2023-01-24 03:08:00.336828: step: 458/466, loss: 0.059819843620061874 2023-01-24 03:08:00.953732: step: 460/466, loss: 0.12198459357023239 2023-01-24 03:08:01.586456: step: 462/466, loss: 0.01760128140449524 2023-01-24 03:08:02.166959: step: 464/466, loss: 0.08083579689264297 2023-01-24 03:08:02.745394: step: 466/466, loss: 0.05634992569684982 2023-01-24 03:08:03.348502: step: 468/466, loss: 0.010742071084678173 2023-01-24 03:08:03.936414: step: 470/466, loss: 0.038598641753196716 2023-01-24 03:08:04.603528: step: 472/466, loss: 0.08225518465042114 2023-01-24 03:08:05.194527: step: 474/466, loss: 0.066253662109375 2023-01-24 03:08:05.747607: step: 476/466, loss: 0.004400709178298712 2023-01-24 03:08:06.333486: step: 478/466, loss: 0.019654320552945137 2023-01-24 03:08:06.953191: step: 480/466, loss: 0.029162848368287086 2023-01-24 03:08:07.584460: step: 482/466, loss: 0.006798680406063795 2023-01-24 03:08:08.171002: step: 484/466, loss: 0.11523490399122238 2023-01-24 03:08:08.831563: step: 486/466, loss: 0.0820143073797226 2023-01-24 03:08:09.448230: step: 488/466, loss: 0.016324078664183617 2023-01-24 03:08:10.037833: step: 490/466, loss: 0.011203078553080559 2023-01-24 03:08:10.610854: step: 492/466, loss: 0.07054407149553299 2023-01-24 03:08:11.167556: step: 494/466, loss: 0.026088794693350792 2023-01-24 03:08:11.778593: step: 496/466, loss: 0.01417471095919609 2023-01-24 03:08:12.469602: step: 498/466, loss: 0.473919153213501 2023-01-24 03:08:13.091204: step: 500/466, loss: 0.011972896754741669 2023-01-24 03:08:13.803370: step: 502/466, loss: 0.028252488002181053 2023-01-24 03:08:14.458781: step: 504/466, loss: 0.019589383155107498 2023-01-24 03:08:15.141261: step: 506/466, loss: 0.18996228277683258 2023-01-24 03:08:15.804086: step: 508/466, loss: 0.04515201225876808 2023-01-24 03:08:16.386497: step: 510/466, loss: 0.08754381537437439 2023-01-24 03:08:17.030583: step: 512/466, loss: 0.002444115001708269 2023-01-24 03:08:17.720606: step: 514/466, loss: 0.14031344652175903 2023-01-24 03:08:18.357878: step: 516/466, loss: 0.04026828706264496 2023-01-24 03:08:18.969740: step: 518/466, loss: 0.13847778737545013 2023-01-24 03:08:19.563313: step: 520/466, loss: 0.006101786624640226 2023-01-24 03:08:20.169108: step: 522/466, loss: 0.03475275635719299 2023-01-24 03:08:20.744576: step: 524/466, loss: 0.010136638768017292 2023-01-24 03:08:21.397808: step: 526/466, loss: 0.09662459045648575 2023-01-24 03:08:21.989088: step: 528/466, loss: 0.09632852673530579 2023-01-24 03:08:22.565007: step: 530/466, loss: 0.04891626909375191 2023-01-24 03:08:23.163664: step: 532/466, loss: 0.02533416822552681 2023-01-24 03:08:23.842372: step: 534/466, loss: 0.06768819689750671 2023-01-24 03:08:24.434757: step: 536/466, loss: 0.037999678403139114 2023-01-24 03:08:25.038011: step: 538/466, loss: 0.16249126195907593 2023-01-24 03:08:25.730224: step: 540/466, loss: 0.05323609337210655 2023-01-24 03:08:26.393180: step: 542/466, loss: 0.03427635878324509 2023-01-24 03:08:27.008548: step: 544/466, loss: 0.029624156653881073 2023-01-24 03:08:27.715612: step: 546/466, loss: 0.5451293587684631 2023-01-24 03:08:28.311972: step: 548/466, loss: 0.028565261512994766 2023-01-24 03:08:28.905632: step: 550/466, loss: 0.05299180746078491 2023-01-24 03:08:29.519476: step: 552/466, loss: 0.053857531398534775 2023-01-24 03:08:30.148793: step: 554/466, loss: 0.0026278712321072817 2023-01-24 03:08:30.832602: step: 556/466, loss: 0.04956957325339317 2023-01-24 03:08:31.456654: step: 558/466, loss: 0.048391908407211304 2023-01-24 03:08:32.150941: step: 560/466, loss: 0.053652457892894745 2023-01-24 03:08:32.796462: step: 562/466, loss: 0.05441490560770035 2023-01-24 03:08:33.431135: step: 564/466, loss: 0.00899538304656744 2023-01-24 03:08:34.017774: step: 566/466, loss: 0.30511531233787537 2023-01-24 03:08:34.628442: step: 568/466, loss: 0.025552192702889442 2023-01-24 03:08:35.229108: step: 570/466, loss: 0.17114883661270142 2023-01-24 03:08:35.884098: step: 572/466, loss: 0.0558360256254673 2023-01-24 03:08:36.549201: step: 574/466, loss: 0.032410457730293274 2023-01-24 03:08:37.191943: step: 576/466, loss: 0.0480712465941906 2023-01-24 03:08:37.873366: step: 578/466, loss: 0.026203026995062828 2023-01-24 03:08:38.460948: step: 580/466, loss: 0.0036527966149151325 2023-01-24 03:08:39.099894: step: 582/466, loss: 0.006195830646902323 2023-01-24 03:08:39.695892: step: 584/466, loss: 0.10995900630950928 2023-01-24 03:08:40.282932: step: 586/466, loss: 0.02922908402979374 2023-01-24 03:08:40.867066: step: 588/466, loss: 0.039502378553152084 2023-01-24 03:08:41.461154: step: 590/466, loss: 0.05805331841111183 2023-01-24 03:08:42.071285: step: 592/466, loss: 0.026140891015529633 2023-01-24 03:08:42.655905: step: 594/466, loss: 0.049084533005952835 2023-01-24 03:08:43.344276: step: 596/466, loss: 0.09888540953397751 2023-01-24 03:08:43.962455: step: 598/466, loss: 0.016056597232818604 2023-01-24 03:08:44.604573: step: 600/466, loss: 0.03200465440750122 2023-01-24 03:08:45.197648: step: 602/466, loss: 0.03161884844303131 2023-01-24 03:08:45.815368: step: 604/466, loss: 0.1447422206401825 2023-01-24 03:08:46.415032: step: 606/466, loss: 0.027576405555009842 2023-01-24 03:08:47.131034: step: 608/466, loss: 0.05594842508435249 2023-01-24 03:08:47.752878: step: 610/466, loss: 0.012763570062816143 2023-01-24 03:08:48.348834: step: 612/466, loss: 1.0796951055526733 2023-01-24 03:08:48.954872: step: 614/466, loss: 0.03139471262693405 2023-01-24 03:08:49.605711: step: 616/466, loss: 0.07075747102499008 2023-01-24 03:08:50.210056: step: 618/466, loss: 0.03813979774713516 2023-01-24 03:08:50.866498: step: 620/466, loss: 0.08887746185064316 2023-01-24 03:08:51.478934: step: 622/466, loss: 2.5260872840881348 2023-01-24 03:08:52.257719: step: 624/466, loss: 0.01851993054151535 2023-01-24 03:08:53.027248: step: 626/466, loss: 0.05724343657493591 2023-01-24 03:08:53.635238: step: 628/466, loss: 0.012389592826366425 2023-01-24 03:08:54.164112: step: 630/466, loss: 0.015039799734950066 2023-01-24 03:08:54.767393: step: 632/466, loss: 0.03112381137907505 2023-01-24 03:08:55.463509: step: 634/466, loss: 0.016538362950086594 2023-01-24 03:08:56.048148: step: 636/466, loss: 0.01323335338383913 2023-01-24 03:08:56.663764: step: 638/466, loss: 0.033239446580410004 2023-01-24 03:08:57.282121: step: 640/466, loss: 0.0349995419383049 2023-01-24 03:08:58.005833: step: 642/466, loss: 0.12855033576488495 2023-01-24 03:08:58.684640: step: 644/466, loss: 0.04696233570575714 2023-01-24 03:08:59.267000: step: 646/466, loss: 0.009281965903937817 2023-01-24 03:08:59.842055: step: 648/466, loss: 0.000963771715760231 2023-01-24 03:09:00.437473: step: 650/466, loss: 0.07592719793319702 2023-01-24 03:09:01.046429: step: 652/466, loss: 0.08561275899410248 2023-01-24 03:09:01.646126: step: 654/466, loss: 0.0015793698839843273 2023-01-24 03:09:02.270285: step: 656/466, loss: 0.09655793756246567 2023-01-24 03:09:02.878973: step: 658/466, loss: 0.028067471459507942 2023-01-24 03:09:03.440450: step: 660/466, loss: 0.04503018036484718 2023-01-24 03:09:04.075163: step: 662/466, loss: 0.03728168457746506 2023-01-24 03:09:04.722166: step: 664/466, loss: 0.3822230100631714 2023-01-24 03:09:05.382841: step: 666/466, loss: 0.0555257648229599 2023-01-24 03:09:06.128551: step: 668/466, loss: 0.06059182435274124 2023-01-24 03:09:06.769597: step: 670/466, loss: 0.027598911896348 2023-01-24 03:09:07.333686: step: 672/466, loss: 0.10212715715169907 2023-01-24 03:09:07.944675: step: 674/466, loss: 0.22003017365932465 2023-01-24 03:09:08.625260: step: 676/466, loss: 0.0722040981054306 2023-01-24 03:09:09.230843: step: 678/466, loss: 0.1438433974981308 2023-01-24 03:09:09.894708: step: 680/466, loss: 0.04073747992515564 2023-01-24 03:09:10.528167: step: 682/466, loss: 0.03020535409450531 2023-01-24 03:09:11.156965: step: 684/466, loss: 0.007457078900188208 2023-01-24 03:09:11.832321: step: 686/466, loss: 0.06697667390108109 2023-01-24 03:09:12.439070: step: 688/466, loss: 0.031219815835356712 2023-01-24 03:09:13.065802: step: 690/466, loss: 0.09386976063251495 2023-01-24 03:09:13.679920: step: 692/466, loss: 0.060721199959516525 2023-01-24 03:09:14.314406: step: 694/466, loss: 0.036306802183389664 2023-01-24 03:09:14.959229: step: 696/466, loss: 0.15226911008358002 2023-01-24 03:09:15.578523: step: 698/466, loss: 0.052767571061849594 2023-01-24 03:09:16.192598: step: 700/466, loss: 0.14551600813865662 2023-01-24 03:09:16.772133: step: 702/466, loss: 0.07331140339374542 2023-01-24 03:09:17.344734: step: 704/466, loss: 0.09488560259342194 2023-01-24 03:09:18.027483: step: 706/466, loss: 0.07346636801958084 2023-01-24 03:09:18.617402: step: 708/466, loss: 0.02112002670764923 2023-01-24 03:09:19.296738: step: 710/466, loss: 0.1378161758184433 2023-01-24 03:09:19.934439: step: 712/466, loss: 0.022499412298202515 2023-01-24 03:09:20.499149: step: 714/466, loss: 0.013000545091927052 2023-01-24 03:09:21.122315: step: 716/466, loss: 0.030715223401784897 2023-01-24 03:09:21.709936: step: 718/466, loss: 0.03894434869289398 2023-01-24 03:09:22.349272: step: 720/466, loss: 0.031011324375867844 2023-01-24 03:09:23.006252: step: 722/466, loss: 0.028001470491290092 2023-01-24 03:09:23.621599: step: 724/466, loss: 0.060471419245004654 2023-01-24 03:09:24.291305: step: 726/466, loss: 0.0355912409722805 2023-01-24 03:09:24.865878: step: 728/466, loss: 0.014043898321688175 2023-01-24 03:09:25.520353: step: 730/466, loss: 0.03688360005617142 2023-01-24 03:09:26.222870: step: 732/466, loss: 0.13661432266235352 2023-01-24 03:09:26.832686: step: 734/466, loss: 0.12804485857486725 2023-01-24 03:09:27.490619: step: 736/466, loss: 0.01817631907761097 2023-01-24 03:09:28.082105: step: 738/466, loss: 0.09710384160280228 2023-01-24 03:09:28.724580: step: 740/466, loss: 0.0344148613512516 2023-01-24 03:09:29.268035: step: 742/466, loss: 0.01592305302619934 2023-01-24 03:09:29.901703: step: 744/466, loss: 0.042749933898448944 2023-01-24 03:09:30.529721: step: 746/466, loss: 0.04967391863465309 2023-01-24 03:09:31.150788: step: 748/466, loss: 0.2840249836444855 2023-01-24 03:09:31.795423: step: 750/466, loss: 0.007397348526865244 2023-01-24 03:09:32.458534: step: 752/466, loss: 0.15602250397205353 2023-01-24 03:09:33.086575: step: 754/466, loss: 0.04240616410970688 2023-01-24 03:09:33.684577: step: 756/466, loss: 0.008856425061821938 2023-01-24 03:09:34.260686: step: 758/466, loss: 0.015032918192446232 2023-01-24 03:09:34.871542: step: 760/466, loss: 0.0599614754319191 2023-01-24 03:09:35.470458: step: 762/466, loss: 0.09908628463745117 2023-01-24 03:09:36.090247: step: 764/466, loss: 0.20481857657432556 2023-01-24 03:09:36.669093: step: 766/466, loss: 0.03759913519024849 2023-01-24 03:09:37.304994: step: 768/466, loss: 0.019100012257695198 2023-01-24 03:09:37.936691: step: 770/466, loss: 0.047631192952394485 2023-01-24 03:09:38.552228: step: 772/466, loss: 0.013122456148266792 2023-01-24 03:09:39.164211: step: 774/466, loss: 0.05599507689476013 2023-01-24 03:09:39.787559: step: 776/466, loss: 0.04072127491235733 2023-01-24 03:09:40.493137: step: 778/466, loss: 0.41633787751197815 2023-01-24 03:09:41.107950: step: 780/466, loss: 0.3024740517139435 2023-01-24 03:09:41.734229: step: 782/466, loss: 0.08349002152681351 2023-01-24 03:09:42.331316: step: 784/466, loss: 0.0031556785106658936 2023-01-24 03:09:42.959677: step: 786/466, loss: 0.06271356344223022 2023-01-24 03:09:43.623264: step: 788/466, loss: 0.06998440623283386 2023-01-24 03:09:44.197632: step: 790/466, loss: 0.028869180008769035 2023-01-24 03:09:44.861279: step: 792/466, loss: 0.055056989192962646 2023-01-24 03:09:45.581837: step: 794/466, loss: 0.1301547735929489 2023-01-24 03:09:46.153997: step: 796/466, loss: 0.019010493531823158 2023-01-24 03:09:46.759815: step: 798/466, loss: 0.04248470067977905 2023-01-24 03:09:47.399889: step: 800/466, loss: 0.0068243108689785 2023-01-24 03:09:47.993939: step: 802/466, loss: 0.028648989275097847 2023-01-24 03:09:48.606150: step: 804/466, loss: 0.01490448322147131 2023-01-24 03:09:49.286160: step: 806/466, loss: 0.08014669269323349 2023-01-24 03:09:49.907558: step: 808/466, loss: 0.06051415577530861 2023-01-24 03:09:50.529242: step: 810/466, loss: 0.050217900425195694 2023-01-24 03:09:51.124560: step: 812/466, loss: 0.010280923917889595 2023-01-24 03:09:51.750516: step: 814/466, loss: 0.0070090824738144875 2023-01-24 03:09:52.400875: step: 816/466, loss: 0.029951807111501694 2023-01-24 03:09:53.019758: step: 818/466, loss: 0.09266568720340729 2023-01-24 03:09:53.644514: step: 820/466, loss: 0.04831046238541603 2023-01-24 03:09:54.274828: step: 822/466, loss: 0.08939031511545181 2023-01-24 03:09:54.952236: step: 824/466, loss: 0.3320868909358978 2023-01-24 03:09:55.607465: step: 826/466, loss: 0.0803196057677269 2023-01-24 03:09:56.270432: step: 828/466, loss: 0.012898766435682774 2023-01-24 03:09:56.950572: step: 830/466, loss: 0.06513816863298416 2023-01-24 03:09:57.560609: step: 832/466, loss: 0.019175000488758087 2023-01-24 03:09:58.176121: step: 834/466, loss: 0.05179853364825249 2023-01-24 03:09:58.823552: step: 836/466, loss: 0.010971073992550373 2023-01-24 03:09:59.386091: step: 838/466, loss: 0.014240525662899017 2023-01-24 03:09:59.939234: step: 840/466, loss: 0.005053781438618898 2023-01-24 03:10:00.576555: step: 842/466, loss: 0.01923801749944687 2023-01-24 03:10:01.166605: step: 844/466, loss: 0.008006543852388859 2023-01-24 03:10:01.769306: step: 846/466, loss: 0.004818916320800781 2023-01-24 03:10:02.389826: step: 848/466, loss: 0.06736943870782852 2023-01-24 03:10:03.058316: step: 850/466, loss: 0.06640961021184921 2023-01-24 03:10:03.685467: step: 852/466, loss: 0.04604225233197212 2023-01-24 03:10:04.343680: step: 854/466, loss: 0.027721967548131943 2023-01-24 03:10:04.937678: step: 856/466, loss: 0.014347330667078495 2023-01-24 03:10:05.613449: step: 858/466, loss: 0.06664622575044632 2023-01-24 03:10:06.238362: step: 860/466, loss: 0.041405536234378815 2023-01-24 03:10:06.937846: step: 862/466, loss: 0.035035353153944016 2023-01-24 03:10:07.554848: step: 864/466, loss: 0.0188984926789999 2023-01-24 03:10:08.150117: step: 866/466, loss: 0.044538602232933044 2023-01-24 03:10:08.784102: step: 868/466, loss: 0.05718390271067619 2023-01-24 03:10:09.427769: step: 870/466, loss: 0.006046994123607874 2023-01-24 03:10:10.062205: step: 872/466, loss: 0.007537276484072208 2023-01-24 03:10:10.698527: step: 874/466, loss: 0.056369852274656296 2023-01-24 03:10:11.293257: step: 876/466, loss: 0.02191542647778988 2023-01-24 03:10:11.903763: step: 878/466, loss: 0.04729026183485985 2023-01-24 03:10:12.536430: step: 880/466, loss: 0.04058125242590904 2023-01-24 03:10:13.165708: step: 882/466, loss: 0.004619591869413853 2023-01-24 03:10:13.783233: step: 884/466, loss: 0.10923773050308228 2023-01-24 03:10:14.399055: step: 886/466, loss: 0.22798533737659454 2023-01-24 03:10:15.067214: step: 888/466, loss: 0.01707821525633335 2023-01-24 03:10:15.690834: step: 890/466, loss: 0.07693028450012207 2023-01-24 03:10:16.337957: step: 892/466, loss: 0.05920485407114029 2023-01-24 03:10:17.025965: step: 894/466, loss: 0.03817473351955414 2023-01-24 03:10:17.629792: step: 896/466, loss: 0.03102230280637741 2023-01-24 03:10:18.296958: step: 898/466, loss: 0.04021405428647995 2023-01-24 03:10:18.992890: step: 900/466, loss: 0.005757685285061598 2023-01-24 03:10:19.641368: step: 902/466, loss: 0.11351823061704636 2023-01-24 03:10:20.322479: step: 904/466, loss: 0.04362649470567703 2023-01-24 03:10:20.944923: step: 906/466, loss: 0.033703770488500595 2023-01-24 03:10:21.580223: step: 908/466, loss: 0.051264479756355286 2023-01-24 03:10:22.202743: step: 910/466, loss: 0.05457863211631775 2023-01-24 03:10:22.748972: step: 912/466, loss: 0.6209095120429993 2023-01-24 03:10:23.352975: step: 914/466, loss: 0.028589626774191856 2023-01-24 03:10:23.999420: step: 916/466, loss: 0.05819375813007355 2023-01-24 03:10:24.586018: step: 918/466, loss: 0.07678476721048355 2023-01-24 03:10:25.233173: step: 920/466, loss: 0.035971250385046005 2023-01-24 03:10:25.824009: step: 922/466, loss: 0.0950455442070961 2023-01-24 03:10:26.413595: step: 924/466, loss: 0.01911776140332222 2023-01-24 03:10:27.103350: step: 926/466, loss: 0.042217954993247986 2023-01-24 03:10:27.651911: step: 928/466, loss: 0.013078675605356693 2023-01-24 03:10:28.277832: step: 930/466, loss: 0.0354284793138504 2023-01-24 03:10:28.842653: step: 932/466, loss: 0.09293639659881592 ================================================== Loss: 0.091 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33973418056838567, 'r': 0.30814599300130613, 'f1': 0.32317002649092214}, 'combined': 0.23812528267752156, 'epoch': 22} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3311050158222272, 'r': 0.2776624569113357, 'f1': 0.30203792517378253}, 'combined': 0.18912655127704142, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3353774979865096, 'r': 0.32837720865472286, 'f1': 0.33184043904321947}, 'combined': 0.24451400771605644, 'epoch': 22} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3383678162644092, 'r': 0.2919909830162666, 'f1': 0.3134733786689638}, 'combined': 0.1942651924145691, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32100950313731214, 'r': 0.325273386480692, 'f1': 0.3231273792183311}, 'combined': 0.23809385837140187, 'epoch': 22} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3362348294226863, 'r': 0.2827774401016449, 'f1': 0.3071978667247507}, 'combined': 0.20378472347087423, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30514705882352944, 'r': 0.29642857142857143, 'f1': 0.3007246376811594}, 'combined': 0.20048309178743962, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3472222222222222, 'r': 0.2717391304347826, 'f1': 0.3048780487804878}, 'combined': 0.1524390243902439, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.13793103448275862, 'f1': 0.1951219512195122}, 'combined': 0.13008130081300812, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:13:01.848581: step: 2/466, loss: 0.37519821524620056 2023-01-24 03:13:02.486661: step: 4/466, loss: 0.04106681048870087 2023-01-24 03:13:03.118525: step: 6/466, loss: 0.015577426180243492 2023-01-24 03:13:03.803775: step: 8/466, loss: 0.07751546055078506 2023-01-24 03:13:04.422410: step: 10/466, loss: 0.049695976078510284 2023-01-24 03:13:05.062918: step: 12/466, loss: 0.018751537427306175 2023-01-24 03:13:05.725318: step: 14/466, loss: 0.05414804071187973 2023-01-24 03:13:06.315808: step: 16/466, loss: 0.2026587277650833 2023-01-24 03:13:06.964318: step: 18/466, loss: 0.0006938294973224401 2023-01-24 03:13:07.550702: step: 20/466, loss: 0.15780635178089142 2023-01-24 03:13:08.173783: step: 22/466, loss: 0.052532535046339035 2023-01-24 03:13:08.762358: step: 24/466, loss: 0.033414844423532486 2023-01-24 03:13:09.379422: step: 26/466, loss: 0.11205101013183594 2023-01-24 03:13:09.980838: step: 28/466, loss: 0.030356595292687416 2023-01-24 03:13:10.612115: step: 30/466, loss: 0.26295825839042664 2023-01-24 03:13:11.253100: step: 32/466, loss: 0.04312417656183243 2023-01-24 03:13:11.933947: step: 34/466, loss: 0.26988208293914795 2023-01-24 03:13:12.578057: step: 36/466, loss: 0.10601172596216202 2023-01-24 03:13:13.182316: step: 38/466, loss: 0.046130288392305374 2023-01-24 03:13:13.819110: step: 40/466, loss: 0.04392627626657486 2023-01-24 03:13:14.412928: step: 42/466, loss: 0.026971695944666862 2023-01-24 03:13:15.119130: step: 44/466, loss: 0.006729075685143471 2023-01-24 03:13:15.724846: step: 46/466, loss: 0.007654865272343159 2023-01-24 03:13:16.327221: step: 48/466, loss: 0.04854927211999893 2023-01-24 03:13:16.942580: step: 50/466, loss: 0.0065819318406283855 2023-01-24 03:13:17.527642: step: 52/466, loss: 0.061822593212127686 2023-01-24 03:13:18.101832: step: 54/466, loss: 0.1821979433298111 2023-01-24 03:13:18.753073: step: 56/466, loss: 0.028851419687271118 2023-01-24 03:13:19.366905: step: 58/466, loss: 0.05924450606107712 2023-01-24 03:13:19.993421: step: 60/466, loss: 0.055872879922389984 2023-01-24 03:13:20.616185: step: 62/466, loss: 0.048712216317653656 2023-01-24 03:13:21.183470: step: 64/466, loss: 0.007741500623524189 2023-01-24 03:13:21.779615: step: 66/466, loss: 0.025018535554409027 2023-01-24 03:13:22.378457: step: 68/466, loss: 0.10492322593927383 2023-01-24 03:13:22.948126: step: 70/466, loss: 0.21613642573356628 2023-01-24 03:13:23.644417: step: 72/466, loss: 0.029166538268327713 2023-01-24 03:13:24.243635: step: 74/466, loss: 0.016935860738158226 2023-01-24 03:13:24.888524: step: 76/466, loss: 0.04437658563256264 2023-01-24 03:13:25.588167: step: 78/466, loss: 0.025598792359232903 2023-01-24 03:13:26.255256: step: 80/466, loss: 0.09755132347345352 2023-01-24 03:13:26.871651: step: 82/466, loss: 0.02875247597694397 2023-01-24 03:13:27.446104: step: 84/466, loss: 0.021137593314051628 2023-01-24 03:13:28.041374: step: 86/466, loss: 0.03565197065472603 2023-01-24 03:13:28.750386: step: 88/466, loss: 0.1278599053621292 2023-01-24 03:13:29.384132: step: 90/466, loss: 0.02374625951051712 2023-01-24 03:13:29.993509: step: 92/466, loss: 0.026628822088241577 2023-01-24 03:13:30.637466: step: 94/466, loss: 0.019938306882977486 2023-01-24 03:13:31.284469: step: 96/466, loss: 0.007408472243696451 2023-01-24 03:13:31.929305: step: 98/466, loss: 0.02977028861641884 2023-01-24 03:13:32.577876: step: 100/466, loss: 0.004461156204342842 2023-01-24 03:13:33.245216: step: 102/466, loss: 0.10266581177711487 2023-01-24 03:13:33.839221: step: 104/466, loss: 0.1725711077451706 2023-01-24 03:13:34.443375: step: 106/466, loss: 0.0576268695294857 2023-01-24 03:13:35.071914: step: 108/466, loss: 0.2895353138446808 2023-01-24 03:13:35.648527: step: 110/466, loss: 0.026378493756055832 2023-01-24 03:13:36.268318: step: 112/466, loss: 0.045850664377212524 2023-01-24 03:13:36.974152: step: 114/466, loss: 0.051356494426727295 2023-01-24 03:13:37.570109: step: 116/466, loss: 0.030543841421604156 2023-01-24 03:13:38.184890: step: 118/466, loss: 0.14664997160434723 2023-01-24 03:13:38.831830: step: 120/466, loss: 0.006917271763086319 2023-01-24 03:13:39.488999: step: 122/466, loss: 0.07905556261539459 2023-01-24 03:13:40.120035: step: 124/466, loss: 0.08073782175779343 2023-01-24 03:13:40.712170: step: 126/466, loss: 0.03974530100822449 2023-01-24 03:13:41.325683: step: 128/466, loss: 0.013782481662929058 2023-01-24 03:13:41.978100: step: 130/466, loss: 0.022367702797055244 2023-01-24 03:13:42.557721: step: 132/466, loss: 0.015513092279434204 2023-01-24 03:13:43.177623: step: 134/466, loss: 0.04052796587347984 2023-01-24 03:13:43.862138: step: 136/466, loss: 0.17355284094810486 2023-01-24 03:13:44.490175: step: 138/466, loss: 0.04959815740585327 2023-01-24 03:13:45.080940: step: 140/466, loss: 0.06585326790809631 2023-01-24 03:13:45.682562: step: 142/466, loss: 0.01629745215177536 2023-01-24 03:13:46.325634: step: 144/466, loss: 0.029959937557578087 2023-01-24 03:13:46.930685: step: 146/466, loss: 0.1523573398590088 2023-01-24 03:13:47.608074: step: 148/466, loss: 0.07513323426246643 2023-01-24 03:13:48.166768: step: 150/466, loss: 0.0007255334639921784 2023-01-24 03:13:48.770061: step: 152/466, loss: 0.005202392116189003 2023-01-24 03:13:49.323449: step: 154/466, loss: 0.122276172041893 2023-01-24 03:13:49.851393: step: 156/466, loss: 0.036976199597120285 2023-01-24 03:13:50.467834: step: 158/466, loss: 0.05452893674373627 2023-01-24 03:13:51.099653: step: 160/466, loss: 0.08069787919521332 2023-01-24 03:13:51.659819: step: 162/466, loss: 0.3329411745071411 2023-01-24 03:13:52.278914: step: 164/466, loss: 0.05058800056576729 2023-01-24 03:13:52.933920: step: 166/466, loss: 0.14589150249958038 2023-01-24 03:13:53.603103: step: 168/466, loss: 0.03127635642886162 2023-01-24 03:13:54.260778: step: 170/466, loss: 0.006756802089512348 2023-01-24 03:13:54.816476: step: 172/466, loss: 0.005486187059432268 2023-01-24 03:13:55.436990: step: 174/466, loss: 0.024932648986577988 2023-01-24 03:13:56.040151: step: 176/466, loss: 0.008025525137782097 2023-01-24 03:13:56.651164: step: 178/466, loss: 0.05440286546945572 2023-01-24 03:13:57.226333: step: 180/466, loss: 0.0334198996424675 2023-01-24 03:13:57.859612: step: 182/466, loss: 0.0066267079673707485 2023-01-24 03:13:58.454640: step: 184/466, loss: 0.014033726416528225 2023-01-24 03:13:58.991514: step: 186/466, loss: 0.053195107728242874 2023-01-24 03:13:59.614410: step: 188/466, loss: 0.04765544459223747 2023-01-24 03:14:00.301835: step: 190/466, loss: 0.06990660727024078 2023-01-24 03:14:00.922638: step: 192/466, loss: 0.1254531294107437 2023-01-24 03:14:01.517604: step: 194/466, loss: 0.08785541355609894 2023-01-24 03:14:02.084599: step: 196/466, loss: 0.036871835589408875 2023-01-24 03:14:02.684821: step: 198/466, loss: 0.02268662303686142 2023-01-24 03:14:03.214438: step: 200/466, loss: 0.013539185747504234 2023-01-24 03:14:03.803313: step: 202/466, loss: 0.037191201001405716 2023-01-24 03:14:04.422009: step: 204/466, loss: 10.831572532653809 2023-01-24 03:14:05.053182: step: 206/466, loss: 0.23229451477527618 2023-01-24 03:14:05.657461: step: 208/466, loss: 0.029476964846253395 2023-01-24 03:14:06.301121: step: 210/466, loss: 0.07312937080860138 2023-01-24 03:14:06.876397: step: 212/466, loss: 0.004163483157753944 2023-01-24 03:14:07.516593: step: 214/466, loss: 0.1760241836309433 2023-01-24 03:14:08.157349: step: 216/466, loss: 0.07934526354074478 2023-01-24 03:14:08.831644: step: 218/466, loss: 0.07286553829908371 2023-01-24 03:14:09.413760: step: 220/466, loss: 0.008473890833556652 2023-01-24 03:14:10.033721: step: 222/466, loss: 0.006945169530808926 2023-01-24 03:14:10.632337: step: 224/466, loss: 0.07344811409711838 2023-01-24 03:14:11.253364: step: 226/466, loss: 0.09052682667970657 2023-01-24 03:14:11.932460: step: 228/466, loss: 0.0714201033115387 2023-01-24 03:14:12.528710: step: 230/466, loss: 0.09957793354988098 2023-01-24 03:14:13.142565: step: 232/466, loss: 0.050654392689466476 2023-01-24 03:14:13.717857: step: 234/466, loss: 0.057822201400995255 2023-01-24 03:14:14.316706: step: 236/466, loss: 0.04982614144682884 2023-01-24 03:14:14.890846: step: 238/466, loss: 0.016358835622668266 2023-01-24 03:14:15.535153: step: 240/466, loss: 0.07440569251775742 2023-01-24 03:14:16.103503: step: 242/466, loss: 0.009837755002081394 2023-01-24 03:14:16.755148: step: 244/466, loss: 0.4549831449985504 2023-01-24 03:14:17.419565: step: 246/466, loss: 0.06924940645694733 2023-01-24 03:14:17.959867: step: 248/466, loss: 0.0137610649690032 2023-01-24 03:14:18.553348: step: 250/466, loss: 0.06074821576476097 2023-01-24 03:14:19.128536: step: 252/466, loss: 0.002604379318654537 2023-01-24 03:14:19.719884: step: 254/466, loss: 0.011386437341570854 2023-01-24 03:14:20.272895: step: 256/466, loss: 0.08085306733846664 2023-01-24 03:14:20.898977: step: 258/466, loss: 0.02583848498761654 2023-01-24 03:14:21.547088: step: 260/466, loss: 0.06034468859434128 2023-01-24 03:14:22.174170: step: 262/466, loss: 0.04320215806365013 2023-01-24 03:14:22.777821: step: 264/466, loss: 0.0151560939848423 2023-01-24 03:14:23.382951: step: 266/466, loss: 0.029136555269360542 2023-01-24 03:14:23.972489: step: 268/466, loss: 0.03350706771016121 2023-01-24 03:14:24.585330: step: 270/466, loss: 0.03152776136994362 2023-01-24 03:14:25.168239: step: 272/466, loss: 0.0074542067013680935 2023-01-24 03:14:25.880299: step: 274/466, loss: 0.03669152408838272 2023-01-24 03:14:26.521902: step: 276/466, loss: 0.09760794043540955 2023-01-24 03:14:27.169003: step: 278/466, loss: 0.017591552808880806 2023-01-24 03:14:27.859967: step: 280/466, loss: 0.3281540274620056 2023-01-24 03:14:28.491311: step: 282/466, loss: 0.04125038906931877 2023-01-24 03:14:29.151964: step: 284/466, loss: 0.0051215835846960545 2023-01-24 03:14:29.760487: step: 286/466, loss: 0.044143643230199814 2023-01-24 03:14:30.369839: step: 288/466, loss: 0.02551931142807007 2023-01-24 03:14:30.997461: step: 290/466, loss: 0.04671537131071091 2023-01-24 03:14:31.616128: step: 292/466, loss: 0.029295897111296654 2023-01-24 03:14:32.254968: step: 294/466, loss: 0.04605097696185112 2023-01-24 03:14:32.853861: step: 296/466, loss: 1.6217180490493774 2023-01-24 03:14:33.500303: step: 298/466, loss: 0.04257981479167938 2023-01-24 03:14:34.149032: step: 300/466, loss: 0.1730869859457016 2023-01-24 03:14:34.710422: step: 302/466, loss: 0.02628973498940468 2023-01-24 03:14:35.350128: step: 304/466, loss: 0.031872548162937164 2023-01-24 03:14:35.984472: step: 306/466, loss: 0.026138199493288994 2023-01-24 03:14:36.649276: step: 308/466, loss: 0.022182194516062737 2023-01-24 03:14:37.180482: step: 310/466, loss: 0.002576672239229083 2023-01-24 03:14:37.834399: step: 312/466, loss: 0.01716276817023754 2023-01-24 03:14:38.513203: step: 314/466, loss: 0.017801720649003983 2023-01-24 03:14:39.143125: step: 316/466, loss: 0.0345219187438488 2023-01-24 03:14:39.771569: step: 318/466, loss: 0.07362405210733414 2023-01-24 03:14:40.458895: step: 320/466, loss: 0.20261883735656738 2023-01-24 03:14:41.028371: step: 322/466, loss: 0.028519850224256516 2023-01-24 03:14:41.650405: step: 324/466, loss: 0.009284978732466698 2023-01-24 03:14:42.344266: step: 326/466, loss: 0.01323692873120308 2023-01-24 03:14:42.935020: step: 328/466, loss: 0.08233549445867538 2023-01-24 03:14:43.551024: step: 330/466, loss: 0.08232059329748154 2023-01-24 03:14:44.107696: step: 332/466, loss: 0.030916700139641762 2023-01-24 03:14:44.799479: step: 334/466, loss: 0.12881356477737427 2023-01-24 03:14:45.432623: step: 336/466, loss: 0.02648826315999031 2023-01-24 03:14:46.047672: step: 338/466, loss: 0.04425083100795746 2023-01-24 03:14:46.644524: step: 340/466, loss: 0.026341862976551056 2023-01-24 03:14:47.259786: step: 342/466, loss: 0.5152478814125061 2023-01-24 03:14:47.920815: step: 344/466, loss: 0.04249029606580734 2023-01-24 03:14:48.568142: step: 346/466, loss: 0.012758921831846237 2023-01-24 03:14:49.196990: step: 348/466, loss: 0.007210403680801392 2023-01-24 03:14:49.787024: step: 350/466, loss: 0.008182523772120476 2023-01-24 03:14:50.395464: step: 352/466, loss: 0.03701353818178177 2023-01-24 03:14:51.011363: step: 354/466, loss: 0.018237052485346794 2023-01-24 03:14:51.669527: step: 356/466, loss: 0.02028246596455574 2023-01-24 03:14:52.276544: step: 358/466, loss: 0.029217390343546867 2023-01-24 03:14:52.887695: step: 360/466, loss: 0.03913299739360809 2023-01-24 03:14:53.563061: step: 362/466, loss: 0.08187693357467651 2023-01-24 03:14:54.259485: step: 364/466, loss: 0.02423919178545475 2023-01-24 03:14:54.854026: step: 366/466, loss: 0.03357991576194763 2023-01-24 03:14:55.483140: step: 368/466, loss: 0.1829199641942978 2023-01-24 03:14:56.133785: step: 370/466, loss: 0.05848422273993492 2023-01-24 03:14:56.814914: step: 372/466, loss: 0.024354638531804085 2023-01-24 03:14:57.430211: step: 374/466, loss: 0.026888463646173477 2023-01-24 03:14:58.041174: step: 376/466, loss: 0.10239247232675552 2023-01-24 03:14:58.684641: step: 378/466, loss: 0.03820658475160599 2023-01-24 03:14:59.337774: step: 380/466, loss: 0.009456473402678967 2023-01-24 03:15:00.016753: step: 382/466, loss: 0.025725068524479866 2023-01-24 03:15:00.625117: step: 384/466, loss: 0.021409517154097557 2023-01-24 03:15:01.261305: step: 386/466, loss: 0.04017370939254761 2023-01-24 03:15:01.879520: step: 388/466, loss: 0.11360134929418564 2023-01-24 03:15:02.584007: step: 390/466, loss: 0.37669089436531067 2023-01-24 03:15:03.270155: step: 392/466, loss: 0.01950979046523571 2023-01-24 03:15:03.897735: step: 394/466, loss: 0.6004852056503296 2023-01-24 03:15:04.532268: step: 396/466, loss: 0.03134963661432266 2023-01-24 03:15:05.137687: step: 398/466, loss: 0.06417876482009888 2023-01-24 03:15:05.793548: step: 400/466, loss: 0.039448097348213196 2023-01-24 03:15:06.418737: step: 402/466, loss: 0.1267458200454712 2023-01-24 03:15:07.103290: step: 404/466, loss: 0.151803657412529 2023-01-24 03:15:07.723857: step: 406/466, loss: 0.05627970024943352 2023-01-24 03:15:08.372522: step: 408/466, loss: 0.0677432119846344 2023-01-24 03:15:09.060120: step: 410/466, loss: 0.0646536648273468 2023-01-24 03:15:09.645910: step: 412/466, loss: 0.024825256317853928 2023-01-24 03:15:10.264853: step: 414/466, loss: 0.023025576025247574 2023-01-24 03:15:10.821514: step: 416/466, loss: 0.00923093594610691 2023-01-24 03:15:11.476028: step: 418/466, loss: 0.056759439408779144 2023-01-24 03:15:12.069132: step: 420/466, loss: 0.00021462456788867712 2023-01-24 03:15:12.719720: step: 422/466, loss: 0.01586374267935753 2023-01-24 03:15:13.435560: step: 424/466, loss: 0.06646231561899185 2023-01-24 03:15:14.074183: step: 426/466, loss: 0.0559365339577198 2023-01-24 03:15:14.676732: step: 428/466, loss: 0.005602554883807898 2023-01-24 03:15:15.344346: step: 430/466, loss: 0.006315540987998247 2023-01-24 03:15:16.036786: step: 432/466, loss: 0.016006488353013992 2023-01-24 03:15:16.684091: step: 434/466, loss: 0.12325073778629303 2023-01-24 03:15:17.307270: step: 436/466, loss: 0.0388457253575325 2023-01-24 03:15:17.980374: step: 438/466, loss: 0.03605816140770912 2023-01-24 03:15:18.652914: step: 440/466, loss: 0.010683774948120117 2023-01-24 03:15:19.289549: step: 442/466, loss: 0.03860093653202057 2023-01-24 03:15:19.852192: step: 444/466, loss: 0.001220116508193314 2023-01-24 03:15:20.504225: step: 446/466, loss: 0.021517273038625717 2023-01-24 03:15:21.149093: step: 448/466, loss: 0.19745062291622162 2023-01-24 03:15:21.706388: step: 450/466, loss: 0.0030312389135360718 2023-01-24 03:15:22.376547: step: 452/466, loss: 1.378182053565979 2023-01-24 03:15:23.018064: step: 454/466, loss: 0.027963416650891304 2023-01-24 03:15:23.593101: step: 456/466, loss: 0.010036383755505085 2023-01-24 03:15:24.307245: step: 458/466, loss: 0.10758908838033676 2023-01-24 03:15:24.923694: step: 460/466, loss: 0.09921170771121979 2023-01-24 03:15:25.509934: step: 462/466, loss: 0.013041533529758453 2023-01-24 03:15:26.111395: step: 464/466, loss: 0.07727940380573273 2023-01-24 03:15:26.717899: step: 466/466, loss: 0.06255663186311722 2023-01-24 03:15:27.313259: step: 468/466, loss: 0.012556467205286026 2023-01-24 03:15:27.922284: step: 470/466, loss: 0.0022834744304418564 2023-01-24 03:15:28.588612: step: 472/466, loss: 0.016521736979484558 2023-01-24 03:15:29.250017: step: 474/466, loss: 0.039565179497003555 2023-01-24 03:15:29.886469: step: 476/466, loss: 0.07813805341720581 2023-01-24 03:15:30.478034: step: 478/466, loss: 0.011786703020334244 2023-01-24 03:15:31.097443: step: 480/466, loss: 0.02669355645775795 2023-01-24 03:15:31.698848: step: 482/466, loss: 0.2168130725622177 2023-01-24 03:15:32.316874: step: 484/466, loss: 0.030019201338291168 2023-01-24 03:15:32.879323: step: 486/466, loss: 0.040079835802316666 2023-01-24 03:15:33.482087: step: 488/466, loss: 0.04554833844304085 2023-01-24 03:15:34.099511: step: 490/466, loss: 0.0638691708445549 2023-01-24 03:15:34.698005: step: 492/466, loss: 0.025603465735912323 2023-01-24 03:15:35.377534: step: 494/466, loss: 0.06134669482707977 2023-01-24 03:15:36.064633: step: 496/466, loss: 0.03263123333454132 2023-01-24 03:15:36.691393: step: 498/466, loss: 0.004851019941270351 2023-01-24 03:15:37.339071: step: 500/466, loss: 0.018387088552117348 2023-01-24 03:15:37.994601: step: 502/466, loss: 0.06409452110528946 2023-01-24 03:15:38.683646: step: 504/466, loss: 0.038857266306877136 2023-01-24 03:15:39.252723: step: 506/466, loss: 0.051083534955978394 2023-01-24 03:15:39.840206: step: 508/466, loss: 0.013608801178634167 2023-01-24 03:15:40.469669: step: 510/466, loss: 0.028269052505493164 2023-01-24 03:15:41.058485: step: 512/466, loss: 0.09277983009815216 2023-01-24 03:15:41.604531: step: 514/466, loss: 0.01589254103600979 2023-01-24 03:15:42.307879: step: 516/466, loss: 0.05243847891688347 2023-01-24 03:15:42.898848: step: 518/466, loss: 0.08677437901496887 2023-01-24 03:15:43.524252: step: 520/466, loss: 0.01228324044495821 2023-01-24 03:15:44.161015: step: 522/466, loss: 0.08975356817245483 2023-01-24 03:15:44.840137: step: 524/466, loss: 0.4436494708061218 2023-01-24 03:15:45.448596: step: 526/466, loss: 0.17401957511901855 2023-01-24 03:15:46.074757: step: 528/466, loss: 0.005692127626389265 2023-01-24 03:15:46.663157: step: 530/466, loss: 0.03703230246901512 2023-01-24 03:15:47.250148: step: 532/466, loss: 0.03678973764181137 2023-01-24 03:15:47.844584: step: 534/466, loss: 0.016979314386844635 2023-01-24 03:15:48.458752: step: 536/466, loss: 0.061010606586933136 2023-01-24 03:15:49.030143: step: 538/466, loss: 0.004714383743703365 2023-01-24 03:15:49.825613: step: 540/466, loss: 0.0018672322621569037 2023-01-24 03:15:50.495161: step: 542/466, loss: 0.009897676296532154 2023-01-24 03:15:51.049569: step: 544/466, loss: 0.0023059670347720385 2023-01-24 03:15:51.684969: step: 546/466, loss: 0.03340055048465729 2023-01-24 03:15:52.361677: step: 548/466, loss: 0.058384593576192856 2023-01-24 03:15:53.000920: step: 550/466, loss: 0.0411825068295002 2023-01-24 03:15:53.564159: step: 552/466, loss: 0.15804874897003174 2023-01-24 03:15:54.140599: step: 554/466, loss: 0.012139519676566124 2023-01-24 03:15:54.802463: step: 556/466, loss: 0.2800336480140686 2023-01-24 03:15:55.423490: step: 558/466, loss: 0.038609229028224945 2023-01-24 03:15:56.018511: step: 560/466, loss: 0.05266398936510086 2023-01-24 03:15:56.627465: step: 562/466, loss: 0.011620165780186653 2023-01-24 03:15:57.277419: step: 564/466, loss: 0.07682330906391144 2023-01-24 03:15:57.920490: step: 566/466, loss: 0.010016935877501965 2023-01-24 03:15:58.533608: step: 568/466, loss: 0.017387090250849724 2023-01-24 03:15:59.269440: step: 570/466, loss: 0.2580850422382355 2023-01-24 03:15:59.876665: step: 572/466, loss: 0.0044228374026715755 2023-01-24 03:16:00.512646: step: 574/466, loss: 0.03284839168190956 2023-01-24 03:16:01.121685: step: 576/466, loss: 0.017334576696157455 2023-01-24 03:16:01.778667: step: 578/466, loss: 0.013471720740199089 2023-01-24 03:16:02.380568: step: 580/466, loss: 0.022119492292404175 2023-01-24 03:16:02.916326: step: 582/466, loss: 0.006941903382539749 2023-01-24 03:16:03.567007: step: 584/466, loss: 0.11759119480848312 2023-01-24 03:16:04.207591: step: 586/466, loss: 0.0009638160699978471 2023-01-24 03:16:04.869399: step: 588/466, loss: 0.04819343984127045 2023-01-24 03:16:05.429093: step: 590/466, loss: 0.13402827084064484 2023-01-24 03:16:06.072608: step: 592/466, loss: 0.09971284121274948 2023-01-24 03:16:06.692405: step: 594/466, loss: 0.05317353084683418 2023-01-24 03:16:07.325929: step: 596/466, loss: 0.08394022285938263 2023-01-24 03:16:07.912198: step: 598/466, loss: 0.006317528896033764 2023-01-24 03:16:08.567781: step: 600/466, loss: 0.7620943188667297 2023-01-24 03:16:09.238314: step: 602/466, loss: 0.17191177606582642 2023-01-24 03:16:10.004673: step: 604/466, loss: 0.02348541095852852 2023-01-24 03:16:10.617166: step: 606/466, loss: 14.203178405761719 2023-01-24 03:16:11.292542: step: 608/466, loss: 0.03364052623510361 2023-01-24 03:16:11.905595: step: 610/466, loss: 1.0802329778671265 2023-01-24 03:16:12.536984: step: 612/466, loss: 0.00660161767154932 2023-01-24 03:16:13.093632: step: 614/466, loss: 0.07215036451816559 2023-01-24 03:16:13.664483: step: 616/466, loss: 0.03529845178127289 2023-01-24 03:16:14.240749: step: 618/466, loss: 0.004194928798824549 2023-01-24 03:16:14.809100: step: 620/466, loss: 0.017437612637877464 2023-01-24 03:16:15.431899: step: 622/466, loss: 0.010304673574864864 2023-01-24 03:16:16.165182: step: 624/466, loss: 0.010235766880214214 2023-01-24 03:16:16.838476: step: 626/466, loss: 0.07017090171575546 2023-01-24 03:16:17.448102: step: 628/466, loss: 0.006158484611660242 2023-01-24 03:16:18.051076: step: 630/466, loss: 0.626251220703125 2023-01-24 03:16:18.632731: step: 632/466, loss: 0.09788559377193451 2023-01-24 03:16:19.314187: step: 634/466, loss: 0.032463692128658295 2023-01-24 03:16:19.959103: step: 636/466, loss: 0.012290447019040585 2023-01-24 03:16:20.592451: step: 638/466, loss: 2.016979932785034 2023-01-24 03:16:21.217591: step: 640/466, loss: 0.10255640745162964 2023-01-24 03:16:21.815613: step: 642/466, loss: 0.0026204304303973913 2023-01-24 03:16:22.338597: step: 644/466, loss: 0.018390139564871788 2023-01-24 03:16:22.946580: step: 646/466, loss: 0.009616916999220848 2023-01-24 03:16:23.560750: step: 648/466, loss: 0.023441944271326065 2023-01-24 03:16:24.134094: step: 650/466, loss: 0.04400390759110451 2023-01-24 03:16:24.787661: step: 652/466, loss: 0.03810415789484978 2023-01-24 03:16:25.445850: step: 654/466, loss: 0.08098503947257996 2023-01-24 03:16:26.040362: step: 656/466, loss: 0.008242843672633171 2023-01-24 03:16:26.693734: step: 658/466, loss: 0.16538627445697784 2023-01-24 03:16:27.342989: step: 660/466, loss: 0.0010274339001625776 2023-01-24 03:16:27.941922: step: 662/466, loss: 0.05155831202864647 2023-01-24 03:16:28.555481: step: 664/466, loss: 0.03093663789331913 2023-01-24 03:16:29.173741: step: 666/466, loss: 0.016186198219656944 2023-01-24 03:16:29.810570: step: 668/466, loss: 0.08217496424913406 2023-01-24 03:16:30.377487: step: 670/466, loss: 0.09058801829814911 2023-01-24 03:16:30.992973: step: 672/466, loss: 0.05601709708571434 2023-01-24 03:16:31.594319: step: 674/466, loss: 0.007798232138156891 2023-01-24 03:16:32.307281: step: 676/466, loss: 0.02868696302175522 2023-01-24 03:16:32.941098: step: 678/466, loss: 0.04802197962999344 2023-01-24 03:16:33.611240: step: 680/466, loss: 0.01739462837576866 2023-01-24 03:16:34.255884: step: 682/466, loss: 0.0069674295373260975 2023-01-24 03:16:34.933145: step: 684/466, loss: 0.0967487096786499 2023-01-24 03:16:35.538722: step: 686/466, loss: 0.08254416286945343 2023-01-24 03:16:36.144952: step: 688/466, loss: 0.033013731241226196 2023-01-24 03:16:36.799363: step: 690/466, loss: 0.040913742035627365 2023-01-24 03:16:37.323160: step: 692/466, loss: 0.010079664178192616 2023-01-24 03:16:37.895356: step: 694/466, loss: 0.05389615520834923 2023-01-24 03:16:38.494917: step: 696/466, loss: 0.012297890149056911 2023-01-24 03:16:39.050704: step: 698/466, loss: 0.005239218007773161 2023-01-24 03:16:39.712645: step: 700/466, loss: 1.0167995691299438 2023-01-24 03:16:40.363433: step: 702/466, loss: 0.10525007545948029 2023-01-24 03:16:40.922457: step: 704/466, loss: 0.027654528617858887 2023-01-24 03:16:41.602213: step: 706/466, loss: 0.04541835933923721 2023-01-24 03:16:42.266755: step: 708/466, loss: 0.01958831585943699 2023-01-24 03:16:42.946355: step: 710/466, loss: 0.11447075754404068 2023-01-24 03:16:43.567442: step: 712/466, loss: 0.0112985298037529 2023-01-24 03:16:44.164818: step: 714/466, loss: 0.016465893015265465 2023-01-24 03:16:44.768657: step: 716/466, loss: 0.029620099812746048 2023-01-24 03:16:45.361059: step: 718/466, loss: 0.004684635903686285 2023-01-24 03:16:45.952891: step: 720/466, loss: 0.01591295748949051 2023-01-24 03:16:46.611678: step: 722/466, loss: 0.023893725126981735 2023-01-24 03:16:47.212607: step: 724/466, loss: 0.5566367506980896 2023-01-24 03:16:47.819667: step: 726/466, loss: 0.04075148329138756 2023-01-24 03:16:48.501435: step: 728/466, loss: 0.01116813812404871 2023-01-24 03:16:49.116610: step: 730/466, loss: 0.17533671855926514 2023-01-24 03:16:49.768008: step: 732/466, loss: 0.05816694349050522 2023-01-24 03:16:50.400602: step: 734/466, loss: 0.033278536051511765 2023-01-24 03:16:50.965586: step: 736/466, loss: 0.04196491837501526 2023-01-24 03:16:51.567514: step: 738/466, loss: 0.08225072175264359 2023-01-24 03:16:52.214698: step: 740/466, loss: 0.10632101446390152 2023-01-24 03:16:52.778557: step: 742/466, loss: 0.004922711756080389 2023-01-24 03:16:53.362922: step: 744/466, loss: 0.017099060118198395 2023-01-24 03:16:53.966233: step: 746/466, loss: 0.06865949928760529 2023-01-24 03:16:54.587971: step: 748/466, loss: 0.13417190313339233 2023-01-24 03:16:55.238417: step: 750/466, loss: 0.03777005895972252 2023-01-24 03:16:55.893141: step: 752/466, loss: 0.05155256763100624 2023-01-24 03:16:56.496304: step: 754/466, loss: 0.016800545156002045 2023-01-24 03:16:57.170844: step: 756/466, loss: 0.05773625150322914 2023-01-24 03:16:57.826055: step: 758/466, loss: 0.026250790804624557 2023-01-24 03:16:58.492055: step: 760/466, loss: 0.017661169171333313 2023-01-24 03:16:59.096123: step: 762/466, loss: 0.02000456675887108 2023-01-24 03:16:59.733663: step: 764/466, loss: 0.0435657873749733 2023-01-24 03:17:00.438864: step: 766/466, loss: 0.015171305276453495 2023-01-24 03:17:01.018597: step: 768/466, loss: 0.061575550585985184 2023-01-24 03:17:01.645288: step: 770/466, loss: 0.014146787114441395 2023-01-24 03:17:02.225090: step: 772/466, loss: 0.04964570701122284 2023-01-24 03:17:02.786695: step: 774/466, loss: 0.025675414130091667 2023-01-24 03:17:03.409483: step: 776/466, loss: 0.06615062803030014 2023-01-24 03:17:04.050734: step: 778/466, loss: 0.06605000793933868 2023-01-24 03:17:04.613927: step: 780/466, loss: 0.008641757071018219 2023-01-24 03:17:05.204616: step: 782/466, loss: 0.007029538508504629 2023-01-24 03:17:05.781340: step: 784/466, loss: 0.04293414577841759 2023-01-24 03:17:06.347860: step: 786/466, loss: 0.011798088438808918 2023-01-24 03:17:06.932809: step: 788/466, loss: 0.05605228990316391 2023-01-24 03:17:07.515906: step: 790/466, loss: 0.0408843532204628 2023-01-24 03:17:08.144935: step: 792/466, loss: 0.06712520122528076 2023-01-24 03:17:08.698867: step: 794/466, loss: 0.014473170042037964 2023-01-24 03:17:09.287375: step: 796/466, loss: 0.00806282926350832 2023-01-24 03:17:09.930193: step: 798/466, loss: 0.04116816446185112 2023-01-24 03:17:10.553381: step: 800/466, loss: 0.023592161014676094 2023-01-24 03:17:11.164822: step: 802/466, loss: 0.055174462497234344 2023-01-24 03:17:11.833473: step: 804/466, loss: 0.08948253095149994 2023-01-24 03:17:12.448417: step: 806/466, loss: 0.034423716366291046 2023-01-24 03:17:13.048608: step: 808/466, loss: 0.04152939096093178 2023-01-24 03:17:13.654480: step: 810/466, loss: 0.25082412362098694 2023-01-24 03:17:14.264845: step: 812/466, loss: 0.01173321157693863 2023-01-24 03:17:14.812836: step: 814/466, loss: 0.011800372041761875 2023-01-24 03:17:15.451218: step: 816/466, loss: 0.06533826142549515 2023-01-24 03:17:16.069969: step: 818/466, loss: 0.007304074242711067 2023-01-24 03:17:16.752191: step: 820/466, loss: 0.04298927262425423 2023-01-24 03:17:17.330100: step: 822/466, loss: 0.010678119026124477 2023-01-24 03:17:17.945326: step: 824/466, loss: 0.11303430050611496 2023-01-24 03:17:18.592577: step: 826/466, loss: 0.06516654789447784 2023-01-24 03:17:19.189206: step: 828/466, loss: 0.03973430395126343 2023-01-24 03:17:19.870725: step: 830/466, loss: 0.050120484083890915 2023-01-24 03:17:20.458240: step: 832/466, loss: 0.058797042816877365 2023-01-24 03:17:21.038430: step: 834/466, loss: 0.11180714517831802 2023-01-24 03:17:21.718954: step: 836/466, loss: 0.044810060411691666 2023-01-24 03:17:22.313647: step: 838/466, loss: 0.42404794692993164 2023-01-24 03:17:23.021250: step: 840/466, loss: 0.046611882746219635 2023-01-24 03:17:23.651045: step: 842/466, loss: 0.015883365646004677 2023-01-24 03:17:24.294369: step: 844/466, loss: 0.03104478307068348 2023-01-24 03:17:24.889869: step: 846/466, loss: 0.010571775957942009 2023-01-24 03:17:25.458897: step: 848/466, loss: 0.00489027751609683 2023-01-24 03:17:26.045935: step: 850/466, loss: 0.006423996761441231 2023-01-24 03:17:26.679988: step: 852/466, loss: 0.10496664047241211 2023-01-24 03:17:27.275370: step: 854/466, loss: 0.0379096120595932 2023-01-24 03:17:27.848441: step: 856/466, loss: 0.0010135396150872111 2023-01-24 03:17:28.671690: step: 858/466, loss: 0.15507668256759644 2023-01-24 03:17:29.320905: step: 860/466, loss: 0.041714731603860855 2023-01-24 03:17:29.956304: step: 862/466, loss: 0.04817929491400719 2023-01-24 03:17:30.511027: step: 864/466, loss: 0.003338088747113943 2023-01-24 03:17:31.141141: step: 866/466, loss: 0.09543223679065704 2023-01-24 03:17:31.713147: step: 868/466, loss: 0.07428394258022308 2023-01-24 03:17:32.322415: step: 870/466, loss: 0.04204535484313965 2023-01-24 03:17:32.908800: step: 872/466, loss: 0.10000576078891754 2023-01-24 03:17:33.518725: step: 874/466, loss: 0.03834407776594162 2023-01-24 03:17:34.180215: step: 876/466, loss: 0.046376850455999374 2023-01-24 03:17:34.771762: step: 878/466, loss: 0.027334975078701973 2023-01-24 03:17:35.404916: step: 880/466, loss: 0.2535788118839264 2023-01-24 03:17:36.040104: step: 882/466, loss: 0.023781809955835342 2023-01-24 03:17:36.631369: step: 884/466, loss: 0.464969277381897 2023-01-24 03:17:37.300937: step: 886/466, loss: 0.030364157631993294 2023-01-24 03:17:37.909959: step: 888/466, loss: 0.10395995527505875 2023-01-24 03:17:38.572680: step: 890/466, loss: 0.0058978162705898285 2023-01-24 03:17:39.236701: step: 892/466, loss: 0.016039518639445305 2023-01-24 03:17:39.863479: step: 894/466, loss: 0.40252816677093506 2023-01-24 03:17:40.471759: step: 896/466, loss: 0.014576230198144913 2023-01-24 03:17:41.127221: step: 898/466, loss: 0.057729076594114304 2023-01-24 03:17:41.714332: step: 900/466, loss: 0.023492462933063507 2023-01-24 03:17:42.431235: step: 902/466, loss: 0.09623562544584274 2023-01-24 03:17:43.031291: step: 904/466, loss: 0.018250683322548866 2023-01-24 03:17:43.661940: step: 906/466, loss: 0.04059088975191116 2023-01-24 03:17:44.307458: step: 908/466, loss: 0.07524502277374268 2023-01-24 03:17:45.003101: step: 910/466, loss: 0.04556982219219208 2023-01-24 03:17:45.581027: step: 912/466, loss: 0.012496293522417545 2023-01-24 03:17:46.248670: step: 914/466, loss: 1.0032553672790527 2023-01-24 03:17:46.844136: step: 916/466, loss: 0.20766794681549072 2023-01-24 03:17:47.469007: step: 918/466, loss: 0.550917387008667 2023-01-24 03:17:48.052469: step: 920/466, loss: 0.030320841819047928 2023-01-24 03:17:48.727634: step: 922/466, loss: 0.08618208765983582 2023-01-24 03:17:49.326495: step: 924/466, loss: 0.5151544213294983 2023-01-24 03:17:49.934067: step: 926/466, loss: 0.045670926570892334 2023-01-24 03:17:50.605529: step: 928/466, loss: 0.016454020515084267 2023-01-24 03:17:51.354254: step: 930/466, loss: 0.06444823741912842 2023-01-24 03:17:51.956779: step: 932/466, loss: 0.11253959685564041 ================================================== Loss: 0.137 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3590219307017715, 'r': 0.328365409104846, 'f1': 0.3430100507398491}, 'combined': 0.252744247913573, 'epoch': 23} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3239267684184419, 'r': 0.2806585719148955, 'f1': 0.3007443884736018}, 'combined': 0.18831657969842358, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34398239390356067, 'r': 0.3368024957385907, 'f1': 0.3403545834213563}, 'combined': 0.2507875877841573, 'epoch': 23} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.32425918914562474, 'r': 0.2917747396915053, 'f1': 0.307160486060036}, 'combined': 0.190352977276642, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31027917179560016, 'r': 0.31616682211430225, 'f1': 0.313195329425258}, 'combined': 0.23077550589229537, 'epoch': 23} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3248785033339627, 'r': 0.2875732864050176, 'f1': 0.3050897407807669}, 'combined': 0.20238626368625132, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3142857142857143, 'r': 0.3142857142857143, 'f1': 0.3142857142857143}, 'combined': 0.2095238095238095, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44047619047619047, 'r': 0.40217391304347827, 'f1': 0.4204545454545454}, 'combined': 0.2102272727272727, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.1724137931034483, 'f1': 0.22727272727272724}, 'combined': 0.1515151515151515, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:20:25.336541: step: 2/466, loss: 0.01775951124727726 2023-01-24 03:20:25.969350: step: 4/466, loss: 0.06998228281736374 2023-01-24 03:20:26.579866: step: 6/466, loss: 0.023496080189943314 2023-01-24 03:20:27.219724: step: 8/466, loss: 0.03171665966510773 2023-01-24 03:20:27.820235: step: 10/466, loss: 0.21160852909088135 2023-01-24 03:20:28.463939: step: 12/466, loss: 0.011140208691358566 2023-01-24 03:20:29.115844: step: 14/466, loss: 0.0045223054476082325 2023-01-24 03:20:29.733070: step: 16/466, loss: 0.02822500467300415 2023-01-24 03:20:30.337703: step: 18/466, loss: 0.007921488024294376 2023-01-24 03:20:30.972544: step: 20/466, loss: 0.07815907150506973 2023-01-24 03:20:31.604526: step: 22/466, loss: 0.08718613535165787 2023-01-24 03:20:32.260332: step: 24/466, loss: 0.08531629294157028 2023-01-24 03:20:32.846436: step: 26/466, loss: 0.01115855947136879 2023-01-24 03:20:33.530353: step: 28/466, loss: 0.3936383128166199 2023-01-24 03:20:34.155406: step: 30/466, loss: 0.04807283356785774 2023-01-24 03:20:34.773216: step: 32/466, loss: 0.0031118232291191816 2023-01-24 03:20:35.426977: step: 34/466, loss: 0.04471299797296524 2023-01-24 03:20:36.116562: step: 36/466, loss: 0.09689126163721085 2023-01-24 03:20:36.807971: step: 38/466, loss: 0.004671262577176094 2023-01-24 03:20:37.441141: step: 40/466, loss: 0.11291002482175827 2023-01-24 03:20:38.051350: step: 42/466, loss: 0.02706337347626686 2023-01-24 03:20:38.634838: step: 44/466, loss: 0.053012263029813766 2023-01-24 03:20:39.260294: step: 46/466, loss: 0.033150896430015564 2023-01-24 03:20:39.822056: step: 48/466, loss: 0.025658225640654564 2023-01-24 03:20:40.449853: step: 50/466, loss: 0.06108486279845238 2023-01-24 03:20:41.095375: step: 52/466, loss: 0.12495020031929016 2023-01-24 03:20:41.724655: step: 54/466, loss: 0.05012280493974686 2023-01-24 03:20:42.426809: step: 56/466, loss: 0.10500068217515945 2023-01-24 03:20:43.010190: step: 58/466, loss: 0.046825896948575974 2023-01-24 03:20:43.695781: step: 60/466, loss: 0.027875442057847977 2023-01-24 03:20:44.380636: step: 62/466, loss: 0.053261008113622665 2023-01-24 03:20:45.065346: step: 64/466, loss: 0.10042470693588257 2023-01-24 03:20:45.682660: step: 66/466, loss: 1.264203429222107 2023-01-24 03:20:46.363793: step: 68/466, loss: 0.024331819266080856 2023-01-24 03:20:47.004437: step: 70/466, loss: 0.015103347599506378 2023-01-24 03:20:47.575889: step: 72/466, loss: 0.05606008321046829 2023-01-24 03:20:48.154067: step: 74/466, loss: 0.004255786072462797 2023-01-24 03:20:48.700663: step: 76/466, loss: 0.015752147883176804 2023-01-24 03:20:49.296545: step: 78/466, loss: 0.01863991469144821 2023-01-24 03:20:49.967275: step: 80/466, loss: 0.02777590975165367 2023-01-24 03:20:50.565464: step: 82/466, loss: 0.019921375438570976 2023-01-24 03:20:51.174184: step: 84/466, loss: 0.004241253715008497 2023-01-24 03:20:51.777767: step: 86/466, loss: 0.1516450047492981 2023-01-24 03:20:52.427892: step: 88/466, loss: 0.03991668298840523 2023-01-24 03:20:53.085830: step: 90/466, loss: 0.05472749099135399 2023-01-24 03:20:53.747181: step: 92/466, loss: 0.00998813845217228 2023-01-24 03:20:54.376248: step: 94/466, loss: 0.16966433823108673 2023-01-24 03:20:54.958333: step: 96/466, loss: 0.03190416842699051 2023-01-24 03:20:55.604655: step: 98/466, loss: 0.06969668716192245 2023-01-24 03:20:56.274370: step: 100/466, loss: 0.010890313424170017 2023-01-24 03:20:56.891890: step: 102/466, loss: 0.02054757997393608 2023-01-24 03:20:57.555333: step: 104/466, loss: 0.026327362284064293 2023-01-24 03:20:58.119945: step: 106/466, loss: 0.0259767584502697 2023-01-24 03:20:58.754406: step: 108/466, loss: 0.01270345039665699 2023-01-24 03:20:59.380547: step: 110/466, loss: 0.012134579010307789 2023-01-24 03:21:00.018593: step: 112/466, loss: 0.23109854757785797 2023-01-24 03:21:00.669531: step: 114/466, loss: 0.05138760060071945 2023-01-24 03:21:01.268820: step: 116/466, loss: 0.03566274419426918 2023-01-24 03:21:02.053276: step: 118/466, loss: 0.05119818449020386 2023-01-24 03:21:02.672354: step: 120/466, loss: 0.011486800387501717 2023-01-24 03:21:03.333178: step: 122/466, loss: 0.1277124136686325 2023-01-24 03:21:03.903070: step: 124/466, loss: 0.07726099342107773 2023-01-24 03:21:04.495018: step: 126/466, loss: 0.016203083097934723 2023-01-24 03:21:05.080416: step: 128/466, loss: 0.044843513518571854 2023-01-24 03:21:05.677901: step: 130/466, loss: 0.03943625092506409 2023-01-24 03:21:06.366650: step: 132/466, loss: 0.7324963808059692 2023-01-24 03:21:07.169868: step: 134/466, loss: 0.2553439140319824 2023-01-24 03:21:07.765788: step: 136/466, loss: 0.012169379740953445 2023-01-24 03:21:08.401639: step: 138/466, loss: 0.03544780984520912 2023-01-24 03:21:09.007140: step: 140/466, loss: 0.03327156603336334 2023-01-24 03:21:09.665246: step: 142/466, loss: 0.2415328174829483 2023-01-24 03:21:10.219466: step: 144/466, loss: 0.013524964451789856 2023-01-24 03:21:10.857115: step: 146/466, loss: 0.052955932915210724 2023-01-24 03:21:11.419369: step: 148/466, loss: 0.0393313430249691 2023-01-24 03:21:12.129945: step: 150/466, loss: 0.006056379992514849 2023-01-24 03:21:12.776468: step: 152/466, loss: 0.028438430279493332 2023-01-24 03:21:13.395509: step: 154/466, loss: 0.11125431954860687 2023-01-24 03:21:14.028936: step: 156/466, loss: 0.022854281589388847 2023-01-24 03:21:14.617579: step: 158/466, loss: 0.3715655207633972 2023-01-24 03:21:15.235692: step: 160/466, loss: 0.020477047190070152 2023-01-24 03:21:15.847547: step: 162/466, loss: 0.01693480834364891 2023-01-24 03:21:16.458437: step: 164/466, loss: 0.06682530045509338 2023-01-24 03:21:17.065785: step: 166/466, loss: 0.042962782084941864 2023-01-24 03:21:17.665746: step: 168/466, loss: 0.05419673025608063 2023-01-24 03:21:18.310159: step: 170/466, loss: 0.027051566168665886 2023-01-24 03:21:18.933351: step: 172/466, loss: 0.01761796325445175 2023-01-24 03:21:19.519257: step: 174/466, loss: 0.03369941934943199 2023-01-24 03:21:20.086999: step: 176/466, loss: 0.02449605241417885 2023-01-24 03:21:20.700090: step: 178/466, loss: 0.02717144787311554 2023-01-24 03:21:21.311131: step: 180/466, loss: 0.03494774550199509 2023-01-24 03:21:21.913261: step: 182/466, loss: 0.03236817568540573 2023-01-24 03:21:22.529248: step: 184/466, loss: 0.005535303149372339 2023-01-24 03:21:23.197555: step: 186/466, loss: 0.05312425643205643 2023-01-24 03:21:23.878614: step: 188/466, loss: 0.016179973259568214 2023-01-24 03:21:24.484614: step: 190/466, loss: 0.04713601991534233 2023-01-24 03:21:25.083837: step: 192/466, loss: 0.10981413722038269 2023-01-24 03:21:25.738471: step: 194/466, loss: 0.01001281850039959 2023-01-24 03:21:26.329346: step: 196/466, loss: 0.000854157842695713 2023-01-24 03:21:26.969542: step: 198/466, loss: 0.02450506016612053 2023-01-24 03:21:27.611574: step: 200/466, loss: 0.0063180020079016685 2023-01-24 03:21:28.213416: step: 202/466, loss: 0.017017902806401253 2023-01-24 03:21:28.901915: step: 204/466, loss: 0.04628724977374077 2023-01-24 03:21:29.521543: step: 206/466, loss: 0.02292279526591301 2023-01-24 03:21:30.161167: step: 208/466, loss: 0.011091941967606544 2023-01-24 03:21:30.791346: step: 210/466, loss: 0.027954787015914917 2023-01-24 03:21:31.429692: step: 212/466, loss: 0.003656168933957815 2023-01-24 03:21:31.943851: step: 214/466, loss: 0.044199857860803604 2023-01-24 03:21:32.614851: step: 216/466, loss: 0.056119684129953384 2023-01-24 03:21:33.235595: step: 218/466, loss: 0.030617576092481613 2023-01-24 03:21:33.803763: step: 220/466, loss: 0.05026071518659592 2023-01-24 03:21:34.432949: step: 222/466, loss: 0.0207377802580595 2023-01-24 03:21:35.027908: step: 224/466, loss: 0.06010458990931511 2023-01-24 03:21:35.605123: step: 226/466, loss: 0.04278656095266342 2023-01-24 03:21:36.224598: step: 228/466, loss: 0.034164175391197205 2023-01-24 03:21:36.859164: step: 230/466, loss: 0.08308444172143936 2023-01-24 03:21:37.520319: step: 232/466, loss: 0.0037873126566410065 2023-01-24 03:21:38.146983: step: 234/466, loss: 0.1749260425567627 2023-01-24 03:21:38.757300: step: 236/466, loss: 0.03200647607445717 2023-01-24 03:21:39.344976: step: 238/466, loss: 0.2304929941892624 2023-01-24 03:21:39.949050: step: 240/466, loss: 0.024186624214053154 2023-01-24 03:21:40.558047: step: 242/466, loss: 0.006505788769572973 2023-01-24 03:21:41.147716: step: 244/466, loss: 5.718115329742432 2023-01-24 03:21:41.762384: step: 246/466, loss: 0.0025009613018482924 2023-01-24 03:21:42.403435: step: 248/466, loss: 0.042459696531295776 2023-01-24 03:21:43.077484: step: 250/466, loss: 0.07881811261177063 2023-01-24 03:21:43.726848: step: 252/466, loss: 0.02184082195162773 2023-01-24 03:21:44.352324: step: 254/466, loss: 0.01406814344227314 2023-01-24 03:21:45.002595: step: 256/466, loss: 0.021873051300644875 2023-01-24 03:21:45.582714: step: 258/466, loss: 0.024203473702073097 2023-01-24 03:21:46.196687: step: 260/466, loss: 0.046456798911094666 2023-01-24 03:21:46.881294: step: 262/466, loss: 0.006486671045422554 2023-01-24 03:21:47.438293: step: 264/466, loss: 0.08957996964454651 2023-01-24 03:21:48.134199: step: 266/466, loss: 0.05139755457639694 2023-01-24 03:21:48.771922: step: 268/466, loss: 0.02413560450077057 2023-01-24 03:21:49.398076: step: 270/466, loss: 0.06363941729068756 2023-01-24 03:21:50.057654: step: 272/466, loss: 0.5705989003181458 2023-01-24 03:21:50.696641: step: 274/466, loss: 0.03353706747293472 2023-01-24 03:21:51.275425: step: 276/466, loss: 0.0007393390405923128 2023-01-24 03:21:51.922562: step: 278/466, loss: 0.006027671508491039 2023-01-24 03:21:52.588173: step: 280/466, loss: 0.012460839934647083 2023-01-24 03:21:53.224724: step: 282/466, loss: 0.029581815004348755 2023-01-24 03:21:53.869827: step: 284/466, loss: 0.09199880063533783 2023-01-24 03:21:54.456234: step: 286/466, loss: 0.02445092611014843 2023-01-24 03:21:55.057008: step: 288/466, loss: 0.08971263468265533 2023-01-24 03:21:55.636399: step: 290/466, loss: 0.01842050813138485 2023-01-24 03:21:56.183599: step: 292/466, loss: 0.022412758320569992 2023-01-24 03:21:56.844991: step: 294/466, loss: 0.8509193658828735 2023-01-24 03:21:57.483557: step: 296/466, loss: 0.18079602718353271 2023-01-24 03:21:58.098330: step: 298/466, loss: 0.09790819883346558 2023-01-24 03:21:58.655014: step: 300/466, loss: 0.02296244353055954 2023-01-24 03:21:59.259548: step: 302/466, loss: 0.007325155194848776 2023-01-24 03:21:59.851299: step: 304/466, loss: 0.037404581904411316 2023-01-24 03:22:00.487812: step: 306/466, loss: 0.02718578651547432 2023-01-24 03:22:01.170358: step: 308/466, loss: 0.03230520710349083 2023-01-24 03:22:01.815930: step: 310/466, loss: 0.028744935989379883 2023-01-24 03:22:02.480344: step: 312/466, loss: 0.04959486797451973 2023-01-24 03:22:03.115146: step: 314/466, loss: 0.03979591652750969 2023-01-24 03:22:03.785667: step: 316/466, loss: 0.01729343831539154 2023-01-24 03:22:04.361221: step: 318/466, loss: 0.007263426668941975 2023-01-24 03:22:05.047804: step: 320/466, loss: 0.04841906949877739 2023-01-24 03:22:05.661277: step: 322/466, loss: 0.02187824435532093 2023-01-24 03:22:06.282870: step: 324/466, loss: 0.02896566316485405 2023-01-24 03:22:06.873733: step: 326/466, loss: 0.10468301177024841 2023-01-24 03:22:07.490345: step: 328/466, loss: 0.0009681862429715693 2023-01-24 03:22:08.097590: step: 330/466, loss: 0.03846053034067154 2023-01-24 03:22:08.736980: step: 332/466, loss: 0.08358601480722427 2023-01-24 03:22:09.280796: step: 334/466, loss: 0.06232349947094917 2023-01-24 03:22:09.908806: step: 336/466, loss: 0.00839509442448616 2023-01-24 03:22:10.587163: step: 338/466, loss: 0.018033072352409363 2023-01-24 03:22:11.200734: step: 340/466, loss: 0.02174694463610649 2023-01-24 03:22:11.812287: step: 342/466, loss: 0.03843015804886818 2023-01-24 03:22:12.435776: step: 344/466, loss: 0.02696356177330017 2023-01-24 03:22:13.068120: step: 346/466, loss: 0.0023052766919136047 2023-01-24 03:22:13.743118: step: 348/466, loss: 0.09128759056329727 2023-01-24 03:22:14.324485: step: 350/466, loss: 0.034821297973394394 2023-01-24 03:22:14.939768: step: 352/466, loss: 0.04999520629644394 2023-01-24 03:22:15.511162: step: 354/466, loss: 0.0011430811136960983 2023-01-24 03:22:16.134379: step: 356/466, loss: 0.012842460535466671 2023-01-24 03:22:16.731498: step: 358/466, loss: 0.010257087647914886 2023-01-24 03:22:17.478309: step: 360/466, loss: 0.06453787535429001 2023-01-24 03:22:18.182178: step: 362/466, loss: 0.0472383126616478 2023-01-24 03:22:18.775964: step: 364/466, loss: 0.013958015479147434 2023-01-24 03:22:19.448071: step: 366/466, loss: 0.023365404456853867 2023-01-24 03:22:20.019462: step: 368/466, loss: 0.12553706765174866 2023-01-24 03:22:20.618084: step: 370/466, loss: 0.03481516242027283 2023-01-24 03:22:21.202252: step: 372/466, loss: 0.016188135370612144 2023-01-24 03:22:21.809552: step: 374/466, loss: 0.03400895744562149 2023-01-24 03:22:22.451915: step: 376/466, loss: 0.061151888221502304 2023-01-24 03:22:23.141512: step: 378/466, loss: 0.0034502113703638315 2023-01-24 03:22:23.753843: step: 380/466, loss: 0.5659694075584412 2023-01-24 03:22:24.406829: step: 382/466, loss: 0.07413993030786514 2023-01-24 03:22:24.994202: step: 384/466, loss: 0.008955743163824081 2023-01-24 03:22:25.624163: step: 386/466, loss: 0.04835180565714836 2023-01-24 03:22:26.304611: step: 388/466, loss: 0.5826194882392883 2023-01-24 03:22:26.945004: step: 390/466, loss: 0.43313485383987427 2023-01-24 03:22:27.570300: step: 392/466, loss: 4.415528746903874e-05 2023-01-24 03:22:28.121597: step: 394/466, loss: 0.03372623771429062 2023-01-24 03:22:28.708164: step: 396/466, loss: 0.0779719427227974 2023-01-24 03:22:29.287670: step: 398/466, loss: 0.03113323450088501 2023-01-24 03:22:29.920394: step: 400/466, loss: 0.014071152545511723 2023-01-24 03:22:30.537514: step: 402/466, loss: 0.26610639691352844 2023-01-24 03:22:31.108287: step: 404/466, loss: 0.026780832558870316 2023-01-24 03:22:31.757025: step: 406/466, loss: 0.042376335710287094 2023-01-24 03:22:32.396627: step: 408/466, loss: 0.04931849613785744 2023-01-24 03:22:33.038110: step: 410/466, loss: 0.03209669142961502 2023-01-24 03:22:33.652495: step: 412/466, loss: 0.04621882364153862 2023-01-24 03:22:34.989595: step: 414/466, loss: 0.23924817144870758 2023-01-24 03:22:35.515838: step: 416/466, loss: 0.004419934935867786 2023-01-24 03:22:36.126708: step: 418/466, loss: 0.018524905666708946 2023-01-24 03:22:36.659305: step: 420/466, loss: 0.04037218168377876 2023-01-24 03:22:37.263225: step: 422/466, loss: 0.005396811757236719 2023-01-24 03:22:37.849943: step: 424/466, loss: 0.025286972522735596 2023-01-24 03:22:38.496415: step: 426/466, loss: 0.006728660315275192 2023-01-24 03:22:39.136685: step: 428/466, loss: 0.033965032547712326 2023-01-24 03:22:39.766686: step: 430/466, loss: 0.1107817143201828 2023-01-24 03:22:40.320146: step: 432/466, loss: 0.03414522856473923 2023-01-24 03:22:40.903603: step: 434/466, loss: 0.11841520667076111 2023-01-24 03:22:41.560186: step: 436/466, loss: 0.029670242220163345 2023-01-24 03:22:42.185870: step: 438/466, loss: 0.07413268089294434 2023-01-24 03:22:42.791453: step: 440/466, loss: 0.05652602016925812 2023-01-24 03:22:43.449529: step: 442/466, loss: 0.004003919195383787 2023-01-24 03:22:44.032205: step: 444/466, loss: 0.20921221375465393 2023-01-24 03:22:44.667417: step: 446/466, loss: 0.01874626986682415 2023-01-24 03:22:45.335953: step: 448/466, loss: 0.2473910003900528 2023-01-24 03:22:46.011301: step: 450/466, loss: 0.2558498680591583 2023-01-24 03:22:46.640604: step: 452/466, loss: 0.04292437061667442 2023-01-24 03:22:47.255127: step: 454/466, loss: 0.040209073573350906 2023-01-24 03:22:47.870699: step: 456/466, loss: 0.02306332066655159 2023-01-24 03:22:48.468102: step: 458/466, loss: 0.020379625260829926 2023-01-24 03:22:49.021814: step: 460/466, loss: 0.0014458958758041263 2023-01-24 03:22:49.608941: step: 462/466, loss: 0.02728479914367199 2023-01-24 03:22:50.189687: step: 464/466, loss: 0.055573392659425735 2023-01-24 03:22:50.761473: step: 466/466, loss: 0.013155962340533733 2023-01-24 03:22:51.435974: step: 468/466, loss: 0.02111225575208664 2023-01-24 03:22:51.990409: step: 470/466, loss: 0.00214797118678689 2023-01-24 03:22:52.580281: step: 472/466, loss: 0.0030293106101453304 2023-01-24 03:22:53.203937: step: 474/466, loss: 0.014872650615870953 2023-01-24 03:22:53.810089: step: 476/466, loss: 0.006019831635057926 2023-01-24 03:22:54.431252: step: 478/466, loss: 0.03690674155950546 2023-01-24 03:22:55.058841: step: 480/466, loss: 0.04174504429101944 2023-01-24 03:22:55.714980: step: 482/466, loss: 0.04920300841331482 2023-01-24 03:22:56.316029: step: 484/466, loss: 0.01668628863990307 2023-01-24 03:22:56.899294: step: 486/466, loss: 0.03446260094642639 2023-01-24 03:22:57.561942: step: 488/466, loss: 0.13341861963272095 2023-01-24 03:22:58.268866: step: 490/466, loss: 0.05224217474460602 2023-01-24 03:22:58.914706: step: 492/466, loss: 0.05532006546854973 2023-01-24 03:22:59.543576: step: 494/466, loss: 0.015033143572509289 2023-01-24 03:23:00.105310: step: 496/466, loss: 0.01447263266891241 2023-01-24 03:23:00.739566: step: 498/466, loss: 0.9773015975952148 2023-01-24 03:23:01.376925: step: 500/466, loss: 0.01731417328119278 2023-01-24 03:23:02.014172: step: 502/466, loss: 0.23520046472549438 2023-01-24 03:23:02.621229: step: 504/466, loss: 0.01694115251302719 2023-01-24 03:23:03.349481: step: 506/466, loss: 0.023830056190490723 2023-01-24 03:23:04.000773: step: 508/466, loss: 0.013624468818306923 2023-01-24 03:23:04.672541: step: 510/466, loss: 0.08544586598873138 2023-01-24 03:23:05.377505: step: 512/466, loss: 0.2895181179046631 2023-01-24 03:23:06.017875: step: 514/466, loss: 0.3380913734436035 2023-01-24 03:23:06.588411: step: 516/466, loss: 0.010832487605512142 2023-01-24 03:23:07.195829: step: 518/466, loss: 0.019181370735168457 2023-01-24 03:23:07.878377: step: 520/466, loss: 0.03397896885871887 2023-01-24 03:23:08.505562: step: 522/466, loss: 0.008898376487195492 2023-01-24 03:23:09.121677: step: 524/466, loss: 0.001290146610699594 2023-01-24 03:23:09.792607: step: 526/466, loss: 0.015028296038508415 2023-01-24 03:23:10.425461: step: 528/466, loss: 0.02278532274067402 2023-01-24 03:23:11.063365: step: 530/466, loss: 0.05829382687807083 2023-01-24 03:23:11.637138: step: 532/466, loss: 0.011463063769042492 2023-01-24 03:23:12.256700: step: 534/466, loss: 0.5001119375228882 2023-01-24 03:23:12.821342: step: 536/466, loss: 0.023553278297185898 2023-01-24 03:23:13.499704: step: 538/466, loss: 0.024624977260828018 2023-01-24 03:23:14.102578: step: 540/466, loss: 0.037908170372247696 2023-01-24 03:23:14.637644: step: 542/466, loss: 0.045436758548021317 2023-01-24 03:23:15.222299: step: 544/466, loss: 1.2077014446258545 2023-01-24 03:23:15.809268: step: 546/466, loss: 0.11791954934597015 2023-01-24 03:23:16.431663: step: 548/466, loss: 0.07331138849258423 2023-01-24 03:23:17.135684: step: 550/466, loss: 0.005301305558532476 2023-01-24 03:23:17.792833: step: 552/466, loss: 0.2202800214290619 2023-01-24 03:23:18.414941: step: 554/466, loss: 0.04418746754527092 2023-01-24 03:23:19.086836: step: 556/466, loss: 0.023405754938721657 2023-01-24 03:23:19.692304: step: 558/466, loss: 0.0036683911457657814 2023-01-24 03:23:20.310482: step: 560/466, loss: 1.2640928030014038 2023-01-24 03:23:20.977243: step: 562/466, loss: 0.026077618822455406 2023-01-24 03:23:21.537847: step: 564/466, loss: 0.05308159068226814 2023-01-24 03:23:22.132877: step: 566/466, loss: 0.015728816390037537 2023-01-24 03:23:22.773395: step: 568/466, loss: 0.028387639671564102 2023-01-24 03:23:23.423633: step: 570/466, loss: 0.043330736458301544 2023-01-24 03:23:24.086452: step: 572/466, loss: 0.06042530760169029 2023-01-24 03:23:24.700493: step: 574/466, loss: 0.013226497918367386 2023-01-24 03:23:25.289921: step: 576/466, loss: 0.03364450857043266 2023-01-24 03:23:25.837995: step: 578/466, loss: 0.054037388414144516 2023-01-24 03:23:26.476242: step: 580/466, loss: 0.01870799995958805 2023-01-24 03:23:27.071523: step: 582/466, loss: 0.04428953677415848 2023-01-24 03:23:27.672403: step: 584/466, loss: 0.024038489907979965 2023-01-24 03:23:28.287245: step: 586/466, loss: 0.01239954773336649 2023-01-24 03:23:28.944712: step: 588/466, loss: 0.010824295692145824 2023-01-24 03:23:29.536710: step: 590/466, loss: 0.049315545707941055 2023-01-24 03:23:30.246336: step: 592/466, loss: 4.014024257659912 2023-01-24 03:23:30.843810: step: 594/466, loss: 0.05163121968507767 2023-01-24 03:23:31.447658: step: 596/466, loss: 0.031344205141067505 2023-01-24 03:23:32.119588: step: 598/466, loss: 0.08736274391412735 2023-01-24 03:23:32.721671: step: 600/466, loss: 0.08165568113327026 2023-01-24 03:23:33.392005: step: 602/466, loss: 0.08569925278425217 2023-01-24 03:23:34.003147: step: 604/466, loss: 0.05095534771680832 2023-01-24 03:23:34.615132: step: 606/466, loss: 0.045369550585746765 2023-01-24 03:23:35.287373: step: 608/466, loss: 0.01282559148967266 2023-01-24 03:23:35.906852: step: 610/466, loss: 0.0015983064658939838 2023-01-24 03:23:36.434579: step: 612/466, loss: 0.01708504930138588 2023-01-24 03:23:37.045837: step: 614/466, loss: 0.018791068345308304 2023-01-24 03:23:37.700935: step: 616/466, loss: 0.07073036581277847 2023-01-24 03:23:38.343336: step: 618/466, loss: 0.029045097529888153 2023-01-24 03:23:39.004833: step: 620/466, loss: 0.020562678575515747 2023-01-24 03:23:39.639621: step: 622/466, loss: 0.05618838593363762 2023-01-24 03:23:40.296470: step: 624/466, loss: 0.04314511641860008 2023-01-24 03:23:40.948994: step: 626/466, loss: 0.0668528750538826 2023-01-24 03:23:41.638934: step: 628/466, loss: 0.010574841871857643 2023-01-24 03:23:42.296106: step: 630/466, loss: 0.01126186829060316 2023-01-24 03:23:42.927965: step: 632/466, loss: 0.016938580200076103 2023-01-24 03:23:43.555455: step: 634/466, loss: 0.03811643272638321 2023-01-24 03:23:44.207353: step: 636/466, loss: 0.03264860808849335 2023-01-24 03:23:44.794586: step: 638/466, loss: 0.04469931125640869 2023-01-24 03:23:45.331065: step: 640/466, loss: 0.08351390063762665 2023-01-24 03:23:45.940797: step: 642/466, loss: 0.018482796847820282 2023-01-24 03:23:46.533922: step: 644/466, loss: 0.027051081880927086 2023-01-24 03:23:47.175486: step: 646/466, loss: 0.014741193503141403 2023-01-24 03:23:47.802594: step: 648/466, loss: 0.02814878523349762 2023-01-24 03:23:48.376347: step: 650/466, loss: 1.3097721338272095 2023-01-24 03:23:48.953133: step: 652/466, loss: 0.0610317625105381 2023-01-24 03:23:49.585650: step: 654/466, loss: 0.03113587759435177 2023-01-24 03:23:50.233103: step: 656/466, loss: 0.2117530256509781 2023-01-24 03:23:50.867202: step: 658/466, loss: 0.6282182931900024 2023-01-24 03:23:51.489124: step: 660/466, loss: 0.012697635218501091 2023-01-24 03:23:52.120223: step: 662/466, loss: 0.010068407282233238 2023-01-24 03:23:52.711641: step: 664/466, loss: 0.03171693906188011 2023-01-24 03:23:53.315841: step: 666/466, loss: 0.35073983669281006 2023-01-24 03:23:53.913222: step: 668/466, loss: 0.027183786034584045 2023-01-24 03:23:54.609290: step: 670/466, loss: 0.005791663192212582 2023-01-24 03:23:55.264536: step: 672/466, loss: 0.033167000859975815 2023-01-24 03:23:55.940926: step: 674/466, loss: 0.02182154357433319 2023-01-24 03:23:56.549228: step: 676/466, loss: 0.015532762743532658 2023-01-24 03:23:57.154357: step: 678/466, loss: 0.14364345371723175 2023-01-24 03:23:57.750747: step: 680/466, loss: 0.03017961047589779 2023-01-24 03:23:58.353899: step: 682/466, loss: 0.03674246743321419 2023-01-24 03:23:58.939240: step: 684/466, loss: 0.3185787498950958 2023-01-24 03:23:59.520069: step: 686/466, loss: 0.010634418576955795 2023-01-24 03:24:00.086362: step: 688/466, loss: 0.7276974320411682 2023-01-24 03:24:00.696061: step: 690/466, loss: 0.01223810762166977 2023-01-24 03:24:01.303525: step: 692/466, loss: 0.025081532076001167 2023-01-24 03:24:01.907097: step: 694/466, loss: 0.016418136656284332 2023-01-24 03:24:02.444053: step: 696/466, loss: 0.014432492665946484 2023-01-24 03:24:03.013923: step: 698/466, loss: 0.022362738847732544 2023-01-24 03:24:03.628907: step: 700/466, loss: 0.036238569766283035 2023-01-24 03:24:04.254013: step: 702/466, loss: 0.022475963458418846 2023-01-24 03:24:04.800754: step: 704/466, loss: 0.009691936895251274 2023-01-24 03:24:05.448742: step: 706/466, loss: 0.019086472690105438 2023-01-24 03:24:06.088750: step: 708/466, loss: 0.03635561466217041 2023-01-24 03:24:06.682765: step: 710/466, loss: 0.05995665863156319 2023-01-24 03:24:07.308884: step: 712/466, loss: 0.08793265372514725 2023-01-24 03:24:07.871110: step: 714/466, loss: 0.04483603686094284 2023-01-24 03:24:08.502651: step: 716/466, loss: 0.0006318181985989213 2023-01-24 03:24:09.104805: step: 718/466, loss: 0.04581335559487343 2023-01-24 03:24:09.750815: step: 720/466, loss: 0.054784007370471954 2023-01-24 03:24:10.340809: step: 722/466, loss: 0.008994770236313343 2023-01-24 03:24:11.050980: step: 724/466, loss: 0.07038211822509766 2023-01-24 03:24:11.728394: step: 726/466, loss: 0.02739771641790867 2023-01-24 03:24:12.415710: step: 728/466, loss: 0.0066716414876282215 2023-01-24 03:24:12.999610: step: 730/466, loss: 0.023032061755657196 2023-01-24 03:24:13.669452: step: 732/466, loss: 0.05693968012928963 2023-01-24 03:24:14.299701: step: 734/466, loss: 0.038457415997982025 2023-01-24 03:24:14.882615: step: 736/466, loss: 0.04100510850548744 2023-01-24 03:24:15.513517: step: 738/466, loss: 0.04011070355772972 2023-01-24 03:24:16.070283: step: 740/466, loss: 0.008213753812015057 2023-01-24 03:24:16.680308: step: 742/466, loss: 0.06475730240345001 2023-01-24 03:24:17.285503: step: 744/466, loss: 0.05782656744122505 2023-01-24 03:24:17.871698: step: 746/466, loss: 0.00479760579764843 2023-01-24 03:24:18.508683: step: 748/466, loss: 0.3707916736602783 2023-01-24 03:24:19.221715: step: 750/466, loss: 0.010022311471402645 2023-01-24 03:24:19.838796: step: 752/466, loss: 0.0010830792598426342 2023-01-24 03:24:20.380971: step: 754/466, loss: 0.017043301835656166 2023-01-24 03:24:21.060399: step: 756/466, loss: 0.03057851642370224 2023-01-24 03:24:21.684440: step: 758/466, loss: 0.07336274534463882 2023-01-24 03:24:22.278739: step: 760/466, loss: 0.18320348858833313 2023-01-24 03:24:23.021396: step: 762/466, loss: 0.0185698289424181 2023-01-24 03:24:23.680518: step: 764/466, loss: 0.017747841775417328 2023-01-24 03:24:24.366457: step: 766/466, loss: 0.04033486172556877 2023-01-24 03:24:24.917814: step: 768/466, loss: 0.030373042449355125 2023-01-24 03:24:25.577876: step: 770/466, loss: 0.02626446820795536 2023-01-24 03:24:26.217900: step: 772/466, loss: 0.0635000541806221 2023-01-24 03:24:26.847442: step: 774/466, loss: 0.028376979753375053 2023-01-24 03:24:27.452857: step: 776/466, loss: 0.015307756140828133 2023-01-24 03:24:28.120811: step: 778/466, loss: 0.031209155917167664 2023-01-24 03:24:28.764119: step: 780/466, loss: 0.01880793273448944 2023-01-24 03:24:29.384647: step: 782/466, loss: 0.04064683988690376 2023-01-24 03:24:29.984667: step: 784/466, loss: 0.02077857404947281 2023-01-24 03:24:30.544289: step: 786/466, loss: 0.07937052100896835 2023-01-24 03:24:31.092960: step: 788/466, loss: 0.017864972352981567 2023-01-24 03:24:31.686832: step: 790/466, loss: 0.06696239113807678 2023-01-24 03:24:32.309833: step: 792/466, loss: 0.10035181045532227 2023-01-24 03:24:32.904324: step: 794/466, loss: 0.01624724082648754 2023-01-24 03:24:33.499491: step: 796/466, loss: 0.016496853902935982 2023-01-24 03:24:34.096371: step: 798/466, loss: 0.021680563688278198 2023-01-24 03:24:34.690724: step: 800/466, loss: 0.13155917823314667 2023-01-24 03:24:35.359445: step: 802/466, loss: 0.0448790118098259 2023-01-24 03:24:36.028654: step: 804/466, loss: 0.04087737947702408 2023-01-24 03:24:36.634988: step: 806/466, loss: 0.06466640532016754 2023-01-24 03:24:37.237653: step: 808/466, loss: 0.021056322380900383 2023-01-24 03:24:37.859049: step: 810/466, loss: 0.05157526209950447 2023-01-24 03:24:38.565505: step: 812/466, loss: 0.15065328776836395 2023-01-24 03:24:39.162236: step: 814/466, loss: 0.11482340097427368 2023-01-24 03:24:39.739605: step: 816/466, loss: 0.015318148769438267 2023-01-24 03:24:40.409912: step: 818/466, loss: 0.06725523620843887 2023-01-24 03:24:41.137598: step: 820/466, loss: 0.12060696631669998 2023-01-24 03:24:41.729714: step: 822/466, loss: 0.03064657375216484 2023-01-24 03:24:42.364515: step: 824/466, loss: 0.004661441780626774 2023-01-24 03:24:42.945411: step: 826/466, loss: 0.04126424714922905 2023-01-24 03:24:43.513815: step: 828/466, loss: 0.009859644807875156 2023-01-24 03:24:44.148972: step: 830/466, loss: 0.03717637434601784 2023-01-24 03:24:44.792445: step: 832/466, loss: 0.025209451094269753 2023-01-24 03:24:45.412748: step: 834/466, loss: 0.015618618577718735 2023-01-24 03:24:46.036681: step: 836/466, loss: 0.042017657309770584 2023-01-24 03:24:46.594422: step: 838/466, loss: 0.05587295815348625 2023-01-24 03:24:47.269264: step: 840/466, loss: 0.09808378666639328 2023-01-24 03:24:47.981141: step: 842/466, loss: 0.019855311140418053 2023-01-24 03:24:48.663961: step: 844/466, loss: 0.18799708783626556 2023-01-24 03:24:49.355152: step: 846/466, loss: 0.18601791560649872 2023-01-24 03:24:49.969292: step: 848/466, loss: 0.057044170796871185 2023-01-24 03:24:50.568384: step: 850/466, loss: 0.04101067781448364 2023-01-24 03:24:51.160309: step: 852/466, loss: 0.033392854034900665 2023-01-24 03:24:51.734991: step: 854/466, loss: 1.741076111793518 2023-01-24 03:24:52.284822: step: 856/466, loss: 0.05415608361363411 2023-01-24 03:24:52.894539: step: 858/466, loss: 0.03163951262831688 2023-01-24 03:24:53.512277: step: 860/466, loss: 0.03953757509589195 2023-01-24 03:24:54.174666: step: 862/466, loss: 0.03556588664650917 2023-01-24 03:24:54.771284: step: 864/466, loss: 0.010855968110263348 2023-01-24 03:24:55.425225: step: 866/466, loss: 0.033087357878685 2023-01-24 03:24:56.116636: step: 868/466, loss: 0.1575131118297577 2023-01-24 03:24:56.686594: step: 870/466, loss: 0.042996399104595184 2023-01-24 03:24:57.290429: step: 872/466, loss: 0.1106543093919754 2023-01-24 03:24:57.966240: step: 874/466, loss: 0.0015727959107607603 2023-01-24 03:24:58.586109: step: 876/466, loss: 0.014399182051420212 2023-01-24 03:24:59.180794: step: 878/466, loss: 0.039629943668842316 2023-01-24 03:24:59.757102: step: 880/466, loss: 0.08232701569795609 2023-01-24 03:25:00.367324: step: 882/466, loss: 0.026552898809313774 2023-01-24 03:25:00.981258: step: 884/466, loss: 0.012258796021342278 2023-01-24 03:25:01.551365: step: 886/466, loss: 0.020021259784698486 2023-01-24 03:25:02.195856: step: 888/466, loss: 0.035042934119701385 2023-01-24 03:25:02.835637: step: 890/466, loss: 0.001964666647836566 2023-01-24 03:25:03.435427: step: 892/466, loss: 0.07394883036613464 2023-01-24 03:25:04.023529: step: 894/466, loss: 0.33917370438575745 2023-01-24 03:25:04.637360: step: 896/466, loss: 0.031111711636185646 2023-01-24 03:25:05.242969: step: 898/466, loss: 0.03684708848595619 2023-01-24 03:25:05.915948: step: 900/466, loss: 0.038802359253168106 2023-01-24 03:25:06.488358: step: 902/466, loss: 0.023610800504684448 2023-01-24 03:25:07.139175: step: 904/466, loss: 0.02906438708305359 2023-01-24 03:25:07.840186: step: 906/466, loss: 0.0029160077683627605 2023-01-24 03:25:08.419670: step: 908/466, loss: 0.020668020471930504 2023-01-24 03:25:09.029566: step: 910/466, loss: 0.026013070717453957 2023-01-24 03:25:09.662537: step: 912/466, loss: 0.0015297239879146218 2023-01-24 03:25:10.273899: step: 914/466, loss: 0.03978642448782921 2023-01-24 03:25:10.864048: step: 916/466, loss: 0.020224010571837425 2023-01-24 03:25:11.476888: step: 918/466, loss: 0.049149610102176666 2023-01-24 03:25:12.072265: step: 920/466, loss: 0.05288805440068245 2023-01-24 03:25:12.639190: step: 922/466, loss: 0.0071567329578101635 2023-01-24 03:25:13.299200: step: 924/466, loss: 0.055217765271663666 2023-01-24 03:25:13.906507: step: 926/466, loss: 0.03580210730433464 2023-01-24 03:25:14.544391: step: 928/466, loss: 0.04881547391414642 2023-01-24 03:25:15.255885: step: 930/466, loss: 0.020376309752464294 2023-01-24 03:25:15.864874: step: 932/466, loss: 0.402281254529953 ================================================== Loss: 0.099 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3523164730006835, 'r': 0.3175527982453979, 'f1': 0.3340325841822848}, 'combined': 0.24612927255536776, 'epoch': 24} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34543108433211595, 'r': 0.2815200985125458, 'f1': 0.3102180697681758}, 'combined': 0.19424869789222224, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33198906356801094, 'r': 0.3351388649301363, 'f1': 0.33355652845737827}, 'combined': 0.24577849465280502, 'epoch': 24} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.353948602826708, 'r': 0.2964479272772428, 'f1': 0.3226564866632466}, 'combined': 0.19995613258004014, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3018315093586341, 'r': 0.3167226274674092, 'f1': 0.30909782347282344}, 'combined': 0.22775629097997516, 'epoch': 24} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3467809817809817, 'r': 0.28976730636622233, 'f1': 0.3157208741608348}, 'combined': 0.2094385996908508, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2840909090909091, 'r': 0.26785714285714285, 'f1': 0.2757352941176471}, 'combined': 0.18382352941176472, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:27:48.050924: step: 2/466, loss: 0.123256616294384 2023-01-24 03:27:48.681493: step: 4/466, loss: 0.11478867381811142 2023-01-24 03:27:49.363791: step: 6/466, loss: 0.15547913312911987 2023-01-24 03:27:49.969035: step: 8/466, loss: 0.04502660036087036 2023-01-24 03:27:50.671145: step: 10/466, loss: 0.06763220578432083 2023-01-24 03:27:51.312539: step: 12/466, loss: 0.043466079980134964 2023-01-24 03:27:52.045317: step: 14/466, loss: 0.3706084191799164 2023-01-24 03:27:52.713269: step: 16/466, loss: 0.06069587543606758 2023-01-24 03:27:53.437039: step: 18/466, loss: 0.0064912224188447 2023-01-24 03:27:54.078504: step: 20/466, loss: 0.007945464923977852 2023-01-24 03:27:54.674974: step: 22/466, loss: 0.00964177306741476 2023-01-24 03:27:55.351838: step: 24/466, loss: 0.003734796540811658 2023-01-24 03:27:55.974639: step: 26/466, loss: 0.02065412513911724 2023-01-24 03:27:56.646579: step: 28/466, loss: 0.08720247447490692 2023-01-24 03:27:57.289811: step: 30/466, loss: 0.022077567875385284 2023-01-24 03:27:57.932468: step: 32/466, loss: 0.1807323545217514 2023-01-24 03:27:58.537970: step: 34/466, loss: 0.006703019607812166 2023-01-24 03:27:59.154849: step: 36/466, loss: 0.00502686807885766 2023-01-24 03:27:59.787665: step: 38/466, loss: 0.0813484638929367 2023-01-24 03:28:00.464706: step: 40/466, loss: 0.0033198362216353416 2023-01-24 03:28:01.088769: step: 42/466, loss: 0.8578417897224426 2023-01-24 03:28:01.671922: step: 44/466, loss: 0.013072691857814789 2023-01-24 03:28:02.297789: step: 46/466, loss: 0.01905766874551773 2023-01-24 03:28:02.924893: step: 48/466, loss: 0.4790077209472656 2023-01-24 03:28:03.573176: step: 50/466, loss: 0.05062977224588394 2023-01-24 03:28:04.180691: step: 52/466, loss: 0.025456015020608902 2023-01-24 03:28:04.735123: step: 54/466, loss: 0.002291387179866433 2023-01-24 03:28:05.350123: step: 56/466, loss: 0.01267669815570116 2023-01-24 03:28:05.951221: step: 58/466, loss: 0.0442313551902771 2023-01-24 03:28:06.548154: step: 60/466, loss: 0.01942724548280239 2023-01-24 03:28:07.126097: step: 62/466, loss: 0.06036658585071564 2023-01-24 03:28:07.692362: step: 64/466, loss: 0.021152399480342865 2023-01-24 03:28:08.243742: step: 66/466, loss: 0.0027935849502682686 2023-01-24 03:28:08.883154: step: 68/466, loss: 2.237856864929199 2023-01-24 03:28:09.627258: step: 70/466, loss: 0.08581312745809555 2023-01-24 03:28:10.251984: step: 72/466, loss: 0.024593474343419075 2023-01-24 03:28:10.811301: step: 74/466, loss: 0.06295838207006454 2023-01-24 03:28:11.475848: step: 76/466, loss: 0.01236707903444767 2023-01-24 03:28:12.188709: step: 78/466, loss: 0.048196546733379364 2023-01-24 03:28:12.864577: step: 80/466, loss: 0.15854839980602264 2023-01-24 03:28:13.487316: step: 82/466, loss: 0.0425398051738739 2023-01-24 03:28:14.209108: step: 84/466, loss: 0.041842229664325714 2023-01-24 03:28:14.771640: step: 86/466, loss: 0.05728701874613762 2023-01-24 03:28:15.331371: step: 88/466, loss: 0.034428469836711884 2023-01-24 03:28:15.985124: step: 90/466, loss: 0.06219211965799332 2023-01-24 03:28:16.689255: step: 92/466, loss: 0.02116026170551777 2023-01-24 03:28:17.285269: step: 94/466, loss: 0.017197037115693092 2023-01-24 03:28:17.932379: step: 96/466, loss: 0.02358037605881691 2023-01-24 03:28:18.574041: step: 98/466, loss: 0.0025120610371232033 2023-01-24 03:28:19.181333: step: 100/466, loss: 0.03598516061902046 2023-01-24 03:28:19.752267: step: 102/466, loss: 0.06651560962200165 2023-01-24 03:28:20.406447: step: 104/466, loss: 0.01988566666841507 2023-01-24 03:28:20.962276: step: 106/466, loss: 0.055231306701898575 2023-01-24 03:28:21.615165: step: 108/466, loss: 0.013693037442862988 2023-01-24 03:28:22.288585: step: 110/466, loss: 0.016922691836953163 2023-01-24 03:28:22.916516: step: 112/466, loss: 0.02427794225513935 2023-01-24 03:28:23.537579: step: 114/466, loss: 1.463797688484192 2023-01-24 03:28:24.145806: step: 116/466, loss: 0.011846937239170074 2023-01-24 03:28:24.798117: step: 118/466, loss: 0.03153500333428383 2023-01-24 03:28:25.454381: step: 120/466, loss: 0.008501111529767513 2023-01-24 03:28:26.078250: step: 122/466, loss: 0.0016625624848529696 2023-01-24 03:28:26.707640: step: 124/466, loss: 1.526236653327942 2023-01-24 03:28:27.334846: step: 126/466, loss: 0.01810932531952858 2023-01-24 03:28:28.045554: step: 128/466, loss: 0.07274865359067917 2023-01-24 03:28:28.690047: step: 130/466, loss: 0.021032212302088737 2023-01-24 03:28:29.427996: step: 132/466, loss: 0.03922904282808304 2023-01-24 03:28:30.027544: step: 134/466, loss: 0.06585580110549927 2023-01-24 03:28:30.676737: step: 136/466, loss: 0.00184637529309839 2023-01-24 03:28:31.265977: step: 138/466, loss: 0.09909527748823166 2023-01-24 03:28:31.913250: step: 140/466, loss: 0.03606913983821869 2023-01-24 03:28:32.564541: step: 142/466, loss: 0.035890597850084305 2023-01-24 03:28:33.174183: step: 144/466, loss: 0.0817774310708046 2023-01-24 03:28:33.820802: step: 146/466, loss: 0.04412094131112099 2023-01-24 03:28:34.449950: step: 148/466, loss: 0.01928979717195034 2023-01-24 03:28:34.998019: step: 150/466, loss: 0.004080347716808319 2023-01-24 03:28:35.609519: step: 152/466, loss: 0.03950808197259903 2023-01-24 03:28:36.229861: step: 154/466, loss: 0.03697212040424347 2023-01-24 03:28:36.797950: step: 156/466, loss: 0.07765532284975052 2023-01-24 03:28:37.418845: step: 158/466, loss: 0.2336684912443161 2023-01-24 03:28:38.036105: step: 160/466, loss: 0.2615325152873993 2023-01-24 03:28:38.648668: step: 162/466, loss: 0.009940821677446365 2023-01-24 03:28:39.288294: step: 164/466, loss: 0.03197099268436432 2023-01-24 03:28:39.901105: step: 166/466, loss: 0.05685582011938095 2023-01-24 03:28:40.504349: step: 168/466, loss: 0.0019060398917645216 2023-01-24 03:28:41.026694: step: 170/466, loss: 0.003438710467889905 2023-01-24 03:28:41.673255: step: 172/466, loss: 0.04050370305776596 2023-01-24 03:28:42.258439: step: 174/466, loss: 0.015799373388290405 2023-01-24 03:28:42.899082: step: 176/466, loss: 0.012722530402243137 2023-01-24 03:28:43.534181: step: 178/466, loss: 0.06495097279548645 2023-01-24 03:28:44.136790: step: 180/466, loss: 0.012860220856964588 2023-01-24 03:28:44.745177: step: 182/466, loss: 0.09234242886304855 2023-01-24 03:28:45.463442: step: 184/466, loss: 0.028211582452058792 2023-01-24 03:28:46.077204: step: 186/466, loss: 0.24828922748565674 2023-01-24 03:28:46.767097: step: 188/466, loss: 0.011880680918693542 2023-01-24 03:28:47.323259: step: 190/466, loss: 0.18462541699409485 2023-01-24 03:28:48.046079: step: 192/466, loss: 0.019765596836805344 2023-01-24 03:28:48.694661: step: 194/466, loss: 0.027054965496063232 2023-01-24 03:28:49.313501: step: 196/466, loss: 0.013658122159540653 2023-01-24 03:28:49.951027: step: 198/466, loss: 0.12429645657539368 2023-01-24 03:28:50.577623: step: 200/466, loss: 0.03704677149653435 2023-01-24 03:28:51.149817: step: 202/466, loss: 0.004386530257761478 2023-01-24 03:28:51.750314: step: 204/466, loss: 0.0045953174121677876 2023-01-24 03:28:52.379551: step: 206/466, loss: 0.06506485491991043 2023-01-24 03:28:53.081693: step: 208/466, loss: 0.027044648304581642 2023-01-24 03:28:53.719390: step: 210/466, loss: 0.007475194986909628 2023-01-24 03:28:54.377563: step: 212/466, loss: 0.03475631773471832 2023-01-24 03:28:54.994619: step: 214/466, loss: 0.037433333694934845 2023-01-24 03:28:55.631520: step: 216/466, loss: 0.09614378213882446 2023-01-24 03:28:56.280563: step: 218/466, loss: 0.014291762374341488 2023-01-24 03:28:56.883198: step: 220/466, loss: 0.0066920192912220955 2023-01-24 03:28:57.533199: step: 222/466, loss: 0.0016438337042927742 2023-01-24 03:28:58.185434: step: 224/466, loss: 0.01268971711397171 2023-01-24 03:28:58.726642: step: 226/466, loss: 0.019960829988121986 2023-01-24 03:28:59.292415: step: 228/466, loss: 0.02156595140695572 2023-01-24 03:28:59.898314: step: 230/466, loss: 0.041490305215120316 2023-01-24 03:29:00.532939: step: 232/466, loss: 0.007419609930366278 2023-01-24 03:29:01.202380: step: 234/466, loss: 0.06077762693166733 2023-01-24 03:29:01.849566: step: 236/466, loss: 0.04927678778767586 2023-01-24 03:29:02.483197: step: 238/466, loss: 0.007565382402390242 2023-01-24 03:29:03.036339: step: 240/466, loss: 0.00638133380562067 2023-01-24 03:29:03.629556: step: 242/466, loss: 0.05720380321145058 2023-01-24 03:29:04.215748: step: 244/466, loss: 0.027775298804044724 2023-01-24 03:29:04.813527: step: 246/466, loss: 0.08375570178031921 2023-01-24 03:29:05.430984: step: 248/466, loss: 0.014697118662297726 2023-01-24 03:29:06.138132: step: 250/466, loss: 0.08780642598867416 2023-01-24 03:29:06.809959: step: 252/466, loss: 0.016955697908997536 2023-01-24 03:29:07.405862: step: 254/466, loss: 0.05962974950671196 2023-01-24 03:29:08.029103: step: 256/466, loss: 0.015877990052103996 2023-01-24 03:29:08.631327: step: 258/466, loss: 0.05827377364039421 2023-01-24 03:29:09.273563: step: 260/466, loss: 0.03235466778278351 2023-01-24 03:29:09.873756: step: 262/466, loss: 0.011585214175283909 2023-01-24 03:29:10.533948: step: 264/466, loss: 0.058018896728754044 2023-01-24 03:29:11.123390: step: 266/466, loss: 0.06219848245382309 2023-01-24 03:29:11.727036: step: 268/466, loss: 0.07465940713882446 2023-01-24 03:29:12.493116: step: 270/466, loss: 0.01665102317929268 2023-01-24 03:29:13.306594: step: 272/466, loss: 0.005275152623653412 2023-01-24 03:29:13.972699: step: 274/466, loss: 0.05236566439270973 2023-01-24 03:29:14.587075: step: 276/466, loss: 0.005467272363603115 2023-01-24 03:29:15.140113: step: 278/466, loss: 0.03000018000602722 2023-01-24 03:29:15.673258: step: 280/466, loss: 1.1425138711929321 2023-01-24 03:29:16.289222: step: 282/466, loss: 0.039501119405031204 2023-01-24 03:29:16.872409: step: 284/466, loss: 0.44520309567451477 2023-01-24 03:29:17.474988: step: 286/466, loss: 0.000496347201988101 2023-01-24 03:29:18.061246: step: 288/466, loss: 0.0023207715712487698 2023-01-24 03:29:18.640781: step: 290/466, loss: 0.02163492515683174 2023-01-24 03:29:19.316785: step: 292/466, loss: 0.0733761414885521 2023-01-24 03:29:19.970685: step: 294/466, loss: 0.013693487271666527 2023-01-24 03:29:20.567621: step: 296/466, loss: 0.025295032188296318 2023-01-24 03:29:21.223770: step: 298/466, loss: 0.030743736773729324 2023-01-24 03:29:21.830071: step: 300/466, loss: 0.01697983592748642 2023-01-24 03:29:22.442365: step: 302/466, loss: 0.006202060729265213 2023-01-24 03:29:23.061211: step: 304/466, loss: 0.002101277466863394 2023-01-24 03:29:23.693362: step: 306/466, loss: 0.036038950085639954 2023-01-24 03:29:24.313474: step: 308/466, loss: 0.010649413801729679 2023-01-24 03:29:24.911719: step: 310/466, loss: 0.0036623121704906225 2023-01-24 03:29:25.511427: step: 312/466, loss: 0.006318286992609501 2023-01-24 03:29:26.127829: step: 314/466, loss: 0.019612329080700874 2023-01-24 03:29:26.698405: step: 316/466, loss: 0.08130209147930145 2023-01-24 03:29:27.328570: step: 318/466, loss: 0.6466153860092163 2023-01-24 03:29:27.946015: step: 320/466, loss: 0.017690975219011307 2023-01-24 03:29:28.589193: step: 322/466, loss: 0.0048448131419718266 2023-01-24 03:29:29.275546: step: 324/466, loss: 0.8842586278915405 2023-01-24 03:29:29.919837: step: 326/466, loss: 0.020062141120433807 2023-01-24 03:29:30.532202: step: 328/466, loss: 0.011810514144599438 2023-01-24 03:29:31.140324: step: 330/466, loss: 0.04219945892691612 2023-01-24 03:29:31.689746: step: 332/466, loss: 0.02550136111676693 2023-01-24 03:29:32.281440: step: 334/466, loss: 0.01426367275416851 2023-01-24 03:29:32.829312: step: 336/466, loss: 0.03547900542616844 2023-01-24 03:29:33.438566: step: 338/466, loss: 0.0798686221241951 2023-01-24 03:29:34.147815: step: 340/466, loss: 0.06922107934951782 2023-01-24 03:29:34.772733: step: 342/466, loss: 0.08623357862234116 2023-01-24 03:29:35.306448: step: 344/466, loss: 0.0029127050656825304 2023-01-24 03:29:35.948133: step: 346/466, loss: 0.0047578406520187855 2023-01-24 03:29:36.576777: step: 348/466, loss: 0.030754726380109787 2023-01-24 03:29:37.138698: step: 350/466, loss: 0.09280386567115784 2023-01-24 03:29:37.735134: step: 352/466, loss: 0.0332859605550766 2023-01-24 03:29:38.349445: step: 354/466, loss: 0.11619105935096741 2023-01-24 03:29:38.900232: step: 356/466, loss: 0.024161159992218018 2023-01-24 03:29:39.507766: step: 358/466, loss: 0.008979431353509426 2023-01-24 03:29:40.137079: step: 360/466, loss: 0.06891241669654846 2023-01-24 03:29:40.808101: step: 362/466, loss: 0.022034132853150368 2023-01-24 03:29:41.382931: step: 364/466, loss: 0.08202984184026718 2023-01-24 03:29:42.072323: step: 366/466, loss: 0.039208486676216125 2023-01-24 03:29:42.657270: step: 368/466, loss: 0.010029909200966358 2023-01-24 03:29:43.290919: step: 370/466, loss: 0.044931963086128235 2023-01-24 03:29:43.863840: step: 372/466, loss: 0.017695071175694466 2023-01-24 03:29:44.506351: step: 374/466, loss: 0.07123210281133652 2023-01-24 03:29:45.170291: step: 376/466, loss: 0.010910790413618088 2023-01-24 03:29:45.786102: step: 378/466, loss: 0.07261643558740616 2023-01-24 03:29:46.405840: step: 380/466, loss: 0.03362992778420448 2023-01-24 03:29:47.123056: step: 382/466, loss: 0.030723733827471733 2023-01-24 03:29:47.695948: step: 384/466, loss: 0.0033833698835223913 2023-01-24 03:29:48.307179: step: 386/466, loss: 0.07037527114152908 2023-01-24 03:29:48.945748: step: 388/466, loss: 0.04732706397771835 2023-01-24 03:29:49.599900: step: 390/466, loss: 0.03024684637784958 2023-01-24 03:29:50.301493: step: 392/466, loss: 0.010613604448735714 2023-01-24 03:29:50.985056: step: 394/466, loss: 0.024162225425243378 2023-01-24 03:29:51.568559: step: 396/466, loss: 0.009866451844573021 2023-01-24 03:29:52.182475: step: 398/466, loss: 0.04842867702245712 2023-01-24 03:29:52.808815: step: 400/466, loss: 0.04632077366113663 2023-01-24 03:29:53.389924: step: 402/466, loss: 0.08502992242574692 2023-01-24 03:29:53.992422: step: 404/466, loss: 0.00219887588173151 2023-01-24 03:29:54.663915: step: 406/466, loss: 0.05578460171818733 2023-01-24 03:29:55.321554: step: 408/466, loss: 0.02796776592731476 2023-01-24 03:29:55.971628: step: 410/466, loss: 0.04815340414643288 2023-01-24 03:29:56.564067: step: 412/466, loss: 0.009533676318824291 2023-01-24 03:29:57.253290: step: 414/466, loss: 0.00020531343761831522 2023-01-24 03:29:57.865658: step: 416/466, loss: 0.03974773734807968 2023-01-24 03:29:58.487822: step: 418/466, loss: 0.0235374066978693 2023-01-24 03:29:59.101924: step: 420/466, loss: 0.017376694828271866 2023-01-24 03:29:59.662414: step: 422/466, loss: 0.01427893154323101 2023-01-24 03:30:00.271169: step: 424/466, loss: 0.002878089901059866 2023-01-24 03:30:00.910652: step: 426/466, loss: 0.008732099086046219 2023-01-24 03:30:01.547596: step: 428/466, loss: 0.05201972275972366 2023-01-24 03:30:02.153467: step: 430/466, loss: 0.03940167650580406 2023-01-24 03:30:02.814260: step: 432/466, loss: 0.7123115062713623 2023-01-24 03:30:03.407797: step: 434/466, loss: 0.014425323344767094 2023-01-24 03:30:04.020070: step: 436/466, loss: 0.020876772701740265 2023-01-24 03:30:04.665839: step: 438/466, loss: 0.01232703123241663 2023-01-24 03:30:05.236758: step: 440/466, loss: 0.013305318541824818 2023-01-24 03:30:05.841044: step: 442/466, loss: 0.00580544862896204 2023-01-24 03:30:06.441500: step: 444/466, loss: 0.024085568264126778 2023-01-24 03:30:07.132554: step: 446/466, loss: 0.0445990227162838 2023-01-24 03:30:07.729812: step: 448/466, loss: 0.00036237595486454666 2023-01-24 03:30:08.324557: step: 450/466, loss: 0.15216988325119019 2023-01-24 03:30:09.009319: step: 452/466, loss: 0.02704990655183792 2023-01-24 03:30:09.581992: step: 454/466, loss: 0.012838363647460938 2023-01-24 03:30:10.245772: step: 456/466, loss: 0.06284131854772568 2023-01-24 03:30:10.846229: step: 458/466, loss: 0.10360924154520035 2023-01-24 03:30:11.438903: step: 460/466, loss: 0.00795061606913805 2023-01-24 03:30:12.082996: step: 462/466, loss: 0.030827568843960762 2023-01-24 03:30:12.731971: step: 464/466, loss: 0.0068170600570738316 2023-01-24 03:30:13.356502: step: 466/466, loss: 0.15848875045776367 2023-01-24 03:30:13.991757: step: 468/466, loss: 0.004579485859721899 2023-01-24 03:30:14.625812: step: 470/466, loss: 0.006150723434984684 2023-01-24 03:30:15.226818: step: 472/466, loss: 0.031934954226017 2023-01-24 03:30:15.861122: step: 474/466, loss: 0.014982946217060089 2023-01-24 03:30:16.502497: step: 476/466, loss: 0.041599661111831665 2023-01-24 03:30:17.156586: step: 478/466, loss: 0.03959587588906288 2023-01-24 03:30:17.800563: step: 480/466, loss: 0.001282591954804957 2023-01-24 03:30:18.372618: step: 482/466, loss: 0.2651715576648712 2023-01-24 03:30:19.022647: step: 484/466, loss: 0.017963455989956856 2023-01-24 03:30:19.676741: step: 486/466, loss: 0.08299870043992996 2023-01-24 03:30:20.275970: step: 488/466, loss: 0.010541216470301151 2023-01-24 03:30:20.869155: step: 490/466, loss: 0.0055192383006215096 2023-01-24 03:30:21.552765: step: 492/466, loss: 0.009777909144759178 2023-01-24 03:30:22.167640: step: 494/466, loss: 0.11229853332042694 2023-01-24 03:30:22.673118: step: 496/466, loss: 0.001413665246218443 2023-01-24 03:30:23.328730: step: 498/466, loss: 0.12335145473480225 2023-01-24 03:30:23.958580: step: 500/466, loss: 0.030117841437458992 2023-01-24 03:30:24.542288: step: 502/466, loss: 0.10821045935153961 2023-01-24 03:30:25.149940: step: 504/466, loss: 0.016559399664402008 2023-01-24 03:30:25.720465: step: 506/466, loss: 0.0051531922072172165 2023-01-24 03:30:26.321926: step: 508/466, loss: 0.05999673530459404 2023-01-24 03:30:26.950459: step: 510/466, loss: 0.08033227920532227 2023-01-24 03:30:27.581718: step: 512/466, loss: 0.05756360664963722 2023-01-24 03:30:28.189125: step: 514/466, loss: 0.1250261515378952 2023-01-24 03:30:28.827720: step: 516/466, loss: 0.024026280269026756 2023-01-24 03:30:29.419600: step: 518/466, loss: 0.02750120684504509 2023-01-24 03:30:30.012402: step: 520/466, loss: 0.05190858244895935 2023-01-24 03:30:30.643738: step: 522/466, loss: 0.08374769240617752 2023-01-24 03:30:31.282241: step: 524/466, loss: 0.19721168279647827 2023-01-24 03:30:31.992382: step: 526/466, loss: 0.14855492115020752 2023-01-24 03:30:32.560389: step: 528/466, loss: 0.029247550293803215 2023-01-24 03:30:33.176201: step: 530/466, loss: 0.028942137956619263 2023-01-24 03:30:33.771677: step: 532/466, loss: 0.1819128692150116 2023-01-24 03:30:34.406572: step: 534/466, loss: 0.006035921163856983 2023-01-24 03:30:35.005718: step: 536/466, loss: 0.16624990105628967 2023-01-24 03:30:35.645999: step: 538/466, loss: 0.029659925028681755 2023-01-24 03:30:36.269633: step: 540/466, loss: 0.0641593262553215 2023-01-24 03:30:36.932392: step: 542/466, loss: 0.0009827445028349757 2023-01-24 03:30:37.577290: step: 544/466, loss: 0.17054708302021027 2023-01-24 03:30:38.151835: step: 546/466, loss: 0.010677242651581764 2023-01-24 03:30:38.753501: step: 548/466, loss: 0.0017253371188417077 2023-01-24 03:30:39.401221: step: 550/466, loss: 0.5802269577980042 2023-01-24 03:30:40.026282: step: 552/466, loss: 0.009611350484192371 2023-01-24 03:30:40.579336: step: 554/466, loss: 0.01147443987429142 2023-01-24 03:30:41.238811: step: 556/466, loss: 0.08705990016460419 2023-01-24 03:30:41.919187: step: 558/466, loss: 1.0124412775039673 2023-01-24 03:30:42.512087: step: 560/466, loss: 0.022383833304047585 2023-01-24 03:30:43.093070: step: 562/466, loss: 0.007344966288655996 2023-01-24 03:30:43.634099: step: 564/466, loss: 0.03636467829346657 2023-01-24 03:30:44.263339: step: 566/466, loss: 0.009142805822193623 2023-01-24 03:30:44.846465: step: 568/466, loss: 0.017209192737936974 2023-01-24 03:30:45.477349: step: 570/466, loss: 0.008467994630336761 2023-01-24 03:30:46.140461: step: 572/466, loss: 0.018438834697008133 2023-01-24 03:30:46.753805: step: 574/466, loss: 0.31713053584098816 2023-01-24 03:30:47.368480: step: 576/466, loss: 0.024290455505251884 2023-01-24 03:30:48.022514: step: 578/466, loss: 0.01887284778058529 2023-01-24 03:30:48.669385: step: 580/466, loss: 0.5228666067123413 2023-01-24 03:30:49.234792: step: 582/466, loss: 0.003434462007135153 2023-01-24 03:30:49.883409: step: 584/466, loss: 0.08324345201253891 2023-01-24 03:30:50.478610: step: 586/466, loss: 0.031138503924012184 2023-01-24 03:30:51.049287: step: 588/466, loss: 0.004604414105415344 2023-01-24 03:30:51.699185: step: 590/466, loss: 0.07915446907281876 2023-01-24 03:30:52.304670: step: 592/466, loss: 0.012631416320800781 2023-01-24 03:30:53.006850: step: 594/466, loss: 0.01816081814467907 2023-01-24 03:30:53.594170: step: 596/466, loss: 0.05987480282783508 2023-01-24 03:30:54.135093: step: 598/466, loss: 0.061415087431669235 2023-01-24 03:30:54.679731: step: 600/466, loss: 0.013928325846791267 2023-01-24 03:30:55.257268: step: 602/466, loss: 0.03612973541021347 2023-01-24 03:30:55.881795: step: 604/466, loss: 0.003916529472917318 2023-01-24 03:30:56.543055: step: 606/466, loss: 0.39525341987609863 2023-01-24 03:30:57.171319: step: 608/466, loss: 0.002376685617491603 2023-01-24 03:30:57.749084: step: 610/466, loss: 0.07487525790929794 2023-01-24 03:30:58.442931: step: 612/466, loss: 0.037955593317747116 2023-01-24 03:30:59.087001: step: 614/466, loss: 0.019716570153832436 2023-01-24 03:30:59.720014: step: 616/466, loss: 0.020672595128417015 2023-01-24 03:31:00.327837: step: 618/466, loss: 0.01101340726017952 2023-01-24 03:31:00.981079: step: 620/466, loss: 0.03404736891388893 2023-01-24 03:31:01.660718: step: 622/466, loss: 0.005758011247962713 2023-01-24 03:31:02.198233: step: 624/466, loss: 0.0057716164737939835 2023-01-24 03:31:02.865212: step: 626/466, loss: 0.037827637046575546 2023-01-24 03:31:03.484046: step: 628/466, loss: 0.07281064242124557 2023-01-24 03:31:04.151527: step: 630/466, loss: 0.024857493117451668 2023-01-24 03:31:04.733411: step: 632/466, loss: 0.004130253568291664 2023-01-24 03:31:05.338033: step: 634/466, loss: 0.013181922025978565 2023-01-24 03:31:05.911124: step: 636/466, loss: 0.003704882925376296 2023-01-24 03:31:06.532151: step: 638/466, loss: 0.033294033259153366 2023-01-24 03:31:07.137938: step: 640/466, loss: 0.787654459476471 2023-01-24 03:31:07.733296: step: 642/466, loss: 0.07885098457336426 2023-01-24 03:31:08.367114: step: 644/466, loss: 0.0641399398446083 2023-01-24 03:31:08.945756: step: 646/466, loss: 0.009171229787170887 2023-01-24 03:31:09.600830: step: 648/466, loss: 0.03740779682993889 2023-01-24 03:31:10.324689: step: 650/466, loss: 0.04201372340321541 2023-01-24 03:31:10.922587: step: 652/466, loss: 0.04712774604558945 2023-01-24 03:31:11.486377: step: 654/466, loss: 0.08313298225402832 2023-01-24 03:31:12.081648: step: 656/466, loss: 0.028765540570020676 2023-01-24 03:31:12.638057: step: 658/466, loss: 0.03196149691939354 2023-01-24 03:31:13.244541: step: 660/466, loss: 0.020120587199926376 2023-01-24 03:31:13.859365: step: 662/466, loss: 0.02490849420428276 2023-01-24 03:31:14.457824: step: 664/466, loss: 0.26444730162620544 2023-01-24 03:31:15.047108: step: 666/466, loss: 0.02657054178416729 2023-01-24 03:31:15.598634: step: 668/466, loss: 0.006359726656228304 2023-01-24 03:31:16.226929: step: 670/466, loss: 0.06964357197284698 2023-01-24 03:31:16.885860: step: 672/466, loss: 0.003294349182397127 2023-01-24 03:31:17.477932: step: 674/466, loss: 0.05599857494235039 2023-01-24 03:31:18.163576: step: 676/466, loss: 0.00530225457623601 2023-01-24 03:31:18.763638: step: 678/466, loss: 0.05987514182925224 2023-01-24 03:31:19.321888: step: 680/466, loss: 0.01196238026022911 2023-01-24 03:31:19.918453: step: 682/466, loss: 0.09103018790483475 2023-01-24 03:31:20.499130: step: 684/466, loss: 0.03690921887755394 2023-01-24 03:31:21.119122: step: 686/466, loss: 0.04676826298236847 2023-01-24 03:31:21.689119: step: 688/466, loss: 0.013362089172005653 2023-01-24 03:31:22.320639: step: 690/466, loss: 0.09177672117948532 2023-01-24 03:31:22.946635: step: 692/466, loss: 0.013591652736067772 2023-01-24 03:31:23.547190: step: 694/466, loss: 0.0038507573772221804 2023-01-24 03:31:24.125469: step: 696/466, loss: 0.011765643954277039 2023-01-24 03:31:24.708913: step: 698/466, loss: 0.16771037876605988 2023-01-24 03:31:25.356660: step: 700/466, loss: 0.05011175945401192 2023-01-24 03:31:25.933060: step: 702/466, loss: 0.036982789635658264 2023-01-24 03:31:26.573412: step: 704/466, loss: 0.6828376650810242 2023-01-24 03:31:27.217163: step: 706/466, loss: 0.0290224552154541 2023-01-24 03:31:27.806990: step: 708/466, loss: 0.02623136155307293 2023-01-24 03:31:28.415948: step: 710/466, loss: 0.3259034752845764 2023-01-24 03:31:29.022351: step: 712/466, loss: 0.005587138235569 2023-01-24 03:31:29.643337: step: 714/466, loss: 0.05836469307541847 2023-01-24 03:31:30.237090: step: 716/466, loss: 0.003592146560549736 2023-01-24 03:31:30.864162: step: 718/466, loss: 0.011633865535259247 2023-01-24 03:31:31.438384: step: 720/466, loss: 0.02132820524275303 2023-01-24 03:31:32.041862: step: 722/466, loss: 0.0905800461769104 2023-01-24 03:31:32.669947: step: 724/466, loss: 0.40397319197654724 2023-01-24 03:31:33.319339: step: 726/466, loss: 0.5215640664100647 2023-01-24 03:31:33.961848: step: 728/466, loss: 0.09964409470558167 2023-01-24 03:31:34.563698: step: 730/466, loss: 0.022946283221244812 2023-01-24 03:31:35.168645: step: 732/466, loss: 0.34767013788223267 2023-01-24 03:31:35.715788: step: 734/466, loss: 0.0024455466773360968 2023-01-24 03:31:36.318644: step: 736/466, loss: 0.06245845928788185 2023-01-24 03:31:36.936234: step: 738/466, loss: 0.3352563679218292 2023-01-24 03:31:37.533905: step: 740/466, loss: 0.4687623977661133 2023-01-24 03:31:38.137023: step: 742/466, loss: 0.014673358760774136 2023-01-24 03:31:38.680122: step: 744/466, loss: 0.07676398754119873 2023-01-24 03:31:39.272986: step: 746/466, loss: 0.022580327466130257 2023-01-24 03:31:39.946904: step: 748/466, loss: 0.05122647061944008 2023-01-24 03:31:40.581790: step: 750/466, loss: 0.07465817034244537 2023-01-24 03:31:41.237110: step: 752/466, loss: 0.07189252227544785 2023-01-24 03:31:41.978911: step: 754/466, loss: 0.0055083101615309715 2023-01-24 03:31:42.599043: step: 756/466, loss: 0.03365939110517502 2023-01-24 03:31:43.149644: step: 758/466, loss: 0.07387290894985199 2023-01-24 03:31:43.773510: step: 760/466, loss: 0.029260363429784775 2023-01-24 03:31:44.389685: step: 762/466, loss: 0.014879300259053707 2023-01-24 03:31:45.012591: step: 764/466, loss: 0.05147287994623184 2023-01-24 03:31:45.669436: step: 766/466, loss: 0.025484487414360046 2023-01-24 03:31:46.258573: step: 768/466, loss: 0.06355484575033188 2023-01-24 03:31:46.828874: step: 770/466, loss: 0.007383082527667284 2023-01-24 03:31:47.457487: step: 772/466, loss: 0.0148489223793149 2023-01-24 03:31:48.141064: step: 774/466, loss: 0.18538230657577515 2023-01-24 03:31:48.774853: step: 776/466, loss: 0.03870323672890663 2023-01-24 03:31:49.430847: step: 778/466, loss: 0.0025325315073132515 2023-01-24 03:31:50.067740: step: 780/466, loss: 0.1094198226928711 2023-01-24 03:31:50.686531: step: 782/466, loss: 0.03620237484574318 2023-01-24 03:31:51.388751: step: 784/466, loss: 0.03306031972169876 2023-01-24 03:31:52.062404: step: 786/466, loss: 0.12057071924209595 2023-01-24 03:31:52.737018: step: 788/466, loss: 0.061688702553510666 2023-01-24 03:31:53.364995: step: 790/466, loss: 0.0335828959941864 2023-01-24 03:31:54.011564: step: 792/466, loss: 0.023003384470939636 2023-01-24 03:31:54.682291: step: 794/466, loss: 0.010956516489386559 2023-01-24 03:31:55.375558: step: 796/466, loss: 0.8769637942314148 2023-01-24 03:31:56.010485: step: 798/466, loss: 0.025993596762418747 2023-01-24 03:31:56.625507: step: 800/466, loss: 0.04359062388539314 2023-01-24 03:31:57.209757: step: 802/466, loss: 0.053605817258358 2023-01-24 03:31:57.841731: step: 804/466, loss: 0.06414834409952164 2023-01-24 03:31:58.458627: step: 806/466, loss: 0.02431272529065609 2023-01-24 03:31:59.087316: step: 808/466, loss: 0.008475328795611858 2023-01-24 03:31:59.719250: step: 810/466, loss: 0.012665689922869205 2023-01-24 03:32:00.316331: step: 812/466, loss: 0.027181055396795273 2023-01-24 03:32:00.936483: step: 814/466, loss: 0.002804344752803445 2023-01-24 03:32:01.618335: step: 816/466, loss: 0.026928989216685295 2023-01-24 03:32:02.250090: step: 818/466, loss: 0.046765200793743134 2023-01-24 03:32:02.920073: step: 820/466, loss: 0.06903944909572601 2023-01-24 03:32:03.519912: step: 822/466, loss: 0.01643381454050541 2023-01-24 03:32:04.159632: step: 824/466, loss: 0.17748354375362396 2023-01-24 03:32:04.769290: step: 826/466, loss: 0.07518330216407776 2023-01-24 03:32:05.410337: step: 828/466, loss: 0.044734109193086624 2023-01-24 03:32:06.049426: step: 830/466, loss: 0.08786842226982117 2023-01-24 03:32:06.743111: step: 832/466, loss: 0.014323107898235321 2023-01-24 03:32:07.374243: step: 834/466, loss: 0.006167815998196602 2023-01-24 03:32:08.009680: step: 836/466, loss: 0.04702593386173248 2023-01-24 03:32:08.597853: step: 838/466, loss: 0.028971809893846512 2023-01-24 03:32:09.195498: step: 840/466, loss: 0.21031683683395386 2023-01-24 03:32:09.787168: step: 842/466, loss: 0.006460993085056543 2023-01-24 03:32:10.502934: step: 844/466, loss: 0.019567999988794327 2023-01-24 03:32:11.090880: step: 846/466, loss: 0.8865355253219604 2023-01-24 03:32:11.680088: step: 848/466, loss: 0.011671203188598156 2023-01-24 03:32:12.350550: step: 850/466, loss: 0.0006757083465345204 2023-01-24 03:32:13.077487: step: 852/466, loss: 0.04617739096283913 2023-01-24 03:32:13.727412: step: 854/466, loss: 0.021671177819371223 2023-01-24 03:32:14.356175: step: 856/466, loss: 0.057586777955293655 2023-01-24 03:32:14.965876: step: 858/466, loss: 0.045462507754564285 2023-01-24 03:32:15.600402: step: 860/466, loss: 0.3865273594856262 2023-01-24 03:32:16.238279: step: 862/466, loss: 0.011022298596799374 2023-01-24 03:32:16.829880: step: 864/466, loss: 0.0045396713539958 2023-01-24 03:32:17.466879: step: 866/466, loss: 0.05541210621595383 2023-01-24 03:32:18.000362: step: 868/466, loss: 0.025362443178892136 2023-01-24 03:32:18.542332: step: 870/466, loss: 0.010455988347530365 2023-01-24 03:32:19.191021: step: 872/466, loss: 0.041592665016651154 2023-01-24 03:32:19.805242: step: 874/466, loss: 0.01976536586880684 2023-01-24 03:32:20.377177: step: 876/466, loss: 0.03550330549478531 2023-01-24 03:32:20.988851: step: 878/466, loss: 0.0005371432052925229 2023-01-24 03:32:21.675199: step: 880/466, loss: 0.45359212160110474 2023-01-24 03:32:22.282549: step: 882/466, loss: 0.031113147735595703 2023-01-24 03:32:22.879414: step: 884/466, loss: 0.04073180630803108 2023-01-24 03:32:23.475652: step: 886/466, loss: 0.07079195231199265 2023-01-24 03:32:24.024253: step: 888/466, loss: 0.01618576981127262 2023-01-24 03:32:24.618405: step: 890/466, loss: 2.57959246635437 2023-01-24 03:32:25.200825: step: 892/466, loss: 0.009884090162813663 2023-01-24 03:32:25.772828: step: 894/466, loss: 0.03281768411397934 2023-01-24 03:32:26.398714: step: 896/466, loss: 0.060086511075496674 2023-01-24 03:32:27.016107: step: 898/466, loss: 0.020023701712489128 2023-01-24 03:32:27.682904: step: 900/466, loss: 0.04844291880726814 2023-01-24 03:32:28.351107: step: 902/466, loss: 0.0430406853556633 2023-01-24 03:32:28.923241: step: 904/466, loss: 0.014855911023914814 2023-01-24 03:32:29.521850: step: 906/466, loss: 0.041675664484500885 2023-01-24 03:32:30.085299: step: 908/466, loss: 0.042056307196617126 2023-01-24 03:32:30.871382: step: 910/466, loss: 0.1248590350151062 2023-01-24 03:32:31.524941: step: 912/466, loss: 0.35482367873191833 2023-01-24 03:32:32.108646: step: 914/466, loss: 0.00630500353872776 2023-01-24 03:32:32.690340: step: 916/466, loss: 0.030308715999126434 2023-01-24 03:32:33.294128: step: 918/466, loss: 0.023932820186018944 2023-01-24 03:32:33.933327: step: 920/466, loss: 0.02819758653640747 2023-01-24 03:32:34.536398: step: 922/466, loss: 0.04101166874170303 2023-01-24 03:32:35.138869: step: 924/466, loss: 0.00014712440315634012 2023-01-24 03:32:35.714181: step: 926/466, loss: 0.38159114122390747 2023-01-24 03:32:36.271030: step: 928/466, loss: 0.007751580327749252 2023-01-24 03:32:36.893929: step: 930/466, loss: 0.02519507147371769 2023-01-24 03:32:37.480055: step: 932/466, loss: 0.06680907309055328 ================================================== Loss: 0.089 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3559770613490869, 'r': 0.3289579295578849, 'f1': 0.34193457372190406}, 'combined': 0.25195179116350824, 'epoch': 25} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.33498637262079295, 'r': 0.2730078470005199, 'f1': 0.30083808500902637}, 'combined': 0.18837524949163337, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34378095862470864, 'r': 0.3392146081306423, 'f1': 0.34148251859569917}, 'combined': 0.2516186979126204, 'epoch': 25} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.35906179520653214, 'r': 0.29975826765888286, 'f1': 0.3267409351362933}, 'combined': 0.20248734008446345, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3223685881157948, 'r': 0.3284856391236846, 'f1': 0.32539836807928907}, 'combined': 0.2397672185847393, 'epoch': 25} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.34373247290599257, 'r': 0.2834394351629885, 'f1': 0.310687824866467}, 'combined': 0.20609984421834943, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3101851851851852, 'r': 0.2392857142857143, 'f1': 0.2701612903225806}, 'combined': 0.1801075268817204, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34, 'r': 0.3695652173913043, 'f1': 0.3541666666666667}, 'combined': 0.17708333333333334, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29411764705882354, 'r': 0.1724137931034483, 'f1': 0.2173913043478261}, 'combined': 0.14492753623188406, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459801534250064, 'r': 0.31249820309355414, 'f1': 0.32838794223390433}, 'combined': 0.24197006269866633, 'epoch': 11} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3781372469196784, 'r': 0.29076979636783934, 'f1': 0.3287478922199653}, 'combined': 0.20373108813631652, 'epoch': 11} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44, 'r': 0.4782608695652174, 'f1': 0.4583333333333333}, 'combined': 0.22916666666666666, 'epoch': 11} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:35:09.997611: step: 2/466, loss: 0.007889525964856148 2023-01-24 03:35:10.570001: step: 4/466, loss: 0.10208336263895035 2023-01-24 03:35:11.159385: step: 6/466, loss: 0.05998808890581131 2023-01-24 03:35:11.767489: step: 8/466, loss: 0.03341453894972801 2023-01-24 03:35:12.389745: step: 10/466, loss: 0.042030151933431625 2023-01-24 03:35:13.032030: step: 12/466, loss: 0.03125926852226257 2023-01-24 03:35:13.626456: step: 14/466, loss: 0.035212282091379166 2023-01-24 03:35:14.224064: step: 16/466, loss: 0.021042432636022568 2023-01-24 03:35:14.943852: step: 18/466, loss: 0.03849077224731445 2023-01-24 03:35:15.579875: step: 20/466, loss: 0.058912985026836395 2023-01-24 03:35:16.134875: step: 22/466, loss: 0.05570070073008537 2023-01-24 03:35:16.732964: step: 24/466, loss: 0.06585846096277237 2023-01-24 03:35:17.408783: step: 26/466, loss: 0.011703751049935818 2023-01-24 03:35:18.003236: step: 28/466, loss: 0.011850082315504551 2023-01-24 03:35:18.656370: step: 30/466, loss: 0.027266433462500572 2023-01-24 03:35:19.280089: step: 32/466, loss: 0.0038200421258807182 2023-01-24 03:35:19.911594: step: 34/466, loss: 0.18678438663482666 2023-01-24 03:35:20.549295: step: 36/466, loss: 0.04685428738594055 2023-01-24 03:35:21.330188: step: 38/466, loss: 1.2945747375488281 2023-01-24 03:35:21.938836: step: 40/466, loss: 0.005321810953319073 2023-01-24 03:35:22.564493: step: 42/466, loss: 0.0058112493716180325 2023-01-24 03:35:23.209054: step: 44/466, loss: 0.03415434807538986 2023-01-24 03:35:23.848631: step: 46/466, loss: 0.0038346522487699986 2023-01-24 03:35:24.467101: step: 48/466, loss: 0.005778027698397636 2023-01-24 03:35:25.071706: step: 50/466, loss: 0.007281619589775801 2023-01-24 03:35:25.731220: step: 52/466, loss: 0.015835389494895935 2023-01-24 03:35:26.372578: step: 54/466, loss: 0.01310531236231327 2023-01-24 03:35:27.012487: step: 56/466, loss: 0.006166370585560799 2023-01-24 03:35:27.627082: step: 58/466, loss: 0.023539837449789047 2023-01-24 03:35:28.231492: step: 60/466, loss: 0.04465335234999657 2023-01-24 03:35:28.801030: step: 62/466, loss: 0.0011540910927578807 2023-01-24 03:35:29.444629: step: 64/466, loss: 0.014557569287717342 2023-01-24 03:35:30.030931: step: 66/466, loss: 0.010300085879862309 2023-01-24 03:35:30.673250: step: 68/466, loss: 0.06224033609032631 2023-01-24 03:35:31.278094: step: 70/466, loss: 0.026488469913601875 2023-01-24 03:35:31.901513: step: 72/466, loss: 0.03434697911143303 2023-01-24 03:35:32.546741: step: 74/466, loss: 0.015540475025773048 2023-01-24 03:35:33.171457: step: 76/466, loss: 0.2161504030227661 2023-01-24 03:35:33.780430: step: 78/466, loss: 0.012048210948705673 2023-01-24 03:35:34.398783: step: 80/466, loss: 0.03682199865579605 2023-01-24 03:35:34.941048: step: 82/466, loss: 0.01592867448925972 2023-01-24 03:35:35.520661: step: 84/466, loss: 0.013426944613456726 2023-01-24 03:35:36.113938: step: 86/466, loss: 0.004081183113157749 2023-01-24 03:35:36.679137: step: 88/466, loss: 0.01988298073410988 2023-01-24 03:35:37.313947: step: 90/466, loss: 0.018079768866300583 2023-01-24 03:35:37.991422: step: 92/466, loss: 0.019703395664691925 2023-01-24 03:35:38.678332: step: 94/466, loss: 0.005619524046778679 2023-01-24 03:35:39.382370: step: 96/466, loss: 0.020142460241913795 2023-01-24 03:35:39.988069: step: 98/466, loss: 0.007503869011998177 2023-01-24 03:35:40.582146: step: 100/466, loss: 0.007635800633579493 2023-01-24 03:35:41.200193: step: 102/466, loss: 0.0380149781703949 2023-01-24 03:35:41.795893: step: 104/466, loss: 0.004585582297295332 2023-01-24 03:35:42.436088: step: 106/466, loss: 0.00025846846983768046 2023-01-24 03:35:43.097130: step: 108/466, loss: 0.018610037863254547 2023-01-24 03:35:43.684586: step: 110/466, loss: 0.013583391904830933 2023-01-24 03:35:44.302821: step: 112/466, loss: 0.014886787161231041 2023-01-24 03:35:44.902349: step: 114/466, loss: 0.1853538304567337 2023-01-24 03:35:45.494174: step: 116/466, loss: 0.011070048436522484 2023-01-24 03:35:46.037398: step: 118/466, loss: 0.007503334898501635 2023-01-24 03:35:46.677259: step: 120/466, loss: 0.4969816207885742 2023-01-24 03:35:47.302842: step: 122/466, loss: 0.013406869024038315 2023-01-24 03:35:47.939221: step: 124/466, loss: 0.23728245496749878 2023-01-24 03:35:48.612773: step: 126/466, loss: 0.0021659706253558397 2023-01-24 03:35:49.223791: step: 128/466, loss: 0.0159104373306036 2023-01-24 03:35:49.868583: step: 130/466, loss: 0.026894891634583473 2023-01-24 03:35:50.531977: step: 132/466, loss: 0.0010676357196643949 2023-01-24 03:35:51.114814: step: 134/466, loss: 0.008388376794755459 2023-01-24 03:35:51.690263: step: 136/466, loss: 0.10600942373275757 2023-01-24 03:35:52.338899: step: 138/466, loss: 0.047140851616859436 2023-01-24 03:35:52.976045: step: 140/466, loss: 0.030893616378307343 2023-01-24 03:35:53.653242: step: 142/466, loss: 0.0019499821355566382 2023-01-24 03:35:54.258965: step: 144/466, loss: 0.04567256569862366 2023-01-24 03:35:54.928093: step: 146/466, loss: 0.027375293895602226 2023-01-24 03:35:55.476042: step: 148/466, loss: 0.017524300143122673 2023-01-24 03:35:56.035935: step: 150/466, loss: 0.0002785766846500337 2023-01-24 03:35:56.606071: step: 152/466, loss: 0.0372467041015625 2023-01-24 03:35:57.184354: step: 154/466, loss: 0.04227850213646889 2023-01-24 03:35:57.798537: step: 156/466, loss: 0.02953212335705757 2023-01-24 03:35:58.413220: step: 158/466, loss: 0.005038856528699398 2023-01-24 03:35:58.980567: step: 160/466, loss: 0.00554104708135128 2023-01-24 03:35:59.597606: step: 162/466, loss: 0.04864320904016495 2023-01-24 03:36:00.214005: step: 164/466, loss: 0.08262515813112259 2023-01-24 03:36:00.913004: step: 166/466, loss: 0.024860626086592674 2023-01-24 03:36:01.521956: step: 168/466, loss: 0.009989991784095764 2023-01-24 03:36:02.150524: step: 170/466, loss: 0.002989133121445775 2023-01-24 03:36:02.841061: step: 172/466, loss: 0.08360762894153595 2023-01-24 03:36:03.450843: step: 174/466, loss: 0.008303227834403515 2023-01-24 03:36:04.093733: step: 176/466, loss: 0.03197474405169487 2023-01-24 03:36:04.695881: step: 178/466, loss: 0.02286393567919731 2023-01-24 03:36:05.257846: step: 180/466, loss: 0.008490473963320255 2023-01-24 03:36:05.918097: step: 182/466, loss: 0.05364244058728218 2023-01-24 03:36:06.572600: step: 184/466, loss: 0.22248348593711853 2023-01-24 03:36:07.174582: step: 186/466, loss: 0.007184356916695833 2023-01-24 03:36:07.862376: step: 188/466, loss: 0.007733544334769249 2023-01-24 03:36:08.487568: step: 190/466, loss: 0.0249953456223011 2023-01-24 03:36:09.101768: step: 192/466, loss: 0.03881276026368141 2023-01-24 03:36:09.725787: step: 194/466, loss: 0.06749670207500458 2023-01-24 03:36:10.308459: step: 196/466, loss: 0.06935339421033859 2023-01-24 03:36:10.948082: step: 198/466, loss: 0.012322738766670227 2023-01-24 03:36:11.580470: step: 200/466, loss: 0.07050473988056183 2023-01-24 03:36:12.229199: step: 202/466, loss: 0.00030367798171937466 2023-01-24 03:36:12.818785: step: 204/466, loss: 0.040786728262901306 2023-01-24 03:36:13.439542: step: 206/466, loss: 0.049529772251844406 2023-01-24 03:36:14.025908: step: 208/466, loss: 0.0509815588593483 2023-01-24 03:36:14.670182: step: 210/466, loss: 0.006703998893499374 2023-01-24 03:36:15.408252: step: 212/466, loss: 0.013644722290337086 2023-01-24 03:36:16.167714: step: 214/466, loss: 0.04323235899209976 2023-01-24 03:36:16.762934: step: 216/466, loss: 0.0813487246632576 2023-01-24 03:36:17.364155: step: 218/466, loss: 0.01943470537662506 2023-01-24 03:36:17.932337: step: 220/466, loss: 0.0996766909956932 2023-01-24 03:36:18.591664: step: 222/466, loss: 0.013173628598451614 2023-01-24 03:36:19.206368: step: 224/466, loss: 0.07100927829742432 2023-01-24 03:36:19.797181: step: 226/466, loss: 0.030978145077824593 2023-01-24 03:36:20.421587: step: 228/466, loss: 0.006432516500353813 2023-01-24 03:36:20.993696: step: 230/466, loss: 0.16904519498348236 2023-01-24 03:36:21.573517: step: 232/466, loss: 0.016514841467142105 2023-01-24 03:36:22.222975: step: 234/466, loss: 0.017402343451976776 2023-01-24 03:36:22.817428: step: 236/466, loss: 0.027484111487865448 2023-01-24 03:36:23.419598: step: 238/466, loss: 0.07576289772987366 2023-01-24 03:36:24.075059: step: 240/466, loss: 3.778850555419922 2023-01-24 03:36:24.686992: step: 242/466, loss: 0.04886449873447418 2023-01-24 03:36:25.314438: step: 244/466, loss: 0.10850292444229126 2023-01-24 03:36:25.977761: step: 246/466, loss: 0.0247701033949852 2023-01-24 03:36:26.525518: step: 248/466, loss: 0.004330090247094631 2023-01-24 03:36:27.172683: step: 250/466, loss: 0.4619436264038086 2023-01-24 03:36:27.784352: step: 252/466, loss: 0.01035249512642622 2023-01-24 03:36:28.348785: step: 254/466, loss: 0.009742227382957935 2023-01-24 03:36:29.019078: step: 256/466, loss: 0.01020122691988945 2023-01-24 03:36:29.591173: step: 258/466, loss: 0.007446271367371082 2023-01-24 03:36:30.194856: step: 260/466, loss: 0.15225790441036224 2023-01-24 03:36:30.797271: step: 262/466, loss: 0.014964156784117222 2023-01-24 03:36:31.353514: step: 264/466, loss: 0.01936171017587185 2023-01-24 03:36:31.898892: step: 266/466, loss: 0.0006788380560465157 2023-01-24 03:36:32.551410: step: 268/466, loss: 0.021012699231505394 2023-01-24 03:36:33.122104: step: 270/466, loss: 0.01303273718804121 2023-01-24 03:36:33.791659: step: 272/466, loss: 0.0294012650847435 2023-01-24 03:36:34.389267: step: 274/466, loss: 0.012099758721888065 2023-01-24 03:36:34.961874: step: 276/466, loss: 0.05515825003385544 2023-01-24 03:36:35.558105: step: 278/466, loss: 0.02570991776883602 2023-01-24 03:36:36.231809: step: 280/466, loss: 0.3937881588935852 2023-01-24 03:36:36.945065: step: 282/466, loss: 0.035539574921131134 2023-01-24 03:36:37.588427: step: 284/466, loss: 0.026720087975263596 2023-01-24 03:36:38.163892: step: 286/466, loss: 0.027191609144210815 2023-01-24 03:36:38.741626: step: 288/466, loss: 0.12927848100662231 2023-01-24 03:36:39.316748: step: 290/466, loss: 0.02109511010348797 2023-01-24 03:36:39.865181: step: 292/466, loss: 0.020149292424321175 2023-01-24 03:36:40.456212: step: 294/466, loss: 0.09865506738424301 2023-01-24 03:36:41.061467: step: 296/466, loss: 0.010992120020091534 2023-01-24 03:36:41.636783: step: 298/466, loss: 0.1111476942896843 2023-01-24 03:36:42.295014: step: 300/466, loss: 0.0034163333475589752 2023-01-24 03:36:42.915044: step: 302/466, loss: 0.00595430051907897 2023-01-24 03:36:43.567544: step: 304/466, loss: 0.025029225274920464 2023-01-24 03:36:44.170695: step: 306/466, loss: 0.028571361675858498 2023-01-24 03:36:44.800718: step: 308/466, loss: 0.17691342532634735 2023-01-24 03:36:45.372213: step: 310/466, loss: 0.020528914406895638 2023-01-24 03:36:45.947316: step: 312/466, loss: 0.049001604318618774 2023-01-24 03:36:46.546302: step: 314/466, loss: 0.0383269339799881 2023-01-24 03:36:47.156218: step: 316/466, loss: 0.02348129078745842 2023-01-24 03:36:47.807887: step: 318/466, loss: 0.059397753328084946 2023-01-24 03:36:48.505009: step: 320/466, loss: 0.04565797373652458 2023-01-24 03:36:49.048400: step: 322/466, loss: 0.057746272534132004 2023-01-24 03:36:49.671508: step: 324/466, loss: 0.026648664847016335 2023-01-24 03:36:50.317180: step: 326/466, loss: 0.16751256585121155 2023-01-24 03:36:50.947990: step: 328/466, loss: 0.030149096623063087 2023-01-24 03:36:51.520906: step: 330/466, loss: 0.029561301693320274 2023-01-24 03:36:52.151871: step: 332/466, loss: 0.029974963515996933 2023-01-24 03:36:52.744303: step: 334/466, loss: 0.040951766073703766 2023-01-24 03:36:53.361461: step: 336/466, loss: 0.06460101902484894 2023-01-24 03:36:53.969692: step: 338/466, loss: 0.3614656925201416 2023-01-24 03:36:54.605044: step: 340/466, loss: 0.061829160898923874 2023-01-24 03:36:55.195327: step: 342/466, loss: 0.0027699971105903387 2023-01-24 03:36:55.822740: step: 344/466, loss: 0.024076269939541817 2023-01-24 03:36:56.443904: step: 346/466, loss: 0.03276991471648216 2023-01-24 03:36:57.014132: step: 348/466, loss: 0.3131476938724518 2023-01-24 03:36:57.693818: step: 350/466, loss: 0.09210637211799622 2023-01-24 03:36:58.310399: step: 352/466, loss: 0.04329919070005417 2023-01-24 03:36:58.902441: step: 354/466, loss: 0.017054257914423943 2023-01-24 03:36:59.490446: step: 356/466, loss: 0.05004601180553436 2023-01-24 03:37:00.078127: step: 358/466, loss: 0.006824565585702658 2023-01-24 03:37:00.677390: step: 360/466, loss: 0.010391207411885262 2023-01-24 03:37:01.309824: step: 362/466, loss: 0.002363370731472969 2023-01-24 03:37:01.949811: step: 364/466, loss: 0.06585758924484253 2023-01-24 03:37:02.595554: step: 366/466, loss: 0.026189126074314117 2023-01-24 03:37:03.244973: step: 368/466, loss: 0.06721070408821106 2023-01-24 03:37:03.862022: step: 370/466, loss: 0.003301021410152316 2023-01-24 03:37:04.452174: step: 372/466, loss: 0.004242087714374065 2023-01-24 03:37:05.073809: step: 374/466, loss: 0.22085796296596527 2023-01-24 03:37:05.697534: step: 376/466, loss: 0.0024112502578645945 2023-01-24 03:37:06.327712: step: 378/466, loss: 0.28338131308555603 2023-01-24 03:37:06.924395: step: 380/466, loss: 0.0014360016211867332 2023-01-24 03:37:07.548485: step: 382/466, loss: 0.0298091322183609 2023-01-24 03:37:08.165002: step: 384/466, loss: 0.04952583461999893 2023-01-24 03:37:08.777586: step: 386/466, loss: 0.05813305824995041 2023-01-24 03:37:09.360492: step: 388/466, loss: 0.020183315500617027 2023-01-24 03:37:09.949241: step: 390/466, loss: 0.009550157934427261 2023-01-24 03:37:10.502000: step: 392/466, loss: 0.006014412268996239 2023-01-24 03:37:11.131834: step: 394/466, loss: 0.003889001440256834 2023-01-24 03:37:11.728279: step: 396/466, loss: 0.06576824188232422 2023-01-24 03:37:12.410245: step: 398/466, loss: 0.6932088136672974 2023-01-24 03:37:13.032508: step: 400/466, loss: 0.03067278116941452 2023-01-24 03:37:13.676318: step: 402/466, loss: 0.0402335599064827 2023-01-24 03:37:14.202593: step: 404/466, loss: 0.024017097428441048 2023-01-24 03:37:14.895762: step: 406/466, loss: 0.025933636352419853 2023-01-24 03:37:15.522912: step: 408/466, loss: 0.007940349169075489 2023-01-24 03:37:16.097979: step: 410/466, loss: 0.28987357020378113 2023-01-24 03:37:16.711924: step: 412/466, loss: 0.048365600407123566 2023-01-24 03:37:17.307635: step: 414/466, loss: 0.0058062439784407616 2023-01-24 03:37:17.927801: step: 416/466, loss: 0.4432596266269684 2023-01-24 03:37:18.592303: step: 418/466, loss: 0.02553720213472843 2023-01-24 03:37:19.226583: step: 420/466, loss: 0.006183108780533075 2023-01-24 03:37:19.792981: step: 422/466, loss: 0.012993517331779003 2023-01-24 03:37:20.454628: step: 424/466, loss: 0.027333056554198265 2023-01-24 03:37:21.053743: step: 426/466, loss: 0.05770184472203255 2023-01-24 03:37:21.705413: step: 428/466, loss: 0.0065458836033940315 2023-01-24 03:37:22.354371: step: 430/466, loss: 0.03590097278356552 2023-01-24 03:37:23.064509: step: 432/466, loss: 0.0036675333976745605 2023-01-24 03:37:23.703129: step: 434/466, loss: 0.004667150788009167 2023-01-24 03:37:24.358394: step: 436/466, loss: 0.039111293852329254 2023-01-24 03:37:24.973250: step: 438/466, loss: 0.07505755126476288 2023-01-24 03:37:25.554056: step: 440/466, loss: 0.015009235590696335 2023-01-24 03:37:26.141946: step: 442/466, loss: 0.012551910243928432 2023-01-24 03:37:26.713684: step: 444/466, loss: 0.0094830971211195 2023-01-24 03:37:27.364659: step: 446/466, loss: 0.01995844766497612 2023-01-24 03:37:27.964103: step: 448/466, loss: 0.06222337484359741 2023-01-24 03:37:28.628781: step: 450/466, loss: 0.4666108787059784 2023-01-24 03:37:29.273691: step: 452/466, loss: 0.08595582097768784 2023-01-24 03:37:29.962833: step: 454/466, loss: 0.06083333492279053 2023-01-24 03:37:30.610962: step: 456/466, loss: 0.017147425562143326 2023-01-24 03:37:31.289161: step: 458/466, loss: 0.008499028161168098 2023-01-24 03:37:31.863645: step: 460/466, loss: 0.003404253860935569 2023-01-24 03:37:32.458428: step: 462/466, loss: 0.059194836765527725 2023-01-24 03:37:33.072470: step: 464/466, loss: 0.023937981575727463 2023-01-24 03:37:33.706372: step: 466/466, loss: 0.24330773949623108 2023-01-24 03:37:34.332242: step: 468/466, loss: 0.7082357406616211 2023-01-24 03:37:34.954590: step: 470/466, loss: 0.1127346083521843 2023-01-24 03:37:35.606130: step: 472/466, loss: 0.004727509338408709 2023-01-24 03:37:36.240474: step: 474/466, loss: 0.019945118576288223 2023-01-24 03:37:36.926572: step: 476/466, loss: 0.013882302679121494 2023-01-24 03:37:37.542469: step: 478/466, loss: 0.02526629902422428 2023-01-24 03:37:38.186721: step: 480/466, loss: 0.01937795802950859 2023-01-24 03:37:38.794775: step: 482/466, loss: 0.007325158454477787 2023-01-24 03:37:39.454508: step: 484/466, loss: 0.13527823984622955 2023-01-24 03:37:40.090311: step: 486/466, loss: 0.04730508476495743 2023-01-24 03:37:40.720663: step: 488/466, loss: 0.024615265429019928 2023-01-24 03:37:41.293740: step: 490/466, loss: 0.020592007786035538 2023-01-24 03:37:41.892495: step: 492/466, loss: 0.024291126057505608 2023-01-24 03:37:42.423308: step: 494/466, loss: 0.0022951976861804724 2023-01-24 03:37:43.032102: step: 496/466, loss: 0.006601238623261452 2023-01-24 03:37:43.618958: step: 498/466, loss: 0.006386002991348505 2023-01-24 03:37:44.208003: step: 500/466, loss: 0.17011430859565735 2023-01-24 03:37:44.799511: step: 502/466, loss: 0.007931244559586048 2023-01-24 03:37:45.438752: step: 504/466, loss: 0.018457433208823204 2023-01-24 03:37:46.044667: step: 506/466, loss: 0.004319499246776104 2023-01-24 03:37:46.657398: step: 508/466, loss: 0.849025547504425 2023-01-24 03:37:47.306263: step: 510/466, loss: 0.03703732788562775 2023-01-24 03:37:47.985797: step: 512/466, loss: 0.6687430739402771 2023-01-24 03:37:48.612643: step: 514/466, loss: 0.0261391568928957 2023-01-24 03:37:49.221513: step: 516/466, loss: 0.058999765664339066 2023-01-24 03:37:49.845160: step: 518/466, loss: 0.4974743723869324 2023-01-24 03:37:50.506145: step: 520/466, loss: 0.02269855886697769 2023-01-24 03:37:51.071173: step: 522/466, loss: 0.041551124304533005 2023-01-24 03:37:51.617976: step: 524/466, loss: 0.031055569648742676 2023-01-24 03:37:52.227514: step: 526/466, loss: 0.020982986316084862 2023-01-24 03:37:52.937345: step: 528/466, loss: 0.037243373692035675 2023-01-24 03:37:53.567082: step: 530/466, loss: 0.06426267325878143 2023-01-24 03:37:54.255002: step: 532/466, loss: 0.03899231553077698 2023-01-24 03:37:54.858252: step: 534/466, loss: 0.038331370800733566 2023-01-24 03:37:55.498056: step: 536/466, loss: 0.012957875616848469 2023-01-24 03:37:56.039319: step: 538/466, loss: 0.0284006018191576 2023-01-24 03:37:56.669130: step: 540/466, loss: 0.042280081659555435 2023-01-24 03:37:57.321348: step: 542/466, loss: 0.09956970810890198 2023-01-24 03:37:58.010364: step: 544/466, loss: 0.02581607550382614 2023-01-24 03:37:58.607289: step: 546/466, loss: 0.0029468508437275887 2023-01-24 03:37:59.233837: step: 548/466, loss: 0.02818090282380581 2023-01-24 03:37:59.798690: step: 550/466, loss: 0.004466727375984192 2023-01-24 03:38:00.398937: step: 552/466, loss: 0.01916109025478363 2023-01-24 03:38:01.000951: step: 554/466, loss: 0.0018156702863052487 2023-01-24 03:38:01.607929: step: 556/466, loss: 0.054775774478912354 2023-01-24 03:38:02.203349: step: 558/466, loss: 0.006064811255782843 2023-01-24 03:38:02.838640: step: 560/466, loss: 0.010558255948126316 2023-01-24 03:38:03.419569: step: 562/466, loss: 0.01179610937833786 2023-01-24 03:38:04.037939: step: 564/466, loss: 0.010723483748733997 2023-01-24 03:38:04.638698: step: 566/466, loss: 0.008093049749732018 2023-01-24 03:38:05.244940: step: 568/466, loss: 0.1264907270669937 2023-01-24 03:38:05.928543: step: 570/466, loss: 0.0295632965862751 2023-01-24 03:38:06.610385: step: 572/466, loss: 0.09722160547971725 2023-01-24 03:38:07.236412: step: 574/466, loss: 0.029360679909586906 2023-01-24 03:38:07.916961: step: 576/466, loss: 0.06597114354372025 2023-01-24 03:38:08.640486: step: 578/466, loss: 0.012875804677605629 2023-01-24 03:38:09.214196: step: 580/466, loss: 0.013631662353873253 2023-01-24 03:38:09.858575: step: 582/466, loss: 0.026635481044650078 2023-01-24 03:38:10.481711: step: 584/466, loss: 0.01695885695517063 2023-01-24 03:38:11.099230: step: 586/466, loss: 0.0008552673971280456 2023-01-24 03:38:11.687245: step: 588/466, loss: 0.0036454915534704924 2023-01-24 03:38:12.267685: step: 590/466, loss: 0.06252723932266235 2023-01-24 03:38:12.960321: step: 592/466, loss: 0.00505690835416317 2023-01-24 03:38:13.627981: step: 594/466, loss: 0.01895541325211525 2023-01-24 03:38:14.205945: step: 596/466, loss: 0.01808973215520382 2023-01-24 03:38:14.871983: step: 598/466, loss: 0.04847077652812004 2023-01-24 03:38:15.449241: step: 600/466, loss: 0.050853267312049866 2023-01-24 03:38:16.054877: step: 602/466, loss: 0.008160309866070747 2023-01-24 03:38:16.719764: step: 604/466, loss: 0.023656044155359268 2023-01-24 03:38:17.375626: step: 606/466, loss: 0.04210818558931351 2023-01-24 03:38:17.953389: step: 608/466, loss: 0.016821881756186485 2023-01-24 03:38:18.534524: step: 610/466, loss: 0.570462703704834 2023-01-24 03:38:19.218015: step: 612/466, loss: 0.055564701557159424 2023-01-24 03:38:19.831543: step: 614/466, loss: 0.04362868517637253 2023-01-24 03:38:20.400720: step: 616/466, loss: 0.019416680559515953 2023-01-24 03:38:20.980024: step: 618/466, loss: 0.016455095261335373 2023-01-24 03:38:21.602035: step: 620/466, loss: 0.008576720021665096 2023-01-24 03:38:22.162623: step: 622/466, loss: 0.04641371965408325 2023-01-24 03:38:22.719061: step: 624/466, loss: 0.14390678703784943 2023-01-24 03:38:23.321256: step: 626/466, loss: 0.0321093313395977 2023-01-24 03:38:23.937586: step: 628/466, loss: 0.06703966856002808 2023-01-24 03:38:24.557067: step: 630/466, loss: 0.05470241233706474 2023-01-24 03:38:25.135997: step: 632/466, loss: 0.022950900718569756 2023-01-24 03:38:25.871823: step: 634/466, loss: 0.02484145201742649 2023-01-24 03:38:26.514123: step: 636/466, loss: 0.08635030686855316 2023-01-24 03:38:27.138439: step: 638/466, loss: 0.0073463390581309795 2023-01-24 03:38:27.776251: step: 640/466, loss: 0.0007146025891415775 2023-01-24 03:38:28.396080: step: 642/466, loss: 0.029871122911572456 2023-01-24 03:38:29.048717: step: 644/466, loss: 0.02813507243990898 2023-01-24 03:38:29.722488: step: 646/466, loss: 0.009793507866561413 2023-01-24 03:38:30.303175: step: 648/466, loss: 0.0316142812371254 2023-01-24 03:38:30.888604: step: 650/466, loss: 0.028614122420549393 2023-01-24 03:38:31.486902: step: 652/466, loss: 0.019259123131632805 2023-01-24 03:38:32.158636: step: 654/466, loss: 0.06576264649629593 2023-01-24 03:38:32.793766: step: 656/466, loss: 0.10803041607141495 2023-01-24 03:38:33.448030: step: 658/466, loss: 0.06235981360077858 2023-01-24 03:38:34.102132: step: 660/466, loss: 0.012615116313099861 2023-01-24 03:38:34.700002: step: 662/466, loss: 0.001386008458212018 2023-01-24 03:38:35.365402: step: 664/466, loss: 0.017653729766607285 2023-01-24 03:38:35.953694: step: 666/466, loss: 0.01049298606812954 2023-01-24 03:38:36.659685: step: 668/466, loss: 0.009629614651203156 2023-01-24 03:38:37.311883: step: 670/466, loss: 0.014639287255704403 2023-01-24 03:38:38.011623: step: 672/466, loss: 0.01156079862266779 2023-01-24 03:38:38.647308: step: 674/466, loss: 0.01873650960624218 2023-01-24 03:38:39.358316: step: 676/466, loss: 0.019368216395378113 2023-01-24 03:38:40.002327: step: 678/466, loss: 0.05814380198717117 2023-01-24 03:38:40.572578: step: 680/466, loss: 0.01892939954996109 2023-01-24 03:38:41.228584: step: 682/466, loss: 0.025773027911782265 2023-01-24 03:38:41.828950: step: 684/466, loss: 0.00873519852757454 2023-01-24 03:38:42.429524: step: 686/466, loss: 0.0031690082978457212 2023-01-24 03:38:43.052726: step: 688/466, loss: 0.029933644458651543 2023-01-24 03:38:43.617028: step: 690/466, loss: 0.011398996226489544 2023-01-24 03:38:44.204526: step: 692/466, loss: 0.039612822234630585 2023-01-24 03:38:44.814967: step: 694/466, loss: 0.03607015311717987 2023-01-24 03:38:45.436900: step: 696/466, loss: 0.008851285092532635 2023-01-24 03:38:46.037915: step: 698/466, loss: 0.0515628457069397 2023-01-24 03:38:46.710407: step: 700/466, loss: 0.021373560652136803 2023-01-24 03:38:47.344378: step: 702/466, loss: 0.13975434005260468 2023-01-24 03:38:47.972034: step: 704/466, loss: 0.04040196165442467 2023-01-24 03:38:48.589422: step: 706/466, loss: 0.03192339837551117 2023-01-24 03:38:49.213602: step: 708/466, loss: 0.04951074719429016 2023-01-24 03:38:49.922242: step: 710/466, loss: 0.02514973282814026 2023-01-24 03:38:50.471724: step: 712/466, loss: 0.24668730795383453 2023-01-24 03:38:51.098934: step: 714/466, loss: 0.0209896732121706 2023-01-24 03:38:51.717665: step: 716/466, loss: 1.013275146484375 2023-01-24 03:38:52.236102: step: 718/466, loss: 0.0603434182703495 2023-01-24 03:38:52.845109: step: 720/466, loss: 0.05530453100800514 2023-01-24 03:38:53.448688: step: 722/466, loss: 0.02282405085861683 2023-01-24 03:38:54.092616: step: 724/466, loss: 0.49871450662612915 2023-01-24 03:38:54.663995: step: 726/466, loss: 0.016372518613934517 2023-01-24 03:38:55.263780: step: 728/466, loss: 0.02990126982331276 2023-01-24 03:38:55.861247: step: 730/466, loss: 0.011705626733601093 2023-01-24 03:38:56.478198: step: 732/466, loss: 0.008233029395341873 2023-01-24 03:38:57.104766: step: 734/466, loss: 0.09673385322093964 2023-01-24 03:38:57.685437: step: 736/466, loss: 0.04583362862467766 2023-01-24 03:38:58.307096: step: 738/466, loss: 0.007076576817780733 2023-01-24 03:38:58.921381: step: 740/466, loss: 0.001303449971601367 2023-01-24 03:38:59.598942: step: 742/466, loss: 0.12852562963962555 2023-01-24 03:39:00.213770: step: 744/466, loss: 0.016642574220895767 2023-01-24 03:39:00.819956: step: 746/466, loss: 0.007977128028869629 2023-01-24 03:39:01.484570: step: 748/466, loss: 0.12341004610061646 2023-01-24 03:39:02.143029: step: 750/466, loss: 0.010951337404549122 2023-01-24 03:39:02.816903: step: 752/466, loss: 0.016577934846282005 2023-01-24 03:39:03.430955: step: 754/466, loss: 0.03315458819270134 2023-01-24 03:39:04.085206: step: 756/466, loss: 0.06137204170227051 2023-01-24 03:39:04.734347: step: 758/466, loss: 0.02198304980993271 2023-01-24 03:39:05.360106: step: 760/466, loss: 0.03844422474503517 2023-01-24 03:39:05.909673: step: 762/466, loss: 0.681039571762085 2023-01-24 03:39:06.524088: step: 764/466, loss: 0.03540460020303726 2023-01-24 03:39:07.058170: step: 766/466, loss: 0.02652108296751976 2023-01-24 03:39:07.664510: step: 768/466, loss: 0.12025786936283112 2023-01-24 03:39:08.303099: step: 770/466, loss: 0.006684943102300167 2023-01-24 03:39:08.921089: step: 772/466, loss: 0.026162046939134598 2023-01-24 03:39:09.531856: step: 774/466, loss: 0.007701580412685871 2023-01-24 03:39:10.262983: step: 776/466, loss: 0.0641896203160286 2023-01-24 03:39:10.894500: step: 778/466, loss: 0.05006628483533859 2023-01-24 03:39:11.507526: step: 780/466, loss: 0.03685498610138893 2023-01-24 03:39:12.240690: step: 782/466, loss: 0.011374561116099358 2023-01-24 03:39:12.973252: step: 784/466, loss: 0.024460364133119583 2023-01-24 03:39:13.541798: step: 786/466, loss: 0.04770936444401741 2023-01-24 03:39:14.171393: step: 788/466, loss: 0.01947230100631714 2023-01-24 03:39:14.780089: step: 790/466, loss: 0.0377841480076313 2023-01-24 03:39:15.329907: step: 792/466, loss: 0.015739483758807182 2023-01-24 03:39:15.945570: step: 794/466, loss: 0.017583288252353668 2023-01-24 03:39:16.588946: step: 796/466, loss: 0.1661037802696228 2023-01-24 03:39:17.356153: step: 798/466, loss: 0.04773499444127083 2023-01-24 03:39:17.985646: step: 800/466, loss: 0.0025100763887166977 2023-01-24 03:39:18.588359: step: 802/466, loss: 0.02382386289536953 2023-01-24 03:39:19.213297: step: 804/466, loss: 0.13822172582149506 2023-01-24 03:39:19.783802: step: 806/466, loss: 0.021848829463124275 2023-01-24 03:39:20.375742: step: 808/466, loss: 0.1822071373462677 2023-01-24 03:39:20.989658: step: 810/466, loss: 0.019787373021245003 2023-01-24 03:39:21.566232: step: 812/466, loss: 0.02656339854001999 2023-01-24 03:39:22.132738: step: 814/466, loss: 0.007962155155837536 2023-01-24 03:39:22.726694: step: 816/466, loss: 0.04875000938773155 2023-01-24 03:39:23.364305: step: 818/466, loss: 0.014167295768857002 2023-01-24 03:39:23.991210: step: 820/466, loss: 0.0020671940874308348 2023-01-24 03:39:24.616499: step: 822/466, loss: 0.0077877468429505825 2023-01-24 03:39:25.348629: step: 824/466, loss: 0.01249811053276062 2023-01-24 03:39:25.931227: step: 826/466, loss: 0.04537919536232948 2023-01-24 03:39:26.632115: step: 828/466, loss: 0.01676964946091175 2023-01-24 03:39:27.265570: step: 830/466, loss: 0.04484226927161217 2023-01-24 03:39:27.926614: step: 832/466, loss: 0.03277130797505379 2023-01-24 03:39:28.553279: step: 834/466, loss: 0.0016892838757485151 2023-01-24 03:39:29.241892: step: 836/466, loss: 0.06757955253124237 2023-01-24 03:39:29.898041: step: 838/466, loss: 0.015835018828511238 2023-01-24 03:39:30.442949: step: 840/466, loss: 0.04168068617582321 2023-01-24 03:39:31.037658: step: 842/466, loss: 0.015160385519266129 2023-01-24 03:39:31.801873: step: 844/466, loss: 0.09161270409822464 2023-01-24 03:39:32.399831: step: 846/466, loss: 0.03056454285979271 2023-01-24 03:39:32.995187: step: 848/466, loss: 0.009426084347069263 2023-01-24 03:39:33.654208: step: 850/466, loss: 0.04484337940812111 2023-01-24 03:39:34.250044: step: 852/466, loss: 0.02648405358195305 2023-01-24 03:39:34.889008: step: 854/466, loss: 0.019934559240937233 2023-01-24 03:39:35.504425: step: 856/466, loss: 0.009772099554538727 2023-01-24 03:39:36.079041: step: 858/466, loss: 0.0010070321150124073 2023-01-24 03:39:36.793172: step: 860/466, loss: 0.0026540574617683887 2023-01-24 03:39:37.452988: step: 862/466, loss: 0.01820238307118416 2023-01-24 03:39:38.029827: step: 864/466, loss: 0.012328501790761948 2023-01-24 03:39:38.652038: step: 866/466, loss: 0.06458301842212677 2023-01-24 03:39:39.262180: step: 868/466, loss: 0.06785338371992111 2023-01-24 03:39:39.911815: step: 870/466, loss: 0.07444152981042862 2023-01-24 03:39:40.510289: step: 872/466, loss: 0.006764355581253767 2023-01-24 03:39:41.086577: step: 874/466, loss: 0.05087079107761383 2023-01-24 03:39:41.641223: step: 876/466, loss: 0.01839734986424446 2023-01-24 03:39:42.215320: step: 878/466, loss: 0.039690256118774414 2023-01-24 03:39:42.882071: step: 880/466, loss: 0.04079141840338707 2023-01-24 03:39:43.472631: step: 882/466, loss: 0.011542139574885368 2023-01-24 03:39:44.051513: step: 884/466, loss: 0.07851341366767883 2023-01-24 03:39:44.675412: step: 886/466, loss: 0.01554762851446867 2023-01-24 03:39:45.208542: step: 888/466, loss: 0.010157089680433273 2023-01-24 03:39:45.843519: step: 890/466, loss: 0.016509560868144035 2023-01-24 03:39:46.475746: step: 892/466, loss: 0.04192443937063217 2023-01-24 03:39:47.084936: step: 894/466, loss: 0.04110085219144821 2023-01-24 03:39:47.689483: step: 896/466, loss: 0.013482372276484966 2023-01-24 03:39:48.300056: step: 898/466, loss: 0.1777517944574356 2023-01-24 03:39:48.952846: step: 900/466, loss: 0.11353351175785065 2023-01-24 03:39:49.552702: step: 902/466, loss: 0.002722186967730522 2023-01-24 03:39:50.222587: step: 904/466, loss: 0.029758060351014137 2023-01-24 03:39:50.801562: step: 906/466, loss: 0.005375860724598169 2023-01-24 03:39:51.403038: step: 908/466, loss: 0.00414345832541585 2023-01-24 03:39:51.992975: step: 910/466, loss: 0.04140179604291916 2023-01-24 03:39:52.606348: step: 912/466, loss: 0.0016417494043707848 2023-01-24 03:39:53.223672: step: 914/466, loss: 0.026578109711408615 2023-01-24 03:39:53.872279: step: 916/466, loss: 0.15591458976268768 2023-01-24 03:39:54.560615: step: 918/466, loss: 0.01798109896481037 2023-01-24 03:39:55.207030: step: 920/466, loss: 0.006656917277723551 2023-01-24 03:39:55.805731: step: 922/466, loss: 0.004008627030998468 2023-01-24 03:39:56.434096: step: 924/466, loss: 0.038623325526714325 2023-01-24 03:39:57.045137: step: 926/466, loss: 0.006900690030306578 2023-01-24 03:39:57.661530: step: 928/466, loss: 0.013307107612490654 2023-01-24 03:39:58.344158: step: 930/466, loss: 0.9667753577232361 2023-01-24 03:39:58.873813: step: 932/466, loss: 0.07520213723182678 ================================================== Loss: 0.069 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35715689109651955, 'r': 0.3347922280866806, 'f1': 0.34561313261837545}, 'combined': 0.25466230824511876, 'epoch': 26} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3613680982251036, 'r': 0.2805571979908153, 'f1': 0.3158760736769687}, 'combined': 0.19779156015286825, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459375840731773, 'r': 0.34856329628625643, 'f1': 0.34724547664056177}, 'combined': 0.2558650880509402, 'epoch': 26} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36646549337427325, 'r': 0.29502456686809725, 'f1': 0.3268872200898517}, 'combined': 0.20257799554864048, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31886262731783754, 'r': 0.3309636757928978, 'f1': 0.32480047885075813}, 'combined': 0.2393266686268744, 'epoch': 26} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3581746565642706, 'r': 0.28505137527834, 'f1': 0.31745661210575693}, 'combined': 0.21059002981272987, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30743243243243246, 'r': 0.325, 'f1': 0.31597222222222227}, 'combined': 0.21064814814814817, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44565217391304346, 'r': 0.44565217391304346, 'f1': 0.44565217391304346}, 'combined': 0.22282608695652173, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36666666666666664, 'r': 0.1896551724137931, 'f1': 0.25}, 'combined': 0.16666666666666666, 'epoch': 26} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459375840731773, 'r': 0.34856329628625643, 'f1': 0.34724547664056177}, 'combined': 0.2558650880509402, 'epoch': 26} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36646549337427325, 'r': 0.29502456686809725, 'f1': 0.3268872200898517}, 'combined': 0.20257799554864048, 'epoch': 26} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.44565217391304346, 'r': 0.44565217391304346, 'f1': 0.44565217391304346}, 'combined': 0.22282608695652173, 'epoch': 26} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:42:39.258046: step: 2/466, loss: 0.00473618321120739 2023-01-24 03:42:39.968190: step: 4/466, loss: 0.021933015435934067 2023-01-24 03:42:40.522425: step: 6/466, loss: 0.01194464135915041 2023-01-24 03:42:41.138687: step: 8/466, loss: 0.0030708627309650183 2023-01-24 03:42:41.765293: step: 10/466, loss: 0.08510902523994446 2023-01-24 03:42:42.435225: step: 12/466, loss: 0.03904871270060539 2023-01-24 03:42:43.178811: step: 14/466, loss: 0.03710261359810829 2023-01-24 03:42:43.808173: step: 16/466, loss: 0.03612261265516281 2023-01-24 03:42:44.420127: step: 18/466, loss: 0.02608722262084484 2023-01-24 03:42:45.052088: step: 20/466, loss: 0.02009262517094612 2023-01-24 03:42:45.705355: step: 22/466, loss: 0.00886100996285677 2023-01-24 03:42:46.317618: step: 24/466, loss: 0.006947273854166269 2023-01-24 03:42:46.906100: step: 26/466, loss: 0.5168101191520691 2023-01-24 03:42:47.512523: step: 28/466, loss: 0.012647329829633236 2023-01-24 03:42:48.216870: step: 30/466, loss: 0.07058289647102356 2023-01-24 03:42:48.848407: step: 32/466, loss: 0.5412499308586121 2023-01-24 03:42:49.456071: step: 34/466, loss: 1.4842169284820557 2023-01-24 03:42:50.073281: step: 36/466, loss: 0.008719472214579582 2023-01-24 03:42:50.661765: step: 38/466, loss: 0.1013767272233963 2023-01-24 03:42:51.312702: step: 40/466, loss: 0.018471570685505867 2023-01-24 03:42:51.883360: step: 42/466, loss: 0.01053072139620781 2023-01-24 03:42:52.511232: step: 44/466, loss: 0.04705832898616791 2023-01-24 03:42:53.105011: step: 46/466, loss: 0.015392724424600601 2023-01-24 03:42:53.791445: step: 48/466, loss: 0.1451658457517624 2023-01-24 03:42:54.412186: step: 50/466, loss: 0.0006970667745918036 2023-01-24 03:42:55.035532: step: 52/466, loss: 0.017020707949995995 2023-01-24 03:42:55.802510: step: 54/466, loss: 0.01852017641067505 2023-01-24 03:42:56.384466: step: 56/466, loss: 0.0063835615292191505 2023-01-24 03:42:57.057856: step: 58/466, loss: 1.0854027271270752 2023-01-24 03:42:57.693353: step: 60/466, loss: 0.0040357052348554134 2023-01-24 03:42:58.276082: step: 62/466, loss: 0.015983114019036293 2023-01-24 03:42:58.821659: step: 64/466, loss: 0.019109312444925308 2023-01-24 03:42:59.419516: step: 66/466, loss: 0.018166447058320045 2023-01-24 03:43:00.040225: step: 68/466, loss: 0.006958534475415945 2023-01-24 03:43:00.715978: step: 70/466, loss: 0.018533293157815933 2023-01-24 03:43:01.251477: step: 72/466, loss: 0.0011832513846457005 2023-01-24 03:43:01.865813: step: 74/466, loss: 0.00281977909617126 2023-01-24 03:43:02.433921: step: 76/466, loss: 0.02246713638305664 2023-01-24 03:43:03.059191: step: 78/466, loss: 0.010963533073663712 2023-01-24 03:43:03.629753: step: 80/466, loss: 0.1575906127691269 2023-01-24 03:43:04.245599: step: 82/466, loss: 0.004155725706368685 2023-01-24 03:43:04.840772: step: 84/466, loss: 0.004825697746127844 2023-01-24 03:43:05.454971: step: 86/466, loss: 0.09241409599781036 2023-01-24 03:43:06.079268: step: 88/466, loss: 0.036858055740594864 2023-01-24 03:43:06.699374: step: 90/466, loss: 0.025953615084290504 2023-01-24 03:43:07.288914: step: 92/466, loss: 0.06212431564927101 2023-01-24 03:43:07.978048: step: 94/466, loss: 0.009525452740490437 2023-01-24 03:43:08.667202: step: 96/466, loss: 0.009087126702070236 2023-01-24 03:43:09.246640: step: 98/466, loss: 0.11408080905675888 2023-01-24 03:43:09.860306: step: 100/466, loss: 0.0030554882250726223 2023-01-24 03:43:10.624333: step: 102/466, loss: 0.0033673925790935755 2023-01-24 03:43:11.224475: step: 104/466, loss: 0.02337544783949852 2023-01-24 03:43:11.816587: step: 106/466, loss: 0.002787669189274311 2023-01-24 03:43:12.456069: step: 108/466, loss: 0.013979383744299412 2023-01-24 03:43:13.071161: step: 110/466, loss: 0.007575168740004301 2023-01-24 03:43:13.707020: step: 112/466, loss: 0.04922885447740555 2023-01-24 03:43:14.315562: step: 114/466, loss: 0.10226202756166458 2023-01-24 03:43:14.905696: step: 116/466, loss: 0.018735816702246666 2023-01-24 03:43:15.521129: step: 118/466, loss: 0.030529620125889778 2023-01-24 03:43:16.091010: step: 120/466, loss: 0.06682250648736954 2023-01-24 03:43:16.722934: step: 122/466, loss: 0.02575521729886532 2023-01-24 03:43:17.343716: step: 124/466, loss: 0.025370188057422638 2023-01-24 03:43:17.911154: step: 126/466, loss: 0.0212542787194252 2023-01-24 03:43:18.499779: step: 128/466, loss: 0.10935328155755997 2023-01-24 03:43:19.109548: step: 130/466, loss: 0.0003061419993173331 2023-01-24 03:43:19.737509: step: 132/466, loss: 0.08963587135076523 2023-01-24 03:43:20.429238: step: 134/466, loss: 0.01148142758756876 2023-01-24 03:43:21.037537: step: 136/466, loss: 0.10679512470960617 2023-01-24 03:43:21.582451: step: 138/466, loss: 0.02435310184955597 2023-01-24 03:43:22.249312: step: 140/466, loss: 0.013863036409020424 2023-01-24 03:43:22.883379: step: 142/466, loss: 0.0051196301355957985 2023-01-24 03:43:23.477753: step: 144/466, loss: 0.13050124049186707 2023-01-24 03:43:24.138559: step: 146/466, loss: 0.015986185520887375 2023-01-24 03:43:24.772605: step: 148/466, loss: 0.07870206981897354 2023-01-24 03:43:25.395775: step: 150/466, loss: 0.009278064593672752 2023-01-24 03:43:25.974530: step: 152/466, loss: 0.019551480188965797 2023-01-24 03:43:26.587676: step: 154/466, loss: 0.03024390898644924 2023-01-24 03:43:27.119412: step: 156/466, loss: 0.024581238627433777 2023-01-24 03:43:27.770414: step: 158/466, loss: 0.022428328171372414 2023-01-24 03:43:28.432926: step: 160/466, loss: 0.05050790309906006 2023-01-24 03:43:29.062818: step: 162/466, loss: 0.043015267699956894 2023-01-24 03:43:29.687470: step: 164/466, loss: 0.014434296637773514 2023-01-24 03:43:30.323373: step: 166/466, loss: 0.006054923869669437 2023-01-24 03:43:30.890103: step: 168/466, loss: 0.013495230115950108 2023-01-24 03:43:31.525456: step: 170/466, loss: 0.08444388955831528 2023-01-24 03:43:32.138182: step: 172/466, loss: 0.11862921714782715 2023-01-24 03:43:32.761426: step: 174/466, loss: 0.00015126168727874756 2023-01-24 03:43:33.350869: step: 176/466, loss: 0.011310840956866741 2023-01-24 03:43:34.006249: step: 178/466, loss: 0.022712282836437225 2023-01-24 03:43:34.584684: step: 180/466, loss: 0.002795211272314191 2023-01-24 03:43:35.240338: step: 182/466, loss: 0.6242778301239014 2023-01-24 03:43:35.927089: step: 184/466, loss: 0.046207040548324585 2023-01-24 03:43:36.565428: step: 186/466, loss: 0.015958664938807487 2023-01-24 03:43:37.288738: step: 188/466, loss: 0.009933280758559704 2023-01-24 03:43:37.910281: step: 190/466, loss: 0.029680395498871803 2023-01-24 03:43:38.482924: step: 192/466, loss: 0.022134831175208092 2023-01-24 03:43:39.053315: step: 194/466, loss: 0.00868904311209917 2023-01-24 03:43:39.672646: step: 196/466, loss: 0.05578196421265602 2023-01-24 03:43:40.282100: step: 198/466, loss: 0.002785441931337118 2023-01-24 03:43:40.877354: step: 200/466, loss: 0.009689037688076496 2023-01-24 03:43:41.510905: step: 202/466, loss: 0.019714631140232086 2023-01-24 03:43:42.211941: step: 204/466, loss: 0.028808506205677986 2023-01-24 03:43:42.878679: step: 206/466, loss: 0.197043776512146 2023-01-24 03:43:43.478792: step: 208/466, loss: 0.036407601088285446 2023-01-24 03:43:44.103094: step: 210/466, loss: 0.013742203824222088 2023-01-24 03:43:44.661591: step: 212/466, loss: 0.0034784136805683374 2023-01-24 03:43:45.268861: step: 214/466, loss: 0.007334389258176088 2023-01-24 03:43:45.805170: step: 216/466, loss: 2.303227186203003 2023-01-24 03:43:46.383675: step: 218/466, loss: 0.0018344019772484899 2023-01-24 03:43:47.049050: step: 220/466, loss: 0.02117347903549671 2023-01-24 03:43:47.722024: step: 222/466, loss: 0.006467959377914667 2023-01-24 03:43:48.335101: step: 224/466, loss: 0.032383497804403305 2023-01-24 03:43:48.919112: step: 226/466, loss: 0.019396239891648293 2023-01-24 03:43:49.521382: step: 228/466, loss: 0.021252932026982307 2023-01-24 03:43:50.138292: step: 230/466, loss: 0.009483873844146729 2023-01-24 03:43:50.802219: step: 232/466, loss: 0.004131697118282318 2023-01-24 03:43:51.416429: step: 234/466, loss: 0.0014794659800827503 2023-01-24 03:43:52.013998: step: 236/466, loss: 0.04074552655220032 2023-01-24 03:43:52.657167: step: 238/466, loss: 0.005354260094463825 2023-01-24 03:43:53.315591: step: 240/466, loss: 0.0849388837814331 2023-01-24 03:43:53.921630: step: 242/466, loss: 0.02900581806898117 2023-01-24 03:43:54.512449: step: 244/466, loss: 0.022469066083431244 2023-01-24 03:43:55.111945: step: 246/466, loss: 0.007856997661292553 2023-01-24 03:43:55.706686: step: 248/466, loss: 0.23080813884735107 2023-01-24 03:43:56.298378: step: 250/466, loss: 0.030895939096808434 2023-01-24 03:43:56.889437: step: 252/466, loss: 0.03448798879981041 2023-01-24 03:43:57.474461: step: 254/466, loss: 0.03563670441508293 2023-01-24 03:43:58.094155: step: 256/466, loss: 0.00970627460628748 2023-01-24 03:43:58.747174: step: 258/466, loss: 0.01935366541147232 2023-01-24 03:43:59.445076: step: 260/466, loss: 0.006309571210294962 2023-01-24 03:44:00.056465: step: 262/466, loss: 0.06405292451381683 2023-01-24 03:44:00.640564: step: 264/466, loss: 0.006565271411091089 2023-01-24 03:44:01.226692: step: 266/466, loss: 0.057645246386528015 2023-01-24 03:44:01.831514: step: 268/466, loss: 0.024342840537428856 2023-01-24 03:44:02.425564: step: 270/466, loss: 0.0457969568669796 2023-01-24 03:44:03.166239: step: 272/466, loss: 0.1900371015071869 2023-01-24 03:44:03.778827: step: 274/466, loss: 0.034951284527778625 2023-01-24 03:44:04.435455: step: 276/466, loss: 0.048316992819309235 2023-01-24 03:44:05.022625: step: 278/466, loss: 0.04126692935824394 2023-01-24 03:44:05.598291: step: 280/466, loss: 0.04981108009815216 2023-01-24 03:44:06.182108: step: 282/466, loss: 0.0024086672347038984 2023-01-24 03:44:06.810759: step: 284/466, loss: 0.08330139517784119 2023-01-24 03:44:07.419460: step: 286/466, loss: 0.008298112079501152 2023-01-24 03:44:08.028767: step: 288/466, loss: 0.011200077831745148 2023-01-24 03:44:08.603674: step: 290/466, loss: 0.0011140488786622882 2023-01-24 03:44:09.176141: step: 292/466, loss: 0.0024151839315891266 2023-01-24 03:44:09.762439: step: 294/466, loss: 0.008168731816112995 2023-01-24 03:44:10.296126: step: 296/466, loss: 0.010240050032734871 2023-01-24 03:44:10.962873: step: 298/466, loss: 0.014512955211102962 2023-01-24 03:44:11.559616: step: 300/466, loss: 0.009714333340525627 2023-01-24 03:44:12.168781: step: 302/466, loss: 0.08640128374099731 2023-01-24 03:44:12.691896: step: 304/466, loss: 0.04933005943894386 2023-01-24 03:44:13.296959: step: 306/466, loss: 0.05029614269733429 2023-01-24 03:44:13.873409: step: 308/466, loss: 0.05182838439941406 2023-01-24 03:44:14.487969: step: 310/466, loss: 0.005064661148935556 2023-01-24 03:44:15.118453: step: 312/466, loss: 0.026809820905327797 2023-01-24 03:44:15.749978: step: 314/466, loss: 0.002669850131496787 2023-01-24 03:44:16.327990: step: 316/466, loss: 0.027825424447655678 2023-01-24 03:44:16.982767: step: 318/466, loss: 0.0005731211858801544 2023-01-24 03:44:17.621943: step: 320/466, loss: 0.016787687316536903 2023-01-24 03:44:18.251862: step: 322/466, loss: 0.017717620357871056 2023-01-24 03:44:18.877620: step: 324/466, loss: 0.04581868648529053 2023-01-24 03:44:19.466119: step: 326/466, loss: 0.017648017033934593 2023-01-24 03:44:20.095497: step: 328/466, loss: 0.014814457856118679 2023-01-24 03:44:20.703375: step: 330/466, loss: 0.010334369726479053 2023-01-24 03:44:21.365292: step: 332/466, loss: 0.001626111101359129 2023-01-24 03:44:22.002398: step: 334/466, loss: 0.0026082699187099934 2023-01-24 03:44:22.640327: step: 336/466, loss: 0.008969327434897423 2023-01-24 03:44:23.236537: step: 338/466, loss: 0.02104179561138153 2023-01-24 03:44:23.849390: step: 340/466, loss: 0.042626965790987015 2023-01-24 03:44:24.444582: step: 342/466, loss: 0.016393663361668587 2023-01-24 03:44:25.075791: step: 344/466, loss: 0.0356920026242733 2023-01-24 03:44:25.701430: step: 346/466, loss: 0.017763594165444374 2023-01-24 03:44:26.345634: step: 348/466, loss: 0.02152266539633274 2023-01-24 03:44:26.981634: step: 350/466, loss: 0.07866132259368896 2023-01-24 03:44:27.608233: step: 352/466, loss: 0.010813960805535316 2023-01-24 03:44:28.212202: step: 354/466, loss: 0.002389649162068963 2023-01-24 03:44:28.836628: step: 356/466, loss: 0.006263962481170893 2023-01-24 03:44:29.468834: step: 358/466, loss: 0.028872860595583916 2023-01-24 03:44:30.097501: step: 360/466, loss: 0.03761502727866173 2023-01-24 03:44:30.758976: step: 362/466, loss: 0.01260085217654705 2023-01-24 03:44:31.330697: step: 364/466, loss: 0.12152864784002304 2023-01-24 03:44:31.987716: step: 366/466, loss: 0.14280076324939728 2023-01-24 03:44:32.577551: step: 368/466, loss: 0.028131060302257538 2023-01-24 03:44:33.173931: step: 370/466, loss: 0.008670762181282043 2023-01-24 03:44:33.865350: step: 372/466, loss: 0.02223077416419983 2023-01-24 03:44:34.448960: step: 374/466, loss: 0.0015590013936161995 2023-01-24 03:44:35.052882: step: 376/466, loss: 0.04011651873588562 2023-01-24 03:44:35.636482: step: 378/466, loss: 0.012510258704423904 2023-01-24 03:44:36.254927: step: 380/466, loss: 0.010407834313809872 2023-01-24 03:44:36.883830: step: 382/466, loss: 0.016522957012057304 2023-01-24 03:44:37.528221: step: 384/466, loss: 0.04073789343237877 2023-01-24 03:44:38.196778: step: 386/466, loss: 0.06633096933364868 2023-01-24 03:44:38.792060: step: 388/466, loss: 0.013094646856188774 2023-01-24 03:44:39.432640: step: 390/466, loss: 0.01919671520590782 2023-01-24 03:44:40.112817: step: 392/466, loss: 0.28468018770217896 2023-01-24 03:44:40.743902: step: 394/466, loss: 0.025736916810274124 2023-01-24 03:44:41.390540: step: 396/466, loss: 0.10791622847318649 2023-01-24 03:44:42.011834: step: 398/466, loss: 0.03712229058146477 2023-01-24 03:44:42.633343: step: 400/466, loss: 0.04609160125255585 2023-01-24 03:44:43.229100: step: 402/466, loss: 0.028331611305475235 2023-01-24 03:44:43.879678: step: 404/466, loss: 0.006319758482277393 2023-01-24 03:44:44.465495: step: 406/466, loss: 0.001945237978361547 2023-01-24 03:44:45.133296: step: 408/466, loss: 0.04442158341407776 2023-01-24 03:44:45.763064: step: 410/466, loss: 0.0859503522515297 2023-01-24 03:44:46.336899: step: 412/466, loss: 0.033219464123249054 2023-01-24 03:44:47.007191: step: 414/466, loss: 0.05364341661334038 2023-01-24 03:44:47.630531: step: 416/466, loss: 0.10488469153642654 2023-01-24 03:44:48.312800: step: 418/466, loss: 0.0008414004696533084 2023-01-24 03:44:48.948614: step: 420/466, loss: 0.025465134531259537 2023-01-24 03:44:49.513517: step: 422/466, loss: 0.024259982630610466 2023-01-24 03:44:50.183359: step: 424/466, loss: 0.0021938399877399206 2023-01-24 03:44:50.777502: step: 426/466, loss: 0.07630009949207306 2023-01-24 03:44:51.398520: step: 428/466, loss: 0.012023454532027245 2023-01-24 03:44:51.995075: step: 430/466, loss: 0.0048853070475161076 2023-01-24 03:44:52.610724: step: 432/466, loss: 0.4136844277381897 2023-01-24 03:44:53.217002: step: 434/466, loss: 0.007623132783919573 2023-01-24 03:44:53.843410: step: 436/466, loss: 0.0607096329331398 2023-01-24 03:44:54.458850: step: 438/466, loss: 0.004217234905809164 2023-01-24 03:44:55.168167: step: 440/466, loss: 0.018126465380191803 2023-01-24 03:44:55.749424: step: 442/466, loss: 0.014559803530573845 2023-01-24 03:44:56.380898: step: 444/466, loss: 0.007691314909607172 2023-01-24 03:44:56.962720: step: 446/466, loss: 0.012965541332960129 2023-01-24 03:44:57.623663: step: 448/466, loss: 0.05624009296298027 2023-01-24 03:44:58.238697: step: 450/466, loss: 0.1598840355873108 2023-01-24 03:44:58.889259: step: 452/466, loss: 0.037264205515384674 2023-01-24 03:44:59.512135: step: 454/466, loss: 0.005337410140782595 2023-01-24 03:45:00.121539: step: 456/466, loss: 0.0020100458059459925 2023-01-24 03:45:00.661697: step: 458/466, loss: 0.023190123960375786 2023-01-24 03:45:01.243157: step: 460/466, loss: 0.005955295171588659 2023-01-24 03:45:01.865722: step: 462/466, loss: 0.05335367098450661 2023-01-24 03:45:02.382249: step: 464/466, loss: 0.022979112342000008 2023-01-24 03:45:03.069298: step: 466/466, loss: 0.01416210550814867 2023-01-24 03:45:03.683694: step: 468/466, loss: 0.03598571568727493 2023-01-24 03:45:04.316319: step: 470/466, loss: 0.03203276917338371 2023-01-24 03:45:04.960518: step: 472/466, loss: 0.07548796385526657 2023-01-24 03:45:05.606563: step: 474/466, loss: 0.01304719876497984 2023-01-24 03:45:06.213543: step: 476/466, loss: 0.0888737142086029 2023-01-24 03:45:06.783082: step: 478/466, loss: 0.15686488151550293 2023-01-24 03:45:07.435269: step: 480/466, loss: 0.030848558992147446 2023-01-24 03:45:08.085192: step: 482/466, loss: 0.02938656508922577 2023-01-24 03:45:08.682549: step: 484/466, loss: 0.017374135553836823 2023-01-24 03:45:09.306380: step: 486/466, loss: 0.008283546194434166 2023-01-24 03:45:09.924428: step: 488/466, loss: 0.03463687002658844 2023-01-24 03:45:10.644254: step: 490/466, loss: 0.0263261366635561 2023-01-24 03:45:11.303463: step: 492/466, loss: 0.057920608669519424 2023-01-24 03:45:11.896054: step: 494/466, loss: 0.0033012954518198967 2023-01-24 03:45:12.534868: step: 496/466, loss: 0.03663906827569008 2023-01-24 03:45:13.118473: step: 498/466, loss: 0.0053479028865695 2023-01-24 03:45:13.765036: step: 500/466, loss: 0.16667911410331726 2023-01-24 03:45:14.394663: step: 502/466, loss: 0.040800731629133224 2023-01-24 03:45:15.004245: step: 504/466, loss: 0.008014737628400326 2023-01-24 03:45:15.625175: step: 506/466, loss: 0.045118384063243866 2023-01-24 03:45:16.249631: step: 508/466, loss: 0.05529142916202545 2023-01-24 03:45:16.906265: step: 510/466, loss: 0.05914291366934776 2023-01-24 03:45:17.564682: step: 512/466, loss: 0.09540831297636032 2023-01-24 03:45:18.140178: step: 514/466, loss: 0.0009840897982940078 2023-01-24 03:45:18.802186: step: 516/466, loss: 0.061920154839754105 2023-01-24 03:45:19.395742: step: 518/466, loss: 0.029298361390829086 2023-01-24 03:45:20.007302: step: 520/466, loss: 0.08342516422271729 2023-01-24 03:45:20.683290: step: 522/466, loss: 0.5614947080612183 2023-01-24 03:45:21.299086: step: 524/466, loss: 0.005597488954663277 2023-01-24 03:45:21.911019: step: 526/466, loss: 0.014806962572038174 2023-01-24 03:45:22.497248: step: 528/466, loss: 0.0023010680451989174 2023-01-24 03:45:23.045684: step: 530/466, loss: 0.033632270991802216 2023-01-24 03:45:23.697461: step: 532/466, loss: 0.0015359396347776055 2023-01-24 03:45:24.400443: step: 534/466, loss: 0.03112613968551159 2023-01-24 03:45:25.037980: step: 536/466, loss: 0.009359101764857769 2023-01-24 03:45:25.618823: step: 538/466, loss: 0.0030481473077088594 2023-01-24 03:45:26.177073: step: 540/466, loss: 0.004858879372477531 2023-01-24 03:45:26.794986: step: 542/466, loss: 0.006360383238643408 2023-01-24 03:45:27.414531: step: 544/466, loss: 0.04065334051847458 2023-01-24 03:45:28.088725: step: 546/466, loss: 0.014170126989483833 2023-01-24 03:45:28.669900: step: 548/466, loss: 0.006495888810604811 2023-01-24 03:45:29.307939: step: 550/466, loss: 0.009829082526266575 2023-01-24 03:45:29.911966: step: 552/466, loss: 0.011651817709207535 2023-01-24 03:45:30.597400: step: 554/466, loss: 0.7188246846199036 2023-01-24 03:45:31.225696: step: 556/466, loss: 0.15502245724201202 2023-01-24 03:45:31.930003: step: 558/466, loss: 0.0001732190139591694 2023-01-24 03:45:32.545732: step: 560/466, loss: 0.19844211637973785 2023-01-24 03:45:33.201674: step: 562/466, loss: 0.06763464957475662 2023-01-24 03:45:33.826241: step: 564/466, loss: 0.06928513199090958 2023-01-24 03:45:34.458405: step: 566/466, loss: 0.013963966630399227 2023-01-24 03:45:35.142631: step: 568/466, loss: 0.07351906597614288 2023-01-24 03:45:35.757564: step: 570/466, loss: 0.22942838072776794 2023-01-24 03:45:36.345022: step: 572/466, loss: 0.011113603599369526 2023-01-24 03:45:36.936130: step: 574/466, loss: 0.003512877505272627 2023-01-24 03:45:37.543653: step: 576/466, loss: 0.044654857367277145 2023-01-24 03:45:38.141525: step: 578/466, loss: 0.008034838363528252 2023-01-24 03:45:38.859692: step: 580/466, loss: 0.8384864330291748 2023-01-24 03:45:39.499457: step: 582/466, loss: 0.007044475991278887 2023-01-24 03:45:40.061141: step: 584/466, loss: 0.06129581108689308 2023-01-24 03:45:40.670865: step: 586/466, loss: 0.009062034077942371 2023-01-24 03:45:41.259296: step: 588/466, loss: 0.05162311717867851 2023-01-24 03:45:41.901619: step: 590/466, loss: 0.000708371982909739 2023-01-24 03:45:42.501635: step: 592/466, loss: 0.03490292653441429 2023-01-24 03:45:43.131957: step: 594/466, loss: 0.025423798710107803 2023-01-24 03:45:43.789797: step: 596/466, loss: 0.00033729904680512846 2023-01-24 03:45:44.412563: step: 598/466, loss: 0.06699639558792114 2023-01-24 03:45:45.034111: step: 600/466, loss: 0.0021308648865669966 2023-01-24 03:45:45.585138: step: 602/466, loss: 0.003646919270977378 2023-01-24 03:45:46.217829: step: 604/466, loss: 0.02293245680630207 2023-01-24 03:45:46.869671: step: 606/466, loss: 0.013322302140295506 2023-01-24 03:45:47.551916: step: 608/466, loss: 0.043741822242736816 2023-01-24 03:45:48.178703: step: 610/466, loss: 0.0774562880396843 2023-01-24 03:45:48.818665: step: 612/466, loss: 0.0023809592239558697 2023-01-24 03:45:49.409276: step: 614/466, loss: 0.007703673094511032 2023-01-24 03:45:49.982469: step: 616/466, loss: 0.37894806265830994 2023-01-24 03:45:50.673491: step: 618/466, loss: 0.005944686941802502 2023-01-24 03:45:51.276782: step: 620/466, loss: 0.006991254631429911 2023-01-24 03:45:51.928148: step: 622/466, loss: 0.050385165959596634 2023-01-24 03:45:52.620834: step: 624/466, loss: 0.006400011014193296 2023-01-24 03:45:53.238544: step: 626/466, loss: 0.004842943046241999 2023-01-24 03:45:53.980187: step: 628/466, loss: 0.23291310667991638 2023-01-24 03:45:54.626618: step: 630/466, loss: 0.007940417155623436 2023-01-24 03:45:55.203355: step: 632/466, loss: 0.010347411036491394 2023-01-24 03:45:55.871183: step: 634/466, loss: 0.017035510390996933 2023-01-24 03:45:56.517138: step: 636/466, loss: 1.2720946073532104 2023-01-24 03:45:57.122666: step: 638/466, loss: 0.12251061201095581 2023-01-24 03:45:57.694429: step: 640/466, loss: 0.16059669852256775 2023-01-24 03:45:58.320915: step: 642/466, loss: 0.03878507763147354 2023-01-24 03:45:58.999731: step: 644/466, loss: 0.018028240650892258 2023-01-24 03:45:59.603817: step: 646/466, loss: 0.020756619051098824 2023-01-24 03:46:00.248429: step: 648/466, loss: 0.01615186780691147 2023-01-24 03:46:00.859002: step: 650/466, loss: 0.047810375690460205 2023-01-24 03:46:01.477689: step: 652/466, loss: 0.015006436966359615 2023-01-24 03:46:02.107549: step: 654/466, loss: 0.053115393966436386 2023-01-24 03:46:02.748945: step: 656/466, loss: 0.0799861028790474 2023-01-24 03:46:03.353091: step: 658/466, loss: 0.0006414179224520922 2023-01-24 03:46:03.953363: step: 660/466, loss: 0.02255016192793846 2023-01-24 03:46:04.595976: step: 662/466, loss: 0.04278068244457245 2023-01-24 03:46:05.281882: step: 664/466, loss: 0.0029859403148293495 2023-01-24 03:46:05.976380: step: 666/466, loss: 0.06284870207309723 2023-01-24 03:46:06.589679: step: 668/466, loss: 0.015784582123160362 2023-01-24 03:46:07.186967: step: 670/466, loss: 0.022132791578769684 2023-01-24 03:46:07.785706: step: 672/466, loss: 0.0014332809951156378 2023-01-24 03:46:08.427687: step: 674/466, loss: 0.03355376049876213 2023-01-24 03:46:09.045053: step: 676/466, loss: 0.031046321615576744 2023-01-24 03:46:09.663953: step: 678/466, loss: 0.02303227409720421 2023-01-24 03:46:10.245096: step: 680/466, loss: 0.014874064363539219 2023-01-24 03:46:10.827871: step: 682/466, loss: 0.012770193628966808 2023-01-24 03:46:11.441760: step: 684/466, loss: 0.6695691347122192 2023-01-24 03:46:12.087015: step: 686/466, loss: 0.010362708009779453 2023-01-24 03:46:12.797030: step: 688/466, loss: 0.02548399195075035 2023-01-24 03:46:13.449576: step: 690/466, loss: 0.0030976503621786833 2023-01-24 03:46:14.109665: step: 692/466, loss: 0.046513043344020844 2023-01-24 03:46:14.734896: step: 694/466, loss: 0.016862619668245316 2023-01-24 03:46:15.332711: step: 696/466, loss: 0.031665779650211334 2023-01-24 03:46:15.960153: step: 698/466, loss: 0.0098891481757164 2023-01-24 03:46:16.571890: step: 700/466, loss: 0.34672558307647705 2023-01-24 03:46:17.220033: step: 702/466, loss: 0.0001856032758951187 2023-01-24 03:46:17.836263: step: 704/466, loss: 0.020029060542583466 2023-01-24 03:46:18.490881: step: 706/466, loss: 0.0032362567726522684 2023-01-24 03:46:19.104895: step: 708/466, loss: 0.02074238285422325 2023-01-24 03:46:19.756418: step: 710/466, loss: 0.0448627769947052 2023-01-24 03:46:20.393695: step: 712/466, loss: 0.009725132025778294 2023-01-24 03:46:21.100003: step: 714/466, loss: 0.07118083536624908 2023-01-24 03:46:21.731872: step: 716/466, loss: 0.004004801157861948 2023-01-24 03:46:22.366007: step: 718/466, loss: 0.005218241363763809 2023-01-24 03:46:22.988227: step: 720/466, loss: 0.003216929268091917 2023-01-24 03:46:23.638848: step: 722/466, loss: 0.03918952867388725 2023-01-24 03:46:24.352166: step: 724/466, loss: 0.04884127900004387 2023-01-24 03:46:25.016720: step: 726/466, loss: 0.058916158974170685 2023-01-24 03:46:25.554482: step: 728/466, loss: 0.026085475459694862 2023-01-24 03:46:26.192911: step: 730/466, loss: 0.2417844980955124 2023-01-24 03:46:26.772607: step: 732/466, loss: 0.008069354109466076 2023-01-24 03:46:27.374155: step: 734/466, loss: 0.006184478756040335 2023-01-24 03:46:27.973289: step: 736/466, loss: 0.009322668425738811 2023-01-24 03:46:28.552368: step: 738/466, loss: 0.025292107835412025 2023-01-24 03:46:29.201694: step: 740/466, loss: 0.0015159074682742357 2023-01-24 03:46:29.837186: step: 742/466, loss: 0.1141427606344223 2023-01-24 03:46:30.512061: step: 744/466, loss: 0.0584435909986496 2023-01-24 03:46:31.171908: step: 746/466, loss: 0.030674343928694725 2023-01-24 03:46:31.801931: step: 748/466, loss: 0.03274570778012276 2023-01-24 03:46:32.428079: step: 750/466, loss: 0.05589473992586136 2023-01-24 03:46:33.029936: step: 752/466, loss: 0.014640445820987225 2023-01-24 03:46:33.678266: step: 754/466, loss: 0.010607525706291199 2023-01-24 03:46:34.246243: step: 756/466, loss: 0.004988064989447594 2023-01-24 03:46:34.861684: step: 758/466, loss: 0.024294689297676086 2023-01-24 03:46:35.480324: step: 760/466, loss: 0.0023869031574577093 2023-01-24 03:46:36.068130: step: 762/466, loss: 0.001264250953681767 2023-01-24 03:46:36.670826: step: 764/466, loss: 0.009330169297754765 2023-01-24 03:46:37.260098: step: 766/466, loss: 0.006431054789572954 2023-01-24 03:46:37.930034: step: 768/466, loss: 0.020721986889839172 2023-01-24 03:46:38.602670: step: 770/466, loss: 0.24625498056411743 2023-01-24 03:46:39.221855: step: 772/466, loss: 0.013136804103851318 2023-01-24 03:46:39.802102: step: 774/466, loss: 0.011591672897338867 2023-01-24 03:46:40.426642: step: 776/466, loss: 0.0027735396288335323 2023-01-24 03:46:41.067721: step: 778/466, loss: 0.04726502299308777 2023-01-24 03:46:41.691998: step: 780/466, loss: 0.3299539089202881 2023-01-24 03:46:42.376461: step: 782/466, loss: 0.024561433121562004 2023-01-24 03:46:42.889988: step: 784/466, loss: 0.019899209961295128 2023-01-24 03:46:43.513987: step: 786/466, loss: 0.037758611142635345 2023-01-24 03:46:44.121476: step: 788/466, loss: 0.022166717797517776 2023-01-24 03:46:44.756136: step: 790/466, loss: 0.8392122983932495 2023-01-24 03:46:45.421778: step: 792/466, loss: 0.014615439809858799 2023-01-24 03:46:46.045583: step: 794/466, loss: 0.0296429805457592 2023-01-24 03:46:46.674493: step: 796/466, loss: 0.019924568012356758 2023-01-24 03:46:47.262699: step: 798/466, loss: 0.0008745107334107161 2023-01-24 03:46:47.920891: step: 800/466, loss: 0.0896400511264801 2023-01-24 03:46:48.585330: step: 802/466, loss: 0.08619721978902817 2023-01-24 03:46:49.261503: step: 804/466, loss: 0.010402233339846134 2023-01-24 03:46:49.870865: step: 806/466, loss: 0.1356736570596695 2023-01-24 03:46:50.496279: step: 808/466, loss: 0.014810479246079922 2023-01-24 03:46:51.101362: step: 810/466, loss: 0.014135269448161125 2023-01-24 03:46:51.784922: step: 812/466, loss: 0.07043011486530304 2023-01-24 03:46:52.452282: step: 814/466, loss: 0.028513118624687195 2023-01-24 03:46:53.076134: step: 816/466, loss: 0.02211073786020279 2023-01-24 03:46:53.724222: step: 818/466, loss: 0.026882054284214973 2023-01-24 03:46:54.271798: step: 820/466, loss: 0.6213523745536804 2023-01-24 03:46:54.871430: step: 822/466, loss: 0.004070287104696035 2023-01-24 03:46:55.482228: step: 824/466, loss: 0.049383603036403656 2023-01-24 03:46:56.133614: step: 826/466, loss: 0.007738706190139055 2023-01-24 03:46:56.717856: step: 828/466, loss: 0.0017029495211318135 2023-01-24 03:46:57.306393: step: 830/466, loss: 0.20054680109024048 2023-01-24 03:46:57.959907: step: 832/466, loss: 0.019246773794293404 2023-01-24 03:46:58.635033: step: 834/466, loss: 0.01825520396232605 2023-01-24 03:46:59.229892: step: 836/466, loss: 0.012878013774752617 2023-01-24 03:46:59.840645: step: 838/466, loss: 0.009729273617267609 2023-01-24 03:47:00.449590: step: 840/466, loss: 0.026708990335464478 2023-01-24 03:47:01.139906: step: 842/466, loss: 0.004096888471394777 2023-01-24 03:47:01.679084: step: 844/466, loss: 0.0020870447624474764 2023-01-24 03:47:02.346376: step: 846/466, loss: 0.01970016583800316 2023-01-24 03:47:02.963061: step: 848/466, loss: 0.06840378046035767 2023-01-24 03:47:03.661992: step: 850/466, loss: 0.09377360343933105 2023-01-24 03:47:04.274180: step: 852/466, loss: 0.007807820104062557 2023-01-24 03:47:04.887624: step: 854/466, loss: 0.007622482255101204 2023-01-24 03:47:05.504359: step: 856/466, loss: 0.010054780170321465 2023-01-24 03:47:06.106344: step: 858/466, loss: 0.014251005835831165 2023-01-24 03:47:06.715243: step: 860/466, loss: 0.023616626858711243 2023-01-24 03:47:07.340719: step: 862/466, loss: 0.027740897610783577 2023-01-24 03:47:07.972962: step: 864/466, loss: 0.0004946734406985343 2023-01-24 03:47:08.589291: step: 866/466, loss: 0.027191264554858208 2023-01-24 03:47:09.221818: step: 868/466, loss: 0.0035628662444651127 2023-01-24 03:47:09.836589: step: 870/466, loss: 0.00571247236803174 2023-01-24 03:47:10.427228: step: 872/466, loss: 0.008461954072117805 2023-01-24 03:47:11.048755: step: 874/466, loss: 0.008066113106906414 2023-01-24 03:47:11.640896: step: 876/466, loss: 0.003854152048006654 2023-01-24 03:47:12.262275: step: 878/466, loss: 0.0005070980987511575 2023-01-24 03:47:12.896115: step: 880/466, loss: 0.03043699450790882 2023-01-24 03:47:13.555547: step: 882/466, loss: 0.028554614633321762 2023-01-24 03:47:14.158604: step: 884/466, loss: 0.04454214498400688 2023-01-24 03:47:14.769818: step: 886/466, loss: 0.017221003770828247 2023-01-24 03:47:15.405208: step: 888/466, loss: 0.01930846832692623 2023-01-24 03:47:16.037733: step: 890/466, loss: 0.011658270843327045 2023-01-24 03:47:16.586970: step: 892/466, loss: 0.0632919892668724 2023-01-24 03:47:17.173318: step: 894/466, loss: 0.0009290258749388158 2023-01-24 03:47:17.828776: step: 896/466, loss: 0.04633230343461037 2023-01-24 03:47:18.407486: step: 898/466, loss: 0.041435934603214264 2023-01-24 03:47:19.043545: step: 900/466, loss: 0.09117339551448822 2023-01-24 03:47:19.633239: step: 902/466, loss: 0.013194074854254723 2023-01-24 03:47:20.284379: step: 904/466, loss: 1.1540361642837524 2023-01-24 03:47:20.846241: step: 906/466, loss: 0.02288798615336418 2023-01-24 03:47:21.466185: step: 908/466, loss: 0.0001516256743343547 2023-01-24 03:47:22.097521: step: 910/466, loss: 0.019018512219190598 2023-01-24 03:47:22.698955: step: 912/466, loss: 0.028519008308649063 2023-01-24 03:47:23.292279: step: 914/466, loss: 0.016488952562212944 2023-01-24 03:47:23.889110: step: 916/466, loss: 0.04028515890240669 2023-01-24 03:47:24.504326: step: 918/466, loss: 0.11264704912900925 2023-01-24 03:47:25.082201: step: 920/466, loss: 0.09657475352287292 2023-01-24 03:47:25.708313: step: 922/466, loss: 0.0336512066423893 2023-01-24 03:47:26.305060: step: 924/466, loss: 0.024904528632760048 2023-01-24 03:47:26.973409: step: 926/466, loss: 0.0068472339771687984 2023-01-24 03:47:27.533404: step: 928/466, loss: 0.011571408249437809 2023-01-24 03:47:28.121473: step: 930/466, loss: 0.05737202242016792 2023-01-24 03:47:28.654293: step: 932/466, loss: 0.005660996772348881 ================================================== Loss: 0.064 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3662495675431126, 'r': 0.3391457096034895, 'f1': 0.3521769240611605}, 'combined': 0.2594987861503288, 'epoch': 27} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35762610957584895, 'r': 0.2702350584531663, 'f1': 0.3078486695681165}, 'combined': 0.19276505477629727, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31702997936642796, 'r': 0.3218425786736982, 'f1': 0.31941815246899985}, 'combined': 0.2353607439245262, 'epoch': 27} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36628214712486035, 'r': 0.2790091445608142, 'f1': 0.3167439878825637}, 'combined': 0.21011729889239375, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34615384615384615, 'r': 0.2571428571428571, 'f1': 0.29508196721311475}, 'combined': 0.19672131147540983, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 27} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:50:07.370801: step: 2/466, loss: 0.023768384009599686 2023-01-24 03:50:08.070789: step: 4/466, loss: 0.03996489197015762 2023-01-24 03:50:08.684414: step: 6/466, loss: 0.004306620452553034 2023-01-24 03:50:09.335554: step: 8/466, loss: 0.010168182663619518 2023-01-24 03:50:09.940621: step: 10/466, loss: 0.006507258862257004 2023-01-24 03:50:10.574992: step: 12/466, loss: 0.011234902776777744 2023-01-24 03:50:11.135627: step: 14/466, loss: 0.03960973396897316 2023-01-24 03:50:11.899899: step: 16/466, loss: 0.014154575765132904 2023-01-24 03:50:12.518980: step: 18/466, loss: 0.018108831718564034 2023-01-24 03:50:13.137603: step: 20/466, loss: 0.023137645795941353 2023-01-24 03:50:13.753494: step: 22/466, loss: 0.06897438317537308 2023-01-24 03:50:14.466143: step: 24/466, loss: 0.03519696742296219 2023-01-24 03:50:15.102680: step: 26/466, loss: 0.014579772017896175 2023-01-24 03:50:15.733743: step: 28/466, loss: 0.037931300699710846 2023-01-24 03:50:16.310697: step: 30/466, loss: 0.0020180256105959415 2023-01-24 03:50:16.931367: step: 32/466, loss: 0.0680442526936531 2023-01-24 03:50:17.487770: step: 34/466, loss: 0.01798325590789318 2023-01-24 03:50:18.036969: step: 36/466, loss: 0.03565318137407303 2023-01-24 03:50:18.606488: step: 38/466, loss: 0.048264279961586 2023-01-24 03:50:19.188226: step: 40/466, loss: 0.03383226692676544 2023-01-24 03:50:19.779165: step: 42/466, loss: 0.00869852863252163 2023-01-24 03:50:20.375833: step: 44/466, loss: 0.011189626529812813 2023-01-24 03:50:21.017302: step: 46/466, loss: 0.03855552524328232 2023-01-24 03:50:21.626106: step: 48/466, loss: 0.0033748524729162455 2023-01-24 03:50:22.273869: step: 50/466, loss: 0.02395116351544857 2023-01-24 03:50:22.899269: step: 52/466, loss: 0.00631840992718935 2023-01-24 03:50:23.564518: step: 54/466, loss: 0.02578447014093399 2023-01-24 03:50:24.161128: step: 56/466, loss: 0.041704658418893814 2023-01-24 03:50:24.833908: step: 58/466, loss: 0.0032002837397158146 2023-01-24 03:50:25.453880: step: 60/466, loss: 0.10665503144264221 2023-01-24 03:50:26.092142: step: 62/466, loss: 0.00679376907646656 2023-01-24 03:50:26.641053: step: 64/466, loss: 0.016809426248073578 2023-01-24 03:50:27.286020: step: 66/466, loss: 0.026661166921257973 2023-01-24 03:50:27.927401: step: 68/466, loss: 0.08407910168170929 2023-01-24 03:50:28.488226: step: 70/466, loss: 0.04843861609697342 2023-01-24 03:50:29.080930: step: 72/466, loss: 0.03017924167215824 2023-01-24 03:50:29.708775: step: 74/466, loss: 0.015387943014502525 2023-01-24 03:50:30.368797: step: 76/466, loss: 0.0035162935964763165 2023-01-24 03:50:31.054407: step: 78/466, loss: 0.002334260381758213 2023-01-24 03:50:31.680288: step: 80/466, loss: 0.026451418176293373 2023-01-24 03:50:32.285868: step: 82/466, loss: 0.00722060352563858 2023-01-24 03:50:32.867624: step: 84/466, loss: 0.19554363191127777 2023-01-24 03:50:33.494678: step: 86/466, loss: 0.018334930762648582 2023-01-24 03:50:34.132352: step: 88/466, loss: 0.04861517250537872 2023-01-24 03:50:34.756607: step: 90/466, loss: 0.029225613921880722 2023-01-24 03:50:35.312813: step: 92/466, loss: 0.015543647110462189 2023-01-24 03:50:35.954459: step: 94/466, loss: 0.04949422925710678 2023-01-24 03:50:36.554917: step: 96/466, loss: 0.09246551990509033 2023-01-24 03:50:37.210769: step: 98/466, loss: 0.25158873200416565 2023-01-24 03:50:37.849774: step: 100/466, loss: 0.014198623597621918 2023-01-24 03:50:38.467149: step: 102/466, loss: 0.022452840581536293 2023-01-24 03:50:39.113570: step: 104/466, loss: 0.016129590570926666 2023-01-24 03:50:39.682836: step: 106/466, loss: 0.0017111928900703788 2023-01-24 03:50:40.343651: step: 108/466, loss: 0.0237188171595335 2023-01-24 03:50:40.902314: step: 110/466, loss: 0.0030442920979112387 2023-01-24 03:50:41.607819: step: 112/466, loss: 0.02442362532019615 2023-01-24 03:50:42.226932: step: 114/466, loss: 0.004675176925957203 2023-01-24 03:50:42.844370: step: 116/466, loss: 0.02758142165839672 2023-01-24 03:50:43.456086: step: 118/466, loss: 0.0414704754948616 2023-01-24 03:50:44.116772: step: 120/466, loss: 0.015966974198818207 2023-01-24 03:50:44.730831: step: 122/466, loss: 0.005912418011575937 2023-01-24 03:50:45.376246: step: 124/466, loss: 0.10807689279317856 2023-01-24 03:50:46.010916: step: 126/466, loss: 0.025736931711435318 2023-01-24 03:50:46.676053: step: 128/466, loss: 0.025900406762957573 2023-01-24 03:50:47.269310: step: 130/466, loss: 0.03595322370529175 2023-01-24 03:50:47.857152: step: 132/466, loss: 0.028274891898036003 2023-01-24 03:50:48.448037: step: 134/466, loss: 0.018205879256129265 2023-01-24 03:50:49.130133: step: 136/466, loss: 0.05391043797135353 2023-01-24 03:50:49.738364: step: 138/466, loss: 0.005010449793189764 2023-01-24 03:50:50.345321: step: 140/466, loss: 0.023384299129247665 2023-01-24 03:50:50.964349: step: 142/466, loss: 0.34657207131385803 2023-01-24 03:50:51.597892: step: 144/466, loss: 0.008673430420458317 2023-01-24 03:50:52.330020: step: 146/466, loss: 0.010439866222441196 2023-01-24 03:50:52.933016: step: 148/466, loss: 0.07247832417488098 2023-01-24 03:50:53.570241: step: 150/466, loss: 0.007264253217726946 2023-01-24 03:50:54.195706: step: 152/466, loss: 0.0024536659475415945 2023-01-24 03:50:54.808372: step: 154/466, loss: 0.05248751863837242 2023-01-24 03:50:55.376190: step: 156/466, loss: 0.03346743807196617 2023-01-24 03:50:56.020463: step: 158/466, loss: 0.0344766303896904 2023-01-24 03:50:56.663358: step: 160/466, loss: 0.057996924966573715 2023-01-24 03:50:57.231914: step: 162/466, loss: 0.0343867689371109 2023-01-24 03:50:57.872903: step: 164/466, loss: 0.010756413452327251 2023-01-24 03:50:58.573252: step: 166/466, loss: 0.01722462847828865 2023-01-24 03:50:59.157312: step: 168/466, loss: 0.02536899968981743 2023-01-24 03:50:59.720616: step: 170/466, loss: 0.014726881869137287 2023-01-24 03:51:00.343494: step: 172/466, loss: 0.003492683405056596 2023-01-24 03:51:00.992062: step: 174/466, loss: 0.010319219902157784 2023-01-24 03:51:01.594487: step: 176/466, loss: 0.001274162670597434 2023-01-24 03:51:02.177095: step: 178/466, loss: 0.05954611673951149 2023-01-24 03:51:02.801752: step: 180/466, loss: 0.010346210561692715 2023-01-24 03:51:03.459901: step: 182/466, loss: 0.041230689734220505 2023-01-24 03:51:04.100705: step: 184/466, loss: 0.01227517332881689 2023-01-24 03:51:04.634506: step: 186/466, loss: 0.0498775951564312 2023-01-24 03:51:05.283291: step: 188/466, loss: 0.020214635878801346 2023-01-24 03:51:05.877763: step: 190/466, loss: 0.02066658064723015 2023-01-24 03:51:06.499658: step: 192/466, loss: 0.013288436457514763 2023-01-24 03:51:07.116411: step: 194/466, loss: 0.03310953825712204 2023-01-24 03:51:07.711715: step: 196/466, loss: 0.0349651537835598 2023-01-24 03:51:08.392659: step: 198/466, loss: 0.264168918132782 2023-01-24 03:51:08.959226: step: 200/466, loss: 0.017585033550858498 2023-01-24 03:51:09.529196: step: 202/466, loss: 0.01101447269320488 2023-01-24 03:51:10.214424: step: 204/466, loss: 0.0085613913834095 2023-01-24 03:51:10.835196: step: 206/466, loss: 0.0034275949001312256 2023-01-24 03:51:11.345542: step: 208/466, loss: 0.0016740479040890932 2023-01-24 03:51:11.999530: step: 210/466, loss: 0.004394230432808399 2023-01-24 03:51:12.542625: step: 212/466, loss: 0.006876669824123383 2023-01-24 03:51:13.172656: step: 214/466, loss: 0.0018812270136550069 2023-01-24 03:51:13.715786: step: 216/466, loss: 0.00029821597854606807 2023-01-24 03:51:14.339820: step: 218/466, loss: 0.002942908788099885 2023-01-24 03:51:14.896600: step: 220/466, loss: 0.024115046486258507 2023-01-24 03:51:15.513784: step: 222/466, loss: 0.017866337671875954 2023-01-24 03:51:16.065175: step: 224/466, loss: 0.02098376490175724 2023-01-24 03:51:16.727320: step: 226/466, loss: 0.023650728166103363 2023-01-24 03:51:17.346021: step: 228/466, loss: 0.024871397763490677 2023-01-24 03:51:17.980200: step: 230/466, loss: 0.02011144533753395 2023-01-24 03:51:18.632495: step: 232/466, loss: 0.0009520826861262321 2023-01-24 03:51:19.247509: step: 234/466, loss: 0.00101017439737916 2023-01-24 03:51:19.826676: step: 236/466, loss: 0.03736605867743492 2023-01-24 03:51:20.415778: step: 238/466, loss: 0.000858426617924124 2023-01-24 03:51:21.018583: step: 240/466, loss: 0.055236946791410446 2023-01-24 03:51:21.703770: step: 242/466, loss: 0.0015216044848784804 2023-01-24 03:51:22.294326: step: 244/466, loss: 0.024074794724583626 2023-01-24 03:51:22.844988: step: 246/466, loss: 0.009139341302216053 2023-01-24 03:51:23.495089: step: 248/466, loss: 0.002656628843396902 2023-01-24 03:51:24.091030: step: 250/466, loss: 0.029650507494807243 2023-01-24 03:51:24.704381: step: 252/466, loss: 0.05830903723835945 2023-01-24 03:51:25.274765: step: 254/466, loss: 0.014021525159478188 2023-01-24 03:51:25.874841: step: 256/466, loss: 0.3828524351119995 2023-01-24 03:51:26.482490: step: 258/466, loss: 0.043327976018190384 2023-01-24 03:51:27.101926: step: 260/466, loss: 0.0033573047257959843 2023-01-24 03:51:27.772455: step: 262/466, loss: 0.004619850777089596 2023-01-24 03:51:28.334285: step: 264/466, loss: 0.08685749024152756 2023-01-24 03:51:28.951858: step: 266/466, loss: 0.01593833789229393 2023-01-24 03:51:29.587869: step: 268/466, loss: 0.036680977791547775 2023-01-24 03:51:30.195909: step: 270/466, loss: 0.0330805778503418 2023-01-24 03:51:30.807989: step: 272/466, loss: 0.007293290924280882 2023-01-24 03:51:31.454020: step: 274/466, loss: 0.047484152019023895 2023-01-24 03:51:32.046373: step: 276/466, loss: 0.351840078830719 2023-01-24 03:51:32.614055: step: 278/466, loss: 0.00860503874719143 2023-01-24 03:51:33.234365: step: 280/466, loss: 0.1977149099111557 2023-01-24 03:51:33.786557: step: 282/466, loss: 0.10114485025405884 2023-01-24 03:51:34.407252: step: 284/466, loss: 0.08704552054405212 2023-01-24 03:51:34.982596: step: 286/466, loss: 0.5295833349227905 2023-01-24 03:51:35.585330: step: 288/466, loss: 0.010039645247161388 2023-01-24 03:51:36.152379: step: 290/466, loss: 0.0005082232528366148 2023-01-24 03:51:36.757392: step: 292/466, loss: 0.008479979820549488 2023-01-24 03:51:37.344994: step: 294/466, loss: 3.3063900470733643 2023-01-24 03:51:37.969394: step: 296/466, loss: 0.019657650962471962 2023-01-24 03:51:38.582996: step: 298/466, loss: 0.026188286021351814 2023-01-24 03:51:39.155287: step: 300/466, loss: 0.010561192408204079 2023-01-24 03:51:39.798565: step: 302/466, loss: 0.010517285205423832 2023-01-24 03:51:40.416940: step: 304/466, loss: 0.011019128374755383 2023-01-24 03:51:41.012176: step: 306/466, loss: 0.008257926441729069 2023-01-24 03:51:41.646121: step: 308/466, loss: 0.014237681403756142 2023-01-24 03:51:42.265587: step: 310/466, loss: 0.031133001670241356 2023-01-24 03:51:42.951883: step: 312/466, loss: 0.00708776293322444 2023-01-24 03:51:43.556827: step: 314/466, loss: 0.02494877204298973 2023-01-24 03:51:44.141933: step: 316/466, loss: 0.025167588144540787 2023-01-24 03:51:44.787900: step: 318/466, loss: 0.002418582094833255 2023-01-24 03:51:45.414146: step: 320/466, loss: 0.06174202635884285 2023-01-24 03:51:45.982399: step: 322/466, loss: 0.12775883078575134 2023-01-24 03:51:46.706081: step: 324/466, loss: 0.019592782482504845 2023-01-24 03:51:47.437593: step: 326/466, loss: 0.010396583937108517 2023-01-24 03:51:48.048082: step: 328/466, loss: 0.013660160824656487 2023-01-24 03:51:48.706186: step: 330/466, loss: 0.037008270621299744 2023-01-24 03:51:49.280089: step: 332/466, loss: 0.005264009814709425 2023-01-24 03:51:49.902232: step: 334/466, loss: 2.1258628368377686 2023-01-24 03:51:50.520032: step: 336/466, loss: 0.0020432898309081793 2023-01-24 03:51:51.145135: step: 338/466, loss: 0.8654299974441528 2023-01-24 03:51:51.740484: step: 340/466, loss: 0.015576314181089401 2023-01-24 03:51:52.312870: step: 342/466, loss: 0.0017202608287334442 2023-01-24 03:51:52.934352: step: 344/466, loss: 0.015618101693689823 2023-01-24 03:51:53.536804: step: 346/466, loss: 0.22508034110069275 2023-01-24 03:51:54.149882: step: 348/466, loss: 0.038267288357019424 2023-01-24 03:51:54.768288: step: 350/466, loss: 0.03531248867511749 2023-01-24 03:51:55.414029: step: 352/466, loss: 0.01732778176665306 2023-01-24 03:51:56.046984: step: 354/466, loss: 0.033080875873565674 2023-01-24 03:51:56.748118: step: 356/466, loss: 0.01420635636895895 2023-01-24 03:51:57.359085: step: 358/466, loss: 0.0015354438219219446 2023-01-24 03:51:57.928389: step: 360/466, loss: 0.028320256620645523 2023-01-24 03:51:58.499717: step: 362/466, loss: 0.002205535303801298 2023-01-24 03:51:59.060708: step: 364/466, loss: 0.009863844141364098 2023-01-24 03:51:59.659307: step: 366/466, loss: 26.99533462524414 2023-01-24 03:52:00.249217: step: 368/466, loss: 0.0035730074159801006 2023-01-24 03:52:00.887302: step: 370/466, loss: 0.025294972583651543 2023-01-24 03:52:01.493647: step: 372/466, loss: 0.014734728261828423 2023-01-24 03:52:02.081234: step: 374/466, loss: 0.0021246871910989285 2023-01-24 03:52:02.902706: step: 376/466, loss: 0.005827118642628193 2023-01-24 03:52:03.476634: step: 378/466, loss: 0.012958161532878876 2023-01-24 03:52:04.044732: step: 380/466, loss: 0.037231847643852234 2023-01-24 03:52:04.638960: step: 382/466, loss: 0.021039549261331558 2023-01-24 03:52:05.261446: step: 384/466, loss: 0.006525214295834303 2023-01-24 03:52:05.896153: step: 386/466, loss: 0.08137285709381104 2023-01-24 03:52:06.488163: step: 388/466, loss: 0.007886284962296486 2023-01-24 03:52:07.039414: step: 390/466, loss: 0.002167411847040057 2023-01-24 03:52:07.606443: step: 392/466, loss: 0.011483059264719486 2023-01-24 03:52:08.186546: step: 394/466, loss: 0.0016958725173026323 2023-01-24 03:52:08.775145: step: 396/466, loss: 0.07811151444911957 2023-01-24 03:52:09.358172: step: 398/466, loss: 0.04621563106775284 2023-01-24 03:52:09.939891: step: 400/466, loss: 0.026535989716649055 2023-01-24 03:52:10.558163: step: 402/466, loss: 0.007337826769798994 2023-01-24 03:52:11.064469: step: 404/466, loss: 0.0006064106128178537 2023-01-24 03:52:11.692334: step: 406/466, loss: 0.09214655309915543 2023-01-24 03:52:12.334940: step: 408/466, loss: 0.0458175353705883 2023-01-24 03:52:12.852843: step: 410/466, loss: 0.019851980730891228 2023-01-24 03:52:13.487560: step: 412/466, loss: 0.03482075780630112 2023-01-24 03:52:14.106664: step: 414/466, loss: 0.023295555263757706 2023-01-24 03:52:14.692013: step: 416/466, loss: 0.0010975089389830828 2023-01-24 03:52:15.307112: step: 418/466, loss: 0.010821403004229069 2023-01-24 03:52:15.938032: step: 420/466, loss: 0.024789204820990562 2023-01-24 03:52:16.614030: step: 422/466, loss: 0.0259927399456501 2023-01-24 03:52:17.164112: step: 424/466, loss: 0.009154855273663998 2023-01-24 03:52:17.790444: step: 426/466, loss: 0.09827637672424316 2023-01-24 03:52:18.385074: step: 428/466, loss: 0.019148392602801323 2023-01-24 03:52:19.034710: step: 430/466, loss: 0.02102496474981308 2023-01-24 03:52:19.591181: step: 432/466, loss: 0.002071237191557884 2023-01-24 03:52:20.189421: step: 434/466, loss: 0.005694160703569651 2023-01-24 03:52:20.873099: step: 436/466, loss: 0.011221546679735184 2023-01-24 03:52:21.475412: step: 438/466, loss: 0.043992385268211365 2023-01-24 03:52:22.104581: step: 440/466, loss: 0.05732232332229614 2023-01-24 03:52:22.684075: step: 442/466, loss: 0.01635921187698841 2023-01-24 03:52:23.280224: step: 444/466, loss: 0.009666459634900093 2023-01-24 03:52:23.955753: step: 446/466, loss: 0.05570024624466896 2023-01-24 03:52:24.518095: step: 448/466, loss: 0.05164948105812073 2023-01-24 03:52:25.106481: step: 450/466, loss: 0.02679915726184845 2023-01-24 03:52:25.779609: step: 452/466, loss: 0.024820327758789062 2023-01-24 03:52:26.377802: step: 454/466, loss: 0.006560258101671934 2023-01-24 03:52:27.120422: step: 456/466, loss: 0.22647497057914734 2023-01-24 03:52:27.759329: step: 458/466, loss: 0.0020619293209165335 2023-01-24 03:52:28.395457: step: 460/466, loss: 0.044577330350875854 2023-01-24 03:52:29.025974: step: 462/466, loss: 0.04522687941789627 2023-01-24 03:52:29.613517: step: 464/466, loss: 0.02234252355992794 2023-01-24 03:52:30.216441: step: 466/466, loss: 0.023085763677954674 2023-01-24 03:52:30.893658: step: 468/466, loss: 0.012092499993741512 2023-01-24 03:52:31.498895: step: 470/466, loss: 0.00947714876383543 2023-01-24 03:52:32.101488: step: 472/466, loss: 0.0202737245708704 2023-01-24 03:52:32.676004: step: 474/466, loss: 0.22844865918159485 2023-01-24 03:52:33.269081: step: 476/466, loss: 0.007046691607683897 2023-01-24 03:52:33.871562: step: 478/466, loss: 0.014844018965959549 2023-01-24 03:52:34.501561: step: 480/466, loss: 0.11203937977552414 2023-01-24 03:52:35.138574: step: 482/466, loss: 0.0071283141151070595 2023-01-24 03:52:35.778392: step: 484/466, loss: 0.012620110996067524 2023-01-24 03:52:36.422419: step: 486/466, loss: 0.0929551050066948 2023-01-24 03:52:37.019268: step: 488/466, loss: 0.020535800606012344 2023-01-24 03:52:37.572880: step: 490/466, loss: 0.03097878210246563 2023-01-24 03:52:38.165686: step: 492/466, loss: 0.14330974221229553 2023-01-24 03:52:38.807644: step: 494/466, loss: 0.028692664578557014 2023-01-24 03:52:39.440155: step: 496/466, loss: 0.0004861719498876482 2023-01-24 03:52:40.093852: step: 498/466, loss: 0.048825427889823914 2023-01-24 03:52:40.733545: step: 500/466, loss: 0.17499829828739166 2023-01-24 03:52:41.314518: step: 502/466, loss: 0.017115101218223572 2023-01-24 03:52:41.932385: step: 504/466, loss: 0.04524501413106918 2023-01-24 03:52:42.562130: step: 506/466, loss: 0.03364171087741852 2023-01-24 03:52:43.208197: step: 508/466, loss: 0.03208279609680176 2023-01-24 03:52:43.828419: step: 510/466, loss: 0.023519242182374 2023-01-24 03:52:44.433487: step: 512/466, loss: 0.007820955477654934 2023-01-24 03:52:45.043174: step: 514/466, loss: 0.004060547798871994 2023-01-24 03:52:45.644636: step: 516/466, loss: 0.00632070004940033 2023-01-24 03:52:46.357109: step: 518/466, loss: 0.02671051397919655 2023-01-24 03:52:46.978531: step: 520/466, loss: 0.018239395692944527 2023-01-24 03:52:47.529484: step: 522/466, loss: 0.019363006576895714 2023-01-24 03:52:48.308707: step: 524/466, loss: 0.7857245206832886 2023-01-24 03:52:48.889012: step: 526/466, loss: 0.004246650729328394 2023-01-24 03:52:49.455254: step: 528/466, loss: 0.007724442519247532 2023-01-24 03:52:50.045183: step: 530/466, loss: 0.2277851402759552 2023-01-24 03:52:50.636653: step: 532/466, loss: 0.02044188231229782 2023-01-24 03:52:51.262808: step: 534/466, loss: 0.02166859433054924 2023-01-24 03:52:51.924761: step: 536/466, loss: 0.0062633054330945015 2023-01-24 03:52:52.606859: step: 538/466, loss: 0.08236105740070343 2023-01-24 03:52:53.169593: step: 540/466, loss: 0.1476522982120514 2023-01-24 03:52:53.831143: step: 542/466, loss: 0.004284419119358063 2023-01-24 03:52:54.463716: step: 544/466, loss: 0.011481350287795067 2023-01-24 03:52:55.074962: step: 546/466, loss: 0.0007513607270084321 2023-01-24 03:52:55.667164: step: 548/466, loss: 0.0015099295414984226 2023-01-24 03:52:56.285606: step: 550/466, loss: 0.0057240622118115425 2023-01-24 03:52:56.864181: step: 552/466, loss: 0.05477767065167427 2023-01-24 03:52:57.489394: step: 554/466, loss: 0.019141511991620064 2023-01-24 03:52:58.183531: step: 556/466, loss: 0.11399663239717484 2023-01-24 03:52:58.789056: step: 558/466, loss: 0.02359907701611519 2023-01-24 03:52:59.428971: step: 560/466, loss: 0.04549015685915947 2023-01-24 03:53:00.049264: step: 562/466, loss: 0.0032001808285713196 2023-01-24 03:53:00.682090: step: 564/466, loss: 0.03296215459704399 2023-01-24 03:53:01.281420: step: 566/466, loss: 0.032535843551158905 2023-01-24 03:53:01.827310: step: 568/466, loss: 0.02791605144739151 2023-01-24 03:53:02.466080: step: 570/466, loss: 0.009045847691595554 2023-01-24 03:53:03.048422: step: 572/466, loss: 0.03438599035143852 2023-01-24 03:53:03.637110: step: 574/466, loss: 0.17986418306827545 2023-01-24 03:53:04.247747: step: 576/466, loss: 0.017250701785087585 2023-01-24 03:53:04.856693: step: 578/466, loss: 0.03292156755924225 2023-01-24 03:53:05.451580: step: 580/466, loss: 0.0006128060049377382 2023-01-24 03:53:06.035021: step: 582/466, loss: 0.005637643858790398 2023-01-24 03:53:06.702791: step: 584/466, loss: 0.013439452275633812 2023-01-24 03:53:07.305759: step: 586/466, loss: 0.00701222475618124 2023-01-24 03:53:07.875703: step: 588/466, loss: 0.030753254890441895 2023-01-24 03:53:08.502105: step: 590/466, loss: 0.046054352074861526 2023-01-24 03:53:09.178172: step: 592/466, loss: 0.1185753121972084 2023-01-24 03:53:09.749533: step: 594/466, loss: 0.048997662961483 2023-01-24 03:53:10.400915: step: 596/466, loss: 0.08898276090621948 2023-01-24 03:53:11.060716: step: 598/466, loss: 0.03522208705544472 2023-01-24 03:53:11.605025: step: 600/466, loss: 0.00015154962602537125 2023-01-24 03:53:12.201995: step: 602/466, loss: 0.0046302746050059795 2023-01-24 03:53:12.816256: step: 604/466, loss: 0.22859686613082886 2023-01-24 03:53:13.477297: step: 606/466, loss: 0.04483726620674133 2023-01-24 03:53:14.117254: step: 608/466, loss: 0.054383255541324615 2023-01-24 03:53:14.844192: step: 610/466, loss: 0.030266225337982178 2023-01-24 03:53:15.487870: step: 612/466, loss: 0.0004396865551825613 2023-01-24 03:53:16.104510: step: 614/466, loss: 0.004917386919260025 2023-01-24 03:53:16.756087: step: 616/466, loss: 0.017483513802289963 2023-01-24 03:53:17.380138: step: 618/466, loss: 0.006055546458810568 2023-01-24 03:53:18.002932: step: 620/466, loss: 0.02891460619866848 2023-01-24 03:53:18.737408: step: 622/466, loss: 0.09455475211143494 2023-01-24 03:53:19.344370: step: 624/466, loss: 0.054718174040317535 2023-01-24 03:53:19.971319: step: 626/466, loss: 0.03614216297864914 2023-01-24 03:53:20.619305: step: 628/466, loss: 0.05073446035385132 2023-01-24 03:53:21.193768: step: 630/466, loss: 0.0009193853475153446 2023-01-24 03:53:21.843471: step: 632/466, loss: 0.11036810278892517 2023-01-24 03:53:22.482595: step: 634/466, loss: 0.004744046367704868 2023-01-24 03:53:23.124375: step: 636/466, loss: 0.06734267622232437 2023-01-24 03:53:23.750541: step: 638/466, loss: 0.04062696173787117 2023-01-24 03:53:24.362035: step: 640/466, loss: 0.02793099544942379 2023-01-24 03:53:24.984718: step: 642/466, loss: 0.0048840404488146305 2023-01-24 03:53:25.595769: step: 644/466, loss: 0.06036311760544777 2023-01-24 03:53:26.219132: step: 646/466, loss: 0.0007871698471717536 2023-01-24 03:53:26.848594: step: 648/466, loss: 0.04189624637365341 2023-01-24 03:53:27.448845: step: 650/466, loss: 0.007794898469001055 2023-01-24 03:53:28.004408: step: 652/466, loss: 0.007587234955281019 2023-01-24 03:53:28.558627: step: 654/466, loss: 0.003714426886290312 2023-01-24 03:53:29.271175: step: 656/466, loss: 0.045027803629636765 2023-01-24 03:53:29.950888: step: 658/466, loss: 0.02378426305949688 2023-01-24 03:53:30.588295: step: 660/466, loss: 0.010064369067549706 2023-01-24 03:53:31.180203: step: 662/466, loss: 0.05422085523605347 2023-01-24 03:53:31.756880: step: 664/466, loss: 0.0013482351787388325 2023-01-24 03:53:32.460952: step: 666/466, loss: 0.0004007349780295044 2023-01-24 03:53:33.079049: step: 668/466, loss: 0.3075380027294159 2023-01-24 03:53:33.625906: step: 670/466, loss: 0.02334265038371086 2023-01-24 03:53:34.213677: step: 672/466, loss: 0.2771427035331726 2023-01-24 03:53:34.844182: step: 674/466, loss: 0.011716392822563648 2023-01-24 03:53:35.441225: step: 676/466, loss: 0.0197080560028553 2023-01-24 03:53:36.083314: step: 678/466, loss: 0.0005337827606126666 2023-01-24 03:53:36.747188: step: 680/466, loss: 0.047744929790496826 2023-01-24 03:53:37.389511: step: 682/466, loss: 0.05141134187579155 2023-01-24 03:53:37.950567: step: 684/466, loss: 0.0003885963815264404 2023-01-24 03:53:38.595656: step: 686/466, loss: 0.046781327575445175 2023-01-24 03:53:39.243178: step: 688/466, loss: 0.03787783905863762 2023-01-24 03:53:39.852470: step: 690/466, loss: 0.02537960559129715 2023-01-24 03:53:40.413836: step: 692/466, loss: 0.007958278059959412 2023-01-24 03:53:41.049366: step: 694/466, loss: 0.057035181671381 2023-01-24 03:53:41.615734: step: 696/466, loss: 0.0502951443195343 2023-01-24 03:53:42.236431: step: 698/466, loss: 0.047478318214416504 2023-01-24 03:53:42.882037: step: 700/466, loss: 0.044125545769929886 2023-01-24 03:53:43.467966: step: 702/466, loss: 0.022803746163845062 2023-01-24 03:53:44.113423: step: 704/466, loss: 0.061728183180093765 2023-01-24 03:53:44.750858: step: 706/466, loss: 0.0735589861869812 2023-01-24 03:53:45.346011: step: 708/466, loss: 0.0005398796638473868 2023-01-24 03:53:45.939258: step: 710/466, loss: 0.019195714965462685 2023-01-24 03:53:46.594884: step: 712/466, loss: 0.014919676817953587 2023-01-24 03:53:47.255634: step: 714/466, loss: 0.10128084570169449 2023-01-24 03:53:47.903935: step: 716/466, loss: 0.007795746438205242 2023-01-24 03:53:48.534963: step: 718/466, loss: 0.0025721562560647726 2023-01-24 03:53:49.187415: step: 720/466, loss: 0.012760023586452007 2023-01-24 03:53:49.791499: step: 722/466, loss: 0.03245546668767929 2023-01-24 03:53:50.380611: step: 724/466, loss: 0.03983759135007858 2023-01-24 03:53:51.053732: step: 726/466, loss: 0.04096521437168121 2023-01-24 03:53:51.704511: step: 728/466, loss: 0.011798517778515816 2023-01-24 03:53:52.296110: step: 730/466, loss: 0.009669964201748371 2023-01-24 03:53:52.902847: step: 732/466, loss: 0.06284675002098083 2023-01-24 03:53:53.418439: step: 734/466, loss: 0.022310523316264153 2023-01-24 03:53:54.026429: step: 736/466, loss: 0.01860705018043518 2023-01-24 03:53:54.696262: step: 738/466, loss: 0.0030016773380339146 2023-01-24 03:53:55.324125: step: 740/466, loss: 0.04680800437927246 2023-01-24 03:53:55.933529: step: 742/466, loss: 0.026600049808621407 2023-01-24 03:53:56.574508: step: 744/466, loss: 0.06729486584663391 2023-01-24 03:53:57.156776: step: 746/466, loss: 0.049576953053474426 2023-01-24 03:53:57.784805: step: 748/466, loss: 0.0032886529807001352 2023-01-24 03:53:58.449001: step: 750/466, loss: 0.0006859523709863424 2023-01-24 03:53:59.111211: step: 752/466, loss: 0.04004330560564995 2023-01-24 03:53:59.774005: step: 754/466, loss: 0.03351690620183945 2023-01-24 03:54:00.458294: step: 756/466, loss: 0.001649754587560892 2023-01-24 03:54:01.114655: step: 758/466, loss: 0.3805055320262909 2023-01-24 03:54:01.846908: step: 760/466, loss: 0.011594302952289581 2023-01-24 03:54:02.475588: step: 762/466, loss: 0.014391475357115269 2023-01-24 03:54:03.094009: step: 764/466, loss: 0.033565498888492584 2023-01-24 03:54:03.714956: step: 766/466, loss: 0.07644869387149811 2023-01-24 03:54:04.330715: step: 768/466, loss: 0.006856882944703102 2023-01-24 03:54:04.967362: step: 770/466, loss: 0.005011100322008133 2023-01-24 03:54:05.537478: step: 772/466, loss: 0.04017748683691025 2023-01-24 03:54:06.173884: step: 774/466, loss: 0.05664641037583351 2023-01-24 03:54:06.828762: step: 776/466, loss: 0.009156241081655025 2023-01-24 03:54:07.463453: step: 778/466, loss: 0.0162956565618515 2023-01-24 03:54:08.064180: step: 780/466, loss: 0.024769090116024017 2023-01-24 03:54:08.723482: step: 782/466, loss: 0.0032476421911269426 2023-01-24 03:54:09.359077: step: 784/466, loss: 0.00945268850773573 2023-01-24 03:54:09.961822: step: 786/466, loss: 0.008953304961323738 2023-01-24 03:54:10.647817: step: 788/466, loss: 0.024789387360215187 2023-01-24 03:54:11.264946: step: 790/466, loss: 0.0331110954284668 2023-01-24 03:54:11.931295: step: 792/466, loss: 0.0029637941624969244 2023-01-24 03:54:12.518206: step: 794/466, loss: 0.003647434990853071 2023-01-24 03:54:13.106841: step: 796/466, loss: 0.01982322335243225 2023-01-24 03:54:13.763691: step: 798/466, loss: 0.03863897547125816 2023-01-24 03:54:14.379186: step: 800/466, loss: 0.033416006714105606 2023-01-24 03:54:14.983527: step: 802/466, loss: 0.013230291195213795 2023-01-24 03:54:15.629319: step: 804/466, loss: 0.03807089850306511 2023-01-24 03:54:16.246388: step: 806/466, loss: 0.036580026149749756 2023-01-24 03:54:16.838374: step: 808/466, loss: 0.0015028535854071379 2023-01-24 03:54:17.567089: step: 810/466, loss: 0.0039765858091413975 2023-01-24 03:54:18.305614: step: 812/466, loss: 0.04079240560531616 2023-01-24 03:54:18.965035: step: 814/466, loss: 0.001219570985995233 2023-01-24 03:54:19.589734: step: 816/466, loss: 0.0005592587986029685 2023-01-24 03:54:20.200698: step: 818/466, loss: 0.007907913997769356 2023-01-24 03:54:20.803050: step: 820/466, loss: 0.04388028383255005 2023-01-24 03:54:21.416199: step: 822/466, loss: 0.018512558192014694 2023-01-24 03:54:22.074250: step: 824/466, loss: 0.019101902842521667 2023-01-24 03:54:22.657724: step: 826/466, loss: 0.0003036137786693871 2023-01-24 03:54:23.314815: step: 828/466, loss: 0.013605811633169651 2023-01-24 03:54:23.954487: step: 830/466, loss: 0.0007750964141450822 2023-01-24 03:54:24.545563: step: 832/466, loss: 0.0010865015210583806 2023-01-24 03:54:25.136499: step: 834/466, loss: 1.5557219982147217 2023-01-24 03:54:25.754303: step: 836/466, loss: 0.028510933741927147 2023-01-24 03:54:26.370214: step: 838/466, loss: 0.013181239366531372 2023-01-24 03:54:27.034152: step: 840/466, loss: 0.007881629280745983 2023-01-24 03:54:27.679516: step: 842/466, loss: 0.5158827900886536 2023-01-24 03:54:28.236479: step: 844/466, loss: 0.060511622577905655 2023-01-24 03:54:28.826553: step: 846/466, loss: 0.029197819530963898 2023-01-24 03:54:29.495743: step: 848/466, loss: 0.0028440975584089756 2023-01-24 03:54:30.080481: step: 850/466, loss: 0.012721743434667587 2023-01-24 03:54:30.739231: step: 852/466, loss: 0.013813858851790428 2023-01-24 03:54:31.388745: step: 854/466, loss: 0.017976820468902588 2023-01-24 03:54:31.993951: step: 856/466, loss: 0.028403783217072487 2023-01-24 03:54:32.611116: step: 858/466, loss: 0.0016109542921185493 2023-01-24 03:54:33.242996: step: 860/466, loss: 0.03347580134868622 2023-01-24 03:54:33.984505: step: 862/466, loss: 0.2174401581287384 2023-01-24 03:54:34.593319: step: 864/466, loss: 0.01822667010128498 2023-01-24 03:54:35.171879: step: 866/466, loss: 0.45253312587738037 2023-01-24 03:54:35.815527: step: 868/466, loss: 0.013842754065990448 2023-01-24 03:54:36.390125: step: 870/466, loss: 0.01675495132803917 2023-01-24 03:54:37.046414: step: 872/466, loss: 0.002902305917814374 2023-01-24 03:54:37.674537: step: 874/466, loss: 0.08422892540693283 2023-01-24 03:54:38.310103: step: 876/466, loss: 0.01864977739751339 2023-01-24 03:54:38.935420: step: 878/466, loss: 0.06614600867033005 2023-01-24 03:54:39.555761: step: 880/466, loss: 0.06060094013810158 2023-01-24 03:54:40.185306: step: 882/466, loss: 0.12300093472003937 2023-01-24 03:54:40.809459: step: 884/466, loss: 0.05484561249613762 2023-01-24 03:54:41.378719: step: 886/466, loss: 0.00846915040165186 2023-01-24 03:54:41.985948: step: 888/466, loss: 0.024216674268245697 2023-01-24 03:54:42.571341: step: 890/466, loss: 0.02085905335843563 2023-01-24 03:54:43.205602: step: 892/466, loss: 0.05968824401497841 2023-01-24 03:54:43.830167: step: 894/466, loss: 0.0692824274301529 2023-01-24 03:54:44.364896: step: 896/466, loss: 0.0036277940962463617 2023-01-24 03:54:45.076847: step: 898/466, loss: 0.022601688280701637 2023-01-24 03:54:45.726930: step: 900/466, loss: 0.018142150714993477 2023-01-24 03:54:46.318053: step: 902/466, loss: 0.033256981521844864 2023-01-24 03:54:46.929543: step: 904/466, loss: 0.341894268989563 2023-01-24 03:54:47.579778: step: 906/466, loss: 0.023613912984728813 2023-01-24 03:54:48.180382: step: 908/466, loss: 0.013277239166200161 2023-01-24 03:54:48.766684: step: 910/466, loss: 0.05817018076777458 2023-01-24 03:54:49.391654: step: 912/466, loss: 0.008494351990520954 2023-01-24 03:54:49.992535: step: 914/466, loss: 0.01082751527428627 2023-01-24 03:54:50.538147: step: 916/466, loss: 0.040563251823186874 2023-01-24 03:54:51.200839: step: 918/466, loss: 0.02546183206140995 2023-01-24 03:54:51.822865: step: 920/466, loss: 0.041106607764959335 2023-01-24 03:54:52.418637: step: 922/466, loss: 0.021582873538136482 2023-01-24 03:54:53.024096: step: 924/466, loss: 0.028529338538646698 2023-01-24 03:54:53.664077: step: 926/466, loss: 0.0011957907117903233 2023-01-24 03:54:54.311001: step: 928/466, loss: 0.02395629696547985 2023-01-24 03:54:54.916091: step: 930/466, loss: 0.019546261057257652 2023-01-24 03:54:55.543869: step: 932/466, loss: 0.21111570298671722 ================================================== Loss: 0.116 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3527404662866648, 'r': 0.3333297005896756, 'f1': 0.34276049211855425}, 'combined': 0.25256036261367154, 'epoch': 28} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35041346272385904, 'r': 0.28267319079711484, 'f1': 0.31291922221240615}, 'combined': 0.19594007372178704, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33193629917298195, 'r': 0.3394946209757823, 'f1': 0.3356729179253983}, 'combined': 0.24733793952397767, 'epoch': 28} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3588659944092702, 'r': 0.29700371558962163, 'f1': 0.3250173993810378}, 'combined': 0.20141923341923468, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3000292662470358, 'r': 0.32109393958885807, 'f1': 0.31020441093185736}, 'combined': 0.22857167121294752, 'epoch': 28} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3516372493725595, 'r': 0.2919192702559911, 'f1': 0.3190075342284128}, 'combined': 0.2116188593396402, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2986111111111111, 'r': 0.30714285714285716, 'f1': 0.3028169014084507}, 'combined': 0.20187793427230044, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3148148148148148, 'r': 0.3695652173913043, 'f1': 0.34}, 'combined': 0.17, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.1724137931034483, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:57:29.340430: step: 2/466, loss: 0.055458780378103256 2023-01-24 03:57:29.956133: step: 4/466, loss: 0.15405134856700897 2023-01-24 03:57:30.563281: step: 6/466, loss: 0.002567231422290206 2023-01-24 03:57:31.174251: step: 8/466, loss: 0.00362135237082839 2023-01-24 03:57:31.798975: step: 10/466, loss: 0.028957726433873177 2023-01-24 03:57:32.369783: step: 12/466, loss: 0.009880519472062588 2023-01-24 03:57:32.993162: step: 14/466, loss: 0.002181266201660037 2023-01-24 03:57:33.635676: step: 16/466, loss: 0.017766177654266357 2023-01-24 03:57:34.217965: step: 18/466, loss: 0.0003223659878131002 2023-01-24 03:57:34.834715: step: 20/466, loss: 0.03559519350528717 2023-01-24 03:57:35.372103: step: 22/466, loss: 0.0033054545056074858 2023-01-24 03:57:35.966218: step: 24/466, loss: 0.06961570680141449 2023-01-24 03:57:36.538807: step: 26/466, loss: 0.002474683104082942 2023-01-24 03:57:37.150332: step: 28/466, loss: 0.012266730889678001 2023-01-24 03:57:37.811245: step: 30/466, loss: 0.016298441216349602 2023-01-24 03:57:38.452638: step: 32/466, loss: 0.015128509141504765 2023-01-24 03:57:39.079009: step: 34/466, loss: 0.002107497537508607 2023-01-24 03:57:39.715011: step: 36/466, loss: 0.004645375069230795 2023-01-24 03:57:40.367530: step: 38/466, loss: 0.004689569119364023 2023-01-24 03:57:40.971935: step: 40/466, loss: 0.026085158810019493 2023-01-24 03:57:41.635080: step: 42/466, loss: 0.018057744950056076 2023-01-24 03:57:42.251657: step: 44/466, loss: 0.1155918687582016 2023-01-24 03:57:42.881220: step: 46/466, loss: 0.02292424626648426 2023-01-24 03:57:43.502846: step: 48/466, loss: 0.02852936089038849 2023-01-24 03:57:44.137104: step: 50/466, loss: 0.6210425496101379 2023-01-24 03:57:44.761954: step: 52/466, loss: 0.007664544507861137 2023-01-24 03:57:45.366618: step: 54/466, loss: 0.0038290699012577534 2023-01-24 03:57:45.963187: step: 56/466, loss: 0.002938607707619667 2023-01-24 03:57:46.605016: step: 58/466, loss: 0.039176780730485916 2023-01-24 03:57:47.214516: step: 60/466, loss: 0.015739092603325844 2023-01-24 03:57:47.814235: step: 62/466, loss: 0.004881486762315035 2023-01-24 03:57:48.442074: step: 64/466, loss: 0.023610053583979607 2023-01-24 03:57:49.053558: step: 66/466, loss: 0.002876927610486746 2023-01-24 03:57:49.686903: step: 68/466, loss: 0.009272072464227676 2023-01-24 03:57:50.334274: step: 70/466, loss: 0.019138410687446594 2023-01-24 03:57:50.965671: step: 72/466, loss: 0.0038951088208705187 2023-01-24 03:57:51.558730: step: 74/466, loss: 0.0008685383945703506 2023-01-24 03:57:52.242437: step: 76/466, loss: 0.0030801768880337477 2023-01-24 03:57:52.890033: step: 78/466, loss: 0.22644856572151184 2023-01-24 03:57:53.515657: step: 80/466, loss: 0.05866759270429611 2023-01-24 03:57:54.137381: step: 82/466, loss: 0.2155982404947281 2023-01-24 03:57:54.713394: step: 84/466, loss: 0.011491452343761921 2023-01-24 03:57:55.333339: step: 86/466, loss: 0.03202078863978386 2023-01-24 03:57:55.996787: step: 88/466, loss: 0.061069175601005554 2023-01-24 03:57:56.642644: step: 90/466, loss: 0.04516958072781563 2023-01-24 03:57:57.287630: step: 92/466, loss: 0.006477218586951494 2023-01-24 03:57:57.880611: step: 94/466, loss: 0.012834693305194378 2023-01-24 03:57:58.541111: step: 96/466, loss: 0.28558802604675293 2023-01-24 03:57:59.144402: step: 98/466, loss: 0.022234907373785973 2023-01-24 03:57:59.870287: step: 100/466, loss: 0.043298911303281784 2023-01-24 03:58:00.541736: step: 102/466, loss: 0.04294387251138687 2023-01-24 03:58:01.134825: step: 104/466, loss: 0.009048323146998882 2023-01-24 03:58:01.795327: step: 106/466, loss: 0.008321230299770832 2023-01-24 03:58:02.426467: step: 108/466, loss: 0.0036059655249118805 2023-01-24 03:58:03.050969: step: 110/466, loss: 0.0009034308604896069 2023-01-24 03:58:03.647813: step: 112/466, loss: 0.1323184072971344 2023-01-24 03:58:04.275844: step: 114/466, loss: 0.020064985379576683 2023-01-24 03:58:04.896841: step: 116/466, loss: 0.043307989835739136 2023-01-24 03:58:05.500532: step: 118/466, loss: 0.02849368005990982 2023-01-24 03:58:06.114094: step: 120/466, loss: 0.03654780238866806 2023-01-24 03:58:06.800109: step: 122/466, loss: 0.0661456286907196 2023-01-24 03:58:07.456893: step: 124/466, loss: 0.0015133408596739173 2023-01-24 03:58:08.059622: step: 126/466, loss: 0.01084557082504034 2023-01-24 03:58:08.683564: step: 128/466, loss: 0.0015474183019250631 2023-01-24 03:58:09.336259: step: 130/466, loss: 0.008061830885708332 2023-01-24 03:58:09.955040: step: 132/466, loss: 0.013437075540423393 2023-01-24 03:58:10.504853: step: 134/466, loss: 0.01693044602870941 2023-01-24 03:58:11.169518: step: 136/466, loss: 0.02920161932706833 2023-01-24 03:58:11.763465: step: 138/466, loss: 0.0014797192998230457 2023-01-24 03:58:12.453228: step: 140/466, loss: 0.03992298245429993 2023-01-24 03:58:13.029659: step: 142/466, loss: 0.004694981966167688 2023-01-24 03:58:13.629017: step: 144/466, loss: 0.03145284950733185 2023-01-24 03:58:14.305322: step: 146/466, loss: 0.006949796807020903 2023-01-24 03:58:14.919759: step: 148/466, loss: 0.0030344414990395308 2023-01-24 03:58:15.578193: step: 150/466, loss: 0.014086630195379257 2023-01-24 03:58:16.207586: step: 152/466, loss: 0.017930377274751663 2023-01-24 03:58:16.820230: step: 154/466, loss: 0.002964240498840809 2023-01-24 03:58:17.468039: step: 156/466, loss: 0.0010559019865468144 2023-01-24 03:58:18.070334: step: 158/466, loss: 0.03484220802783966 2023-01-24 03:58:18.660674: step: 160/466, loss: 0.018427027389407158 2023-01-24 03:58:19.262422: step: 162/466, loss: 0.012817676179111004 2023-01-24 03:58:19.914376: step: 164/466, loss: 0.5581954121589661 2023-01-24 03:58:20.577566: step: 166/466, loss: 0.014592396095395088 2023-01-24 03:58:21.187015: step: 168/466, loss: 0.0002756986359599978 2023-01-24 03:58:21.836328: step: 170/466, loss: 0.018888216465711594 2023-01-24 03:58:22.458899: step: 172/466, loss: 0.0016108837444335222 2023-01-24 03:58:23.089538: step: 174/466, loss: 0.05931129679083824 2023-01-24 03:58:23.750991: step: 176/466, loss: 0.021158115938305855 2023-01-24 03:58:24.360052: step: 178/466, loss: 0.06505705416202545 2023-01-24 03:58:24.988672: step: 180/466, loss: 0.003004356985911727 2023-01-24 03:58:25.594398: step: 182/466, loss: 0.0010955791221931577 2023-01-24 03:58:26.225843: step: 184/466, loss: 0.3599194586277008 2023-01-24 03:58:26.822339: step: 186/466, loss: 0.0008553531370125711 2023-01-24 03:58:27.385000: step: 188/466, loss: 0.013580508530139923 2023-01-24 03:58:27.988098: step: 190/466, loss: 0.003150248434394598 2023-01-24 03:58:28.573238: step: 192/466, loss: 0.03827475756406784 2023-01-24 03:58:29.216609: step: 194/466, loss: 0.0022326000034809113 2023-01-24 03:58:29.851608: step: 196/466, loss: 0.02307925745844841 2023-01-24 03:58:30.479185: step: 198/466, loss: 0.015358785167336464 2023-01-24 03:58:31.112515: step: 200/466, loss: 0.00849807821214199 2023-01-24 03:58:31.773962: step: 202/466, loss: 0.012090289033949375 2023-01-24 03:58:32.396213: step: 204/466, loss: 0.050176870077848434 2023-01-24 03:58:32.946264: step: 206/466, loss: 1.0662583008524962e-05 2023-01-24 03:58:33.561035: step: 208/466, loss: 0.0032666290644556284 2023-01-24 03:58:34.149391: step: 210/466, loss: 0.021163562312722206 2023-01-24 03:58:34.844240: step: 212/466, loss: 0.0418899804353714 2023-01-24 03:58:35.471291: step: 214/466, loss: 0.025734828785061836 2023-01-24 03:58:36.052748: step: 216/466, loss: 0.001853344147093594 2023-01-24 03:58:36.698417: step: 218/466, loss: 0.02691415511071682 2023-01-24 03:58:37.268842: step: 220/466, loss: 0.023987652733922005 2023-01-24 03:58:37.940755: step: 222/466, loss: 0.11891963332891464 2023-01-24 03:58:38.612385: step: 224/466, loss: 0.024687206372618675 2023-01-24 03:58:39.193139: step: 226/466, loss: 0.034278325736522675 2023-01-24 03:58:39.794824: step: 228/466, loss: 0.4470962584018707 2023-01-24 03:58:40.392678: step: 230/466, loss: 0.00696721114218235 2023-01-24 03:58:40.974957: step: 232/466, loss: 0.09120207279920578 2023-01-24 03:58:41.605947: step: 234/466, loss: 0.01796315237879753 2023-01-24 03:58:42.179477: step: 236/466, loss: 0.02135283127427101 2023-01-24 03:58:42.830168: step: 238/466, loss: 0.026142485439777374 2023-01-24 03:58:43.438558: step: 240/466, loss: 0.012500501237809658 2023-01-24 03:58:44.034171: step: 242/466, loss: 0.003451203927397728 2023-01-24 03:58:44.619253: step: 244/466, loss: 0.009863471612334251 2023-01-24 03:58:45.276903: step: 246/466, loss: 0.00018838015967048705 2023-01-24 03:58:45.887942: step: 248/466, loss: 0.02044462226331234 2023-01-24 03:58:46.534033: step: 250/466, loss: 0.0927266776561737 2023-01-24 03:58:47.131465: step: 252/466, loss: 0.004784509539604187 2023-01-24 03:58:47.764024: step: 254/466, loss: 0.00503066461533308 2023-01-24 03:58:48.405807: step: 256/466, loss: 0.03931424766778946 2023-01-24 03:58:49.019003: step: 258/466, loss: 0.03814157471060753 2023-01-24 03:58:49.674274: step: 260/466, loss: 0.0018558165756985545 2023-01-24 03:58:50.281247: step: 262/466, loss: 0.036332663148641586 2023-01-24 03:58:50.895470: step: 264/466, loss: 0.025819065049290657 2023-01-24 03:58:51.595907: step: 266/466, loss: 0.11118387430906296 2023-01-24 03:58:52.293816: step: 268/466, loss: 0.013089925050735474 2023-01-24 03:58:52.900464: step: 270/466, loss: 0.03624863922595978 2023-01-24 03:58:53.484618: step: 272/466, loss: 0.0682566836476326 2023-01-24 03:58:54.033410: step: 274/466, loss: 0.003062065690755844 2023-01-24 03:58:54.669576: step: 276/466, loss: 0.0594942644238472 2023-01-24 03:58:55.300406: step: 278/466, loss: 0.01250424887984991 2023-01-24 03:58:55.865012: step: 280/466, loss: 0.43067610263824463 2023-01-24 03:58:56.454106: step: 282/466, loss: 0.018741458654403687 2023-01-24 03:58:57.047931: step: 284/466, loss: 0.02757546678185463 2023-01-24 03:58:57.711660: step: 286/466, loss: 0.028583785519003868 2023-01-24 03:58:58.348323: step: 288/466, loss: 0.017362266778945923 2023-01-24 03:58:59.008120: step: 290/466, loss: 0.008303117007017136 2023-01-24 03:58:59.608287: step: 292/466, loss: 0.039173100143671036 2023-01-24 03:59:00.279363: step: 294/466, loss: 0.0017002117820084095 2023-01-24 03:59:00.911587: step: 296/466, loss: 0.0066471220925450325 2023-01-24 03:59:01.557362: step: 298/466, loss: 0.0693783089518547 2023-01-24 03:59:02.132162: step: 300/466, loss: 0.025002075359225273 2023-01-24 03:59:02.767399: step: 302/466, loss: 7.945956167532131e-05 2023-01-24 03:59:03.409386: step: 304/466, loss: 0.0022197526413947344 2023-01-24 03:59:04.017969: step: 306/466, loss: 0.04276476800441742 2023-01-24 03:59:04.627489: step: 308/466, loss: 0.003689938923344016 2023-01-24 03:59:05.224368: step: 310/466, loss: 0.008290175348520279 2023-01-24 03:59:05.791647: step: 312/466, loss: 0.01216474175453186 2023-01-24 03:59:06.423579: step: 314/466, loss: 0.02552063763141632 2023-01-24 03:59:07.072193: step: 316/466, loss: 0.0009075455600395799 2023-01-24 03:59:07.691240: step: 318/466, loss: 0.03220680356025696 2023-01-24 03:59:08.377310: step: 320/466, loss: 0.08709538727998734 2023-01-24 03:59:08.995793: step: 322/466, loss: 0.012743586674332619 2023-01-24 03:59:09.678847: step: 324/466, loss: 0.05030284821987152 2023-01-24 03:59:10.265651: step: 326/466, loss: 0.06070198863744736 2023-01-24 03:59:10.881975: step: 328/466, loss: 0.003046990605071187 2023-01-24 03:59:11.478607: step: 330/466, loss: 0.0012152543058618903 2023-01-24 03:59:12.076628: step: 332/466, loss: 0.006156560033559799 2023-01-24 03:59:12.668022: step: 334/466, loss: 0.015339409001171589 2023-01-24 03:59:13.221946: step: 336/466, loss: 0.02426721528172493 2023-01-24 03:59:13.850006: step: 338/466, loss: 0.009182527661323547 2023-01-24 03:59:14.532632: step: 340/466, loss: 0.009834465570747852 2023-01-24 03:59:15.109386: step: 342/466, loss: 0.011993778869509697 2023-01-24 03:59:15.796369: step: 344/466, loss: 0.07179489731788635 2023-01-24 03:59:16.434144: step: 346/466, loss: 0.027768058702349663 2023-01-24 03:59:17.086420: step: 348/466, loss: 0.19790710508823395 2023-01-24 03:59:17.712379: step: 350/466, loss: 0.14374874532222748 2023-01-24 03:59:18.268120: step: 352/466, loss: 0.0007910241838544607 2023-01-24 03:59:18.901571: step: 354/466, loss: 0.03937764838337898 2023-01-24 03:59:19.550439: step: 356/466, loss: 0.013423875905573368 2023-01-24 03:59:20.131722: step: 358/466, loss: 0.0010883713839575648 2023-01-24 03:59:20.771578: step: 360/466, loss: 0.06256536394357681 2023-01-24 03:59:21.340581: step: 362/466, loss: 0.027907468378543854 2023-01-24 03:59:21.897985: step: 364/466, loss: 0.013393533416092396 2023-01-24 03:59:22.531218: step: 366/466, loss: 0.016689620912075043 2023-01-24 03:59:23.172438: step: 368/466, loss: 0.03434484452009201 2023-01-24 03:59:23.752894: step: 370/466, loss: 0.008721742779016495 2023-01-24 03:59:24.383251: step: 372/466, loss: 0.033357422798871994 2023-01-24 03:59:25.016953: step: 374/466, loss: 0.0007862323545850813 2023-01-24 03:59:25.611156: step: 376/466, loss: 0.014408997260034084 2023-01-24 03:59:26.289408: step: 378/466, loss: 0.0028941782657057047 2023-01-24 03:59:26.881612: step: 380/466, loss: 0.024151837453246117 2023-01-24 03:59:27.530368: step: 382/466, loss: 0.007602432742714882 2023-01-24 03:59:28.103177: step: 384/466, loss: 0.03272564336657524 2023-01-24 03:59:28.749239: step: 386/466, loss: 0.10223985463380814 2023-01-24 03:59:29.304363: step: 388/466, loss: 0.06555671244859695 2023-01-24 03:59:29.847111: step: 390/466, loss: 0.00729301618412137 2023-01-24 03:59:30.445091: step: 392/466, loss: 0.06495516002178192 2023-01-24 03:59:31.022040: step: 394/466, loss: 0.015802623704075813 2023-01-24 03:59:31.623848: step: 396/466, loss: 0.0006967387744225562 2023-01-24 03:59:32.198933: step: 398/466, loss: 0.04170413687825203 2023-01-24 03:59:32.768792: step: 400/466, loss: 0.005518648307770491 2023-01-24 03:59:33.447743: step: 402/466, loss: 0.04218827560544014 2023-01-24 03:59:34.037188: step: 404/466, loss: 0.15756018459796906 2023-01-24 03:59:34.651650: step: 406/466, loss: 0.01472820807248354 2023-01-24 03:59:35.354393: step: 408/466, loss: 0.05414842814207077 2023-01-24 03:59:35.954614: step: 410/466, loss: 0.005775025114417076 2023-01-24 03:59:36.563730: step: 412/466, loss: 0.03359856456518173 2023-01-24 03:59:37.156307: step: 414/466, loss: 0.015876727178692818 2023-01-24 03:59:37.719028: step: 416/466, loss: 0.013207340613007545 2023-01-24 03:59:38.316324: step: 418/466, loss: 0.004693139344453812 2023-01-24 03:59:38.930861: step: 420/466, loss: 0.018074549734592438 2023-01-24 03:59:39.578344: step: 422/466, loss: 0.07072412967681885 2023-01-24 03:59:40.289513: step: 424/466, loss: 0.04876650124788284 2023-01-24 03:59:40.911518: step: 426/466, loss: 0.07074081152677536 2023-01-24 03:59:41.507498: step: 428/466, loss: 0.0248673427850008 2023-01-24 03:59:42.165710: step: 430/466, loss: 0.028261784464120865 2023-01-24 03:59:42.791659: step: 432/466, loss: 0.00232923636212945 2023-01-24 03:59:43.460329: step: 434/466, loss: 0.01668694242835045 2023-01-24 03:59:44.078377: step: 436/466, loss: 0.04731211066246033 2023-01-24 03:59:44.709293: step: 438/466, loss: 0.05076116696000099 2023-01-24 03:59:45.343814: step: 440/466, loss: 0.06597016751766205 2023-01-24 03:59:45.933743: step: 442/466, loss: 0.034215111285448074 2023-01-24 03:59:46.537077: step: 444/466, loss: 0.04960392042994499 2023-01-24 03:59:47.165749: step: 446/466, loss: 0.008371061645448208 2023-01-24 03:59:47.785087: step: 448/466, loss: 0.0910857766866684 2023-01-24 03:59:48.391716: step: 450/466, loss: 0.017675234004855156 2023-01-24 03:59:48.992289: step: 452/466, loss: 0.05704466998577118 2023-01-24 03:59:49.652748: step: 454/466, loss: 0.010572683066129684 2023-01-24 03:59:50.268907: step: 456/466, loss: 0.027380328625440598 2023-01-24 03:59:50.876685: step: 458/466, loss: 0.015812745317816734 2023-01-24 03:59:51.473618: step: 460/466, loss: 0.05687829107046127 2023-01-24 03:59:52.083502: step: 462/466, loss: 0.008855399675667286 2023-01-24 03:59:52.689204: step: 464/466, loss: 0.027346044778823853 2023-01-24 03:59:53.297755: step: 466/466, loss: 0.2506482005119324 2023-01-24 03:59:53.890904: step: 468/466, loss: 0.022388659417629242 2023-01-24 03:59:54.496415: step: 470/466, loss: 0.003716476494446397 2023-01-24 03:59:55.114562: step: 472/466, loss: 0.16157712042331696 2023-01-24 03:59:55.772583: step: 474/466, loss: 0.00471423240378499 2023-01-24 03:59:56.406418: step: 476/466, loss: 0.11767103523015976 2023-01-24 03:59:57.071948: step: 478/466, loss: 0.04689113423228264 2023-01-24 03:59:57.687130: step: 480/466, loss: 0.0326380729675293 2023-01-24 03:59:58.242661: step: 482/466, loss: 0.00017360990750603378 2023-01-24 03:59:58.965125: step: 484/466, loss: 0.006069815717637539 2023-01-24 03:59:59.566889: step: 486/466, loss: 0.00955923181027174 2023-01-24 04:00:00.131407: step: 488/466, loss: 0.0006118972669355571 2023-01-24 04:00:00.832857: step: 490/466, loss: 0.007228366564959288 2023-01-24 04:00:01.401920: step: 492/466, loss: 0.021563038229942322 2023-01-24 04:00:01.970774: step: 494/466, loss: 0.008069528266787529 2023-01-24 04:00:02.543706: step: 496/466, loss: 0.0019346001790836453 2023-01-24 04:00:03.118257: step: 498/466, loss: 0.04826516658067703 2023-01-24 04:00:03.708358: step: 500/466, loss: 0.10739605873823166 2023-01-24 04:00:04.331094: step: 502/466, loss: 0.028787538409233093 2023-01-24 04:00:04.948970: step: 504/466, loss: 0.015625806525349617 2023-01-24 04:00:05.626198: step: 506/466, loss: 0.013151661492884159 2023-01-24 04:00:06.243525: step: 508/466, loss: 0.026067402213811874 2023-01-24 04:00:06.849590: step: 510/466, loss: 0.06838241219520569 2023-01-24 04:00:07.414823: step: 512/466, loss: 0.14713266491889954 2023-01-24 04:00:07.975051: step: 514/466, loss: 0.024375615641474724 2023-01-24 04:00:08.577149: step: 516/466, loss: 0.0011519754771143198 2023-01-24 04:00:09.240800: step: 518/466, loss: 0.0037082030903548002 2023-01-24 04:00:09.835227: step: 520/466, loss: 0.02469959482550621 2023-01-24 04:00:10.368750: step: 522/466, loss: 0.002963092178106308 2023-01-24 04:00:10.930400: step: 524/466, loss: 0.003018419025465846 2023-01-24 04:00:11.456945: step: 526/466, loss: 0.011339960619807243 2023-01-24 04:00:12.088236: step: 528/466, loss: 0.000892937183380127 2023-01-24 04:00:12.669054: step: 530/466, loss: 0.00013537502672988921 2023-01-24 04:00:13.349653: step: 532/466, loss: 0.009031346067786217 2023-01-24 04:00:14.011273: step: 534/466, loss: 0.0035680688451975584 2023-01-24 04:00:14.582016: step: 536/466, loss: 0.024319544434547424 2023-01-24 04:00:15.308460: step: 538/466, loss: 0.013947529718279839 2023-01-24 04:00:15.897198: step: 540/466, loss: 0.019546305760741234 2023-01-24 04:00:16.575559: step: 542/466, loss: 0.029420148581266403 2023-01-24 04:00:17.176515: step: 544/466, loss: 0.016928454861044884 2023-01-24 04:00:17.790580: step: 546/466, loss: 0.004215354565531015 2023-01-24 04:00:18.386394: step: 548/466, loss: 0.036032386124134064 2023-01-24 04:00:19.037271: step: 550/466, loss: 0.11021918803453445 2023-01-24 04:00:19.695672: step: 552/466, loss: 0.13087381422519684 2023-01-24 04:00:20.234843: step: 554/466, loss: 0.003767420072108507 2023-01-24 04:00:20.829563: step: 556/466, loss: 0.01813679188489914 2023-01-24 04:00:21.431081: step: 558/466, loss: 0.005178377032279968 2023-01-24 04:00:22.051170: step: 560/466, loss: 0.00700752018019557 2023-01-24 04:00:22.694485: step: 562/466, loss: 0.024122627452015877 2023-01-24 04:00:23.342435: step: 564/466, loss: 0.006926470436155796 2023-01-24 04:00:23.988459: step: 566/466, loss: 0.002192798303440213 2023-01-24 04:00:24.545934: step: 568/466, loss: 0.005069288890808821 2023-01-24 04:00:25.158418: step: 570/466, loss: 0.0009709860314615071 2023-01-24 04:00:25.779718: step: 572/466, loss: 0.0475449375808239 2023-01-24 04:00:26.399020: step: 574/466, loss: 0.00809780228883028 2023-01-24 04:00:27.025123: step: 576/466, loss: 0.007280854508280754 2023-01-24 04:00:27.630430: step: 578/466, loss: 0.005466064903885126 2023-01-24 04:00:28.212635: step: 580/466, loss: 0.0002499515831004828 2023-01-24 04:00:28.834462: step: 582/466, loss: 0.0017354476731270552 2023-01-24 04:00:29.527128: step: 584/466, loss: 0.02401800826191902 2023-01-24 04:00:30.179290: step: 586/466, loss: 0.05263170227408409 2023-01-24 04:00:30.735569: step: 588/466, loss: 0.010402865707874298 2023-01-24 04:00:31.283219: step: 590/466, loss: 0.012639729306101799 2023-01-24 04:00:31.922868: step: 592/466, loss: 0.0034263322595506907 2023-01-24 04:00:32.570839: step: 594/466, loss: 0.00882495753467083 2023-01-24 04:00:33.224159: step: 596/466, loss: 0.026667889207601547 2023-01-24 04:00:33.802474: step: 598/466, loss: 0.03127731382846832 2023-01-24 04:00:34.428966: step: 600/466, loss: 0.09185895323753357 2023-01-24 04:00:35.040521: step: 602/466, loss: 0.04450776427984238 2023-01-24 04:00:35.661802: step: 604/466, loss: 0.04808002710342407 2023-01-24 04:00:36.348579: step: 606/466, loss: 0.009080737829208374 2023-01-24 04:00:36.966350: step: 608/466, loss: 0.006041831336915493 2023-01-24 04:00:37.593984: step: 610/466, loss: 0.005429220385849476 2023-01-24 04:00:38.179000: step: 612/466, loss: 0.002425620099529624 2023-01-24 04:00:38.781642: step: 614/466, loss: 0.023527776822447777 2023-01-24 04:00:39.381082: step: 616/466, loss: 0.024566251784563065 2023-01-24 04:00:40.020376: step: 618/466, loss: 0.01168688666075468 2023-01-24 04:00:40.792860: step: 620/466, loss: 0.04210988059639931 2023-01-24 04:00:41.388995: step: 622/466, loss: 0.05103759840130806 2023-01-24 04:00:42.011539: step: 624/466, loss: 0.020575957372784615 2023-01-24 04:00:42.631338: step: 626/466, loss: 0.0016736033139750361 2023-01-24 04:00:43.232456: step: 628/466, loss: 0.007483240682631731 2023-01-24 04:00:43.828027: step: 630/466, loss: 0.005836804397404194 2023-01-24 04:00:44.466144: step: 632/466, loss: 0.021505624055862427 2023-01-24 04:00:45.042280: step: 634/466, loss: 0.007295564748346806 2023-01-24 04:00:45.698538: step: 636/466, loss: 0.0020562144927680492 2023-01-24 04:00:46.309804: step: 638/466, loss: 0.08412209153175354 2023-01-24 04:00:46.914178: step: 640/466, loss: 0.00717060687020421 2023-01-24 04:00:47.487491: step: 642/466, loss: 0.03495078906416893 2023-01-24 04:00:48.118480: step: 644/466, loss: 0.013647991232573986 2023-01-24 04:00:48.755898: step: 646/466, loss: 0.08542032539844513 2023-01-24 04:00:49.375597: step: 648/466, loss: 0.05156783014535904 2023-01-24 04:00:50.013687: step: 650/466, loss: 0.011495641432702541 2023-01-24 04:00:50.671810: step: 652/466, loss: 0.021660026162862778 2023-01-24 04:00:51.232362: step: 654/466, loss: 0.004816859494894743 2023-01-24 04:00:51.838071: step: 656/466, loss: 0.0038773231208324432 2023-01-24 04:00:52.432804: step: 658/466, loss: 0.09771724045276642 2023-01-24 04:00:52.965790: step: 660/466, loss: 0.0058814045041799545 2023-01-24 04:00:53.612350: step: 662/466, loss: 0.1017870083451271 2023-01-24 04:00:54.239875: step: 664/466, loss: 0.008702441118657589 2023-01-24 04:00:54.819030: step: 666/466, loss: 0.0007017693133093417 2023-01-24 04:00:55.443246: step: 668/466, loss: 0.06927017867565155 2023-01-24 04:00:56.055952: step: 670/466, loss: 0.0059940507635474205 2023-01-24 04:00:56.640235: step: 672/466, loss: 0.021509302780032158 2023-01-24 04:00:57.222138: step: 674/466, loss: 0.04001140967011452 2023-01-24 04:00:57.823535: step: 676/466, loss: 0.00619617011398077 2023-01-24 04:00:58.494511: step: 678/466, loss: 0.02416771650314331 2023-01-24 04:00:59.123282: step: 680/466, loss: 0.0016998895443975925 2023-01-24 04:00:59.720675: step: 682/466, loss: 0.057930637151002884 2023-01-24 04:01:00.316293: step: 684/466, loss: 0.08490652590990067 2023-01-24 04:01:00.917007: step: 686/466, loss: 0.14410264790058136 2023-01-24 04:01:01.670066: step: 688/466, loss: 0.04748551920056343 2023-01-24 04:01:02.238427: step: 690/466, loss: 0.006760920863598585 2023-01-24 04:01:02.831334: step: 692/466, loss: 0.007869471795856953 2023-01-24 04:01:03.455766: step: 694/466, loss: 0.059117190539836884 2023-01-24 04:01:04.085217: step: 696/466, loss: 0.0007719701388850808 2023-01-24 04:01:04.775926: step: 698/466, loss: 0.03341097757220268 2023-01-24 04:01:05.454504: step: 700/466, loss: 0.003831847570836544 2023-01-24 04:01:06.088158: step: 702/466, loss: 0.040099117904901505 2023-01-24 04:01:06.781558: step: 704/466, loss: 0.022016925737261772 2023-01-24 04:01:07.404097: step: 706/466, loss: 0.004059267230331898 2023-01-24 04:01:08.035980: step: 708/466, loss: 0.1325579285621643 2023-01-24 04:01:08.695993: step: 710/466, loss: 0.04360053315758705 2023-01-24 04:01:09.316997: step: 712/466, loss: 0.0035664194729179144 2023-01-24 04:01:09.909117: step: 714/466, loss: 0.011497018858790398 2023-01-24 04:01:10.626999: step: 716/466, loss: 0.003529996844008565 2023-01-24 04:01:11.182355: step: 718/466, loss: 0.003293456742540002 2023-01-24 04:01:11.791307: step: 720/466, loss: 0.0368904173374176 2023-01-24 04:01:12.426501: step: 722/466, loss: 0.08443581312894821 2023-01-24 04:01:13.063128: step: 724/466, loss: 0.00709934439510107 2023-01-24 04:01:13.667405: step: 726/466, loss: 0.20502182841300964 2023-01-24 04:01:14.260398: step: 728/466, loss: 0.03874938189983368 2023-01-24 04:01:14.861744: step: 730/466, loss: 0.17828691005706787 2023-01-24 04:01:15.439121: step: 732/466, loss: 0.7214789390563965 2023-01-24 04:01:16.110960: step: 734/466, loss: 0.11488200724124908 2023-01-24 04:01:16.870470: step: 736/466, loss: 0.05732061341404915 2023-01-24 04:01:17.518707: step: 738/466, loss: 0.021588746458292007 2023-01-24 04:01:18.172561: step: 740/466, loss: 0.006465683691203594 2023-01-24 04:01:18.771295: step: 742/466, loss: 0.22872889041900635 2023-01-24 04:01:19.404206: step: 744/466, loss: 0.06467809528112411 2023-01-24 04:01:19.954696: step: 746/466, loss: 0.008470224216580391 2023-01-24 04:01:20.569696: step: 748/466, loss: 0.0343211330473423 2023-01-24 04:01:21.157051: step: 750/466, loss: 0.0265874695032835 2023-01-24 04:01:22.022633: step: 752/466, loss: 0.024140940979123116 2023-01-24 04:01:22.576788: step: 754/466, loss: 0.017646193504333496 2023-01-24 04:01:23.167182: step: 756/466, loss: 0.030673718079924583 2023-01-24 04:01:23.777978: step: 758/466, loss: 0.005739795975387096 2023-01-24 04:01:24.375126: step: 760/466, loss: 0.0696202963590622 2023-01-24 04:01:25.019255: step: 762/466, loss: 0.00723445788025856 2023-01-24 04:01:25.658133: step: 764/466, loss: 0.014322903007268906 2023-01-24 04:01:26.246008: step: 766/466, loss: 0.03494839742779732 2023-01-24 04:01:26.908650: step: 768/466, loss: 0.006455695256590843 2023-01-24 04:01:27.553527: step: 770/466, loss: 0.008284795098006725 2023-01-24 04:01:28.220014: step: 772/466, loss: 0.010526874102652073 2023-01-24 04:01:28.862972: step: 774/466, loss: 0.004267999902367592 2023-01-24 04:01:29.487186: step: 776/466, loss: 0.11157587915658951 2023-01-24 04:01:30.127966: step: 778/466, loss: 0.016989924013614655 2023-01-24 04:01:30.812004: step: 780/466, loss: 0.05661598592996597 2023-01-24 04:01:31.450334: step: 782/466, loss: 0.008631090633571148 2023-01-24 04:01:32.108426: step: 784/466, loss: 0.02421271987259388 2023-01-24 04:01:32.715206: step: 786/466, loss: 0.007431394886225462 2023-01-24 04:01:33.341433: step: 788/466, loss: 0.008399268612265587 2023-01-24 04:01:33.884587: step: 790/466, loss: 0.0023318808525800705 2023-01-24 04:01:34.482308: step: 792/466, loss: 0.007587133906781673 2023-01-24 04:01:35.151413: step: 794/466, loss: 0.012428142130374908 2023-01-24 04:01:35.743569: step: 796/466, loss: 0.017512351274490356 2023-01-24 04:01:36.368838: step: 798/466, loss: 0.026235686615109444 2023-01-24 04:01:37.027033: step: 800/466, loss: 0.011647449806332588 2023-01-24 04:01:37.715256: step: 802/466, loss: 0.010017693042755127 2023-01-24 04:01:38.293147: step: 804/466, loss: 0.04299866035580635 2023-01-24 04:01:38.865387: step: 806/466, loss: 0.0018923030002042651 2023-01-24 04:01:39.526268: step: 808/466, loss: 0.043426040560007095 2023-01-24 04:01:40.112075: step: 810/466, loss: 0.07270084321498871 2023-01-24 04:01:40.759938: step: 812/466, loss: 0.09882272034883499 2023-01-24 04:01:41.379767: step: 814/466, loss: 0.060429349541664124 2023-01-24 04:01:41.984130: step: 816/466, loss: 0.007622709032148123 2023-01-24 04:01:42.609115: step: 818/466, loss: 0.05808849260210991 2023-01-24 04:01:43.300203: step: 820/466, loss: 0.02498483657836914 2023-01-24 04:01:43.948340: step: 822/466, loss: 0.01560881920158863 2023-01-24 04:01:44.575082: step: 824/466, loss: 0.015989849343895912 2023-01-24 04:01:45.245956: step: 826/466, loss: 0.019871845841407776 2023-01-24 04:01:45.883201: step: 828/466, loss: 0.01792735420167446 2023-01-24 04:01:46.544624: step: 830/466, loss: 0.0017182455630972981 2023-01-24 04:01:47.189598: step: 832/466, loss: 0.0010431138798594475 2023-01-24 04:01:47.780895: step: 834/466, loss: 0.00454977760091424 2023-01-24 04:01:48.417956: step: 836/466, loss: 0.056652382016181946 2023-01-24 04:01:49.052421: step: 838/466, loss: 0.004974375478923321 2023-01-24 04:01:49.695730: step: 840/466, loss: 0.02224394865334034 2023-01-24 04:01:50.283538: step: 842/466, loss: 0.02503383159637451 2023-01-24 04:01:50.918897: step: 844/466, loss: 0.062120914459228516 2023-01-24 04:01:51.519490: step: 846/466, loss: 0.03201693668961525 2023-01-24 04:01:52.177718: step: 848/466, loss: 0.025600528344511986 2023-01-24 04:01:52.804482: step: 850/466, loss: 0.1290968507528305 2023-01-24 04:01:53.494814: step: 852/466, loss: 0.02363685518503189 2023-01-24 04:01:54.077138: step: 854/466, loss: 0.40937554836273193 2023-01-24 04:01:54.674614: step: 856/466, loss: 0.016148915514349937 2023-01-24 04:01:55.291588: step: 858/466, loss: 0.022587740793824196 2023-01-24 04:01:55.893447: step: 860/466, loss: 0.3588748872280121 2023-01-24 04:01:56.534261: step: 862/466, loss: 0.045865412801504135 2023-01-24 04:01:57.155980: step: 864/466, loss: 0.060815632343292236 2023-01-24 04:01:57.734747: step: 866/466, loss: 0.07046215981245041 2023-01-24 04:01:58.327141: step: 868/466, loss: 0.03324376791715622 2023-01-24 04:01:58.982137: step: 870/466, loss: 0.002355964621528983 2023-01-24 04:01:59.537707: step: 872/466, loss: 0.014346129260957241 2023-01-24 04:02:00.132424: step: 874/466, loss: 0.009171859361231327 2023-01-24 04:02:00.773938: step: 876/466, loss: 0.14728856086730957 2023-01-24 04:02:01.475400: step: 878/466, loss: 0.1031573936343193 2023-01-24 04:02:02.109894: step: 880/466, loss: 0.014084714464843273 2023-01-24 04:02:02.756809: step: 882/466, loss: 0.0013595783384516835 2023-01-24 04:02:03.378017: step: 884/466, loss: 0.017398567870259285 2023-01-24 04:02:03.969316: step: 886/466, loss: 0.028092503547668457 2023-01-24 04:02:04.528272: step: 888/466, loss: 0.014292089268565178 2023-01-24 04:02:05.192499: step: 890/466, loss: 0.05108671635389328 2023-01-24 04:02:05.849804: step: 892/466, loss: 0.02743634209036827 2023-01-24 04:02:06.457519: step: 894/466, loss: 0.2530224323272705 2023-01-24 04:02:07.079761: step: 896/466, loss: 0.03472018241882324 2023-01-24 04:02:07.696653: step: 898/466, loss: 0.003845237661153078 2023-01-24 04:02:08.282627: step: 900/466, loss: 0.032374050468206406 2023-01-24 04:02:08.942396: step: 902/466, loss: 0.008284141309559345 2023-01-24 04:02:09.576947: step: 904/466, loss: 0.024483878165483475 2023-01-24 04:02:10.168814: step: 906/466, loss: 0.007158839143812656 2023-01-24 04:02:10.780308: step: 908/466, loss: 0.015555165708065033 2023-01-24 04:02:11.417149: step: 910/466, loss: 0.0011703958734869957 2023-01-24 04:02:12.044878: step: 912/466, loss: 0.014334444887936115 2023-01-24 04:02:12.651504: step: 914/466, loss: 0.004041311796754599 2023-01-24 04:02:13.289954: step: 916/466, loss: 0.02556944638490677 2023-01-24 04:02:13.926246: step: 918/466, loss: 0.002209881553426385 2023-01-24 04:02:14.503090: step: 920/466, loss: 0.016322242096066475 2023-01-24 04:02:15.143073: step: 922/466, loss: 0.00473765330389142 2023-01-24 04:02:15.750335: step: 924/466, loss: 0.03993857279419899 2023-01-24 04:02:16.289849: step: 926/466, loss: 0.0010968783171847463 2023-01-24 04:02:16.911198: step: 928/466, loss: 0.16139191389083862 2023-01-24 04:02:17.524697: step: 930/466, loss: 0.01240439061075449 2023-01-24 04:02:18.063602: step: 932/466, loss: 0.03332965075969696 ================================================== Loss: 0.040 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36309115609969944, 'r': 0.32313045960295833, 'f1': 0.34194729359590176}, 'combined': 0.25196116370224336, 'epoch': 29} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.347227257915669, 'r': 0.281476379500619, 'f1': 0.31091365026512596}, 'combined': 0.1946842482968546, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35389272737013916, 'r': 0.3364331241222765, 'f1': 0.34494213309813176}, 'combined': 0.2541678875459918, 'epoch': 29} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3565373624472215, 'r': 0.29314579168720106, 'f1': 0.32174892242636827}, 'combined': 0.1993936984050733, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3258387175326627, 'r': 0.3233655583863047, 'f1': 0.3245974271801573}, 'combined': 0.2391770516064317, 'epoch': 29} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.35461154109670345, 'r': 0.28605971653058376, 'f1': 0.3166681061993562}, 'combined': 0.2100669615381868, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.26785714285714285, 'f1': 0.31250000000000006}, 'combined': 0.20833333333333337, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3695652173913043, 'r': 0.3695652173913043, 'f1': 0.36956521739130427}, 'combined': 0.18478260869565213, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491647627064294, 'r': 0.32200014169890834, 'f1': 0.33503272393943667}, 'combined': 0.2468662176395849, 'epoch': 21} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.32932177105318117, 'r': 0.2710662953073116, 'f1': 0.2973677774262388}, 'combined': 0.1862022531547477, 'epoch': 21} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.359375, 'r': 0.32857142857142857, 'f1': 0.34328358208955223}, 'combined': 0.2288557213930348, 'epoch': 21} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:04:50.952051: step: 2/466, loss: 0.25535038113594055 2023-01-24 04:04:51.571060: step: 4/466, loss: 0.059584300965070724 2023-01-24 04:04:52.102089: step: 6/466, loss: 0.0011487414594739676 2023-01-24 04:04:52.729219: step: 8/466, loss: 0.01580643281340599 2023-01-24 04:04:53.344415: step: 10/466, loss: 0.0004533784813247621 2023-01-24 04:04:53.915825: step: 12/466, loss: 0.0019115079194307327 2023-01-24 04:04:54.530353: step: 14/466, loss: 0.08677832782268524 2023-01-24 04:04:55.184688: step: 16/466, loss: 0.9424175024032593 2023-01-24 04:04:55.777708: step: 18/466, loss: 0.005752339493483305 2023-01-24 04:04:56.389107: step: 20/466, loss: 0.011783424764871597 2023-01-24 04:04:57.062464: step: 22/466, loss: 0.012505832128226757 2023-01-24 04:04:57.622244: step: 24/466, loss: 0.021198196336627007 2023-01-24 04:04:58.220175: step: 26/466, loss: 0.003726731287315488 2023-01-24 04:04:58.815651: step: 28/466, loss: 0.00993861723691225 2023-01-24 04:04:59.406757: step: 30/466, loss: 0.009253359399735928 2023-01-24 04:05:00.024597: step: 32/466, loss: 0.02724388800561428 2023-01-24 04:05:00.672939: step: 34/466, loss: 0.05642537400126457 2023-01-24 04:05:01.339343: step: 36/466, loss: 0.18135637044906616 2023-01-24 04:05:01.952118: step: 38/466, loss: 0.0024714407045394182 2023-01-24 04:05:02.558444: step: 40/466, loss: 0.027407290413975716 2023-01-24 04:05:03.168950: step: 42/466, loss: 0.021592652425169945 2023-01-24 04:05:03.758069: step: 44/466, loss: 0.06558530032634735 2023-01-24 04:05:04.514750: step: 46/466, loss: 0.001523285172879696 2023-01-24 04:05:05.093721: step: 48/466, loss: 0.00045692603453062475 2023-01-24 04:05:05.661406: step: 50/466, loss: 0.007932682521641254 2023-01-24 04:05:06.251673: step: 52/466, loss: 0.001439443207345903 2023-01-24 04:05:06.937416: step: 54/466, loss: 0.034045495092868805 2023-01-24 04:05:07.542128: step: 56/466, loss: 0.019762855023145676 2023-01-24 04:05:08.218349: step: 58/466, loss: 0.024518828839063644 2023-01-24 04:05:08.846340: step: 60/466, loss: 0.32017919421195984 2023-01-24 04:05:09.422016: step: 62/466, loss: 0.0019085191888734698 2023-01-24 04:05:10.002593: step: 64/466, loss: 0.0019422724144533277 2023-01-24 04:05:10.695610: step: 66/466, loss: 0.028080932796001434 2023-01-24 04:05:11.290889: step: 68/466, loss: 0.001465092645958066 2023-01-24 04:05:11.912650: step: 70/466, loss: 0.0021644604858011007 2023-01-24 04:05:12.497804: step: 72/466, loss: 0.3165663182735443 2023-01-24 04:05:13.073662: step: 74/466, loss: 0.0019387322245165706 2023-01-24 04:05:13.723333: step: 76/466, loss: 0.00856761448085308 2023-01-24 04:05:14.362392: step: 78/466, loss: 0.011682793498039246 2023-01-24 04:05:14.883848: step: 80/466, loss: 0.0014889666344970465 2023-01-24 04:05:15.524906: step: 82/466, loss: 0.002886301139369607 2023-01-24 04:05:16.181182: step: 84/466, loss: 0.013197853229939938 2023-01-24 04:05:16.837927: step: 86/466, loss: 0.0016110630240291357 2023-01-24 04:05:17.476094: step: 88/466, loss: 0.0007182496483437717 2023-01-24 04:05:18.069129: step: 90/466, loss: 0.021293770521879196 2023-01-24 04:05:18.678211: step: 92/466, loss: 0.0070520141161978245 2023-01-24 04:05:19.298636: step: 94/466, loss: 0.003467913717031479 2023-01-24 04:05:19.931691: step: 96/466, loss: 0.017498059198260307 2023-01-24 04:05:20.566527: step: 98/466, loss: 0.002966963918879628 2023-01-24 04:05:21.132079: step: 100/466, loss: 0.03477673977613449 2023-01-24 04:05:21.673183: step: 102/466, loss: 0.003827991895377636 2023-01-24 04:05:22.303637: step: 104/466, loss: 0.005310960579663515 2023-01-24 04:05:23.006556: step: 106/466, loss: 0.009435564279556274 2023-01-24 04:05:23.588511: step: 108/466, loss: 0.004216838628053665 2023-01-24 04:05:24.161274: step: 110/466, loss: 0.010082358494400978 2023-01-24 04:05:24.796705: step: 112/466, loss: 0.009822693653404713 2023-01-24 04:05:25.467395: step: 114/466, loss: 0.0001807942462619394 2023-01-24 04:05:26.076305: step: 116/466, loss: 0.018664024770259857 2023-01-24 04:05:26.684550: step: 118/466, loss: 0.018917158246040344 2023-01-24 04:05:27.339971: step: 120/466, loss: 0.05617400258779526 2023-01-24 04:05:27.986113: step: 122/466, loss: 0.017198363319039345 2023-01-24 04:05:28.558744: step: 124/466, loss: 0.04158959165215492 2023-01-24 04:05:29.147385: step: 126/466, loss: 0.033165402710437775 2023-01-24 04:05:29.720958: step: 128/466, loss: 0.023463696241378784 2023-01-24 04:05:30.358591: step: 130/466, loss: 0.01603042334318161 2023-01-24 04:05:30.931701: step: 132/466, loss: 0.010675805620849133 2023-01-24 04:05:31.562859: step: 134/466, loss: 0.01999947801232338 2023-01-24 04:05:32.129888: step: 136/466, loss: 0.017424974590539932 2023-01-24 04:05:32.760426: step: 138/466, loss: 0.010514659807085991 2023-01-24 04:05:33.408971: step: 140/466, loss: 0.07198095321655273 2023-01-24 04:05:33.958213: step: 142/466, loss: 0.0015183447394520044 2023-01-24 04:05:34.609781: step: 144/466, loss: 0.015799857676029205 2023-01-24 04:05:35.251529: step: 146/466, loss: 0.011465340852737427 2023-01-24 04:05:35.838956: step: 148/466, loss: 0.005793638527393341 2023-01-24 04:05:36.479697: step: 150/466, loss: 0.2577780783176422 2023-01-24 04:05:37.173937: step: 152/466, loss: 0.05551606044173241 2023-01-24 04:05:37.817592: step: 154/466, loss: 4.4543352127075195 2023-01-24 04:05:38.502486: step: 156/466, loss: 0.011444765143096447 2023-01-24 04:05:39.065914: step: 158/466, loss: 0.0006477702409029007 2023-01-24 04:05:39.631965: step: 160/466, loss: 0.0015633396105840802 2023-01-24 04:05:40.272582: step: 162/466, loss: 0.0035987847950309515 2023-01-24 04:05:40.852287: step: 164/466, loss: 0.0012867094483226538 2023-01-24 04:05:41.456750: step: 166/466, loss: 3.6880690004181815e-06 2023-01-24 04:05:42.143535: step: 168/466, loss: 0.014223197475075722 2023-01-24 04:05:42.779643: step: 170/466, loss: 0.031598109751939774 2023-01-24 04:05:43.396882: step: 172/466, loss: 0.003733185352757573 2023-01-24 04:05:43.974419: step: 174/466, loss: 0.07216422259807587 2023-01-24 04:05:44.591360: step: 176/466, loss: 0.0005256884032860398 2023-01-24 04:05:45.180006: step: 178/466, loss: 0.0060292575508356094 2023-01-24 04:05:45.844540: step: 180/466, loss: 0.004464306868612766 2023-01-24 04:05:46.517490: step: 182/466, loss: 0.05793391540646553 2023-01-24 04:05:47.127663: step: 184/466, loss: 0.06362371891736984 2023-01-24 04:05:47.688674: step: 186/466, loss: 0.0032500093802809715 2023-01-24 04:05:48.320150: step: 188/466, loss: 0.013271069154143333 2023-01-24 04:05:48.949611: step: 190/466, loss: 0.05028381943702698 2023-01-24 04:05:49.601220: step: 192/466, loss: 0.007637789938598871 2023-01-24 04:05:50.270282: step: 194/466, loss: 0.00744094792753458 2023-01-24 04:05:50.894926: step: 196/466, loss: 0.010570245794951916 2023-01-24 04:05:51.544926: step: 198/466, loss: 0.0015127696096897125 2023-01-24 04:05:52.151095: step: 200/466, loss: 0.024606024846434593 2023-01-24 04:05:52.776571: step: 202/466, loss: 0.006593392696231604 2023-01-24 04:05:53.443771: step: 204/466, loss: 0.011315795592963696 2023-01-24 04:05:54.070834: step: 206/466, loss: 0.029257386922836304 2023-01-24 04:05:54.672065: step: 208/466, loss: 0.05389769375324249 2023-01-24 04:05:55.318658: step: 210/466, loss: 0.004224174655973911 2023-01-24 04:05:55.996573: step: 212/466, loss: 0.008780408650636673 2023-01-24 04:05:56.644470: step: 214/466, loss: 0.007662737276405096 2023-01-24 04:05:57.211424: step: 216/466, loss: 0.042978685349226 2023-01-24 04:05:57.839864: step: 218/466, loss: 0.016707254573702812 2023-01-24 04:05:58.496805: step: 220/466, loss: 0.00432699266821146 2023-01-24 04:05:59.139327: step: 222/466, loss: 0.0940069779753685 2023-01-24 04:05:59.749028: step: 224/466, loss: 0.05365337058901787 2023-01-24 04:06:00.398345: step: 226/466, loss: 0.01027167122811079 2023-01-24 04:06:00.970482: step: 228/466, loss: 0.11637594550848007 2023-01-24 04:06:01.601214: step: 230/466, loss: 0.09816039353609085 2023-01-24 04:06:02.267811: step: 232/466, loss: 0.02542124129831791 2023-01-24 04:06:02.922099: step: 234/466, loss: 0.046224284917116165 2023-01-24 04:06:03.610731: step: 236/466, loss: 0.0020608706399798393 2023-01-24 04:06:04.184129: step: 238/466, loss: 0.004918182268738747 2023-01-24 04:06:04.824944: step: 240/466, loss: 0.01395337749272585 2023-01-24 04:06:05.482962: step: 242/466, loss: 0.021894754841923714 2023-01-24 04:06:06.089632: step: 244/466, loss: 0.001072632265277207 2023-01-24 04:06:06.675841: step: 246/466, loss: 0.026571357622742653 2023-01-24 04:06:07.288999: step: 248/466, loss: 0.00011169735807925463 2023-01-24 04:06:07.882200: step: 250/466, loss: 0.012831415981054306 2023-01-24 04:06:08.532430: step: 252/466, loss: 0.035040538758039474 2023-01-24 04:06:09.139552: step: 254/466, loss: 0.011681620962917805 2023-01-24 04:06:09.715229: step: 256/466, loss: 0.018262110650539398 2023-01-24 04:06:10.307649: step: 258/466, loss: 0.0010507949627935886 2023-01-24 04:06:10.918633: step: 260/466, loss: 0.012839422561228275 2023-01-24 04:06:11.565781: step: 262/466, loss: 0.0018101419555023313 2023-01-24 04:06:12.215018: step: 264/466, loss: 0.016770323738455772 2023-01-24 04:06:12.809668: step: 266/466, loss: 0.0018701822264119983 2023-01-24 04:06:13.492345: step: 268/466, loss: 0.010127153247594833 2023-01-24 04:06:14.180137: step: 270/466, loss: 0.12128043174743652 2023-01-24 04:06:14.788026: step: 272/466, loss: 0.002511512953788042 2023-01-24 04:06:15.365641: step: 274/466, loss: 0.012186328880488873 2023-01-24 04:06:15.923022: step: 276/466, loss: 0.005697715096175671 2023-01-24 04:06:16.559789: step: 278/466, loss: 0.005176094360649586 2023-01-24 04:06:17.180577: step: 280/466, loss: 0.0028243553824722767 2023-01-24 04:06:17.841225: step: 282/466, loss: 0.000831534736789763 2023-01-24 04:06:18.469088: step: 284/466, loss: 0.005542241036891937 2023-01-24 04:06:19.114762: step: 286/466, loss: 0.012190895155072212 2023-01-24 04:06:19.724762: step: 288/466, loss: 0.0004219510010443628 2023-01-24 04:06:20.439422: step: 290/466, loss: 0.014722839929163456 2023-01-24 04:06:21.068112: step: 292/466, loss: 0.0005669038509950042 2023-01-24 04:06:21.648122: step: 294/466, loss: 0.027332326397299767 2023-01-24 04:06:22.313562: step: 296/466, loss: 0.013245755806565285 2023-01-24 04:06:22.932546: step: 298/466, loss: 0.004525027237832546 2023-01-24 04:06:23.549123: step: 300/466, loss: 0.011388568207621574 2023-01-24 04:06:24.277504: step: 302/466, loss: 0.16052161157131195 2023-01-24 04:06:24.889529: step: 304/466, loss: 0.025578703731298447 2023-01-24 04:06:25.564882: step: 306/466, loss: 0.0348832868039608 2023-01-24 04:06:26.203748: step: 308/466, loss: 0.006878445856273174 2023-01-24 04:06:26.815273: step: 310/466, loss: 0.001989806769415736 2023-01-24 04:06:27.495057: step: 312/466, loss: 0.0032198973931372166 2023-01-24 04:06:28.104796: step: 314/466, loss: 0.1315160095691681 2023-01-24 04:06:28.787156: step: 316/466, loss: 0.04008382931351662 2023-01-24 04:06:29.423512: step: 318/466, loss: 0.0020131836645305157 2023-01-24 04:06:30.080587: step: 320/466, loss: 0.004674920812249184 2023-01-24 04:06:30.823910: step: 322/466, loss: 0.015164303593337536 2023-01-24 04:06:31.459865: step: 324/466, loss: 0.04455585032701492 2023-01-24 04:06:32.029546: step: 326/466, loss: 0.01877361163496971 2023-01-24 04:06:32.605814: step: 328/466, loss: 0.0015368256717920303 2023-01-24 04:06:33.322201: step: 330/466, loss: 0.040641941130161285 2023-01-24 04:06:34.027149: step: 332/466, loss: 0.2505246102809906 2023-01-24 04:06:34.675334: step: 334/466, loss: 0.005070647224783897 2023-01-24 04:06:35.302107: step: 336/466, loss: 0.00018404850561637431 2023-01-24 04:06:35.908807: step: 338/466, loss: 0.031194377690553665 2023-01-24 04:06:36.545813: step: 340/466, loss: 0.005099698901176453 2023-01-24 04:06:37.098159: step: 342/466, loss: 0.0013495985185727477 2023-01-24 04:06:37.735883: step: 344/466, loss: 0.1284133791923523 2023-01-24 04:06:38.370942: step: 346/466, loss: 0.004485417623072863 2023-01-24 04:06:38.998922: step: 348/466, loss: 0.004613775759935379 2023-01-24 04:06:39.601470: step: 350/466, loss: 0.03906206414103508 2023-01-24 04:06:40.229754: step: 352/466, loss: 0.013930348679423332 2023-01-24 04:06:40.831144: step: 354/466, loss: 0.012603990733623505 2023-01-24 04:06:41.459117: step: 356/466, loss: 0.07953281700611115 2023-01-24 04:06:42.101796: step: 358/466, loss: 0.03879635035991669 2023-01-24 04:06:42.629725: step: 360/466, loss: 0.0024805129505693913 2023-01-24 04:06:43.322258: step: 362/466, loss: 0.014602967537939548 2023-01-24 04:06:43.955010: step: 364/466, loss: 0.0034003867767751217 2023-01-24 04:06:44.484666: step: 366/466, loss: 0.012691373936831951 2023-01-24 04:06:45.073025: step: 368/466, loss: 0.0021625852677971125 2023-01-24 04:06:45.807901: step: 370/466, loss: 0.02687658928334713 2023-01-24 04:06:46.508525: step: 372/466, loss: 0.03532731533050537 2023-01-24 04:06:47.096271: step: 374/466, loss: 0.025283852592110634 2023-01-24 04:06:47.727253: step: 376/466, loss: 0.053645867854356766 2023-01-24 04:06:48.336512: step: 378/466, loss: 0.0241458211094141 2023-01-24 04:06:48.984666: step: 380/466, loss: 0.008507013320922852 2023-01-24 04:06:49.592834: step: 382/466, loss: 0.0024003516882658005 2023-01-24 04:06:50.252135: step: 384/466, loss: 0.06329336762428284 2023-01-24 04:06:50.883942: step: 386/466, loss: 0.04357927665114403 2023-01-24 04:06:51.509304: step: 388/466, loss: 0.009667825885117054 2023-01-24 04:06:52.141368: step: 390/466, loss: 0.032328635454177856 2023-01-24 04:06:52.758543: step: 392/466, loss: 0.009936768561601639 2023-01-24 04:06:53.393694: step: 394/466, loss: 0.001956153428182006 2023-01-24 04:06:54.025119: step: 396/466, loss: 0.021120784804224968 2023-01-24 04:06:54.638105: step: 398/466, loss: 0.025271767750382423 2023-01-24 04:06:55.234883: step: 400/466, loss: 0.004417846444994211 2023-01-24 04:06:55.877419: step: 402/466, loss: 0.031339503824710846 2023-01-24 04:06:56.426552: step: 404/466, loss: 0.02603183314204216 2023-01-24 04:06:57.003482: step: 406/466, loss: 0.03182559832930565 2023-01-24 04:06:57.657158: step: 408/466, loss: 0.0009004041203297675 2023-01-24 04:06:58.248806: step: 410/466, loss: 0.003806175896897912 2023-01-24 04:06:58.826249: step: 412/466, loss: 0.45821917057037354 2023-01-24 04:06:59.513445: step: 414/466, loss: 0.0021742149256169796 2023-01-24 04:07:00.161233: step: 416/466, loss: 0.014737543649971485 2023-01-24 04:07:00.738846: step: 418/466, loss: 0.0005118109402246773 2023-01-24 04:07:01.388416: step: 420/466, loss: 0.03900913894176483 2023-01-24 04:07:02.012480: step: 422/466, loss: 0.0010500759817659855 2023-01-24 04:07:02.729313: step: 424/466, loss: 0.005701580550521612 2023-01-24 04:07:03.286000: step: 426/466, loss: 0.009693636558949947 2023-01-24 04:07:03.924738: step: 428/466, loss: 0.005985029973089695 2023-01-24 04:07:04.509245: step: 430/466, loss: 0.009088157676160336 2023-01-24 04:07:05.100889: step: 432/466, loss: 0.001938398228958249 2023-01-24 04:07:05.708900: step: 434/466, loss: 0.020009316504001617 2023-01-24 04:07:06.405020: step: 436/466, loss: 5.809044887428172e-05 2023-01-24 04:07:06.968687: step: 438/466, loss: 0.01283319666981697 2023-01-24 04:07:07.553650: step: 440/466, loss: 0.010783478617668152 2023-01-24 04:07:08.217998: step: 442/466, loss: 0.6042597889900208 2023-01-24 04:07:08.970653: step: 444/466, loss: 0.020227300003170967 2023-01-24 04:07:09.504176: step: 446/466, loss: 0.05583236739039421 2023-01-24 04:07:10.187464: step: 448/466, loss: 0.17182712256908417 2023-01-24 04:07:10.819040: step: 450/466, loss: 0.033569566905498505 2023-01-24 04:07:11.463853: step: 452/466, loss: 0.3938918113708496 2023-01-24 04:07:12.167214: step: 454/466, loss: 0.028887035325169563 2023-01-24 04:07:12.787557: step: 456/466, loss: 0.004464501515030861 2023-01-24 04:07:13.452711: step: 458/466, loss: 0.008131398819386959 2023-01-24 04:07:14.077072: step: 460/466, loss: 0.11341170966625214 2023-01-24 04:07:14.651160: step: 462/466, loss: 0.0030010126065462828 2023-01-24 04:07:15.312016: step: 464/466, loss: 0.08654288202524185 2023-01-24 04:07:15.896838: step: 466/466, loss: 0.01917041465640068 2023-01-24 04:07:16.435777: step: 468/466, loss: 0.017753876745700836 2023-01-24 04:07:16.977709: step: 470/466, loss: 0.0015732371248304844 2023-01-24 04:07:17.619835: step: 472/466, loss: 0.0013067168183624744 2023-01-24 04:07:18.268485: step: 474/466, loss: 0.007370651233941317 2023-01-24 04:07:18.922943: step: 476/466, loss: 0.012751343660056591 2023-01-24 04:07:19.547285: step: 478/466, loss: 0.008689838461577892 2023-01-24 04:07:20.118000: step: 480/466, loss: 0.08662420511245728 2023-01-24 04:07:20.760205: step: 482/466, loss: 0.07603882998228073 2023-01-24 04:07:21.391501: step: 484/466, loss: 0.023959536105394363 2023-01-24 04:07:22.054659: step: 486/466, loss: 0.055649999529123306 2023-01-24 04:07:22.738550: step: 488/466, loss: 0.0018876695539802313 2023-01-24 04:07:23.290189: step: 490/466, loss: 0.047939665615558624 2023-01-24 04:07:23.926950: step: 492/466, loss: 1.2007006406784058 2023-01-24 04:07:24.535127: step: 494/466, loss: 0.020490868017077446 2023-01-24 04:07:25.160441: step: 496/466, loss: 0.009708845987915993 2023-01-24 04:07:25.777550: step: 498/466, loss: 0.034252651035785675 2023-01-24 04:07:26.388841: step: 500/466, loss: 0.008174884133040905 2023-01-24 04:07:26.979503: step: 502/466, loss: 0.0036410826724022627 2023-01-24 04:07:27.622221: step: 504/466, loss: 0.012882047332823277 2023-01-24 04:07:28.261359: step: 506/466, loss: 0.1272018998861313 2023-01-24 04:07:28.861445: step: 508/466, loss: 0.02409953624010086 2023-01-24 04:07:29.408543: step: 510/466, loss: 0.03641194477677345 2023-01-24 04:07:30.021192: step: 512/466, loss: 0.0027759310323745012 2023-01-24 04:07:30.660814: step: 514/466, loss: 0.008434903807938099 2023-01-24 04:07:31.241475: step: 516/466, loss: 0.004168310668319464 2023-01-24 04:07:31.886494: step: 518/466, loss: 0.024747787043452263 2023-01-24 04:07:32.478045: step: 520/466, loss: 0.4482802748680115 2023-01-24 04:07:33.131050: step: 522/466, loss: 0.019972914829850197 2023-01-24 04:07:33.750780: step: 524/466, loss: 0.00814759824424982 2023-01-24 04:07:34.344203: step: 526/466, loss: 0.021629955619573593 2023-01-24 04:07:34.991238: step: 528/466, loss: 0.008018465712666512 2023-01-24 04:07:35.667999: step: 530/466, loss: 0.025636399164795876 2023-01-24 04:07:36.226541: step: 532/466, loss: 0.0006201571086421609 2023-01-24 04:07:36.833364: step: 534/466, loss: 0.04001878201961517 2023-01-24 04:07:37.419773: step: 536/466, loss: 0.016146061941981316 2023-01-24 04:07:37.982406: step: 538/466, loss: 0.02312307059764862 2023-01-24 04:07:38.585118: step: 540/466, loss: 0.00308585655875504 2023-01-24 04:07:39.176074: step: 542/466, loss: 0.02875497005879879 2023-01-24 04:07:39.707583: step: 544/466, loss: 0.010568222962319851 2023-01-24 04:07:40.327259: step: 546/466, loss: 0.0030713172163814306 2023-01-24 04:07:40.941366: step: 548/466, loss: 0.014920078217983246 2023-01-24 04:07:41.555534: step: 550/466, loss: 0.015367996878921986 2023-01-24 04:07:42.222325: step: 552/466, loss: 0.01573663018643856 2023-01-24 04:07:42.807220: step: 554/466, loss: 0.02735070139169693 2023-01-24 04:07:43.443667: step: 556/466, loss: 0.009266615845263004 2023-01-24 04:07:44.095524: step: 558/466, loss: 0.008889292366802692 2023-01-24 04:07:44.753710: step: 560/466, loss: 0.11103135347366333 2023-01-24 04:07:45.362965: step: 562/466, loss: 0.0005813875468447804 2023-01-24 04:07:46.035139: step: 564/466, loss: 0.7120652198791504 2023-01-24 04:07:46.707205: step: 566/466, loss: 0.013271020725369453 2023-01-24 04:07:47.279870: step: 568/466, loss: 0.05157516524195671 2023-01-24 04:07:47.884968: step: 570/466, loss: 0.00710834376513958 2023-01-24 04:07:48.438858: step: 572/466, loss: 0.008108958601951599 2023-01-24 04:07:49.079673: step: 574/466, loss: 0.039710644632577896 2023-01-24 04:07:49.681819: step: 576/466, loss: 0.008387508802115917 2023-01-24 04:07:50.271431: step: 578/466, loss: 0.00163850758690387 2023-01-24 04:07:50.803511: step: 580/466, loss: 0.0025524578522890806 2023-01-24 04:07:51.322980: step: 582/466, loss: 0.007011826615780592 2023-01-24 04:07:51.914646: step: 584/466, loss: 0.048207804560661316 2023-01-24 04:07:52.482501: step: 586/466, loss: 0.0020360019989311695 2023-01-24 04:07:53.166842: step: 588/466, loss: 0.004026514478027821 2023-01-24 04:07:53.790459: step: 590/466, loss: 0.035144511610269547 2023-01-24 04:07:54.317134: step: 592/466, loss: 0.008531235158443451 2023-01-24 04:07:54.880841: step: 594/466, loss: 0.0019683947321027517 2023-01-24 04:07:55.517990: step: 596/466, loss: 0.040554702281951904 2023-01-24 04:07:56.164717: step: 598/466, loss: 0.015702493488788605 2023-01-24 04:07:56.771560: step: 600/466, loss: 0.0009891258087009192 2023-01-24 04:07:57.357596: step: 602/466, loss: 0.03403882682323456 2023-01-24 04:07:57.986175: step: 604/466, loss: 0.013812286779284477 2023-01-24 04:07:58.635440: step: 606/466, loss: 0.005240297876298428 2023-01-24 04:07:59.316261: step: 608/466, loss: 0.02737869881093502 2023-01-24 04:07:59.916769: step: 610/466, loss: 0.04239679500460625 2023-01-24 04:08:00.583000: step: 612/466, loss: 0.020463278517127037 2023-01-24 04:08:01.144387: step: 614/466, loss: 0.008261370472609997 2023-01-24 04:08:01.717131: step: 616/466, loss: 0.0028720779810100794 2023-01-24 04:08:02.301067: step: 618/466, loss: 0.018434986472129822 2023-01-24 04:08:02.866552: step: 620/466, loss: 0.025052722543478012 2023-01-24 04:08:03.551774: step: 622/466, loss: 0.11167764663696289 2023-01-24 04:08:04.140624: step: 624/466, loss: 0.09473086893558502 2023-01-24 04:08:04.764243: step: 626/466, loss: 0.0028115841560065746 2023-01-24 04:08:05.340843: step: 628/466, loss: 0.07390471547842026 2023-01-24 04:08:05.940835: step: 630/466, loss: 0.03992309048771858 2023-01-24 04:08:06.513175: step: 632/466, loss: 0.06804033368825912 2023-01-24 04:08:07.181776: step: 634/466, loss: 0.06968791037797928 2023-01-24 04:08:07.898799: step: 636/466, loss: 0.0007771103992126882 2023-01-24 04:08:08.517010: step: 638/466, loss: 0.028963560238480568 2023-01-24 04:08:09.172073: step: 640/466, loss: 0.006525433622300625 2023-01-24 04:08:09.888945: step: 642/466, loss: 0.060342513024806976 2023-01-24 04:08:10.521134: step: 644/466, loss: 0.029246270656585693 2023-01-24 04:08:11.047158: step: 646/466, loss: 0.00955758336931467 2023-01-24 04:08:11.671465: step: 648/466, loss: 0.01655466854572296 2023-01-24 04:08:12.345167: step: 650/466, loss: 0.0002035806974163279 2023-01-24 04:08:12.945834: step: 652/466, loss: 0.12468570470809937 2023-01-24 04:08:13.551850: step: 654/466, loss: 0.0026691860985010862 2023-01-24 04:08:14.158916: step: 656/466, loss: 0.0038278503343462944 2023-01-24 04:08:14.733388: step: 658/466, loss: 0.0074297040700912476 2023-01-24 04:08:15.318622: step: 660/466, loss: 0.016870766878128052 2023-01-24 04:08:15.935651: step: 662/466, loss: 0.048004359006881714 2023-01-24 04:08:16.516907: step: 664/466, loss: 0.01672959513962269 2023-01-24 04:08:17.150156: step: 666/466, loss: 0.04428689926862717 2023-01-24 04:08:17.737595: step: 668/466, loss: 0.02727423422038555 2023-01-24 04:08:18.352183: step: 670/466, loss: 0.10729333013296127 2023-01-24 04:08:18.971858: step: 672/466, loss: 0.001751934876665473 2023-01-24 04:08:19.584098: step: 674/466, loss: 0.09825156629085541 2023-01-24 04:08:20.192953: step: 676/466, loss: 0.04435792937874794 2023-01-24 04:08:20.832006: step: 678/466, loss: 0.006695875432342291 2023-01-24 04:08:21.451923: step: 680/466, loss: 0.009862974286079407 2023-01-24 04:08:22.050061: step: 682/466, loss: 0.015884706750512123 2023-01-24 04:08:22.676548: step: 684/466, loss: 0.0009615541785024107 2023-01-24 04:08:23.406273: step: 686/466, loss: 0.03612905740737915 2023-01-24 04:08:24.058870: step: 688/466, loss: 0.03043614886701107 2023-01-24 04:08:24.735521: step: 690/466, loss: 0.01803179271519184 2023-01-24 04:08:25.300933: step: 692/466, loss: 0.0025244855787605047 2023-01-24 04:08:25.837755: step: 694/466, loss: 0.004815980792045593 2023-01-24 04:08:26.425843: step: 696/466, loss: 0.007434464525431395 2023-01-24 04:08:27.039608: step: 698/466, loss: 0.13055790960788727 2023-01-24 04:08:27.605833: step: 700/466, loss: 0.0001918329653562978 2023-01-24 04:08:28.197317: step: 702/466, loss: 0.016086140647530556 2023-01-24 04:08:28.781083: step: 704/466, loss: 0.0005090544000267982 2023-01-24 04:08:29.357132: step: 706/466, loss: 0.003033475251868367 2023-01-24 04:08:29.936698: step: 708/466, loss: 0.0037266057915985584 2023-01-24 04:08:30.525688: step: 710/466, loss: 0.01481724064797163 2023-01-24 04:08:31.205483: step: 712/466, loss: 0.03236960247159004 2023-01-24 04:08:31.720422: step: 714/466, loss: 0.0010790909873321652 2023-01-24 04:08:32.284339: step: 716/466, loss: 0.01934128813445568 2023-01-24 04:08:32.862417: step: 718/466, loss: 0.022884057834744453 2023-01-24 04:08:33.610970: step: 720/466, loss: 0.026454202830791473 2023-01-24 04:08:34.237840: step: 722/466, loss: 0.022250041365623474 2023-01-24 04:08:34.819969: step: 724/466, loss: 0.1101565808057785 2023-01-24 04:08:35.479497: step: 726/466, loss: 0.018408456817269325 2023-01-24 04:08:36.100149: step: 728/466, loss: 0.06344349682331085 2023-01-24 04:08:36.703277: step: 730/466, loss: 0.01834847964346409 2023-01-24 04:08:37.326133: step: 732/466, loss: 0.06949294358491898 2023-01-24 04:08:37.937538: step: 734/466, loss: 0.020755456760525703 2023-01-24 04:08:38.533268: step: 736/466, loss: 0.10671348869800568 2023-01-24 04:08:39.171980: step: 738/466, loss: 0.04026241600513458 2023-01-24 04:08:39.746359: step: 740/466, loss: 0.10532877594232559 2023-01-24 04:08:40.364421: step: 742/466, loss: 1.2018945217132568 2023-01-24 04:08:40.947520: step: 744/466, loss: 0.03034796752035618 2023-01-24 04:08:41.552194: step: 746/466, loss: 0.05075696110725403 2023-01-24 04:08:42.233654: step: 748/466, loss: 0.004184384364634752 2023-01-24 04:08:42.903165: step: 750/466, loss: 0.03211819753050804 2023-01-24 04:08:43.505581: step: 752/466, loss: 0.016765151172876358 2023-01-24 04:08:44.031175: step: 754/466, loss: 0.010934228077530861 2023-01-24 04:08:44.702657: step: 756/466, loss: 0.0008892195182852447 2023-01-24 04:08:45.364344: step: 758/466, loss: 0.020512528717517853 2023-01-24 04:08:46.122103: step: 760/466, loss: 0.03005942888557911 2023-01-24 04:08:46.719143: step: 762/466, loss: 0.035132911056280136 2023-01-24 04:08:47.342959: step: 764/466, loss: 0.014512010850012302 2023-01-24 04:08:48.021414: step: 766/466, loss: 0.022986402735114098 2023-01-24 04:08:48.653978: step: 768/466, loss: 0.00485311821103096 2023-01-24 04:08:49.247949: step: 770/466, loss: 0.03989977389574051 2023-01-24 04:08:49.905017: step: 772/466, loss: 0.04922888055443764 2023-01-24 04:08:50.477012: step: 774/466, loss: 0.0006349222967401147 2023-01-24 04:08:51.105889: step: 776/466, loss: 0.06173509359359741 2023-01-24 04:08:51.712683: step: 778/466, loss: 0.016244810074567795 2023-01-24 04:08:52.326533: step: 780/466, loss: 0.024921001866459846 2023-01-24 04:08:52.982391: step: 782/466, loss: 0.01212544646114111 2023-01-24 04:08:53.592032: step: 784/466, loss: 0.002041733590885997 2023-01-24 04:08:54.190776: step: 786/466, loss: 0.005243290215730667 2023-01-24 04:08:54.776996: step: 788/466, loss: 0.03087429516017437 2023-01-24 04:08:55.417005: step: 790/466, loss: 0.055014707148075104 2023-01-24 04:08:56.085623: step: 792/466, loss: 0.006934677716344595 2023-01-24 04:08:56.704800: step: 794/466, loss: 0.13290882110595703 2023-01-24 04:08:57.390948: step: 796/466, loss: 0.09976620972156525 2023-01-24 04:08:58.028818: step: 798/466, loss: 0.020965615287423134 2023-01-24 04:08:58.639091: step: 800/466, loss: 0.018352339044213295 2023-01-24 04:08:59.304331: step: 802/466, loss: 0.0030997898429632187 2023-01-24 04:08:59.898300: step: 804/466, loss: 0.005746449343860149 2023-01-24 04:09:00.620523: step: 806/466, loss: 0.013343226164579391 2023-01-24 04:09:01.297203: step: 808/466, loss: 0.1776353120803833 2023-01-24 04:09:01.972495: step: 810/466, loss: 0.05390021204948425 2023-01-24 04:09:02.641390: step: 812/466, loss: 0.0040868245996534824 2023-01-24 04:09:03.227652: step: 814/466, loss: 0.008829116821289062 2023-01-24 04:09:03.786184: step: 816/466, loss: 0.6879344582557678 2023-01-24 04:09:04.433068: step: 818/466, loss: 0.012122377753257751 2023-01-24 04:09:05.059612: step: 820/466, loss: 0.00031059340108186007 2023-01-24 04:09:05.762087: step: 822/466, loss: 0.15375883877277374 2023-01-24 04:09:06.371916: step: 824/466, loss: 0.005931135267019272 2023-01-24 04:09:06.982630: step: 826/466, loss: 0.00963269080966711 2023-01-24 04:09:07.593958: step: 828/466, loss: 0.006335984915494919 2023-01-24 04:09:08.183101: step: 830/466, loss: 0.02565760910511017 2023-01-24 04:09:08.765571: step: 832/466, loss: 0.019776877015829086 2023-01-24 04:09:09.379624: step: 834/466, loss: 0.013118351809680462 2023-01-24 04:09:10.058177: step: 836/466, loss: 0.2517741918563843 2023-01-24 04:09:10.701712: step: 838/466, loss: 0.019498251378536224 2023-01-24 04:09:11.324008: step: 840/466, loss: 0.02611498534679413 2023-01-24 04:09:11.931874: step: 842/466, loss: 0.011649729683995247 2023-01-24 04:09:12.544867: step: 844/466, loss: 0.03165111690759659 2023-01-24 04:09:13.134304: step: 846/466, loss: 0.01670054905116558 2023-01-24 04:09:13.735445: step: 848/466, loss: 0.04889087378978729 2023-01-24 04:09:14.363321: step: 850/466, loss: 0.1464008092880249 2023-01-24 04:09:15.053224: step: 852/466, loss: 0.01969340443611145 2023-01-24 04:09:15.646985: step: 854/466, loss: 0.05219658836722374 2023-01-24 04:09:16.222181: step: 856/466, loss: 0.0039998069405555725 2023-01-24 04:09:16.834084: step: 858/466, loss: 0.010624684393405914 2023-01-24 04:09:17.530757: step: 860/466, loss: 0.030279122292995453 2023-01-24 04:09:18.133021: step: 862/466, loss: 0.003146994626149535 2023-01-24 04:09:18.715611: step: 864/466, loss: 0.040422990918159485 2023-01-24 04:09:19.280040: step: 866/466, loss: 0.008086932823061943 2023-01-24 04:09:19.899645: step: 868/466, loss: 0.0012018276611343026 2023-01-24 04:09:20.493052: step: 870/466, loss: 0.0007978557841852307 2023-01-24 04:09:21.206317: step: 872/466, loss: 0.02200395241379738 2023-01-24 04:09:21.849964: step: 874/466, loss: 0.004426547791808844 2023-01-24 04:09:22.458506: step: 876/466, loss: 0.007263008039444685 2023-01-24 04:09:23.080065: step: 878/466, loss: 0.015457775443792343 2023-01-24 04:09:23.655933: step: 880/466, loss: 0.09172531962394714 2023-01-24 04:09:24.249004: step: 882/466, loss: 0.17304280400276184 2023-01-24 04:09:24.881654: step: 884/466, loss: 0.001824880950152874 2023-01-24 04:09:25.563922: step: 886/466, loss: 0.07621315866708755 2023-01-24 04:09:26.154143: step: 888/466, loss: 0.013939729891717434 2023-01-24 04:09:26.745782: step: 890/466, loss: 0.00634580384939909 2023-01-24 04:09:27.304338: step: 892/466, loss: 0.006481233984231949 2023-01-24 04:09:27.850678: step: 894/466, loss: 0.0010673175565898418 2023-01-24 04:09:28.485364: step: 896/466, loss: 0.02853173203766346 2023-01-24 04:09:29.105111: step: 898/466, loss: 0.04330425336956978 2023-01-24 04:09:29.728429: step: 900/466, loss: 0.011705813929438591 2023-01-24 04:09:30.348765: step: 902/466, loss: 0.0013577844947576523 2023-01-24 04:09:30.954469: step: 904/466, loss: 0.004012433812022209 2023-01-24 04:09:31.579318: step: 906/466, loss: 0.03765256330370903 2023-01-24 04:09:32.235794: step: 908/466, loss: 0.017649630084633827 2023-01-24 04:09:32.883017: step: 910/466, loss: 0.04798159748315811 2023-01-24 04:09:33.503409: step: 912/466, loss: 0.0020341812632977962 2023-01-24 04:09:34.108112: step: 914/466, loss: 0.0017377976328134537 2023-01-24 04:09:34.701208: step: 916/466, loss: 0.001674551866017282 2023-01-24 04:09:35.323085: step: 918/466, loss: 0.0009396459208801389 2023-01-24 04:09:35.951758: step: 920/466, loss: 0.010935609228909016 2023-01-24 04:09:36.596579: step: 922/466, loss: 0.040163006633520126 2023-01-24 04:09:37.182427: step: 924/466, loss: 0.029091281816363335 2023-01-24 04:09:37.830174: step: 926/466, loss: 0.014700681902468204 2023-01-24 04:09:38.507923: step: 928/466, loss: 0.02362331934273243 2023-01-24 04:09:39.097465: step: 930/466, loss: 0.0006369992042891681 2023-01-24 04:09:39.696827: step: 932/466, loss: 0.33109328150749207 ================================================== Loss: 0.052 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3582493693192432, 'r': 0.34057482737939443, 'f1': 0.3491885876049433}, 'combined': 0.25729685402469504, 'epoch': 30} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3746863989912854, 'r': 0.28582929986170197, 'f1': 0.32428101641598717}, 'combined': 0.20096288341272442, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32531454790589787, 'r': 0.32778372853516463, 'f1': 0.3265444705822907}, 'combined': 0.24061171516589838, 'epoch': 30} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.37133422300389807, 'r': 0.27850066725292355, 'f1': 0.31828647686048406}, 'combined': 0.21114053415497458, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3026315789473684, 'r': 0.25, 'f1': 0.27380952380952384}, 'combined': 0.13690476190476192, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 30} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31727794411177646, 'r': 0.30162476280834916, 'f1': 0.3092534046692607}, 'combined': 0.22787092975629736, 'epoch': 16} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3544730097534393, 'r': 0.2685790073900381, 'f1': 0.3056053314540968}, 'combined': 0.20272828918242067, 'epoch': 16} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 16} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:12:22.418610: step: 2/466, loss: 0.007168573793023825 2023-01-24 04:12:23.023824: step: 4/466, loss: 0.0020445086993277073 2023-01-24 04:12:23.651239: step: 6/466, loss: 0.006324405316263437 2023-01-24 04:12:24.198297: step: 8/466, loss: 0.003093923442065716 2023-01-24 04:12:25.014334: step: 10/466, loss: 0.07295311987400055 2023-01-24 04:12:25.645332: step: 12/466, loss: 0.033171072602272034 2023-01-24 04:12:26.278916: step: 14/466, loss: 0.018763521686196327 2023-01-24 04:12:26.887669: step: 16/466, loss: 0.012126329354941845 2023-01-24 04:12:27.537198: step: 18/466, loss: 0.032156869769096375 2023-01-24 04:12:28.186040: step: 20/466, loss: 0.03246133774518967 2023-01-24 04:12:28.841777: step: 22/466, loss: 0.009170664474368095 2023-01-24 04:12:29.461338: step: 24/466, loss: 0.02240617945790291 2023-01-24 04:12:30.087539: step: 26/466, loss: 0.018498243764042854 2023-01-24 04:12:30.668571: step: 28/466, loss: 0.004776320420205593 2023-01-24 04:12:31.212441: step: 30/466, loss: 0.004571563098579645 2023-01-24 04:12:31.855362: step: 32/466, loss: 0.0020251087844371796 2023-01-24 04:12:32.510430: step: 34/466, loss: 0.0005824713734909892 2023-01-24 04:12:33.142098: step: 36/466, loss: 0.025997484102845192 2023-01-24 04:12:33.743324: step: 38/466, loss: 0.005126075353473425 2023-01-24 04:12:34.363923: step: 40/466, loss: 0.0008517690584994853 2023-01-24 04:12:35.001179: step: 42/466, loss: 0.03736488148570061 2023-01-24 04:12:35.694971: step: 44/466, loss: 0.006267243530601263 2023-01-24 04:12:36.268037: step: 46/466, loss: 0.0033513489179313183 2023-01-24 04:12:36.888767: step: 48/466, loss: 0.2597365975379944 2023-01-24 04:12:37.530513: step: 50/466, loss: 0.01199096068739891 2023-01-24 04:12:38.095284: step: 52/466, loss: 0.0017128143226727843 2023-01-24 04:12:38.801679: step: 54/466, loss: 0.00920811016112566 2023-01-24 04:12:39.463473: step: 56/466, loss: 0.0018016818212345243 2023-01-24 04:12:40.057784: step: 58/466, loss: 0.006626426707953215 2023-01-24 04:12:40.633880: step: 60/466, loss: 0.0019828348886221647 2023-01-24 04:12:41.198892: step: 62/466, loss: 0.012216060422360897 2023-01-24 04:12:41.790274: step: 64/466, loss: 0.11005613207817078 2023-01-24 04:12:42.401651: step: 66/466, loss: 0.0032541663385927677 2023-01-24 04:12:42.942557: step: 68/466, loss: 0.011161950416862965 2023-01-24 04:12:43.524715: step: 70/466, loss: 0.019788235425949097 2023-01-24 04:12:44.154548: step: 72/466, loss: 0.007697759661823511 2023-01-24 04:12:44.747080: step: 74/466, loss: 0.008653736673295498 2023-01-24 04:12:45.320081: step: 76/466, loss: 0.040688205510377884 2023-01-24 04:12:45.923782: step: 78/466, loss: 0.015366827137768269 2023-01-24 04:12:46.594563: step: 80/466, loss: 0.001172990771010518 2023-01-24 04:12:47.230173: step: 82/466, loss: 0.0018485992914065719 2023-01-24 04:12:47.786406: step: 84/466, loss: 0.004098563455045223 2023-01-24 04:12:48.372852: step: 86/466, loss: 0.027816014364361763 2023-01-24 04:12:48.989856: step: 88/466, loss: 0.0030274942982941866 2023-01-24 04:12:49.771812: step: 90/466, loss: 0.03170846402645111 2023-01-24 04:12:50.337081: step: 92/466, loss: 0.0002961618884000927 2023-01-24 04:12:50.957770: step: 94/466, loss: 0.0015852575888857245 2023-01-24 04:12:51.623013: step: 96/466, loss: 0.02714916318655014 2023-01-24 04:12:52.217738: step: 98/466, loss: 0.005829785484820604 2023-01-24 04:12:52.899949: step: 100/466, loss: 0.005810820963233709 2023-01-24 04:12:53.469721: step: 102/466, loss: 0.0010619497625157237 2023-01-24 04:12:54.072314: step: 104/466, loss: 0.009592894464731216 2023-01-24 04:12:54.740419: step: 106/466, loss: 0.05438638851046562 2023-01-24 04:12:55.292090: step: 108/466, loss: 3.570752960513346e-05 2023-01-24 04:12:55.897599: step: 110/466, loss: 0.09015724807977676 2023-01-24 04:12:56.584036: step: 112/466, loss: 0.005148747004568577 2023-01-24 04:12:57.191472: step: 114/466, loss: 0.015584941953420639 2023-01-24 04:12:57.924651: step: 116/466, loss: 0.029465945437550545 2023-01-24 04:12:58.537360: step: 118/466, loss: 0.010357551276683807 2023-01-24 04:12:59.152658: step: 120/466, loss: 0.012574763968586922 2023-01-24 04:12:59.808165: step: 122/466, loss: 0.020364994183182716 2023-01-24 04:13:00.393702: step: 124/466, loss: 0.0037056943401694298 2023-01-24 04:13:01.028190: step: 126/466, loss: 0.0012162663042545319 2023-01-24 04:13:01.679533: step: 128/466, loss: 0.016808146610856056 2023-01-24 04:13:02.291314: step: 130/466, loss: 0.01613013446331024 2023-01-24 04:13:02.885329: step: 132/466, loss: 0.010568464174866676 2023-01-24 04:13:03.525234: step: 134/466, loss: 0.01669691875576973 2023-01-24 04:13:04.117076: step: 136/466, loss: 0.0014973332872614264 2023-01-24 04:13:04.751229: step: 138/466, loss: 0.002069574547931552 2023-01-24 04:13:05.389144: step: 140/466, loss: 0.04698267579078674 2023-01-24 04:13:06.060590: step: 142/466, loss: 0.05436408147215843 2023-01-24 04:13:06.659104: step: 144/466, loss: 0.03474747762084007 2023-01-24 04:13:07.254708: step: 146/466, loss: 0.009406505152583122 2023-01-24 04:13:07.825687: step: 148/466, loss: 0.00043175797327421606 2023-01-24 04:13:08.463007: step: 150/466, loss: 0.013636465184390545 2023-01-24 04:13:09.040667: step: 152/466, loss: 0.0016792012611404061 2023-01-24 04:13:09.606201: step: 154/466, loss: 0.029004333540797234 2023-01-24 04:13:10.220855: step: 156/466, loss: 0.005969460587948561 2023-01-24 04:13:10.870513: step: 158/466, loss: 0.24402426183223724 2023-01-24 04:13:11.487081: step: 160/466, loss: 0.017112595960497856 2023-01-24 04:13:12.129633: step: 162/466, loss: 0.020864853635430336 2023-01-24 04:13:12.729296: step: 164/466, loss: 0.008625946007668972 2023-01-24 04:13:13.356040: step: 166/466, loss: 0.041083239018917084 2023-01-24 04:13:13.949670: step: 168/466, loss: 0.011716339737176895 2023-01-24 04:13:14.585892: step: 170/466, loss: 0.07246832549571991 2023-01-24 04:13:15.194079: step: 172/466, loss: 5.223365224082954e-05 2023-01-24 04:13:15.852825: step: 174/466, loss: 0.07541394978761673 2023-01-24 04:13:16.537111: step: 176/466, loss: 0.02161840908229351 2023-01-24 04:13:17.126995: step: 178/466, loss: 0.005124109797179699 2023-01-24 04:13:17.811212: step: 180/466, loss: 0.0006222125375643373 2023-01-24 04:13:18.489129: step: 182/466, loss: 0.00390928378328681 2023-01-24 04:13:19.128533: step: 184/466, loss: 0.009930741041898727 2023-01-24 04:13:19.692073: step: 186/466, loss: 0.006320476066321135 2023-01-24 04:13:20.311814: step: 188/466, loss: 0.013346170075237751 2023-01-24 04:13:20.969705: step: 190/466, loss: 0.0171652901917696 2023-01-24 04:13:21.569823: step: 192/466, loss: 0.03007993847131729 2023-01-24 04:13:22.140167: step: 194/466, loss: 0.012615848332643509 2023-01-24 04:13:22.745099: step: 196/466, loss: 0.011521113105118275 2023-01-24 04:13:23.340626: step: 198/466, loss: 0.06034286320209503 2023-01-24 04:13:23.936408: step: 200/466, loss: 0.05058571696281433 2023-01-24 04:13:24.573833: step: 202/466, loss: 0.009598346427083015 2023-01-24 04:13:25.267687: step: 204/466, loss: 0.2493104636669159 2023-01-24 04:13:25.975228: step: 206/466, loss: 0.029918083921074867 2023-01-24 04:13:26.577802: step: 208/466, loss: 0.22628265619277954 2023-01-24 04:13:27.168833: step: 210/466, loss: 0.04101979732513428 2023-01-24 04:13:27.770261: step: 212/466, loss: 0.004658593330532312 2023-01-24 04:13:28.379941: step: 214/466, loss: 0.05677751824259758 2023-01-24 04:13:28.996361: step: 216/466, loss: 0.000909144408069551 2023-01-24 04:13:29.632114: step: 218/466, loss: 0.020573722198605537 2023-01-24 04:13:30.322122: step: 220/466, loss: 0.041357796639204025 2023-01-24 04:13:30.987479: step: 222/466, loss: 0.25173336267471313 2023-01-24 04:13:31.575244: step: 224/466, loss: 0.012942682951688766 2023-01-24 04:13:32.217327: step: 226/466, loss: 0.012703394517302513 2023-01-24 04:13:32.807705: step: 228/466, loss: 0.011449992656707764 2023-01-24 04:13:33.419047: step: 230/466, loss: 0.023750029504299164 2023-01-24 04:13:34.080516: step: 232/466, loss: 0.00366886961273849 2023-01-24 04:13:34.706336: step: 234/466, loss: 0.03426119312644005 2023-01-24 04:13:35.295972: step: 236/466, loss: 0.012813607230782509 2023-01-24 04:13:35.828055: step: 238/466, loss: 0.0055039008148014545 2023-01-24 04:13:36.402210: step: 240/466, loss: 0.0028869614470750093 2023-01-24 04:13:37.013101: step: 242/466, loss: 0.008536329492926598 2023-01-24 04:13:37.604672: step: 244/466, loss: 0.010767385363578796 2023-01-24 04:13:38.204548: step: 246/466, loss: 0.016502102836966515 2023-01-24 04:13:38.886742: step: 248/466, loss: 0.0004043432418256998 2023-01-24 04:13:39.496510: step: 250/466, loss: 0.041048161685466766 2023-01-24 04:13:40.102672: step: 252/466, loss: 0.049137476831674576 2023-01-24 04:13:40.690531: step: 254/466, loss: 0.004949237685650587 2023-01-24 04:13:41.252425: step: 256/466, loss: 0.04413600638508797 2023-01-24 04:13:41.860118: step: 258/466, loss: 0.005313973408192396 2023-01-24 04:13:42.452559: step: 260/466, loss: 0.004345383029431105 2023-01-24 04:13:43.005467: step: 262/466, loss: 0.009101023897528648 2023-01-24 04:13:43.646900: step: 264/466, loss: 0.03252504765987396 2023-01-24 04:13:44.241374: step: 266/466, loss: 0.010446806438267231 2023-01-24 04:13:44.846390: step: 268/466, loss: 0.024466492235660553 2023-01-24 04:13:45.476370: step: 270/466, loss: 0.002857351675629616 2023-01-24 04:13:46.118360: step: 272/466, loss: 0.0001232354115927592 2023-01-24 04:13:46.769386: step: 274/466, loss: 0.0036861319094896317 2023-01-24 04:13:47.389611: step: 276/466, loss: 0.00023983018763829023 2023-01-24 04:13:48.044254: step: 278/466, loss: 0.01054013054817915 2023-01-24 04:13:48.724160: step: 280/466, loss: 0.0075253816321492195 2023-01-24 04:13:49.380890: step: 282/466, loss: 0.022886738181114197 2023-01-24 04:13:49.924746: step: 284/466, loss: 0.01093977689743042 2023-01-24 04:13:50.549883: step: 286/466, loss: 0.01793520525097847 2023-01-24 04:13:51.177487: step: 288/466, loss: 0.0015383173013105989 2023-01-24 04:13:51.825432: step: 290/466, loss: 0.06609602272510529 2023-01-24 04:13:52.469302: step: 292/466, loss: 0.005888265557587147 2023-01-24 04:13:53.089578: step: 294/466, loss: 0.018412522971630096 2023-01-24 04:13:53.693696: step: 296/466, loss: 0.00010748588829301298 2023-01-24 04:13:54.286768: step: 298/466, loss: 0.03687933087348938 2023-01-24 04:13:54.900774: step: 300/466, loss: 0.00927209947258234 2023-01-24 04:13:55.458450: step: 302/466, loss: 0.04740048199892044 2023-01-24 04:13:56.098621: step: 304/466, loss: 0.013495231047272682 2023-01-24 04:13:56.757389: step: 306/466, loss: 0.06918410211801529 2023-01-24 04:13:57.394038: step: 308/466, loss: 0.036466117948293686 2023-01-24 04:13:58.108540: step: 310/466, loss: 0.003078033681958914 2023-01-24 04:13:58.685547: step: 312/466, loss: 0.09293632954359055 2023-01-24 04:13:59.356525: step: 314/466, loss: 0.04000889137387276 2023-01-24 04:13:59.972525: step: 316/466, loss: 0.03359391167759895 2023-01-24 04:14:00.567785: step: 318/466, loss: 0.0005374500178731978 2023-01-24 04:14:01.162418: step: 320/466, loss: 0.01053704135119915 2023-01-24 04:14:01.750253: step: 322/466, loss: 0.0017169741913676262 2023-01-24 04:14:02.341851: step: 324/466, loss: 0.002009422518312931 2023-01-24 04:14:02.980176: step: 326/466, loss: 0.01806122623383999 2023-01-24 04:14:03.588224: step: 328/466, loss: 0.021779784932732582 2023-01-24 04:14:04.167995: step: 330/466, loss: 0.004365967120975256 2023-01-24 04:14:04.766372: step: 332/466, loss: 0.0009254095493815839 2023-01-24 04:14:05.342285: step: 334/466, loss: 0.012203444726765156 2023-01-24 04:14:06.030036: step: 336/466, loss: 0.004250540863722563 2023-01-24 04:14:06.635145: step: 338/466, loss: 0.014696831814944744 2023-01-24 04:14:07.316990: step: 340/466, loss: 0.3365165591239929 2023-01-24 04:14:07.912402: step: 342/466, loss: 0.014637970365583897 2023-01-24 04:14:08.489558: step: 344/466, loss: 0.04531939700245857 2023-01-24 04:14:09.122273: step: 346/466, loss: 0.0061531332321465015 2023-01-24 04:14:09.745744: step: 348/466, loss: 0.3791632354259491 2023-01-24 04:14:10.369785: step: 350/466, loss: 0.0018494034884497523 2023-01-24 04:14:10.980220: step: 352/466, loss: 0.010231144726276398 2023-01-24 04:14:11.590982: step: 354/466, loss: 0.0032418647315353155 2023-01-24 04:14:12.208326: step: 356/466, loss: 0.0013531928416341543 2023-01-24 04:14:12.786180: step: 358/466, loss: 0.052366409450769424 2023-01-24 04:14:13.377681: step: 360/466, loss: 0.20318692922592163 2023-01-24 04:14:13.983201: step: 362/466, loss: 0.02761739492416382 2023-01-24 04:14:14.659092: step: 364/466, loss: 0.017266161739826202 2023-01-24 04:14:15.227117: step: 366/466, loss: 1.5719159841537476 2023-01-24 04:14:15.784325: step: 368/466, loss: 0.001832455163821578 2023-01-24 04:14:16.346823: step: 370/466, loss: 5.752670040237717e-05 2023-01-24 04:14:16.985324: step: 372/466, loss: 0.004226814955472946 2023-01-24 04:14:17.633415: step: 374/466, loss: 0.0006275619962252676 2023-01-24 04:14:18.222801: step: 376/466, loss: 0.2641673684120178 2023-01-24 04:14:18.840978: step: 378/466, loss: 0.015237389132380486 2023-01-24 04:14:19.410877: step: 380/466, loss: 0.0043543362990021706 2023-01-24 04:14:20.050881: step: 382/466, loss: 0.0103141525760293 2023-01-24 04:14:20.678708: step: 384/466, loss: 0.006467350292950869 2023-01-24 04:14:21.275517: step: 386/466, loss: 0.05341273173689842 2023-01-24 04:14:21.850418: step: 388/466, loss: 4.330767296778504e-06 2023-01-24 04:14:22.534009: step: 390/466, loss: 0.01330084539949894 2023-01-24 04:14:23.173763: step: 392/466, loss: 0.005651859100908041 2023-01-24 04:14:23.784662: step: 394/466, loss: 0.036984048783779144 2023-01-24 04:14:24.420876: step: 396/466, loss: 0.06965211778879166 2023-01-24 04:14:25.052756: step: 398/466, loss: 0.0013999653747305274 2023-01-24 04:14:25.667654: step: 400/466, loss: 0.0007004133658483624 2023-01-24 04:14:26.297316: step: 402/466, loss: 0.029358763247728348 2023-01-24 04:14:26.999384: step: 404/466, loss: 0.05330312252044678 2023-01-24 04:14:27.618543: step: 406/466, loss: 0.023303842172026634 2023-01-24 04:14:28.264383: step: 408/466, loss: 0.026277275756001472 2023-01-24 04:14:28.875143: step: 410/466, loss: 0.05698707327246666 2023-01-24 04:14:29.481763: step: 412/466, loss: 0.0056052375584840775 2023-01-24 04:14:30.150953: step: 414/466, loss: 0.13320933282375336 2023-01-24 04:14:30.757689: step: 416/466, loss: 0.011446727439761162 2023-01-24 04:14:31.381768: step: 418/466, loss: 0.03348606079816818 2023-01-24 04:14:32.000135: step: 420/466, loss: 0.0022945867385715246 2023-01-24 04:14:32.510718: step: 422/466, loss: 0.0809975266456604 2023-01-24 04:14:33.061067: step: 424/466, loss: 0.2196180522441864 2023-01-24 04:14:33.674418: step: 426/466, loss: 0.003923522774130106 2023-01-24 04:14:34.281586: step: 428/466, loss: 0.02230186201632023 2023-01-24 04:14:34.875245: step: 430/466, loss: 0.0052039786241948605 2023-01-24 04:14:35.501131: step: 432/466, loss: 0.03056887723505497 2023-01-24 04:14:36.115219: step: 434/466, loss: 0.008743995800614357 2023-01-24 04:14:36.718886: step: 436/466, loss: 0.021395454183220863 2023-01-24 04:14:37.347485: step: 438/466, loss: 0.04790028929710388 2023-01-24 04:14:37.968556: step: 440/466, loss: 0.006264548283070326 2023-01-24 04:14:38.599586: step: 442/466, loss: 0.038078151643276215 2023-01-24 04:14:39.271718: step: 444/466, loss: 0.0001275623362744227 2023-01-24 04:14:39.882220: step: 446/466, loss: 0.026751041412353516 2023-01-24 04:14:40.452032: step: 448/466, loss: 0.0038514710031449795 2023-01-24 04:14:41.005926: step: 450/466, loss: 0.02506757527589798 2023-01-24 04:14:41.642047: step: 452/466, loss: 0.042471084743738174 2023-01-24 04:14:42.242256: step: 454/466, loss: 0.03619232028722763 2023-01-24 04:14:42.889062: step: 456/466, loss: 0.005140738561749458 2023-01-24 04:14:43.500192: step: 458/466, loss: 0.02874329499900341 2023-01-24 04:14:44.107427: step: 460/466, loss: 0.0004289276257622987 2023-01-24 04:14:44.683819: step: 462/466, loss: 0.01744789257645607 2023-01-24 04:14:45.342709: step: 464/466, loss: 0.13683846592903137 2023-01-24 04:14:45.875796: step: 466/466, loss: 0.023846862837672234 2023-01-24 04:14:46.444420: step: 468/466, loss: 0.1801401525735855 2023-01-24 04:14:47.059342: step: 470/466, loss: 0.017461387440562248 2023-01-24 04:14:47.682393: step: 472/466, loss: 0.011122321709990501 2023-01-24 04:14:48.262964: step: 474/466, loss: 0.0270382072776556 2023-01-24 04:14:48.936913: step: 476/466, loss: 0.014480615966022015 2023-01-24 04:14:49.580839: step: 478/466, loss: 0.0036217127926647663 2023-01-24 04:14:50.213108: step: 480/466, loss: 0.00939874816685915 2023-01-24 04:14:50.894021: step: 482/466, loss: 0.004832264967262745 2023-01-24 04:14:51.537036: step: 484/466, loss: 0.0022632109466940165 2023-01-24 04:14:52.161718: step: 486/466, loss: 0.009170631878077984 2023-01-24 04:14:52.723251: step: 488/466, loss: 0.0702671930193901 2023-01-24 04:14:53.265529: step: 490/466, loss: 0.048669956624507904 2023-01-24 04:14:53.888399: step: 492/466, loss: 0.02539113350212574 2023-01-24 04:14:54.472501: step: 494/466, loss: 0.001394393271766603 2023-01-24 04:14:55.139457: step: 496/466, loss: 0.5365771651268005 2023-01-24 04:14:55.739994: step: 498/466, loss: 0.00293804076500237 2023-01-24 04:14:56.304680: step: 500/466, loss: 0.0036742223892360926 2023-01-24 04:14:56.853096: step: 502/466, loss: 0.03075113706290722 2023-01-24 04:14:57.468071: step: 504/466, loss: 0.002123782876878977 2023-01-24 04:14:58.107723: step: 506/466, loss: 0.03279469907283783 2023-01-24 04:14:58.709527: step: 508/466, loss: 0.0031929186079651117 2023-01-24 04:14:59.338223: step: 510/466, loss: 0.036908939480781555 2023-01-24 04:14:59.929373: step: 512/466, loss: 0.01312875933945179 2023-01-24 04:15:00.567910: step: 514/466, loss: 0.0018731298623606563 2023-01-24 04:15:01.262847: step: 516/466, loss: 0.01972200721502304 2023-01-24 04:15:01.916949: step: 518/466, loss: 0.004691218491643667 2023-01-24 04:15:02.582387: step: 520/466, loss: 0.0034749784972518682 2023-01-24 04:15:03.296428: step: 522/466, loss: 0.0007262931321747601 2023-01-24 04:15:03.950243: step: 524/466, loss: 0.004763578996062279 2023-01-24 04:15:04.596867: step: 526/466, loss: 0.0663839802145958 2023-01-24 04:15:05.205386: step: 528/466, loss: 0.0008546586614102125 2023-01-24 04:15:05.826972: step: 530/466, loss: 0.010287533514201641 2023-01-24 04:15:06.527931: step: 532/466, loss: 0.11529932171106339 2023-01-24 04:15:07.100108: step: 534/466, loss: 0.000512432015966624 2023-01-24 04:15:07.694775: step: 536/466, loss: 0.0006311875185929239 2023-01-24 04:15:08.264741: step: 538/466, loss: 0.009164192713797092 2023-01-24 04:15:08.953324: step: 540/466, loss: 0.00019224986317567527 2023-01-24 04:15:09.595031: step: 542/466, loss: 0.6070622801780701 2023-01-24 04:15:10.140431: step: 544/466, loss: 0.013346142135560513 2023-01-24 04:15:10.758197: step: 546/466, loss: 0.001861819182522595 2023-01-24 04:15:11.326679: step: 548/466, loss: 0.005893387831747532 2023-01-24 04:15:11.956169: step: 550/466, loss: 0.004697396419942379 2023-01-24 04:15:12.448554: step: 552/466, loss: 0.011865999549627304 2023-01-24 04:15:13.073147: step: 554/466, loss: 0.017121572047472 2023-01-24 04:15:13.776842: step: 556/466, loss: 0.016142327338457108 2023-01-24 04:15:14.427430: step: 558/466, loss: 0.035901207476854324 2023-01-24 04:15:14.994988: step: 560/466, loss: 0.013503274880349636 2023-01-24 04:15:15.596456: step: 562/466, loss: 0.0007424255018122494 2023-01-24 04:15:16.232362: step: 564/466, loss: 0.023608192801475525 2023-01-24 04:15:16.888765: step: 566/466, loss: 0.009967315010726452 2023-01-24 04:15:17.550677: step: 568/466, loss: 0.02642383985221386 2023-01-24 04:15:18.174945: step: 570/466, loss: 0.008085076697170734 2023-01-24 04:15:18.815637: step: 572/466, loss: 0.03311021625995636 2023-01-24 04:15:19.444092: step: 574/466, loss: 0.021686946973204613 2023-01-24 04:15:20.097259: step: 576/466, loss: 0.0004531649174168706 2023-01-24 04:15:20.756738: step: 578/466, loss: 0.002955336356535554 2023-01-24 04:15:21.394344: step: 580/466, loss: 0.06833190470933914 2023-01-24 04:15:22.065912: step: 582/466, loss: 0.00976971909403801 2023-01-24 04:15:22.641623: step: 584/466, loss: 0.0088993264362216 2023-01-24 04:15:23.259131: step: 586/466, loss: 0.031838033348321915 2023-01-24 04:15:23.933352: step: 588/466, loss: 0.00427598413079977 2023-01-24 04:15:24.545257: step: 590/466, loss: 0.0013505751267075539 2023-01-24 04:15:25.140461: step: 592/466, loss: 0.006010470911860466 2023-01-24 04:15:25.759660: step: 594/466, loss: 0.005053442902863026 2023-01-24 04:15:26.418556: step: 596/466, loss: 0.12235328555107117 2023-01-24 04:15:27.023748: step: 598/466, loss: 0.013947657309472561 2023-01-24 04:15:27.729053: step: 600/466, loss: 0.017335861921310425 2023-01-24 04:15:28.358768: step: 602/466, loss: 0.017842689529061317 2023-01-24 04:15:28.975464: step: 604/466, loss: 0.013325287960469723 2023-01-24 04:15:29.579442: step: 606/466, loss: 0.007661189418286085 2023-01-24 04:15:30.153662: step: 608/466, loss: 0.0013941368088126183 2023-01-24 04:15:30.846095: step: 610/466, loss: 0.04949084669351578 2023-01-24 04:15:31.451527: step: 612/466, loss: 0.02539418451488018 2023-01-24 04:15:32.049010: step: 614/466, loss: 0.0013708526967093349 2023-01-24 04:15:32.748942: step: 616/466, loss: 0.00775616941973567 2023-01-24 04:15:33.358768: step: 618/466, loss: 0.0025620737578719854 2023-01-24 04:15:33.964652: step: 620/466, loss: 0.008866401389241219 2023-01-24 04:15:34.561408: step: 622/466, loss: 0.013423405587673187 2023-01-24 04:15:35.097670: step: 624/466, loss: 0.000914953532628715 2023-01-24 04:15:35.775443: step: 626/466, loss: 0.00875373650342226 2023-01-24 04:15:36.397085: step: 628/466, loss: 0.2562370300292969 2023-01-24 04:15:36.999995: step: 630/466, loss: 0.003628920065239072 2023-01-24 04:15:37.583896: step: 632/466, loss: 0.0038765915669500828 2023-01-24 04:15:38.242023: step: 634/466, loss: 0.007976152002811432 2023-01-24 04:15:38.799368: step: 636/466, loss: 0.02190847136080265 2023-01-24 04:15:39.422930: step: 638/466, loss: 0.00013066553219687194 2023-01-24 04:15:40.084477: step: 640/466, loss: 0.0046429443173110485 2023-01-24 04:15:40.652689: step: 642/466, loss: 0.0017820323118939996 2023-01-24 04:15:41.263441: step: 644/466, loss: 0.016633784398436546 2023-01-24 04:15:41.891989: step: 646/466, loss: 0.010373415425419807 2023-01-24 04:15:42.429712: step: 648/466, loss: 0.007152173202484846 2023-01-24 04:15:43.037408: step: 650/466, loss: 0.0010419668396934867 2023-01-24 04:15:43.635588: step: 652/466, loss: 0.0006896069389767945 2023-01-24 04:15:44.280578: step: 654/466, loss: 0.0052822246216237545 2023-01-24 04:15:44.897074: step: 656/466, loss: 0.015896180644631386 2023-01-24 04:15:45.552881: step: 658/466, loss: 0.0004061829240527004 2023-01-24 04:15:46.264990: step: 660/466, loss: 0.006516371853649616 2023-01-24 04:15:46.953495: step: 662/466, loss: 0.0069544874131679535 2023-01-24 04:15:47.563178: step: 664/466, loss: 0.008382551372051239 2023-01-24 04:15:48.153923: step: 666/466, loss: 0.001753911143168807 2023-01-24 04:15:48.799558: step: 668/466, loss: 0.10311511904001236 2023-01-24 04:15:49.393835: step: 670/466, loss: 0.0050849937833845615 2023-01-24 04:15:50.036469: step: 672/466, loss: 0.03090570494532585 2023-01-24 04:15:50.680304: step: 674/466, loss: 0.016384674236178398 2023-01-24 04:15:51.296496: step: 676/466, loss: 0.029901500791311264 2023-01-24 04:15:51.853594: step: 678/466, loss: 0.018675347790122032 2023-01-24 04:15:52.489453: step: 680/466, loss: 0.00016065257659647614 2023-01-24 04:15:53.200229: step: 682/466, loss: 0.005745972506701946 2023-01-24 04:15:53.797046: step: 684/466, loss: 0.0024547441862523556 2023-01-24 04:15:54.335308: step: 686/466, loss: 0.002825072268024087 2023-01-24 04:15:54.957554: step: 688/466, loss: 0.0032656663097441196 2023-01-24 04:15:55.574175: step: 690/466, loss: 0.07884236425161362 2023-01-24 04:15:56.223697: step: 692/466, loss: 0.0067831301130354404 2023-01-24 04:15:56.808069: step: 694/466, loss: 0.0012500978773459792 2023-01-24 04:15:57.395799: step: 696/466, loss: 0.009684398770332336 2023-01-24 04:15:58.034486: step: 698/466, loss: 0.020377876237034798 2023-01-24 04:15:58.714731: step: 700/466, loss: 0.00035672361264005303 2023-01-24 04:15:59.403475: step: 702/466, loss: 0.0206731166690588 2023-01-24 04:16:00.001086: step: 704/466, loss: 0.035817913711071014 2023-01-24 04:16:00.595992: step: 706/466, loss: 0.002449051244184375 2023-01-24 04:16:01.207474: step: 708/466, loss: 0.01403097901493311 2023-01-24 04:16:01.805405: step: 710/466, loss: 0.00010961700172629207 2023-01-24 04:16:02.511417: step: 712/466, loss: 0.01196881290525198 2023-01-24 04:16:03.156342: step: 714/466, loss: 0.01933245360851288 2023-01-24 04:16:03.744821: step: 716/466, loss: 0.00016425353533122689 2023-01-24 04:16:04.311688: step: 718/466, loss: 0.006508949212729931 2023-01-24 04:16:04.875762: step: 720/466, loss: 0.016074998304247856 2023-01-24 04:16:05.504471: step: 722/466, loss: 0.007576656527817249 2023-01-24 04:16:06.132326: step: 724/466, loss: 0.004461620468646288 2023-01-24 04:16:06.786613: step: 726/466, loss: 0.22906804084777832 2023-01-24 04:16:07.269791: step: 728/466, loss: 6.266071432037279e-05 2023-01-24 04:16:07.871493: step: 730/466, loss: 0.00867387279868126 2023-01-24 04:16:08.421436: step: 732/466, loss: 0.00040576778701506555 2023-01-24 04:16:09.149488: step: 734/466, loss: 0.0005055777728557587 2023-01-24 04:16:09.735433: step: 736/466, loss: 0.010774249210953712 2023-01-24 04:16:10.473278: step: 738/466, loss: 0.013444541022181511 2023-01-24 04:16:11.167687: step: 740/466, loss: 0.021006938070058823 2023-01-24 04:16:11.802231: step: 742/466, loss: 0.01994216814637184 2023-01-24 04:16:12.446504: step: 744/466, loss: 0.0026697348803281784 2023-01-24 04:16:13.035813: step: 746/466, loss: 0.03117072954773903 2023-01-24 04:16:13.695318: step: 748/466, loss: 0.0825987383723259 2023-01-24 04:16:14.311537: step: 750/466, loss: 0.02095739170908928 2023-01-24 04:16:14.984141: step: 752/466, loss: 0.401014506816864 2023-01-24 04:16:15.552676: step: 754/466, loss: 0.008619406260550022 2023-01-24 04:16:16.125988: step: 756/466, loss: 0.003280237317085266 2023-01-24 04:16:16.738332: step: 758/466, loss: 0.055942706763744354 2023-01-24 04:16:17.306862: step: 760/466, loss: 0.0006331041804514825 2023-01-24 04:16:17.908218: step: 762/466, loss: 0.03507787734270096 2023-01-24 04:16:18.520542: step: 764/466, loss: 0.0024752917233854532 2023-01-24 04:16:19.089326: step: 766/466, loss: 0.008306448347866535 2023-01-24 04:16:19.689705: step: 768/466, loss: 0.005168801639229059 2023-01-24 04:16:20.300001: step: 770/466, loss: 0.005456330720335245 2023-01-24 04:16:20.867725: step: 772/466, loss: 0.00500397989526391 2023-01-24 04:16:21.449669: step: 774/466, loss: 0.0035422630608081818 2023-01-24 04:16:22.091175: step: 776/466, loss: 0.0029776778537780046 2023-01-24 04:16:22.610837: step: 778/466, loss: 0.00010884831863222644 2023-01-24 04:16:23.272483: step: 780/466, loss: 0.015602245926856995 2023-01-24 04:16:23.932945: step: 782/466, loss: 0.0006214018794707954 2023-01-24 04:16:24.608330: step: 784/466, loss: 0.003791040973737836 2023-01-24 04:16:25.228816: step: 786/466, loss: 0.007465250324457884 2023-01-24 04:16:25.850084: step: 788/466, loss: 0.028806166723370552 2023-01-24 04:16:26.493247: step: 790/466, loss: 0.005335021298378706 2023-01-24 04:16:27.054627: step: 792/466, loss: 0.004698272794485092 2023-01-24 04:16:27.671441: step: 794/466, loss: 0.02386847510933876 2023-01-24 04:16:28.280266: step: 796/466, loss: 0.020296594128012657 2023-01-24 04:16:28.878014: step: 798/466, loss: 0.01201885100454092 2023-01-24 04:16:29.531998: step: 800/466, loss: 0.0002638005535118282 2023-01-24 04:16:30.189920: step: 802/466, loss: 8.68512797751464e-05 2023-01-24 04:16:30.812204: step: 804/466, loss: 0.1226978451013565 2023-01-24 04:16:31.442113: step: 806/466, loss: 0.004920917097479105 2023-01-24 04:16:32.171318: step: 808/466, loss: 0.015866786241531372 2023-01-24 04:16:32.731489: step: 810/466, loss: 0.07115912437438965 2023-01-24 04:16:33.336459: step: 812/466, loss: 2.851075123544433e-06 2023-01-24 04:16:33.932964: step: 814/466, loss: 0.014639374800026417 2023-01-24 04:16:34.512178: step: 816/466, loss: 0.007592801470309496 2023-01-24 04:16:35.155375: step: 818/466, loss: 0.05241686850786209 2023-01-24 04:16:35.722633: step: 820/466, loss: 0.007755121681839228 2023-01-24 04:16:36.340018: step: 822/466, loss: 0.01800842583179474 2023-01-24 04:16:37.008981: step: 824/466, loss: 0.027888098731637 2023-01-24 04:16:37.616365: step: 826/466, loss: 0.09318346530199051 2023-01-24 04:16:38.206793: step: 828/466, loss: 0.06418915838003159 2023-01-24 04:16:38.782039: step: 830/466, loss: 0.0030594964046031237 2023-01-24 04:16:39.431144: step: 832/466, loss: 0.0027627507224678993 2023-01-24 04:16:40.028712: step: 834/466, loss: 9.011743532028049e-05 2023-01-24 04:16:40.592906: step: 836/466, loss: 0.005139882210642099 2023-01-24 04:16:41.245810: step: 838/466, loss: 0.014305722899734974 2023-01-24 04:16:41.933469: step: 840/466, loss: 0.00447474978864193 2023-01-24 04:16:42.553705: step: 842/466, loss: 0.022534940391778946 2023-01-24 04:16:43.120036: step: 844/466, loss: 0.003902808530256152 2023-01-24 04:16:43.702251: step: 846/466, loss: 0.06704127788543701 2023-01-24 04:16:44.342797: step: 848/466, loss: 0.0010130038717761636 2023-01-24 04:16:44.991472: step: 850/466, loss: 0.12758488953113556 2023-01-24 04:16:45.604723: step: 852/466, loss: 0.0028329056221991777 2023-01-24 04:16:46.278707: step: 854/466, loss: 0.02403268776834011 2023-01-24 04:16:46.844181: step: 856/466, loss: 0.27932578325271606 2023-01-24 04:16:47.477059: step: 858/466, loss: 0.015459458343684673 2023-01-24 04:16:48.235614: step: 860/466, loss: 0.005626055411994457 2023-01-24 04:16:48.845471: step: 862/466, loss: 0.10774286836385727 2023-01-24 04:16:49.467371: step: 864/466, loss: 0.005312466528266668 2023-01-24 04:16:50.103411: step: 866/466, loss: 0.016016367822885513 2023-01-24 04:16:50.711303: step: 868/466, loss: 0.08760938048362732 2023-01-24 04:16:51.386329: step: 870/466, loss: 0.0036086023319512606 2023-01-24 04:16:51.928787: step: 872/466, loss: 0.004177022725343704 2023-01-24 04:16:52.578652: step: 874/466, loss: 0.20971441268920898 2023-01-24 04:16:53.205609: step: 876/466, loss: 0.00036603506305254996 2023-01-24 04:16:53.784033: step: 878/466, loss: 0.003779512597247958 2023-01-24 04:16:54.370622: step: 880/466, loss: 0.006469182204455137 2023-01-24 04:16:54.957076: step: 882/466, loss: 0.03458406776189804 2023-01-24 04:16:55.568680: step: 884/466, loss: 0.0436847060918808 2023-01-24 04:16:56.222524: step: 886/466, loss: 0.0017953275237232447 2023-01-24 04:16:56.748216: step: 888/466, loss: 0.01645614765584469 2023-01-24 04:16:57.378592: step: 890/466, loss: 0.005720381624996662 2023-01-24 04:16:58.086508: step: 892/466, loss: 0.018691744655370712 2023-01-24 04:16:58.682232: step: 894/466, loss: 0.024546178057789803 2023-01-24 04:16:59.289618: step: 896/466, loss: 0.011327753774821758 2023-01-24 04:16:59.950797: step: 898/466, loss: 0.02763189561665058 2023-01-24 04:17:00.613177: step: 900/466, loss: 0.008905744180083275 2023-01-24 04:17:01.241348: step: 902/466, loss: 0.019306790083646774 2023-01-24 04:17:01.820504: step: 904/466, loss: 0.040599238127470016 2023-01-24 04:17:02.462420: step: 906/466, loss: 0.018303176388144493 2023-01-24 04:17:03.090555: step: 908/466, loss: 0.014783458784222603 2023-01-24 04:17:03.790351: step: 910/466, loss: 0.013032814487814903 2023-01-24 04:17:04.432477: step: 912/466, loss: 0.008820664137601852 2023-01-24 04:17:05.030600: step: 914/466, loss: 0.000625031825620681 2023-01-24 04:17:05.652826: step: 916/466, loss: 0.028586631640791893 2023-01-24 04:17:06.257466: step: 918/466, loss: 0.009524805471301079 2023-01-24 04:17:06.880025: step: 920/466, loss: 0.007127732969820499 2023-01-24 04:17:07.556950: step: 922/466, loss: 0.0053637209348380566 2023-01-24 04:17:08.189734: step: 924/466, loss: 0.005123194307088852 2023-01-24 04:17:08.790703: step: 926/466, loss: 0.0001634888758417219 2023-01-24 04:17:09.411547: step: 928/466, loss: 0.00564478849992156 2023-01-24 04:17:09.989633: step: 930/466, loss: 0.017921369522809982 2023-01-24 04:17:10.743940: step: 932/466, loss: 0.011737722903490067 ================================================== Loss: 0.032 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3592049872413266, 'r': 0.3189903871516904, 'f1': 0.33790539503304695}, 'combined': 0.2489829226559293, 'epoch': 31} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3519718781643123, 'r': 0.2818314046978443, 'f1': 0.31302055864788114}, 'combined': 0.19600352737764523, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3548577300807585, 'r': 0.3386972262440636, 'f1': 0.3465892004478088}, 'combined': 0.25538151611943805, 'epoch': 31} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.359593052133201, 'r': 0.29701283656809024, 'f1': 0.3253207266706292}, 'combined': 0.20160721089447442, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32324861099239804, 'r': 0.3201817361252975, 'f1': 0.3217078645148366}, 'combined': 0.23704790016882693, 'epoch': 31} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36854991917508884, 'r': 0.2943742585468896, 'f1': 0.32731227141992336}, 'combined': 0.2171279424270779, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3275862068965517, 'r': 0.2714285714285714, 'f1': 0.29687499999999994}, 'combined': 0.19791666666666663, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38461538461538464, 'r': 0.43478260869565216, 'f1': 0.40816326530612246}, 'combined': 0.20408163265306123, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 31} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32324861099239804, 'r': 0.3201817361252975, 'f1': 0.3217078645148366}, 'combined': 0.23704790016882693, 'epoch': 31} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36854991917508884, 'r': 0.2943742585468896, 'f1': 0.32731227141992336}, 'combined': 0.2171279424270779, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 31} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:19:53.730962: step: 2/466, loss: 0.023411225527524948 2023-01-24 04:19:54.331091: step: 4/466, loss: 0.007410215213894844 2023-01-24 04:19:54.932139: step: 6/466, loss: 0.009759167209267616 2023-01-24 04:19:55.561143: step: 8/466, loss: 0.016646098345518112 2023-01-24 04:19:56.233347: step: 10/466, loss: 0.0009101228788495064 2023-01-24 04:19:56.802944: step: 12/466, loss: 0.0028298806864768267 2023-01-24 04:19:57.337483: step: 14/466, loss: 0.006708510220050812 2023-01-24 04:19:57.922293: step: 16/466, loss: 0.013693740591406822 2023-01-24 04:19:58.488046: step: 18/466, loss: 0.05833829566836357 2023-01-24 04:19:59.106376: step: 20/466, loss: 0.0005727614625357091 2023-01-24 04:19:59.673476: step: 22/466, loss: 0.004258910194039345 2023-01-24 04:20:00.311424: step: 24/466, loss: 0.0123649463057518 2023-01-24 04:20:01.001966: step: 26/466, loss: 0.021816201508045197 2023-01-24 04:20:01.576286: step: 28/466, loss: 0.002437890972942114 2023-01-24 04:20:02.190488: step: 30/466, loss: 0.0004007563111372292 2023-01-24 04:20:02.755084: step: 32/466, loss: 0.05183832347393036 2023-01-24 04:20:03.363766: step: 34/466, loss: 0.0009992136619985104 2023-01-24 04:20:03.917533: step: 36/466, loss: 0.0018776083597913384 2023-01-24 04:20:04.549713: step: 38/466, loss: 0.0024277763441205025 2023-01-24 04:20:05.176058: step: 40/466, loss: 0.04051138833165169 2023-01-24 04:20:05.826745: step: 42/466, loss: 0.004881789442151785 2023-01-24 04:20:06.443652: step: 44/466, loss: 0.000656484451610595 2023-01-24 04:20:06.986720: step: 46/466, loss: 0.009599301964044571 2023-01-24 04:20:07.533793: step: 48/466, loss: 0.0040141381323337555 2023-01-24 04:20:08.128101: step: 50/466, loss: 0.009538087993860245 2023-01-24 04:20:08.705301: step: 52/466, loss: 0.00047708090278320014 2023-01-24 04:20:09.329474: step: 54/466, loss: 0.005349311046302319 2023-01-24 04:20:09.976174: step: 56/466, loss: 0.009912590496242046 2023-01-24 04:20:10.591843: step: 58/466, loss: 0.006544522475451231 2023-01-24 04:20:11.222205: step: 60/466, loss: 0.008314132690429688 2023-01-24 04:20:11.892273: step: 62/466, loss: 0.010748178698122501 2023-01-24 04:20:12.476792: step: 64/466, loss: 0.756777286529541 2023-01-24 04:20:13.097643: step: 66/466, loss: 0.0027388925664126873 2023-01-24 04:20:13.667681: step: 68/466, loss: 0.0002909097238443792 2023-01-24 04:20:14.251552: step: 70/466, loss: 0.00019457661255728453 2023-01-24 04:20:14.873017: step: 72/466, loss: 0.031159192323684692 2023-01-24 04:20:15.578033: step: 74/466, loss: 0.014231045730412006 2023-01-24 04:20:16.148517: step: 76/466, loss: 0.007948565296828747 2023-01-24 04:20:16.748168: step: 78/466, loss: 0.0001657801476540044 2023-01-24 04:20:17.363737: step: 80/466, loss: 0.3372848331928253 2023-01-24 04:20:18.026113: step: 82/466, loss: 0.03667658939957619 2023-01-24 04:20:18.698068: step: 84/466, loss: 0.018214678391814232 2023-01-24 04:20:19.320135: step: 86/466, loss: 0.04305468872189522 2023-01-24 04:20:19.935198: step: 88/466, loss: 0.0036728845443576574 2023-01-24 04:20:20.563774: step: 90/466, loss: 0.005807220935821533 2023-01-24 04:20:21.224061: step: 92/466, loss: 0.00411470839753747 2023-01-24 04:20:21.816428: step: 94/466, loss: 0.004546079318970442 2023-01-24 04:20:22.451626: step: 96/466, loss: 0.01811668649315834 2023-01-24 04:20:23.154093: step: 98/466, loss: 0.02663789875805378 2023-01-24 04:20:23.780210: step: 100/466, loss: 0.01967105083167553 2023-01-24 04:20:24.386815: step: 102/466, loss: 0.14476454257965088 2023-01-24 04:20:25.014643: step: 104/466, loss: 0.04371005669236183 2023-01-24 04:20:25.666408: step: 106/466, loss: 0.0020042855758219957 2023-01-24 04:20:26.260540: step: 108/466, loss: 0.0014833958121016622 2023-01-24 04:20:26.892487: step: 110/466, loss: 0.005307415965944529 2023-01-24 04:20:27.499477: step: 112/466, loss: 0.0075561366975307465 2023-01-24 04:20:28.165900: step: 114/466, loss: 0.011789039708673954 2023-01-24 04:20:28.790515: step: 116/466, loss: 0.002293430967256427 2023-01-24 04:20:29.345581: step: 118/466, loss: 0.11512932181358337 2023-01-24 04:20:30.028058: step: 120/466, loss: 6.02415093453601e-05 2023-01-24 04:20:30.665487: step: 122/466, loss: 0.03975168988108635 2023-01-24 04:20:31.230257: step: 124/466, loss: 0.009585501626133919 2023-01-24 04:20:31.823659: step: 126/466, loss: 0.004253715742379427 2023-01-24 04:20:32.514420: step: 128/466, loss: 0.014150316826999187 2023-01-24 04:20:33.100371: step: 130/466, loss: 0.020391590893268585 2023-01-24 04:20:33.767630: step: 132/466, loss: 0.03351735696196556 2023-01-24 04:20:34.474787: step: 134/466, loss: 0.030212484300136566 2023-01-24 04:20:35.088303: step: 136/466, loss: 0.011778630316257477 2023-01-24 04:20:35.717058: step: 138/466, loss: 0.21124117076396942 2023-01-24 04:20:36.375468: step: 140/466, loss: 0.011526920832693577 2023-01-24 04:20:37.034842: step: 142/466, loss: 0.027538979426026344 2023-01-24 04:20:37.638637: step: 144/466, loss: 0.06910871714353561 2023-01-24 04:20:38.289665: step: 146/466, loss: 0.06594354659318924 2023-01-24 04:20:38.930056: step: 148/466, loss: 0.12538748979568481 2023-01-24 04:20:39.547885: step: 150/466, loss: 0.0019299472915008664 2023-01-24 04:20:40.157234: step: 152/466, loss: 0.07830004394054413 2023-01-24 04:20:40.766567: step: 154/466, loss: 0.007638944312930107 2023-01-24 04:20:41.415902: step: 156/466, loss: 0.009758553467690945 2023-01-24 04:20:42.029096: step: 158/466, loss: 8.209812949644402e-05 2023-01-24 04:20:42.606020: step: 160/466, loss: 0.016384704038500786 2023-01-24 04:20:43.195007: step: 162/466, loss: 0.000600729079451412 2023-01-24 04:20:43.856171: step: 164/466, loss: 0.9745030403137207 2023-01-24 04:20:44.448015: step: 166/466, loss: 0.025582611560821533 2023-01-24 04:20:45.080114: step: 168/466, loss: 0.013880142942070961 2023-01-24 04:20:45.717382: step: 170/466, loss: 0.008554169908165932 2023-01-24 04:20:46.495332: step: 172/466, loss: 0.0011577691184356809 2023-01-24 04:20:47.115968: step: 174/466, loss: 0.02661309763789177 2023-01-24 04:20:47.679009: step: 176/466, loss: 0.0473182387650013 2023-01-24 04:20:48.274437: step: 178/466, loss: 0.00834281463176012 2023-01-24 04:20:48.874064: step: 180/466, loss: 0.007583326194435358 2023-01-24 04:20:49.502893: step: 182/466, loss: 0.002567940391600132 2023-01-24 04:20:50.107805: step: 184/466, loss: 0.028543440625071526 2023-01-24 04:20:50.716614: step: 186/466, loss: 0.04543311521410942 2023-01-24 04:20:51.290404: step: 188/466, loss: 0.002355037024244666 2023-01-24 04:20:51.955469: step: 190/466, loss: 0.006026502698659897 2023-01-24 04:20:52.647678: step: 192/466, loss: 0.01346490066498518 2023-01-24 04:20:53.256944: step: 194/466, loss: 0.01777638867497444 2023-01-24 04:20:53.875877: step: 196/466, loss: 0.008044625632464886 2023-01-24 04:20:54.523368: step: 198/466, loss: 0.0005668763187713921 2023-01-24 04:20:55.254995: step: 200/466, loss: 0.028658686205744743 2023-01-24 04:20:55.939699: step: 202/466, loss: 0.6971175670623779 2023-01-24 04:20:56.555549: step: 204/466, loss: 0.0012721081729978323 2023-01-24 04:20:57.160459: step: 206/466, loss: 0.0016366386553272605 2023-01-24 04:20:57.745826: step: 208/466, loss: 0.000262612069491297 2023-01-24 04:20:58.367025: step: 210/466, loss: 0.028013983741402626 2023-01-24 04:20:58.999912: step: 212/466, loss: 0.08432400226593018 2023-01-24 04:20:59.588214: step: 214/466, loss: 0.014807991683483124 2023-01-24 04:21:00.241017: step: 216/466, loss: 0.0009584611980244517 2023-01-24 04:21:00.889301: step: 218/466, loss: 0.003678521839901805 2023-01-24 04:21:01.543086: step: 220/466, loss: 0.012200010009109974 2023-01-24 04:21:02.160407: step: 222/466, loss: 0.08298763632774353 2023-01-24 04:21:02.758663: step: 224/466, loss: 0.0012888513738289475 2023-01-24 04:21:03.430481: step: 226/466, loss: 0.0013545186957344413 2023-01-24 04:21:04.081308: step: 228/466, loss: 0.0001419303735019639 2023-01-24 04:21:04.656610: step: 230/466, loss: 0.0009365221485495567 2023-01-24 04:21:05.266203: step: 232/466, loss: 0.005224217195063829 2023-01-24 04:21:05.896160: step: 234/466, loss: 0.0006510214298032224 2023-01-24 04:21:06.722479: step: 236/466, loss: 0.05415847525000572 2023-01-24 04:21:07.358794: step: 238/466, loss: 0.027244968339800835 2023-01-24 04:21:07.926892: step: 240/466, loss: 0.3291933536529541 2023-01-24 04:21:08.502021: step: 242/466, loss: 0.048413824290037155 2023-01-24 04:21:09.073804: step: 244/466, loss: 0.003729290096089244 2023-01-24 04:21:09.635388: step: 246/466, loss: 0.0262068472802639 2023-01-24 04:21:10.284695: step: 248/466, loss: 0.010029599070549011 2023-01-24 04:21:10.869992: step: 250/466, loss: 0.020009851083159447 2023-01-24 04:21:11.506284: step: 252/466, loss: 0.027754565700888634 2023-01-24 04:21:12.130233: step: 254/466, loss: 0.00015698251081630588 2023-01-24 04:21:12.717222: step: 256/466, loss: 0.00034971226705238223 2023-01-24 04:21:13.366561: step: 258/466, loss: 0.0029135001823306084 2023-01-24 04:21:14.103664: step: 260/466, loss: 0.10238341242074966 2023-01-24 04:21:14.737082: step: 262/466, loss: 0.00717756524682045 2023-01-24 04:21:15.340575: step: 264/466, loss: 0.056868311017751694 2023-01-24 04:21:15.944912: step: 266/466, loss: 0.0049666459672153 2023-01-24 04:21:16.547832: step: 268/466, loss: 0.04420678690075874 2023-01-24 04:21:17.220426: step: 270/466, loss: 0.07952933758497238 2023-01-24 04:21:17.871438: step: 272/466, loss: 0.004162727855145931 2023-01-24 04:21:18.551050: step: 274/466, loss: 0.0005441865650936961 2023-01-24 04:21:19.172686: step: 276/466, loss: 0.15364809334278107 2023-01-24 04:21:19.782872: step: 278/466, loss: 0.01183517649769783 2023-01-24 04:21:20.442631: step: 280/466, loss: 0.08058245480060577 2023-01-24 04:21:21.018462: step: 282/466, loss: 0.0024886305909603834 2023-01-24 04:21:21.730069: step: 284/466, loss: 0.055408552289009094 2023-01-24 04:21:22.333585: step: 286/466, loss: 0.037237122654914856 2023-01-24 04:21:22.961812: step: 288/466, loss: 0.013606084510684013 2023-01-24 04:21:23.530589: step: 290/466, loss: 0.003307209350168705 2023-01-24 04:21:24.188092: step: 292/466, loss: 0.02808493748307228 2023-01-24 04:21:24.832340: step: 294/466, loss: 0.46002358198165894 2023-01-24 04:21:25.436206: step: 296/466, loss: 0.2787669003009796 2023-01-24 04:21:26.093242: step: 298/466, loss: 0.012026851065456867 2023-01-24 04:21:26.673874: step: 300/466, loss: 0.03737563267350197 2023-01-24 04:21:27.289816: step: 302/466, loss: 0.008616953156888485 2023-01-24 04:21:27.908490: step: 304/466, loss: 0.046242013573646545 2023-01-24 04:21:28.450296: step: 306/466, loss: 0.00045969485654495656 2023-01-24 04:21:29.021691: step: 308/466, loss: 0.1616874784231186 2023-01-24 04:21:29.685971: step: 310/466, loss: 0.016417542472481728 2023-01-24 04:21:30.347596: step: 312/466, loss: 0.395427942276001 2023-01-24 04:21:30.941646: step: 314/466, loss: 0.03880275413393974 2023-01-24 04:21:31.658432: step: 316/466, loss: 0.009924361482262611 2023-01-24 04:21:32.350038: step: 318/466, loss: 0.03377104178071022 2023-01-24 04:21:33.012336: step: 320/466, loss: 0.005184852983802557 2023-01-24 04:21:33.630416: step: 322/466, loss: 0.0021988372318446636 2023-01-24 04:21:34.255221: step: 324/466, loss: 0.00976119190454483 2023-01-24 04:21:34.901282: step: 326/466, loss: 0.002342578722164035 2023-01-24 04:21:35.542479: step: 328/466, loss: 0.0007447432144545019 2023-01-24 04:21:36.172763: step: 330/466, loss: 0.04353374242782593 2023-01-24 04:21:36.784686: step: 332/466, loss: 0.0004478727059904486 2023-01-24 04:21:37.452350: step: 334/466, loss: 0.01395033672451973 2023-01-24 04:21:38.125970: step: 336/466, loss: 0.024939075112342834 2023-01-24 04:21:38.717367: step: 338/466, loss: 0.02792339213192463 2023-01-24 04:21:39.321694: step: 340/466, loss: 0.000779581954702735 2023-01-24 04:21:39.944657: step: 342/466, loss: 0.01966101862490177 2023-01-24 04:21:40.625705: step: 344/466, loss: 0.01172749325633049 2023-01-24 04:21:41.196761: step: 346/466, loss: 0.011904709972441196 2023-01-24 04:21:41.815893: step: 348/466, loss: 0.5545079112052917 2023-01-24 04:21:42.426720: step: 350/466, loss: 0.008093088865280151 2023-01-24 04:21:43.103007: step: 352/466, loss: 0.032002806663513184 2023-01-24 04:21:43.768195: step: 354/466, loss: 0.00299689918756485 2023-01-24 04:21:44.355895: step: 356/466, loss: 0.001496439566835761 2023-01-24 04:21:44.987302: step: 358/466, loss: 0.004775097128003836 2023-01-24 04:21:45.620522: step: 360/466, loss: 0.006835389882326126 2023-01-24 04:21:46.212699: step: 362/466, loss: 0.004290407057851553 2023-01-24 04:21:46.787100: step: 364/466, loss: 0.0022695898078382015 2023-01-24 04:21:47.404286: step: 366/466, loss: 0.022341115400195122 2023-01-24 04:21:48.020055: step: 368/466, loss: 0.003539501456543803 2023-01-24 04:21:48.677291: step: 370/466, loss: 0.028464237228035927 2023-01-24 04:21:49.286748: step: 372/466, loss: 0.021883945912122726 2023-01-24 04:21:49.948956: step: 374/466, loss: 0.08412665128707886 2023-01-24 04:21:50.543546: step: 376/466, loss: 0.056779466569423676 2023-01-24 04:21:51.221679: step: 378/466, loss: 0.0023293436970561743 2023-01-24 04:21:51.805461: step: 380/466, loss: 0.004519928712397814 2023-01-24 04:21:52.449497: step: 382/466, loss: 0.015636395663022995 2023-01-24 04:21:53.026123: step: 384/466, loss: 0.0055314707569777966 2023-01-24 04:21:53.649278: step: 386/466, loss: 0.2097301185131073 2023-01-24 04:21:54.253992: step: 388/466, loss: 0.07120423018932343 2023-01-24 04:21:54.967894: step: 390/466, loss: 0.0455934964120388 2023-01-24 04:21:55.558560: step: 392/466, loss: 0.006745305843651295 2023-01-24 04:21:56.132677: step: 394/466, loss: 0.014581529423594475 2023-01-24 04:21:56.787076: step: 396/466, loss: 0.011852283962070942 2023-01-24 04:21:57.434583: step: 398/466, loss: 0.007304691709578037 2023-01-24 04:21:58.121073: step: 400/466, loss: 0.11655885726213455 2023-01-24 04:21:58.737837: step: 402/466, loss: 0.018392568454146385 2023-01-24 04:21:59.346662: step: 404/466, loss: 0.01346831675618887 2023-01-24 04:21:59.875459: step: 406/466, loss: 0.02412579208612442 2023-01-24 04:22:00.553623: step: 408/466, loss: 0.030196748673915863 2023-01-24 04:22:01.134106: step: 410/466, loss: 0.022959111258387566 2023-01-24 04:22:01.735761: step: 412/466, loss: 0.04549776017665863 2023-01-24 04:22:02.325527: step: 414/466, loss: 0.0074821715243160725 2023-01-24 04:22:02.963910: step: 416/466, loss: 0.03170736879110336 2023-01-24 04:22:03.608877: step: 418/466, loss: 1.2644948959350586 2023-01-24 04:22:04.198861: step: 420/466, loss: 0.0130178052932024 2023-01-24 04:22:05.062471: step: 422/466, loss: 0.3240489959716797 2023-01-24 04:22:05.748301: step: 424/466, loss: 0.03748927637934685 2023-01-24 04:22:06.337518: step: 426/466, loss: 0.0007072651060298085 2023-01-24 04:22:06.877040: step: 428/466, loss: 0.00021171310800127685 2023-01-24 04:22:07.528373: step: 430/466, loss: 0.1810675859451294 2023-01-24 04:22:08.139795: step: 432/466, loss: 0.00011003683903254569 2023-01-24 04:22:08.742776: step: 434/466, loss: 0.07037109136581421 2023-01-24 04:22:09.343835: step: 436/466, loss: 0.00043664249824360013 2023-01-24 04:22:09.985263: step: 438/466, loss: 0.016499321907758713 2023-01-24 04:22:10.571665: step: 440/466, loss: 0.00046534600551240146 2023-01-24 04:22:11.109342: step: 442/466, loss: 5.056872760178521e-05 2023-01-24 04:22:11.744491: step: 444/466, loss: 0.0027938506100326777 2023-01-24 04:22:12.438336: step: 446/466, loss: 0.005168906878679991 2023-01-24 04:22:13.003434: step: 448/466, loss: 0.039607737213373184 2023-01-24 04:22:13.645253: step: 450/466, loss: 0.027140803635120392 2023-01-24 04:22:14.287551: step: 452/466, loss: 0.0048378705978393555 2023-01-24 04:22:14.899436: step: 454/466, loss: 0.021167725324630737 2023-01-24 04:22:15.526216: step: 456/466, loss: 0.05374099314212799 2023-01-24 04:22:16.178012: step: 458/466, loss: 0.0035808503162115812 2023-01-24 04:22:16.750129: step: 460/466, loss: 0.004817279055714607 2023-01-24 04:22:17.353502: step: 462/466, loss: 0.8396977782249451 2023-01-24 04:22:17.934826: step: 464/466, loss: 0.003646058263257146 2023-01-24 04:22:18.576162: step: 466/466, loss: 0.016296282410621643 2023-01-24 04:22:19.166722: step: 468/466, loss: 0.060796670615673065 2023-01-24 04:22:19.773693: step: 470/466, loss: 0.05054939538240433 2023-01-24 04:22:20.371618: step: 472/466, loss: 0.02450472302734852 2023-01-24 04:22:20.956083: step: 474/466, loss: 0.0007114399340935051 2023-01-24 04:22:21.556546: step: 476/466, loss: 0.0573386587202549 2023-01-24 04:22:22.174336: step: 478/466, loss: 0.004851747769862413 2023-01-24 04:22:22.759342: step: 480/466, loss: 0.038977328687906265 2023-01-24 04:22:23.363686: step: 482/466, loss: 0.006838818080723286 2023-01-24 04:22:23.935608: step: 484/466, loss: 0.053884100168943405 2023-01-24 04:22:24.475502: step: 486/466, loss: 0.003641329472884536 2023-01-24 04:22:25.094518: step: 488/466, loss: 0.036911703646183014 2023-01-24 04:22:25.723514: step: 490/466, loss: 0.010725787840783596 2023-01-24 04:22:26.372704: step: 492/466, loss: 0.005133012309670448 2023-01-24 04:22:26.981264: step: 494/466, loss: 0.0011553947115316987 2023-01-24 04:22:27.604296: step: 496/466, loss: 0.013483748771250248 2023-01-24 04:22:28.206072: step: 498/466, loss: 0.08920584619045258 2023-01-24 04:22:28.802184: step: 500/466, loss: 0.26528024673461914 2023-01-24 04:22:29.396505: step: 502/466, loss: 0.0017552432836964726 2023-01-24 04:22:30.049511: step: 504/466, loss: 0.0041707539930939674 2023-01-24 04:22:30.621897: step: 506/466, loss: 0.0010291712824255228 2023-01-24 04:22:31.251343: step: 508/466, loss: 0.003643125295639038 2023-01-24 04:22:31.869070: step: 510/466, loss: 0.011014739982783794 2023-01-24 04:22:32.484710: step: 512/466, loss: 0.02263011783361435 2023-01-24 04:22:33.148040: step: 514/466, loss: 0.017728324979543686 2023-01-24 04:22:33.771180: step: 516/466, loss: 0.0007419459288939834 2023-01-24 04:22:34.364540: step: 518/466, loss: 0.002441921504214406 2023-01-24 04:22:34.986915: step: 520/466, loss: 0.0023182809818536043 2023-01-24 04:22:35.585973: step: 522/466, loss: 0.00185644649900496 2023-01-24 04:22:36.220810: step: 524/466, loss: 0.000569177616853267 2023-01-24 04:22:36.901171: step: 526/466, loss: 0.004187730140984058 2023-01-24 04:22:37.506772: step: 528/466, loss: 0.01688574068248272 2023-01-24 04:22:38.001449: step: 530/466, loss: 0.0020011102315038443 2023-01-24 04:22:38.586705: step: 532/466, loss: 0.0068510305136442184 2023-01-24 04:22:39.256955: step: 534/466, loss: 0.017126111313700676 2023-01-24 04:22:39.890766: step: 536/466, loss: 0.06983166933059692 2023-01-24 04:22:40.537463: step: 538/466, loss: 0.0038557236548513174 2023-01-24 04:22:41.139499: step: 540/466, loss: 0.009185834787786007 2023-01-24 04:22:41.756363: step: 542/466, loss: 0.0028127918485552073 2023-01-24 04:22:42.451671: step: 544/466, loss: 0.000843394489493221 2023-01-24 04:22:43.072957: step: 546/466, loss: 0.003618141869083047 2023-01-24 04:22:43.727161: step: 548/466, loss: 9.706970740808174e-05 2023-01-24 04:22:44.359592: step: 550/466, loss: 0.0065374658443033695 2023-01-24 04:22:44.946019: step: 552/466, loss: 0.00029544203425757587 2023-01-24 04:22:45.549248: step: 554/466, loss: 0.018424056470394135 2023-01-24 04:22:46.160362: step: 556/466, loss: 0.005046153906732798 2023-01-24 04:22:46.742522: step: 558/466, loss: 0.01957661285996437 2023-01-24 04:22:47.429231: step: 560/466, loss: 0.04268014803528786 2023-01-24 04:22:48.039612: step: 562/466, loss: 0.00758602237328887 2023-01-24 04:22:48.592782: step: 564/466, loss: 0.0011110709747299552 2023-01-24 04:22:49.207316: step: 566/466, loss: 0.0041378033347427845 2023-01-24 04:22:49.759520: step: 568/466, loss: 0.00987055990844965 2023-01-24 04:22:50.372806: step: 570/466, loss: 0.008387669920921326 2023-01-24 04:22:50.992216: step: 572/466, loss: 0.024874651804566383 2023-01-24 04:22:51.602796: step: 574/466, loss: 0.020754938945174217 2023-01-24 04:22:52.253012: step: 576/466, loss: 0.0061580706387758255 2023-01-24 04:22:52.866067: step: 578/466, loss: 0.008794519118964672 2023-01-24 04:22:53.531848: step: 580/466, loss: 0.003570317290723324 2023-01-24 04:22:54.190471: step: 582/466, loss: 0.010545005090534687 2023-01-24 04:22:54.837997: step: 584/466, loss: 0.01019436027854681 2023-01-24 04:22:55.479638: step: 586/466, loss: 0.006862320005893707 2023-01-24 04:22:56.027563: step: 588/466, loss: 0.009396102279424667 2023-01-24 04:22:56.639408: step: 590/466, loss: 0.011004658415913582 2023-01-24 04:22:57.283185: step: 592/466, loss: 0.008732697926461697 2023-01-24 04:22:57.902477: step: 594/466, loss: 0.0007735695689916611 2023-01-24 04:22:58.459204: step: 596/466, loss: 0.006525528617203236 2023-01-24 04:22:59.063991: step: 598/466, loss: 0.0038592349737882614 2023-01-24 04:22:59.724682: step: 600/466, loss: 0.0046826316975057125 2023-01-24 04:23:00.333925: step: 602/466, loss: 0.024678422138094902 2023-01-24 04:23:00.936365: step: 604/466, loss: 0.0025267021264880896 2023-01-24 04:23:01.510391: step: 606/466, loss: 0.024762844666838646 2023-01-24 04:23:02.127307: step: 608/466, loss: 0.016276901587843895 2023-01-24 04:23:02.779764: step: 610/466, loss: 0.017654333263635635 2023-01-24 04:23:03.402292: step: 612/466, loss: 0.26210862398147583 2023-01-24 04:23:03.998991: step: 614/466, loss: 0.01292844582349062 2023-01-24 04:23:04.542152: step: 616/466, loss: 0.0008593133534304798 2023-01-24 04:23:05.103489: step: 618/466, loss: 0.010108716785907745 2023-01-24 04:23:05.696437: step: 620/466, loss: 0.012292332015931606 2023-01-24 04:23:06.250827: step: 622/466, loss: 0.01431113202124834 2023-01-24 04:23:06.862100: step: 624/466, loss: 0.037744227796792984 2023-01-24 04:23:07.463095: step: 626/466, loss: 1.0760530233383179 2023-01-24 04:23:08.127793: step: 628/466, loss: 0.011397158727049828 2023-01-24 04:23:08.757894: step: 630/466, loss: 0.042986877262592316 2023-01-24 04:23:09.368743: step: 632/466, loss: 0.04665377736091614 2023-01-24 04:23:09.928082: step: 634/466, loss: 0.0028689559549093246 2023-01-24 04:23:10.558092: step: 636/466, loss: 0.010561549104750156 2023-01-24 04:23:11.209358: step: 638/466, loss: 0.00509530259296298 2023-01-24 04:23:11.871300: step: 640/466, loss: 0.0013705334858968854 2023-01-24 04:23:12.451212: step: 642/466, loss: 0.04495071619749069 2023-01-24 04:23:13.090459: step: 644/466, loss: 0.0006146457162685692 2023-01-24 04:23:13.755533: step: 646/466, loss: 0.16979992389678955 2023-01-24 04:23:14.400381: step: 648/466, loss: 0.05898624658584595 2023-01-24 04:23:15.026724: step: 650/466, loss: 0.00024182444030884653 2023-01-24 04:23:15.683254: step: 652/466, loss: 0.01646382175385952 2023-01-24 04:23:16.281189: step: 654/466, loss: 0.2729729115962982 2023-01-24 04:23:16.909734: step: 656/466, loss: 0.43773236870765686 2023-01-24 04:23:17.519850: step: 658/466, loss: 0.06977491825819016 2023-01-24 04:23:18.153473: step: 660/466, loss: 0.011786994524300098 2023-01-24 04:23:18.797096: step: 662/466, loss: 0.01343371532857418 2023-01-24 04:23:19.420969: step: 664/466, loss: 0.004439284559339285 2023-01-24 04:23:20.034590: step: 666/466, loss: 0.014500009827315807 2023-01-24 04:23:20.698864: step: 668/466, loss: 0.008671484887599945 2023-01-24 04:23:21.263485: step: 670/466, loss: 0.0009760663961060345 2023-01-24 04:23:21.876711: step: 672/466, loss: 0.4349953234195709 2023-01-24 04:23:22.508732: step: 674/466, loss: 0.0054158661514520645 2023-01-24 04:23:23.121074: step: 676/466, loss: 0.014271006919443607 2023-01-24 04:23:23.727621: step: 678/466, loss: 0.012837562710046768 2023-01-24 04:23:24.323677: step: 680/466, loss: 1.0666906833648682 2023-01-24 04:23:24.973632: step: 682/466, loss: 0.01808995008468628 2023-01-24 04:23:25.582956: step: 684/466, loss: 0.17323540151119232 2023-01-24 04:23:26.226314: step: 686/466, loss: 0.0014327450189739466 2023-01-24 04:23:26.852306: step: 688/466, loss: 0.003553766990080476 2023-01-24 04:23:27.531302: step: 690/466, loss: 0.026842013001441956 2023-01-24 04:23:28.148506: step: 692/466, loss: 0.031177956610918045 2023-01-24 04:23:28.688550: step: 694/466, loss: 0.0005657792207784951 2023-01-24 04:23:29.324560: step: 696/466, loss: 0.05083481967449188 2023-01-24 04:23:29.918016: step: 698/466, loss: 0.09655691683292389 2023-01-24 04:23:30.511018: step: 700/466, loss: 0.03542042151093483 2023-01-24 04:23:31.131140: step: 702/466, loss: 0.0032076865900307894 2023-01-24 04:23:31.790616: step: 704/466, loss: 0.005588217172771692 2023-01-24 04:23:32.488212: step: 706/466, loss: 0.03496195748448372 2023-01-24 04:23:33.174630: step: 708/466, loss: 0.016073670238256454 2023-01-24 04:23:33.889193: step: 710/466, loss: 0.0002725913655012846 2023-01-24 04:23:34.484432: step: 712/466, loss: 0.0025168531574308872 2023-01-24 04:23:35.090392: step: 714/466, loss: 0.024312369525432587 2023-01-24 04:23:35.795999: step: 716/466, loss: 0.012014505453407764 2023-01-24 04:23:36.407317: step: 718/466, loss: 0.0019410356180742383 2023-01-24 04:23:37.004132: step: 720/466, loss: 0.22631646692752838 2023-01-24 04:23:37.610714: step: 722/466, loss: 0.02366473525762558 2023-01-24 04:23:38.219421: step: 724/466, loss: 0.019449405372142792 2023-01-24 04:23:38.818687: step: 726/466, loss: 0.6188353896141052 2023-01-24 04:23:39.427674: step: 728/466, loss: 0.0034205520059913397 2023-01-24 04:23:40.051089: step: 730/466, loss: 0.10207657516002655 2023-01-24 04:23:40.690961: step: 732/466, loss: 0.03011954389512539 2023-01-24 04:23:41.310753: step: 734/466, loss: 0.0017117736861109734 2023-01-24 04:23:41.988469: step: 736/466, loss: 0.0110403997823596 2023-01-24 04:23:42.660504: step: 738/466, loss: 0.012234913185238838 2023-01-24 04:23:43.268200: step: 740/466, loss: 0.0024288101121783257 2023-01-24 04:23:43.906875: step: 742/466, loss: 0.0033122319728136063 2023-01-24 04:23:44.534581: step: 744/466, loss: 0.002562866546213627 2023-01-24 04:23:45.142935: step: 746/466, loss: 0.22490684688091278 2023-01-24 04:23:45.713552: step: 748/466, loss: 0.07354757189750671 2023-01-24 04:23:46.365445: step: 750/466, loss: 0.008838036097586155 2023-01-24 04:23:47.011666: step: 752/466, loss: 0.0469263531267643 2023-01-24 04:23:47.746047: step: 754/466, loss: 0.008982508443295956 2023-01-24 04:23:48.341708: step: 756/466, loss: 0.0004972777096554637 2023-01-24 04:23:48.987638: step: 758/466, loss: 0.004215892869979143 2023-01-24 04:23:49.560181: step: 760/466, loss: 8.985040039988235e-05 2023-01-24 04:23:50.069258: step: 762/466, loss: 0.0019088794942945242 2023-01-24 04:23:50.703538: step: 764/466, loss: 0.01623927243053913 2023-01-24 04:23:51.287445: step: 766/466, loss: 0.0019317037658765912 2023-01-24 04:23:51.852148: step: 768/466, loss: 0.02090388536453247 2023-01-24 04:23:52.423315: step: 770/466, loss: 0.0030517231207340956 2023-01-24 04:23:52.998664: step: 772/466, loss: 0.001847755047492683 2023-01-24 04:23:53.701843: step: 774/466, loss: 0.0017306302906945348 2023-01-24 04:23:54.326114: step: 776/466, loss: 0.0033409115858376026 2023-01-24 04:23:54.901161: step: 778/466, loss: 0.017179185524582863 2023-01-24 04:23:55.541145: step: 780/466, loss: 0.004775240086019039 2023-01-24 04:23:56.189932: step: 782/466, loss: 0.0027563865296542645 2023-01-24 04:23:56.836428: step: 784/466, loss: 0.021313194185495377 2023-01-24 04:23:57.472070: step: 786/466, loss: 0.06419835239648819 2023-01-24 04:23:58.049593: step: 788/466, loss: 0.00428836839273572 2023-01-24 04:23:58.592187: step: 790/466, loss: 0.01132860966026783 2023-01-24 04:23:59.250773: step: 792/466, loss: 0.03586093336343765 2023-01-24 04:23:59.888141: step: 794/466, loss: 0.015640152618288994 2023-01-24 04:24:00.482433: step: 796/466, loss: 0.010531782172620296 2023-01-24 04:24:01.167576: step: 798/466, loss: 0.000776784960180521 2023-01-24 04:24:01.742625: step: 800/466, loss: 0.0022169083822518587 2023-01-24 04:24:02.405316: step: 802/466, loss: 0.004453351721167564 2023-01-24 04:24:03.023809: step: 804/466, loss: 0.013527227565646172 2023-01-24 04:24:03.704575: step: 806/466, loss: 0.0005594379035755992 2023-01-24 04:24:04.265732: step: 808/466, loss: 0.0028750738129019737 2023-01-24 04:24:04.857353: step: 810/466, loss: 0.0008166292682290077 2023-01-24 04:24:05.509966: step: 812/466, loss: 0.023417195305228233 2023-01-24 04:24:06.124941: step: 814/466, loss: 0.18981100618839264 2023-01-24 04:24:06.740019: step: 816/466, loss: 0.027999037876725197 2023-01-24 04:24:07.277939: step: 818/466, loss: 0.014820974320173264 2023-01-24 04:24:07.906247: step: 820/466, loss: 0.03201407566666603 2023-01-24 04:24:08.603859: step: 822/466, loss: 0.6232259273529053 2023-01-24 04:24:09.172437: step: 824/466, loss: 0.020096469670534134 2023-01-24 04:24:09.798464: step: 826/466, loss: 0.018436051905155182 2023-01-24 04:24:10.390682: step: 828/466, loss: 0.0007196977967396379 2023-01-24 04:24:11.024224: step: 830/466, loss: 0.03440719470381737 2023-01-24 04:24:11.605664: step: 832/466, loss: 0.005299053154885769 2023-01-24 04:24:12.219862: step: 834/466, loss: 0.0396905280649662 2023-01-24 04:24:12.865596: step: 836/466, loss: 0.006178751587867737 2023-01-24 04:24:13.439793: step: 838/466, loss: 0.002416969044134021 2023-01-24 04:24:14.047037: step: 840/466, loss: 0.0021591908298432827 2023-01-24 04:24:14.735855: step: 842/466, loss: 0.06498021632432938 2023-01-24 04:24:15.362518: step: 844/466, loss: 0.381192684173584 2023-01-24 04:24:15.965076: step: 846/466, loss: 0.002702759811654687 2023-01-24 04:24:16.540515: step: 848/466, loss: 0.2226199060678482 2023-01-24 04:24:17.171092: step: 850/466, loss: 0.006855641026049852 2023-01-24 04:24:17.840026: step: 852/466, loss: 0.004108700435608625 2023-01-24 04:24:18.435462: step: 854/466, loss: 0.02524707093834877 2023-01-24 04:24:19.021289: step: 856/466, loss: 0.002686083549633622 2023-01-24 04:24:19.665521: step: 858/466, loss: 0.007875490933656693 2023-01-24 04:24:20.330812: step: 860/466, loss: 0.048078540712594986 2023-01-24 04:24:21.006971: step: 862/466, loss: 0.012722281739115715 2023-01-24 04:24:21.618155: step: 864/466, loss: 0.0064017209224402905 2023-01-24 04:24:22.192083: step: 866/466, loss: 0.021623631939291954 2023-01-24 04:24:22.801016: step: 868/466, loss: 0.08398287743330002 2023-01-24 04:24:23.484674: step: 870/466, loss: 0.15791849792003632 2023-01-24 04:24:24.179999: step: 872/466, loss: 0.009869234636425972 2023-01-24 04:24:24.820107: step: 874/466, loss: 0.055564120411872864 2023-01-24 04:24:25.353363: step: 876/466, loss: 0.022275876253843307 2023-01-24 04:24:26.024086: step: 878/466, loss: 0.012389284558594227 2023-01-24 04:24:26.680643: step: 880/466, loss: 0.039143890142440796 2023-01-24 04:24:27.259008: step: 882/466, loss: 0.001133722485974431 2023-01-24 04:24:27.846570: step: 884/466, loss: 0.0068652331829071045 2023-01-24 04:24:28.385061: step: 886/466, loss: 0.008525632321834564 2023-01-24 04:24:29.050293: step: 888/466, loss: 0.12470296770334244 2023-01-24 04:24:29.702249: step: 890/466, loss: 0.06432314217090607 2023-01-24 04:24:30.295624: step: 892/466, loss: 0.0015465306350961328 2023-01-24 04:24:31.020651: step: 894/466, loss: 0.022779835388064384 2023-01-24 04:24:31.617606: step: 896/466, loss: 0.05004104599356651 2023-01-24 04:24:32.184967: step: 898/466, loss: 0.3507273197174072 2023-01-24 04:24:32.762818: step: 900/466, loss: 0.32218262553215027 2023-01-24 04:24:33.428799: step: 902/466, loss: 0.016275178641080856 2023-01-24 04:24:34.019942: step: 904/466, loss: 0.0206745695322752 2023-01-24 04:24:34.684545: step: 906/466, loss: 0.0029978167731314898 2023-01-24 04:24:35.336099: step: 908/466, loss: 0.009281977079808712 2023-01-24 04:24:35.989837: step: 910/466, loss: 0.022028448060154915 2023-01-24 04:24:36.608151: step: 912/466, loss: 0.02624843269586563 2023-01-24 04:24:37.238350: step: 914/466, loss: 0.007913791574537754 2023-01-24 04:24:37.875162: step: 916/466, loss: 0.15027648210525513 2023-01-24 04:24:38.559017: step: 918/466, loss: 0.10869559645652771 2023-01-24 04:24:39.157606: step: 920/466, loss: 0.0009679266950115561 2023-01-24 04:24:39.692585: step: 922/466, loss: 0.012594206258654594 2023-01-24 04:24:40.449852: step: 924/466, loss: 0.0006866551120765507 2023-01-24 04:24:41.081943: step: 926/466, loss: 0.003792055416852236 2023-01-24 04:24:41.706626: step: 928/466, loss: 0.05514007434248924 2023-01-24 04:24:42.329304: step: 930/466, loss: 0.03771127015352249 2023-01-24 04:24:42.954865: step: 932/466, loss: 0.002431466244161129 ================================================== Loss: 0.051 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35798240938166315, 'r': 0.3185839658444023, 'f1': 0.33713604417670684}, 'combined': 0.24841603255125766, 'epoch': 32} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34798719708819886, 'r': 0.2763797233191471, 'f1': 0.3080771966173189}, 'combined': 0.19290815115290066, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3385108481262327, 'r': 0.3256641366223909, 'f1': 0.331963249516441}, 'combined': 0.24460449964369338, 'epoch': 32} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3536516388612572, 'r': 0.29141150386311176, 'f1': 0.31952889290482717}, 'combined': 0.1980179054621464, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3120724710338681, 'r': 0.3221393249381864, 'f1': 0.31702600232011996}, 'combined': 0.23359810697271996, 'epoch': 32} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3459394077638552, 'r': 0.27968904241070497, 'f1': 0.3093064634851652}, 'combined': 0.20518349557926802, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3620689655172414, 'r': 0.3, 'f1': 0.328125}, 'combined': 0.21875, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32954545454545453, 'r': 0.31521739130434784, 'f1': 0.3222222222222222}, 'combined': 0.1611111111111111, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38461538461538464, 'r': 0.1724137931034483, 'f1': 0.23809523809523808}, 'combined': 0.15873015873015872, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32324861099239804, 'r': 0.3201817361252975, 'f1': 0.3217078645148366}, 'combined': 0.23704790016882693, 'epoch': 31} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36854991917508884, 'r': 0.2943742585468896, 'f1': 0.32731227141992336}, 'combined': 0.2171279424270779, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 31} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:27:16.283158: step: 2/466, loss: 0.011471913196146488 2023-01-24 04:27:16.903008: step: 4/466, loss: 0.003329317085444927 2023-01-24 04:27:17.515755: step: 6/466, loss: 0.05382179468870163 2023-01-24 04:27:18.051987: step: 8/466, loss: 0.03933628648519516 2023-01-24 04:27:18.667311: step: 10/466, loss: 0.0022515642922371626 2023-01-24 04:27:19.376740: step: 12/466, loss: 0.005104720126837492 2023-01-24 04:27:20.003679: step: 14/466, loss: 0.003365864045917988 2023-01-24 04:27:20.688783: step: 16/466, loss: 0.0024858699180185795 2023-01-24 04:27:21.431240: step: 18/466, loss: 0.005585548467934132 2023-01-24 04:27:22.065036: step: 20/466, loss: 0.023618703708052635 2023-01-24 04:27:22.698070: step: 22/466, loss: 0.05313389003276825 2023-01-24 04:27:23.255905: step: 24/466, loss: 0.016346216201782227 2023-01-24 04:27:23.880448: step: 26/466, loss: 0.016453605145215988 2023-01-24 04:27:24.499974: step: 28/466, loss: 0.018995080143213272 2023-01-24 04:27:25.120432: step: 30/466, loss: 0.022241417318582535 2023-01-24 04:27:25.747051: step: 32/466, loss: 0.012925678864121437 2023-01-24 04:27:26.387963: step: 34/466, loss: 0.0032370146363973618 2023-01-24 04:27:27.040615: step: 36/466, loss: 0.010440031997859478 2023-01-24 04:27:27.698625: step: 38/466, loss: 0.0012315127532929182 2023-01-24 04:27:28.316561: step: 40/466, loss: 0.02326072007417679 2023-01-24 04:27:28.958602: step: 42/466, loss: 0.00437350245192647 2023-01-24 04:27:29.525230: step: 44/466, loss: 0.027784300968050957 2023-01-24 04:27:30.150434: step: 46/466, loss: 5.960246562608518e-05 2023-01-24 04:27:30.743605: step: 48/466, loss: 0.46270838379859924 2023-01-24 04:27:31.367078: step: 50/466, loss: 0.31255415081977844 2023-01-24 04:27:32.002978: step: 52/466, loss: 0.02952492982149124 2023-01-24 04:27:32.736402: step: 54/466, loss: 0.004272154998034239 2023-01-24 04:27:33.319878: step: 56/466, loss: 0.0911722257733345 2023-01-24 04:27:33.975784: step: 58/466, loss: 0.006820019334554672 2023-01-24 04:27:34.637934: step: 60/466, loss: 0.03626310080289841 2023-01-24 04:27:35.201492: step: 62/466, loss: 0.012094401754438877 2023-01-24 04:27:35.850682: step: 64/466, loss: 0.02697291038930416 2023-01-24 04:27:36.427376: step: 66/466, loss: 0.0008113411604426801 2023-01-24 04:27:37.066682: step: 68/466, loss: 1.1584266424179077 2023-01-24 04:27:37.730141: step: 70/466, loss: 0.01978868432343006 2023-01-24 04:27:38.314915: step: 72/466, loss: 0.05368809401988983 2023-01-24 04:27:38.926983: step: 74/466, loss: 0.0010436509037390351 2023-01-24 04:27:39.537843: step: 76/466, loss: 0.006107044406235218 2023-01-24 04:27:40.166871: step: 78/466, loss: 0.022841036319732666 2023-01-24 04:27:40.766310: step: 80/466, loss: 0.0010020930785685778 2023-01-24 04:27:41.431024: step: 82/466, loss: 0.025901824235916138 2023-01-24 04:27:42.043793: step: 84/466, loss: 0.008335251361131668 2023-01-24 04:27:42.637749: step: 86/466, loss: 0.032207243144512177 2023-01-24 04:27:43.264765: step: 88/466, loss: 0.018256762996315956 2023-01-24 04:27:43.841343: step: 90/466, loss: 0.0008741633500903845 2023-01-24 04:27:44.447771: step: 92/466, loss: 0.005068059545010328 2023-01-24 04:27:45.083891: step: 94/466, loss: 0.2405049204826355 2023-01-24 04:27:45.654346: step: 96/466, loss: 0.004374948795884848 2023-01-24 04:27:46.226673: step: 98/466, loss: 0.0037068433593958616 2023-01-24 04:27:46.895299: step: 100/466, loss: 0.003929052967578173 2023-01-24 04:27:47.498056: step: 102/466, loss: 0.02032223902642727 2023-01-24 04:27:48.067578: step: 104/466, loss: 0.0019619495142251253 2023-01-24 04:27:48.702850: step: 106/466, loss: 0.008959521539509296 2023-01-24 04:27:49.331455: step: 108/466, loss: 0.38862621784210205 2023-01-24 04:27:49.977312: step: 110/466, loss: 0.016267826780676842 2023-01-24 04:27:50.621519: step: 112/466, loss: 0.02662738785147667 2023-01-24 04:27:51.216828: step: 114/466, loss: 0.04652956500649452 2023-01-24 04:27:51.850021: step: 116/466, loss: 0.018068883568048477 2023-01-24 04:27:52.436318: step: 118/466, loss: 0.017216501757502556 2023-01-24 04:27:53.035382: step: 120/466, loss: 0.00020992594363633543 2023-01-24 04:27:53.703905: step: 122/466, loss: 0.0007761769229546189 2023-01-24 04:27:54.241293: step: 124/466, loss: 0.02506084553897381 2023-01-24 04:27:54.811481: step: 126/466, loss: 0.004943248815834522 2023-01-24 04:27:55.468822: step: 128/466, loss: 0.0730307549238205 2023-01-24 04:27:56.033372: step: 130/466, loss: 0.012557188980281353 2023-01-24 04:27:56.627690: step: 132/466, loss: 0.0061327372677624226 2023-01-24 04:27:57.258910: step: 134/466, loss: 0.020070943981409073 2023-01-24 04:27:57.939696: step: 136/466, loss: 0.0011945957085117698 2023-01-24 04:27:58.544794: step: 138/466, loss: 0.006863201502710581 2023-01-24 04:27:59.162055: step: 140/466, loss: 0.000985067104920745 2023-01-24 04:27:59.793553: step: 142/466, loss: 3.5155653953552246 2023-01-24 04:28:00.389697: step: 144/466, loss: 0.007382845506072044 2023-01-24 04:28:00.996012: step: 146/466, loss: 0.0032322753686457872 2023-01-24 04:28:01.671085: step: 148/466, loss: 0.006009763106703758 2023-01-24 04:28:02.297095: step: 150/466, loss: 0.00329978228546679 2023-01-24 04:28:02.901238: step: 152/466, loss: 0.00011540047853486612 2023-01-24 04:28:03.542388: step: 154/466, loss: 0.031016090884804726 2023-01-24 04:28:04.150320: step: 156/466, loss: 0.018580617383122444 2023-01-24 04:28:04.797250: step: 158/466, loss: 0.011402607895433903 2023-01-24 04:28:05.369023: step: 160/466, loss: 0.020523978397250175 2023-01-24 04:28:05.972836: step: 162/466, loss: 0.05876913666725159 2023-01-24 04:28:06.617453: step: 164/466, loss: 0.0005150781362317502 2023-01-24 04:28:07.230403: step: 166/466, loss: 0.0001547386054880917 2023-01-24 04:28:07.859157: step: 168/466, loss: 0.07472080737352371 2023-01-24 04:28:08.519237: step: 170/466, loss: 0.12995728850364685 2023-01-24 04:28:09.131450: step: 172/466, loss: 0.005580767057836056 2023-01-24 04:28:09.782069: step: 174/466, loss: 0.018097488209605217 2023-01-24 04:28:10.367713: step: 176/466, loss: 0.014467814937233925 2023-01-24 04:28:10.997925: step: 178/466, loss: 0.06239444389939308 2023-01-24 04:28:11.531782: step: 180/466, loss: 0.013972893357276917 2023-01-24 04:28:12.151519: step: 182/466, loss: 0.21559743583202362 2023-01-24 04:28:12.801801: step: 184/466, loss: 0.004221683833748102 2023-01-24 04:28:13.433553: step: 186/466, loss: 0.008235514163970947 2023-01-24 04:28:14.045751: step: 188/466, loss: 0.00017162322183139622 2023-01-24 04:28:14.712985: step: 190/466, loss: 0.0033084298484027386 2023-01-24 04:28:15.275527: step: 192/466, loss: 0.00020260861492715776 2023-01-24 04:28:15.969292: step: 194/466, loss: 0.013346048071980476 2023-01-24 04:28:16.559447: step: 196/466, loss: 0.1149318590760231 2023-01-24 04:28:17.161678: step: 198/466, loss: 0.009522732347249985 2023-01-24 04:28:17.780163: step: 200/466, loss: 0.01283420529216528 2023-01-24 04:28:18.316360: step: 202/466, loss: 0.08689934760332108 2023-01-24 04:28:18.979074: step: 204/466, loss: 0.0014092724304646254 2023-01-24 04:28:19.592636: step: 206/466, loss: 0.026221109554171562 2023-01-24 04:28:20.220599: step: 208/466, loss: 0.002638742793351412 2023-01-24 04:28:20.868229: step: 210/466, loss: 0.01947478950023651 2023-01-24 04:28:21.500723: step: 212/466, loss: 0.00884014181792736 2023-01-24 04:28:22.173238: step: 214/466, loss: 0.007349574007093906 2023-01-24 04:28:22.829624: step: 216/466, loss: 0.07528423517942429 2023-01-24 04:28:23.406671: step: 218/466, loss: 0.008929373696446419 2023-01-24 04:28:24.010123: step: 220/466, loss: 0.0001904977107187733 2023-01-24 04:28:24.546899: step: 222/466, loss: 0.0016717190155759454 2023-01-24 04:28:25.275283: step: 224/466, loss: 0.006900259293615818 2023-01-24 04:28:25.831373: step: 226/466, loss: 0.12913194298744202 2023-01-24 04:28:26.428401: step: 228/466, loss: 0.0014772651484236121 2023-01-24 04:28:27.049193: step: 230/466, loss: 0.028150325641036034 2023-01-24 04:28:27.672318: step: 232/466, loss: 0.000608698173891753 2023-01-24 04:28:28.274012: step: 234/466, loss: 0.03875589743256569 2023-01-24 04:28:28.913921: step: 236/466, loss: 0.00619150884449482 2023-01-24 04:28:29.432789: step: 238/466, loss: 0.013868349604308605 2023-01-24 04:28:30.056260: step: 240/466, loss: 0.002042794832959771 2023-01-24 04:28:30.627851: step: 242/466, loss: 0.00555034726858139 2023-01-24 04:28:31.191372: step: 244/466, loss: 0.09385880827903748 2023-01-24 04:28:31.822235: step: 246/466, loss: 0.0006551188416779041 2023-01-24 04:28:32.425811: step: 248/466, loss: 0.0023250433150678873 2023-01-24 04:28:33.034444: step: 250/466, loss: 0.03568408265709877 2023-01-24 04:28:33.688776: step: 252/466, loss: 0.005665675271302462 2023-01-24 04:28:34.300008: step: 254/466, loss: 0.01324258279055357 2023-01-24 04:28:34.867899: step: 256/466, loss: 0.0017058087978512049 2023-01-24 04:28:35.461623: step: 258/466, loss: 0.00028060312615707517 2023-01-24 04:28:36.125860: step: 260/466, loss: 0.0016577666392549872 2023-01-24 04:28:36.774664: step: 262/466, loss: 0.0002143417514162138 2023-01-24 04:28:37.410164: step: 264/466, loss: 0.008948958478868008 2023-01-24 04:28:38.110777: step: 266/466, loss: 0.024027816951274872 2023-01-24 04:28:38.761796: step: 268/466, loss: 0.0371001698076725 2023-01-24 04:28:39.369590: step: 270/466, loss: 0.002744163852185011 2023-01-24 04:28:39.999227: step: 272/466, loss: 0.012039094232022762 2023-01-24 04:28:40.624951: step: 274/466, loss: 0.00039196471334435046 2023-01-24 04:28:41.286365: step: 276/466, loss: 0.0013024767395108938 2023-01-24 04:28:41.970877: step: 278/466, loss: 0.5007848143577576 2023-01-24 04:28:42.614363: step: 280/466, loss: 0.019194647669792175 2023-01-24 04:28:43.234888: step: 282/466, loss: 0.0012372337514534593 2023-01-24 04:28:43.853258: step: 284/466, loss: 0.00508892722427845 2023-01-24 04:28:44.422077: step: 286/466, loss: 0.0002391472808085382 2023-01-24 04:28:45.096247: step: 288/466, loss: 0.010844860225915909 2023-01-24 04:28:45.736243: step: 290/466, loss: 0.006039358675479889 2023-01-24 04:28:46.333610: step: 292/466, loss: 0.012529624626040459 2023-01-24 04:28:46.983979: step: 294/466, loss: 0.09980753809213638 2023-01-24 04:28:47.643030: step: 296/466, loss: 0.0014879453228786588 2023-01-24 04:28:48.242613: step: 298/466, loss: 0.010294638574123383 2023-01-24 04:28:48.974491: step: 300/466, loss: 0.1290862262248993 2023-01-24 04:28:49.631944: step: 302/466, loss: 0.07933500409126282 2023-01-24 04:28:50.275586: step: 304/466, loss: 0.37784236669540405 2023-01-24 04:28:50.876099: step: 306/466, loss: 0.01543118804693222 2023-01-24 04:28:51.477807: step: 308/466, loss: 0.0007847215165384114 2023-01-24 04:28:52.104457: step: 310/466, loss: 0.0033282111398875713 2023-01-24 04:28:52.697463: step: 312/466, loss: 0.0855177491903305 2023-01-24 04:28:53.341403: step: 314/466, loss: 0.000362559367204085 2023-01-24 04:28:53.907957: step: 316/466, loss: 0.00028692680643871427 2023-01-24 04:28:54.528910: step: 318/466, loss: 0.0013479833723977208 2023-01-24 04:28:55.056363: step: 320/466, loss: 0.0003531108086463064 2023-01-24 04:28:55.658304: step: 322/466, loss: 0.0015235940227285028 2023-01-24 04:28:56.193677: step: 324/466, loss: 0.0012759566307067871 2023-01-24 04:28:56.842378: step: 326/466, loss: 0.003484574379399419 2023-01-24 04:28:57.395017: step: 328/466, loss: 0.01791924424469471 2023-01-24 04:28:57.970305: step: 330/466, loss: 0.0018407482421025634 2023-01-24 04:28:58.591751: step: 332/466, loss: 0.001974991988390684 2023-01-24 04:28:59.251850: step: 334/466, loss: 0.016238750889897346 2023-01-24 04:28:59.772387: step: 336/466, loss: 0.007455571088939905 2023-01-24 04:29:00.473498: step: 338/466, loss: 0.024535367265343666 2023-01-24 04:29:01.056388: step: 340/466, loss: 0.051678985357284546 2023-01-24 04:29:01.631951: step: 342/466, loss: 0.03882472589612007 2023-01-24 04:29:02.232176: step: 344/466, loss: 0.01984047144651413 2023-01-24 04:29:02.908159: step: 346/466, loss: 0.013271857053041458 2023-01-24 04:29:03.465138: step: 348/466, loss: 0.004043884109705687 2023-01-24 04:29:04.039036: step: 350/466, loss: 0.011178984306752682 2023-01-24 04:29:04.678721: step: 352/466, loss: 0.06485337018966675 2023-01-24 04:29:05.318381: step: 354/466, loss: 0.005821486935019493 2023-01-24 04:29:05.905477: step: 356/466, loss: 0.0008058268576860428 2023-01-24 04:29:06.485889: step: 358/466, loss: 0.0013918459881097078 2023-01-24 04:29:07.130986: step: 360/466, loss: 0.0012359642423689365 2023-01-24 04:29:07.751175: step: 362/466, loss: 0.003919335547834635 2023-01-24 04:29:08.338945: step: 364/466, loss: 0.005641720723360777 2023-01-24 04:29:08.972961: step: 366/466, loss: 0.017113283276557922 2023-01-24 04:29:09.584458: step: 368/466, loss: 0.0023856135085225105 2023-01-24 04:29:10.193507: step: 370/466, loss: 0.00031498647877015173 2023-01-24 04:29:10.873493: step: 372/466, loss: 0.016652140766382217 2023-01-24 04:29:11.541387: step: 374/466, loss: 0.0076950243674218655 2023-01-24 04:29:12.156013: step: 376/466, loss: 0.04501022771000862 2023-01-24 04:29:12.783982: step: 378/466, loss: 0.005576370283961296 2023-01-24 04:29:13.452192: step: 380/466, loss: 0.0011929406318813562 2023-01-24 04:29:14.055495: step: 382/466, loss: 0.05035883188247681 2023-01-24 04:29:14.732519: step: 384/466, loss: 0.019984649494290352 2023-01-24 04:29:15.366335: step: 386/466, loss: 0.01815779134631157 2023-01-24 04:29:15.974430: step: 388/466, loss: 0.003168065333738923 2023-01-24 04:29:16.638869: step: 390/466, loss: 0.011367907747626305 2023-01-24 04:29:17.288243: step: 392/466, loss: 0.010168864391744137 2023-01-24 04:29:17.858864: step: 394/466, loss: 0.004494980908930302 2023-01-24 04:29:18.445248: step: 396/466, loss: 0.08662625402212143 2023-01-24 04:29:19.052806: step: 398/466, loss: 0.0009130419930443168 2023-01-24 04:29:19.637425: step: 400/466, loss: 0.034484755247831345 2023-01-24 04:29:20.236994: step: 402/466, loss: 0.010733877308666706 2023-01-24 04:29:20.859074: step: 404/466, loss: 8.5583480540663e-05 2023-01-24 04:29:21.508317: step: 406/466, loss: 0.0028839707374572754 2023-01-24 04:29:22.144100: step: 408/466, loss: 0.009808849543333054 2023-01-24 04:29:22.744903: step: 410/466, loss: 0.04710133746266365 2023-01-24 04:29:23.356532: step: 412/466, loss: 0.009870425797998905 2023-01-24 04:29:23.941124: step: 414/466, loss: 0.003070020116865635 2023-01-24 04:29:24.550904: step: 416/466, loss: 0.006086159497499466 2023-01-24 04:29:25.198584: step: 418/466, loss: 0.010665013454854488 2023-01-24 04:29:25.769754: step: 420/466, loss: 0.0019336992409080267 2023-01-24 04:29:26.404110: step: 422/466, loss: 0.017348945140838623 2023-01-24 04:29:27.072169: step: 424/466, loss: 0.03703954070806503 2023-01-24 04:29:27.660572: step: 426/466, loss: 0.0038241290021687746 2023-01-24 04:29:28.288907: step: 428/466, loss: 0.00012386307935230434 2023-01-24 04:29:28.875735: step: 430/466, loss: 0.009493958204984665 2023-01-24 04:29:29.465149: step: 432/466, loss: 0.035253990441560745 2023-01-24 04:29:30.116192: step: 434/466, loss: 0.04366471245884895 2023-01-24 04:29:30.681073: step: 436/466, loss: 0.008260741829872131 2023-01-24 04:29:31.305826: step: 438/466, loss: 0.05740272253751755 2023-01-24 04:29:31.962496: step: 440/466, loss: 0.007057404611259699 2023-01-24 04:29:32.566300: step: 442/466, loss: 0.017819110304117203 2023-01-24 04:29:33.141615: step: 444/466, loss: 0.005557534750550985 2023-01-24 04:29:33.736947: step: 446/466, loss: 0.0004817653098143637 2023-01-24 04:29:34.327313: step: 448/466, loss: 0.032076407223939896 2023-01-24 04:29:34.959773: step: 450/466, loss: 1.6345652341842651 2023-01-24 04:29:35.621352: step: 452/466, loss: 0.019103243947029114 2023-01-24 04:29:36.200523: step: 454/466, loss: 0.0009005839237943292 2023-01-24 04:29:36.780804: step: 456/466, loss: 2.51334895438049e-05 2023-01-24 04:29:37.398231: step: 458/466, loss: 0.003000382101163268 2023-01-24 04:29:38.059748: step: 460/466, loss: 0.03360116109251976 2023-01-24 04:29:38.734032: step: 462/466, loss: 0.036374256014823914 2023-01-24 04:29:39.362604: step: 464/466, loss: 0.4118907153606415 2023-01-24 04:29:39.919454: step: 466/466, loss: 0.0010197700466960669 2023-01-24 04:29:40.504069: step: 468/466, loss: 0.11220012605190277 2023-01-24 04:29:41.122906: step: 470/466, loss: 0.01524397637695074 2023-01-24 04:29:41.687875: step: 472/466, loss: 0.008090157993137836 2023-01-24 04:29:42.397199: step: 474/466, loss: 0.009427536278963089 2023-01-24 04:29:43.064240: step: 476/466, loss: 0.0007979574147611856 2023-01-24 04:29:43.708223: step: 478/466, loss: 0.010843045078217983 2023-01-24 04:29:44.300539: step: 480/466, loss: 0.13042069971561432 2023-01-24 04:29:44.901802: step: 482/466, loss: 0.002141157165169716 2023-01-24 04:29:45.477415: step: 484/466, loss: 0.005161554552614689 2023-01-24 04:29:46.127422: step: 486/466, loss: 0.0012545384233817458 2023-01-24 04:29:46.765150: step: 488/466, loss: 0.005698261316865683 2023-01-24 04:29:47.390797: step: 490/466, loss: 0.005855304189026356 2023-01-24 04:29:47.967461: step: 492/466, loss: 0.0001480466453358531 2023-01-24 04:29:48.600251: step: 494/466, loss: 0.011437050998210907 2023-01-24 04:29:49.154768: step: 496/466, loss: 0.06595665216445923 2023-01-24 04:29:49.720874: step: 498/466, loss: 0.0012464667670428753 2023-01-24 04:29:50.284028: step: 500/466, loss: 0.0026049893349409103 2023-01-24 04:29:50.923953: step: 502/466, loss: 0.010979488492012024 2023-01-24 04:29:51.569379: step: 504/466, loss: 0.40448689460754395 2023-01-24 04:29:52.202751: step: 506/466, loss: 0.002377287019044161 2023-01-24 04:29:52.849884: step: 508/466, loss: 0.034350961446762085 2023-01-24 04:29:53.463033: step: 510/466, loss: 3.383839066373184e-05 2023-01-24 04:29:54.048098: step: 512/466, loss: 0.03541983291506767 2023-01-24 04:29:54.704748: step: 514/466, loss: 0.0029128689784556627 2023-01-24 04:29:55.314992: step: 516/466, loss: 0.0017191277584061027 2023-01-24 04:29:55.961792: step: 518/466, loss: 0.004536142572760582 2023-01-24 04:29:56.566362: step: 520/466, loss: 0.022383177652955055 2023-01-24 04:29:57.126756: step: 522/466, loss: 0.0029085720889270306 2023-01-24 04:29:57.758282: step: 524/466, loss: 0.11277299374341965 2023-01-24 04:29:58.325997: step: 526/466, loss: 0.00013020861661061645 2023-01-24 04:29:58.914416: step: 528/466, loss: 0.0013358322903513908 2023-01-24 04:29:59.523740: step: 530/466, loss: 0.006719015073031187 2023-01-24 04:30:00.151441: step: 532/466, loss: 0.02594134397804737 2023-01-24 04:30:00.752614: step: 534/466, loss: 0.0010921203065663576 2023-01-24 04:30:01.417029: step: 536/466, loss: 0.1330503523349762 2023-01-24 04:30:02.030012: step: 538/466, loss: 0.061880066990852356 2023-01-24 04:30:02.671274: step: 540/466, loss: 0.0036926132161170244 2023-01-24 04:30:03.341428: step: 542/466, loss: 0.0010837906738743186 2023-01-24 04:30:03.962128: step: 544/466, loss: 0.009028531610965729 2023-01-24 04:30:04.643307: step: 546/466, loss: 0.0003460723673924804 2023-01-24 04:30:05.267408: step: 548/466, loss: 0.00035746858338825405 2023-01-24 04:30:05.883081: step: 550/466, loss: 0.20476196706295013 2023-01-24 04:30:06.433648: step: 552/466, loss: 0.008912076242268085 2023-01-24 04:30:07.041500: step: 554/466, loss: 0.005757046397775412 2023-01-24 04:30:07.627802: step: 556/466, loss: 0.020501768216490746 2023-01-24 04:30:08.360041: step: 558/466, loss: 0.10718557238578796 2023-01-24 04:30:09.010001: step: 560/466, loss: 0.004823639523237944 2023-01-24 04:30:09.648406: step: 562/466, loss: 0.04855097457766533 2023-01-24 04:30:10.274663: step: 564/466, loss: 0.04375689476728439 2023-01-24 04:30:10.862172: step: 566/466, loss: 0.00026579920086078346 2023-01-24 04:30:11.510956: step: 568/466, loss: 0.00195736694149673 2023-01-24 04:30:12.200130: step: 570/466, loss: 0.011081330478191376 2023-01-24 04:30:12.948554: step: 572/466, loss: 0.06517162173986435 2023-01-24 04:30:13.580908: step: 574/466, loss: 0.013517320156097412 2023-01-24 04:30:14.166318: step: 576/466, loss: 0.018146933987736702 2023-01-24 04:30:14.752348: step: 578/466, loss: 0.004572906531393528 2023-01-24 04:30:15.410572: step: 580/466, loss: 0.0022504201624542475 2023-01-24 04:30:16.002639: step: 582/466, loss: 0.0004263647715561092 2023-01-24 04:30:16.645649: step: 584/466, loss: 0.005549941677600145 2023-01-24 04:30:17.271866: step: 586/466, loss: 0.010425898246467113 2023-01-24 04:30:17.888648: step: 588/466, loss: 0.014032966457307339 2023-01-24 04:30:18.640125: step: 590/466, loss: 0.7224438786506653 2023-01-24 04:30:19.213143: step: 592/466, loss: 0.009805294685065746 2023-01-24 04:30:19.805281: step: 594/466, loss: 0.004754491616040468 2023-01-24 04:30:20.374973: step: 596/466, loss: 0.00016097823390737176 2023-01-24 04:30:20.937828: step: 598/466, loss: 0.010585645213723183 2023-01-24 04:30:21.558495: step: 600/466, loss: 0.018932558596134186 2023-01-24 04:30:22.148393: step: 602/466, loss: 0.016921402886509895 2023-01-24 04:30:22.766986: step: 604/466, loss: 0.06691353023052216 2023-01-24 04:30:23.348091: step: 606/466, loss: 0.026497000828385353 2023-01-24 04:30:23.898829: step: 608/466, loss: 0.056519560515880585 2023-01-24 04:30:24.531867: step: 610/466, loss: 0.0020213983952999115 2023-01-24 04:30:25.127603: step: 612/466, loss: 0.021570589393377304 2023-01-24 04:30:25.718794: step: 614/466, loss: 0.04084527865052223 2023-01-24 04:30:26.382627: step: 616/466, loss: 0.05565088242292404 2023-01-24 04:30:27.009489: step: 618/466, loss: 0.0020173382945358753 2023-01-24 04:30:27.599067: step: 620/466, loss: 0.029354479163885117 2023-01-24 04:30:28.271470: step: 622/466, loss: 0.005398329347372055 2023-01-24 04:30:28.893628: step: 624/466, loss: 0.09820173680782318 2023-01-24 04:30:29.490332: step: 626/466, loss: 0.03491608053445816 2023-01-24 04:30:30.115356: step: 628/466, loss: 0.054343245923519135 2023-01-24 04:30:30.738621: step: 630/466, loss: 0.00017145861056633294 2023-01-24 04:30:31.359126: step: 632/466, loss: 0.012736142612993717 2023-01-24 04:30:32.103773: step: 634/466, loss: 0.13187287747859955 2023-01-24 04:30:32.668007: step: 636/466, loss: 0.0011665790807455778 2023-01-24 04:30:33.329077: step: 638/466, loss: 0.004454441834241152 2023-01-24 04:30:33.911990: step: 640/466, loss: 0.0033450776245445013 2023-01-24 04:30:34.690228: step: 642/466, loss: 0.014408610761165619 2023-01-24 04:30:35.329644: step: 644/466, loss: 0.0001343740150332451 2023-01-24 04:30:36.001808: step: 646/466, loss: 0.018189314752817154 2023-01-24 04:30:36.599905: step: 648/466, loss: 0.0004681172431446612 2023-01-24 04:30:37.216203: step: 650/466, loss: 0.016073819249868393 2023-01-24 04:30:37.845307: step: 652/466, loss: 0.0035687494091689587 2023-01-24 04:30:38.455068: step: 654/466, loss: 0.09453099966049194 2023-01-24 04:30:39.194078: step: 656/466, loss: 0.003925571218132973 2023-01-24 04:30:39.843045: step: 658/466, loss: 0.03393247351050377 2023-01-24 04:30:40.534030: step: 660/466, loss: 0.07539302855730057 2023-01-24 04:30:41.165669: step: 662/466, loss: 0.015005357563495636 2023-01-24 04:30:41.796405: step: 664/466, loss: 0.013056541793048382 2023-01-24 04:30:42.497751: step: 666/466, loss: 0.11358191072940826 2023-01-24 04:30:43.098418: step: 668/466, loss: 0.4263495206832886 2023-01-24 04:30:43.732228: step: 670/466, loss: 0.012658163905143738 2023-01-24 04:30:44.306390: step: 672/466, loss: 0.006221231073141098 2023-01-24 04:30:45.022311: step: 674/466, loss: 0.044858161360025406 2023-01-24 04:30:45.626180: step: 676/466, loss: 0.0036089112982153893 2023-01-24 04:30:46.261705: step: 678/466, loss: 0.0018083082977682352 2023-01-24 04:30:47.558520: step: 680/466, loss: 0.0073106917552649975 2023-01-24 04:30:48.209468: step: 682/466, loss: 0.4257279634475708 2023-01-24 04:30:48.847044: step: 684/466, loss: 0.01332678273320198 2023-01-24 04:30:49.421621: step: 686/466, loss: 0.029636632651090622 2023-01-24 04:30:50.010280: step: 688/466, loss: 0.3315723240375519 2023-01-24 04:30:50.580649: step: 690/466, loss: 0.001709525240585208 2023-01-24 04:30:51.177970: step: 692/466, loss: 0.0014177437406033278 2023-01-24 04:30:51.795097: step: 694/466, loss: 0.0018577645532786846 2023-01-24 04:30:52.388332: step: 696/466, loss: 0.0016431428957730532 2023-01-24 04:30:53.026210: step: 698/466, loss: 0.0236639566719532 2023-01-24 04:30:53.621552: step: 700/466, loss: 0.015035852789878845 2023-01-24 04:30:54.250358: step: 702/466, loss: 0.06029500067234039 2023-01-24 04:30:54.880691: step: 704/466, loss: 0.06643743067979813 2023-01-24 04:30:55.501072: step: 706/466, loss: 0.02140481024980545 2023-01-24 04:30:56.105740: step: 708/466, loss: 0.015438289381563663 2023-01-24 04:30:56.698081: step: 710/466, loss: 0.04029211401939392 2023-01-24 04:30:57.362284: step: 712/466, loss: 0.00407075323164463 2023-01-24 04:30:57.971802: step: 714/466, loss: 0.00291248201392591 2023-01-24 04:30:58.674275: step: 716/466, loss: 0.0033066130708903074 2023-01-24 04:30:59.252196: step: 718/466, loss: 3.5524841223377734e-05 2023-01-24 04:30:59.892353: step: 720/466, loss: 0.0008308955584652722 2023-01-24 04:31:00.455855: step: 722/466, loss: 0.011513054370880127 2023-01-24 04:31:01.114372: step: 724/466, loss: 0.0005869634333066642 2023-01-24 04:31:01.694644: step: 726/466, loss: 0.0005612410604953766 2023-01-24 04:31:02.370391: step: 728/466, loss: 0.47174203395843506 2023-01-24 04:31:03.022203: step: 730/466, loss: 0.03442941606044769 2023-01-24 04:31:03.651759: step: 732/466, loss: 0.008386259898543358 2023-01-24 04:31:04.290859: step: 734/466, loss: 0.0007885558879934251 2023-01-24 04:31:04.818280: step: 736/466, loss: 0.004477986600250006 2023-01-24 04:31:05.435028: step: 738/466, loss: 0.03391421213746071 2023-01-24 04:31:06.110613: step: 740/466, loss: 0.043160002678632736 2023-01-24 04:31:06.713393: step: 742/466, loss: 1.754863977432251 2023-01-24 04:31:07.293599: step: 744/466, loss: 0.009212441742420197 2023-01-24 04:31:07.907601: step: 746/466, loss: 0.03072165511548519 2023-01-24 04:31:08.466573: step: 748/466, loss: 0.018118523061275482 2023-01-24 04:31:09.083908: step: 750/466, loss: 0.02915901690721512 2023-01-24 04:31:09.633769: step: 752/466, loss: 0.016097718849778175 2023-01-24 04:31:10.236744: step: 754/466, loss: 0.000931809947360307 2023-01-24 04:31:10.874354: step: 756/466, loss: 0.013095092959702015 2023-01-24 04:31:11.502468: step: 758/466, loss: 0.0061671980656683445 2023-01-24 04:31:12.165646: step: 760/466, loss: 0.09173333644866943 2023-01-24 04:31:12.727871: step: 762/466, loss: 0.001967896241694689 2023-01-24 04:31:13.400190: step: 764/466, loss: 0.04345665127038956 2023-01-24 04:31:13.998405: step: 766/466, loss: 0.03235586732625961 2023-01-24 04:31:14.673365: step: 768/466, loss: 0.016252994537353516 2023-01-24 04:31:15.193569: step: 770/466, loss: 0.005299590528011322 2023-01-24 04:31:15.784462: step: 772/466, loss: 0.013432987034320831 2023-01-24 04:31:16.344157: step: 774/466, loss: 0.009076020680367947 2023-01-24 04:31:16.930703: step: 776/466, loss: 0.005608452018350363 2023-01-24 04:31:17.572931: step: 778/466, loss: 0.009004445746541023 2023-01-24 04:31:18.178680: step: 780/466, loss: 0.04556947201490402 2023-01-24 04:31:18.775287: step: 782/466, loss: 0.004479180555790663 2023-01-24 04:31:19.375691: step: 784/466, loss: 2.405329942703247 2023-01-24 04:31:19.978757: step: 786/466, loss: 0.0009086414938792586 2023-01-24 04:31:20.583917: step: 788/466, loss: 0.0023360440973192453 2023-01-24 04:31:21.204957: step: 790/466, loss: 0.0355655737221241 2023-01-24 04:31:21.831902: step: 792/466, loss: 0.19567205011844635 2023-01-24 04:31:22.481681: step: 794/466, loss: 0.02821464277803898 2023-01-24 04:31:23.082225: step: 796/466, loss: 0.09833505749702454 2023-01-24 04:31:23.713808: step: 798/466, loss: 0.008969747461378574 2023-01-24 04:31:24.338045: step: 800/466, loss: 0.03653215989470482 2023-01-24 04:31:24.902857: step: 802/466, loss: 0.008475563488900661 2023-01-24 04:31:25.524951: step: 804/466, loss: 0.019438931718468666 2023-01-24 04:31:26.165769: step: 806/466, loss: 0.003730077063664794 2023-01-24 04:31:26.782669: step: 808/466, loss: 0.00568796182051301 2023-01-24 04:31:27.418642: step: 810/466, loss: 0.017519226297736168 2023-01-24 04:31:28.166413: step: 812/466, loss: 0.09852321445941925 2023-01-24 04:31:28.773083: step: 814/466, loss: 0.0025821272283792496 2023-01-24 04:31:29.388273: step: 816/466, loss: 0.006342741195112467 2023-01-24 04:31:29.991828: step: 818/466, loss: 0.003560805693268776 2023-01-24 04:31:30.600363: step: 820/466, loss: 0.004143399652093649 2023-01-24 04:31:31.140612: step: 822/466, loss: 0.0027600382454693317 2023-01-24 04:31:31.712640: step: 824/466, loss: 0.41122865676879883 2023-01-24 04:31:32.350714: step: 826/466, loss: 0.03294564038515091 2023-01-24 04:31:32.918591: step: 828/466, loss: 0.030116839334368706 2023-01-24 04:31:33.560695: step: 830/466, loss: 0.44258734583854675 2023-01-24 04:31:34.209043: step: 832/466, loss: 0.02711624652147293 2023-01-24 04:31:34.821143: step: 834/466, loss: 0.04400736093521118 2023-01-24 04:31:35.436340: step: 836/466, loss: 0.005828135181218386 2023-01-24 04:31:36.043041: step: 838/466, loss: 0.037157755345106125 2023-01-24 04:31:36.613599: step: 840/466, loss: 0.3639180064201355 2023-01-24 04:31:37.175955: step: 842/466, loss: 0.00658583827316761 2023-01-24 04:31:37.786961: step: 844/466, loss: 0.005545522086322308 2023-01-24 04:31:38.309969: step: 846/466, loss: 0.004206244368106127 2023-01-24 04:31:38.930534: step: 848/466, loss: 0.007470234762877226 2023-01-24 04:31:39.564456: step: 850/466, loss: 0.0072822291404008865 2023-01-24 04:31:40.191689: step: 852/466, loss: 0.04417445510625839 2023-01-24 04:31:40.764048: step: 854/466, loss: 0.032138094305992126 2023-01-24 04:31:41.368622: step: 856/466, loss: 0.00297050760127604 2023-01-24 04:31:41.995355: step: 858/466, loss: 0.0018105015624314547 2023-01-24 04:31:42.552518: step: 860/466, loss: 0.012027028016746044 2023-01-24 04:31:43.166546: step: 862/466, loss: 0.002343433443456888 2023-01-24 04:31:43.762631: step: 864/466, loss: 0.03817830607295036 2023-01-24 04:31:44.358351: step: 866/466, loss: 0.01945595256984234 2023-01-24 04:31:44.980212: step: 868/466, loss: 0.00023947417503222823 2023-01-24 04:31:45.590110: step: 870/466, loss: 0.009762587957084179 2023-01-24 04:31:46.191399: step: 872/466, loss: 0.019771510735154152 2023-01-24 04:31:46.768021: step: 874/466, loss: 0.027143366634845734 2023-01-24 04:31:47.374957: step: 876/466, loss: 0.01708270236849785 2023-01-24 04:31:48.124260: step: 878/466, loss: 0.014565062709152699 2023-01-24 04:31:48.739962: step: 880/466, loss: 4.5406017306959257e-05 2023-01-24 04:31:49.366912: step: 882/466, loss: 0.03155679255723953 2023-01-24 04:31:50.009738: step: 884/466, loss: 0.03435644507408142 2023-01-24 04:31:50.633217: step: 886/466, loss: 0.0007988469442352653 2023-01-24 04:31:51.265483: step: 888/466, loss: 0.05017571896314621 2023-01-24 04:31:51.894394: step: 890/466, loss: 0.1087830513715744 2023-01-24 04:31:52.494862: step: 892/466, loss: 0.013522031717002392 2023-01-24 04:31:53.058561: step: 894/466, loss: 0.000354521325789392 2023-01-24 04:31:53.711960: step: 896/466, loss: 0.003120807697996497 2023-01-24 04:31:54.339983: step: 898/466, loss: 0.00011694846034515649 2023-01-24 04:31:54.891449: step: 900/466, loss: 0.00047570542665198445 2023-01-24 04:31:55.550495: step: 902/466, loss: 0.0005460705142468214 2023-01-24 04:31:56.213582: step: 904/466, loss: 0.003993664868175983 2023-01-24 04:31:56.814021: step: 906/466, loss: 0.0007893638103269041 2023-01-24 04:31:57.427443: step: 908/466, loss: 0.00033020504633896053 2023-01-24 04:31:57.993692: step: 910/466, loss: 0.003642172319814563 2023-01-24 04:31:58.622321: step: 912/466, loss: 0.12140356004238129 2023-01-24 04:31:59.280925: step: 914/466, loss: 1.5962437391281128 2023-01-24 04:31:59.918928: step: 916/466, loss: 0.020260266959667206 2023-01-24 04:32:00.623705: step: 918/466, loss: 0.08730872720479965 2023-01-24 04:32:01.248042: step: 920/466, loss: 0.0024628147948533297 2023-01-24 04:32:01.797614: step: 922/466, loss: 0.01657303422689438 2023-01-24 04:32:02.431179: step: 924/466, loss: 0.2514936923980713 2023-01-24 04:32:03.038111: step: 926/466, loss: 0.03157556802034378 2023-01-24 04:32:03.649352: step: 928/466, loss: 0.00018364607240073383 2023-01-24 04:32:04.352585: step: 930/466, loss: 0.14305895566940308 2023-01-24 04:32:05.003377: step: 932/466, loss: 0.002802376402541995 ================================================== Loss: 0.061 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3509111701845246, 'r': 0.320947218271235, 'f1': 0.33526101888789067}, 'combined': 0.2470344349700247, 'epoch': 33} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34966413882330644, 'r': 0.28029884527855675, 'f1': 0.31116258199591534}, 'combined': 0.19484012143669469, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32899624025297114, 'r': 0.32525055250815554, 'f1': 0.3271126739919809}, 'combined': 0.24103039136251223, 'epoch': 33} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.35248924470757215, 'r': 0.28599984746580087, 'f1': 0.315782591920386}, 'combined': 0.19569625414784483, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3026522299212688, 'r': 0.3147123757056078, 'f1': 0.30856450604066105}, 'combined': 0.22736332024048708, 'epoch': 33} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3546308024226751, 'r': 0.28991949068881745, 'f1': 0.31902671589713805}, 'combined': 0.21163158381295297, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39814814814814814, 'r': 0.30714285714285716, 'f1': 0.3467741935483871}, 'combined': 0.23118279569892475, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.39285714285714285, 'r': 0.358695652173913, 'f1': 0.375}, 'combined': 0.1875, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28846153846153844, 'r': 0.12931034482758622, 'f1': 0.1785714285714286}, 'combined': 0.11904761904761907, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32324861099239804, 'r': 0.3201817361252975, 'f1': 0.3217078645148366}, 'combined': 0.23704790016882693, 'epoch': 31} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36854991917508884, 'r': 0.2943742585468896, 'f1': 0.32731227141992336}, 'combined': 0.2171279424270779, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 31} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:34:38.205505: step: 2/466, loss: 0.00044776161666959524 2023-01-24 04:34:38.839853: step: 4/466, loss: 0.06525201350450516 2023-01-24 04:34:39.449677: step: 6/466, loss: 0.05276694893836975 2023-01-24 04:34:40.031404: step: 8/466, loss: 0.01285181287676096 2023-01-24 04:34:40.588575: step: 10/466, loss: 0.0017702908953651786 2023-01-24 04:34:41.184691: step: 12/466, loss: 0.012179172597825527 2023-01-24 04:34:41.830904: step: 14/466, loss: 0.009365064091980457 2023-01-24 04:34:42.626058: step: 16/466, loss: 0.017210684716701508 2023-01-24 04:34:43.246931: step: 18/466, loss: 0.010137687437236309 2023-01-24 04:34:43.884865: step: 20/466, loss: 0.038191381841897964 2023-01-24 04:34:44.487207: step: 22/466, loss: 0.0006398653495125473 2023-01-24 04:34:45.136882: step: 24/466, loss: 0.010410054586827755 2023-01-24 04:34:45.777163: step: 26/466, loss: 0.023390959948301315 2023-01-24 04:34:46.442328: step: 28/466, loss: 0.00010038419713964686 2023-01-24 04:34:47.114375: step: 30/466, loss: 0.038161225616931915 2023-01-24 04:34:47.739447: step: 32/466, loss: 0.009207690134644508 2023-01-24 04:34:48.362046: step: 34/466, loss: 0.014065796509385109 2023-01-24 04:34:48.960757: step: 36/466, loss: 0.007660723756998777 2023-01-24 04:34:49.624440: step: 38/466, loss: 0.11670602858066559 2023-01-24 04:34:50.220854: step: 40/466, loss: 0.022145750001072884 2023-01-24 04:34:50.810213: step: 42/466, loss: 0.01542850024998188 2023-01-24 04:34:51.403592: step: 44/466, loss: 0.0018281365046277642 2023-01-24 04:34:52.024193: step: 46/466, loss: 0.033811137080192566 2023-01-24 04:34:52.598888: step: 48/466, loss: 0.0026900717057287693 2023-01-24 04:34:53.205295: step: 50/466, loss: 0.01676148548722267 2023-01-24 04:34:53.770076: step: 52/466, loss: 0.009092275984585285 2023-01-24 04:34:54.404783: step: 54/466, loss: 0.0014002284733578563 2023-01-24 04:34:55.062297: step: 56/466, loss: 0.0008105834713205695 2023-01-24 04:34:55.682979: step: 58/466, loss: 0.015190726146101952 2023-01-24 04:34:56.258567: step: 60/466, loss: 0.0076024350710213184 2023-01-24 04:34:56.994461: step: 62/466, loss: 0.017200469970703125 2023-01-24 04:34:57.559152: step: 64/466, loss: 0.003627775004133582 2023-01-24 04:34:58.168244: step: 66/466, loss: 0.0030399509705603123 2023-01-24 04:34:58.826606: step: 68/466, loss: 0.013998578302562237 2023-01-24 04:34:59.379625: step: 70/466, loss: 0.006730527617037296 2023-01-24 04:34:59.993425: step: 72/466, loss: 0.00045602815225720406 2023-01-24 04:35:00.616994: step: 74/466, loss: 0.01984693855047226 2023-01-24 04:35:01.221345: step: 76/466, loss: 0.008410582318902016 2023-01-24 04:35:01.859202: step: 78/466, loss: 0.004264121875166893 2023-01-24 04:35:02.469390: step: 80/466, loss: 0.011182108893990517 2023-01-24 04:35:03.071290: step: 82/466, loss: 0.0007857290329411626 2023-01-24 04:35:03.651840: step: 84/466, loss: 0.14946375787258148 2023-01-24 04:35:04.357481: step: 86/466, loss: 0.1568223088979721 2023-01-24 04:35:04.960985: step: 88/466, loss: 0.005522684194147587 2023-01-24 04:35:05.593410: step: 90/466, loss: 0.2518271207809448 2023-01-24 04:35:06.308395: step: 92/466, loss: 0.0041334908455610275 2023-01-24 04:35:06.874016: step: 94/466, loss: 0.04081486538052559 2023-01-24 04:35:07.481068: step: 96/466, loss: 0.015616985969245434 2023-01-24 04:35:08.106463: step: 98/466, loss: 0.008200753480196 2023-01-24 04:35:08.675788: step: 100/466, loss: 0.00557843828573823 2023-01-24 04:35:09.434353: step: 102/466, loss: 0.022209985181689262 2023-01-24 04:35:10.035281: step: 104/466, loss: 0.0023639919236302376 2023-01-24 04:35:10.637963: step: 106/466, loss: 0.11503303796052933 2023-01-24 04:35:11.174263: step: 108/466, loss: 0.0006290609599091113 2023-01-24 04:35:11.825654: step: 110/466, loss: 0.0006383599247783422 2023-01-24 04:35:12.464508: step: 112/466, loss: 0.008348631672561169 2023-01-24 04:35:13.065008: step: 114/466, loss: 0.004403615370392799 2023-01-24 04:35:13.622500: step: 116/466, loss: 0.008891992270946503 2023-01-24 04:35:14.300277: step: 118/466, loss: 0.010444930754601955 2023-01-24 04:35:14.883371: step: 120/466, loss: 0.002010609256103635 2023-01-24 04:35:15.498289: step: 122/466, loss: 0.0006103226915001869 2023-01-24 04:35:16.059758: step: 124/466, loss: 0.003745818277820945 2023-01-24 04:35:16.632580: step: 126/466, loss: 0.014827505685389042 2023-01-24 04:35:17.254990: step: 128/466, loss: 0.013629971072077751 2023-01-24 04:35:17.796178: step: 130/466, loss: 0.0016906384844332933 2023-01-24 04:35:18.413116: step: 132/466, loss: 0.006619714200496674 2023-01-24 04:35:19.075348: step: 134/466, loss: 0.008903798647224903 2023-01-24 04:35:19.674769: step: 136/466, loss: 0.007137033622711897 2023-01-24 04:35:20.250071: step: 138/466, loss: 0.07491079717874527 2023-01-24 04:35:20.897196: step: 140/466, loss: 0.0043428814969956875 2023-01-24 04:35:21.528494: step: 142/466, loss: 0.0007345814374275506 2023-01-24 04:35:22.150536: step: 144/466, loss: 0.019647398963570595 2023-01-24 04:35:22.822573: step: 146/466, loss: 0.023039327934384346 2023-01-24 04:35:23.473740: step: 148/466, loss: 0.004542264621704817 2023-01-24 04:35:24.105095: step: 150/466, loss: 0.01884089969098568 2023-01-24 04:35:24.697167: step: 152/466, loss: 0.001451627118512988 2023-01-24 04:35:25.290736: step: 154/466, loss: 0.007424298208206892 2023-01-24 04:35:25.919426: step: 156/466, loss: 0.0035362206399440765 2023-01-24 04:35:26.525166: step: 158/466, loss: 0.00559675507247448 2023-01-24 04:35:27.148425: step: 160/466, loss: 0.0046802363358438015 2023-01-24 04:35:27.790713: step: 162/466, loss: 0.07901641726493835 2023-01-24 04:35:28.406975: step: 164/466, loss: 0.005561268888413906 2023-01-24 04:35:29.066707: step: 166/466, loss: 0.008733381517231464 2023-01-24 04:35:29.694887: step: 168/466, loss: 0.008319716900587082 2023-01-24 04:35:30.330982: step: 170/466, loss: 0.024022787809371948 2023-01-24 04:35:31.002180: step: 172/466, loss: 0.008073654025793076 2023-01-24 04:35:31.604537: step: 174/466, loss: 3.298505180282518e-05 2023-01-24 04:35:32.227388: step: 176/466, loss: 0.016322093084454536 2023-01-24 04:35:32.861292: step: 178/466, loss: 0.6524455547332764 2023-01-24 04:35:33.446624: step: 180/466, loss: 0.003264949657022953 2023-01-24 04:35:34.079987: step: 182/466, loss: 0.00570281594991684 2023-01-24 04:35:34.655857: step: 184/466, loss: 0.04283526912331581 2023-01-24 04:35:35.254029: step: 186/466, loss: 0.00351996929384768 2023-01-24 04:35:35.786831: step: 188/466, loss: 8.80212610354647e-05 2023-01-24 04:35:36.390799: step: 190/466, loss: 0.03641581907868385 2023-01-24 04:35:37.044191: step: 192/466, loss: 0.002388633554801345 2023-01-24 04:35:37.676913: step: 194/466, loss: 0.03432387113571167 2023-01-24 04:35:38.327303: step: 196/466, loss: 0.004611233714967966 2023-01-24 04:35:38.987164: step: 198/466, loss: 0.03649257868528366 2023-01-24 04:35:39.541351: step: 200/466, loss: 0.0010074052261188626 2023-01-24 04:35:40.221757: step: 202/466, loss: 0.0019159098155796528 2023-01-24 04:35:40.809098: step: 204/466, loss: 0.0011125894961878657 2023-01-24 04:35:41.421900: step: 206/466, loss: 0.019255148246884346 2023-01-24 04:35:42.075945: step: 208/466, loss: 0.00870990939438343 2023-01-24 04:35:42.644354: step: 210/466, loss: 0.0029466452542692423 2023-01-24 04:35:43.200638: step: 212/466, loss: 0.01073588989675045 2023-01-24 04:35:43.870613: step: 214/466, loss: 0.0026622465811669827 2023-01-24 04:35:44.480359: step: 216/466, loss: 0.009074680507183075 2023-01-24 04:35:45.138268: step: 218/466, loss: 0.010591430589556694 2023-01-24 04:35:45.715305: step: 220/466, loss: 0.0010342072928324342 2023-01-24 04:35:46.299615: step: 222/466, loss: 0.001506170374341309 2023-01-24 04:35:46.890609: step: 224/466, loss: 0.07824140787124634 2023-01-24 04:35:47.495404: step: 226/466, loss: 0.10094006359577179 2023-01-24 04:35:48.041723: step: 228/466, loss: 0.00027557910652831197 2023-01-24 04:35:48.666245: step: 230/466, loss: 0.002696025650948286 2023-01-24 04:35:49.271534: step: 232/466, loss: 0.0011498354142531753 2023-01-24 04:35:49.848002: step: 234/466, loss: 0.007175063714385033 2023-01-24 04:35:50.466538: step: 236/466, loss: 0.010354211553931236 2023-01-24 04:35:51.090219: step: 238/466, loss: 0.07728850841522217 2023-01-24 04:35:51.715571: step: 240/466, loss: 0.00037491199327632785 2023-01-24 04:35:52.351590: step: 242/466, loss: 0.012333719991147518 2023-01-24 04:35:52.932282: step: 244/466, loss: 0.038081903010606766 2023-01-24 04:35:53.522563: step: 246/466, loss: 8.588692435296252e-05 2023-01-24 04:35:54.083750: step: 248/466, loss: 0.030551902949810028 2023-01-24 04:35:54.670952: step: 250/466, loss: 0.008057349361479282 2023-01-24 04:35:55.274088: step: 252/466, loss: 0.006906131748110056 2023-01-24 04:35:55.911627: step: 254/466, loss: 0.011321410536766052 2023-01-24 04:35:56.536356: step: 256/466, loss: 0.0034377777483314276 2023-01-24 04:35:57.221647: step: 258/466, loss: 9.264093387173489e-05 2023-01-24 04:35:57.850052: step: 260/466, loss: 0.016924438998103142 2023-01-24 04:35:58.463026: step: 262/466, loss: 0.00021807632583659142 2023-01-24 04:35:59.071132: step: 264/466, loss: 0.01791452243924141 2023-01-24 04:35:59.704918: step: 266/466, loss: 0.01496839802712202 2023-01-24 04:36:00.367215: step: 268/466, loss: 0.07914392650127411 2023-01-24 04:36:00.992066: step: 270/466, loss: 0.020682817324995995 2023-01-24 04:36:01.576613: step: 272/466, loss: 0.014893428422510624 2023-01-24 04:36:02.127464: step: 274/466, loss: 0.004114802926778793 2023-01-24 04:36:02.712190: step: 276/466, loss: 0.025077002122998238 2023-01-24 04:36:03.375527: step: 278/466, loss: 0.03476715832948685 2023-01-24 04:36:03.928994: step: 280/466, loss: 0.04227663204073906 2023-01-24 04:36:04.520918: step: 282/466, loss: 3.218621714040637e-06 2023-01-24 04:36:05.112627: step: 284/466, loss: 0.001750446972437203 2023-01-24 04:36:05.772032: step: 286/466, loss: 0.0005400181398727 2023-01-24 04:36:06.409271: step: 288/466, loss: 0.001205334672704339 2023-01-24 04:36:07.067004: step: 290/466, loss: 0.7622677683830261 2023-01-24 04:36:07.656933: step: 292/466, loss: 0.0026643334422260523 2023-01-24 04:36:08.254402: step: 294/466, loss: 0.00021289987489581108 2023-01-24 04:36:08.892494: step: 296/466, loss: 0.017237041145563126 2023-01-24 04:36:09.537822: step: 298/466, loss: 0.002139111515134573 2023-01-24 04:36:10.189453: step: 300/466, loss: 0.0018975037382915616 2023-01-24 04:36:10.836708: step: 302/466, loss: 0.0015305919805541635 2023-01-24 04:36:11.552982: step: 304/466, loss: 0.019873235374689102 2023-01-24 04:36:12.290294: step: 306/466, loss: 0.007630509324371815 2023-01-24 04:36:12.902375: step: 308/466, loss: 0.00576387532055378 2023-01-24 04:36:13.476306: step: 310/466, loss: 0.015104739926755428 2023-01-24 04:36:14.112599: step: 312/466, loss: 0.015948601067066193 2023-01-24 04:36:14.765229: step: 314/466, loss: 0.03411102294921875 2023-01-24 04:36:15.372667: step: 316/466, loss: 0.005596490111202002 2023-01-24 04:36:15.972103: step: 318/466, loss: 2.726545608311426e-05 2023-01-24 04:36:16.590753: step: 320/466, loss: 0.011570059694349766 2023-01-24 04:36:17.299930: step: 322/466, loss: 0.0064280214719474316 2023-01-24 04:36:17.862597: step: 324/466, loss: 0.002828769152984023 2023-01-24 04:36:18.446851: step: 326/466, loss: 0.001282182289287448 2023-01-24 04:36:19.124511: step: 328/466, loss: 0.03785112127661705 2023-01-24 04:36:19.773436: step: 330/466, loss: 0.0014699080493301153 2023-01-24 04:36:20.388517: step: 332/466, loss: 0.3079739809036255 2023-01-24 04:36:20.992737: step: 334/466, loss: 0.005796042736619711 2023-01-24 04:36:21.669397: step: 336/466, loss: 0.001293532201088965 2023-01-24 04:36:22.306957: step: 338/466, loss: 0.005367397330701351 2023-01-24 04:36:23.052637: step: 340/466, loss: 0.03847994655370712 2023-01-24 04:36:23.662194: step: 342/466, loss: 0.04027722403407097 2023-01-24 04:36:24.265604: step: 344/466, loss: 0.017038552090525627 2023-01-24 04:36:24.961933: step: 346/466, loss: 0.0006917880964465439 2023-01-24 04:36:25.579174: step: 348/466, loss: 0.00024127642973326147 2023-01-24 04:36:26.264293: step: 350/466, loss: 0.4475272595882416 2023-01-24 04:36:26.856833: step: 352/466, loss: 0.000668540655169636 2023-01-24 04:36:27.531681: step: 354/466, loss: 0.000448493636213243 2023-01-24 04:36:28.089953: step: 356/466, loss: 0.0029617866966873407 2023-01-24 04:36:28.709068: step: 358/466, loss: 0.006570684257894754 2023-01-24 04:36:29.366547: step: 360/466, loss: 0.006790623534470797 2023-01-24 04:36:29.950785: step: 362/466, loss: 0.0037583005614578724 2023-01-24 04:36:30.508808: step: 364/466, loss: 0.00491564953699708 2023-01-24 04:36:31.156145: step: 366/466, loss: 0.04236968606710434 2023-01-24 04:36:31.745578: step: 368/466, loss: 0.005421468988060951 2023-01-24 04:36:32.386852: step: 370/466, loss: 0.03661723434925079 2023-01-24 04:36:33.002045: step: 372/466, loss: 0.003153814934194088 2023-01-24 04:36:33.651762: step: 374/466, loss: 0.010920863598585129 2023-01-24 04:36:34.311394: step: 376/466, loss: 0.06718513369560242 2023-01-24 04:36:34.829091: step: 378/466, loss: 0.011719964444637299 2023-01-24 04:36:35.364589: step: 380/466, loss: 0.0022813091054558754 2023-01-24 04:36:35.991085: step: 382/466, loss: 0.003214395372197032 2023-01-24 04:36:36.606968: step: 384/466, loss: 0.0005004985723644495 2023-01-24 04:36:37.197420: step: 386/466, loss: 0.011618012562394142 2023-01-24 04:36:37.878399: step: 388/466, loss: 0.0026870244182646275 2023-01-24 04:36:38.503339: step: 390/466, loss: 0.0003367691533640027 2023-01-24 04:36:39.083013: step: 392/466, loss: 0.012099171988666058 2023-01-24 04:36:39.683259: step: 394/466, loss: 0.0068061258643865585 2023-01-24 04:36:40.253505: step: 396/466, loss: 0.0007426588563248515 2023-01-24 04:36:40.901973: step: 398/466, loss: 0.03194461017847061 2023-01-24 04:36:41.526688: step: 400/466, loss: 0.06825070083141327 2023-01-24 04:36:42.157809: step: 402/466, loss: 0.0003770602634176612 2023-01-24 04:36:42.819114: step: 404/466, loss: 1.5576121807098389 2023-01-24 04:36:43.473249: step: 406/466, loss: 0.017388686537742615 2023-01-24 04:36:44.109247: step: 408/466, loss: 0.001316699432209134 2023-01-24 04:36:44.735428: step: 410/466, loss: 0.004049208946526051 2023-01-24 04:36:45.344125: step: 412/466, loss: 0.03801628574728966 2023-01-24 04:36:45.940711: step: 414/466, loss: 0.0003594731679186225 2023-01-24 04:36:46.645973: step: 416/466, loss: 0.04546977952122688 2023-01-24 04:36:47.253823: step: 418/466, loss: 0.004481267649680376 2023-01-24 04:36:47.817102: step: 420/466, loss: 9.061475793714635e-06 2023-01-24 04:36:48.487488: step: 422/466, loss: 0.1406223475933075 2023-01-24 04:36:49.039566: step: 424/466, loss: 0.0019254398066550493 2023-01-24 04:36:49.688159: step: 426/466, loss: 0.007295049726963043 2023-01-24 04:36:50.204045: step: 428/466, loss: 0.002065313747152686 2023-01-24 04:36:50.791312: step: 430/466, loss: 0.5076283812522888 2023-01-24 04:36:51.443744: step: 432/466, loss: 0.0019482868956401944 2023-01-24 04:36:52.032617: step: 434/466, loss: 0.0073101273737847805 2023-01-24 04:36:52.664695: step: 436/466, loss: 0.027784865349531174 2023-01-24 04:36:53.279782: step: 438/466, loss: 0.03103061206638813 2023-01-24 04:36:53.866437: step: 440/466, loss: 0.14775855839252472 2023-01-24 04:36:54.424375: step: 442/466, loss: 0.004521707072854042 2023-01-24 04:36:55.049800: step: 444/466, loss: 0.14504075050354004 2023-01-24 04:36:55.687736: step: 446/466, loss: 0.005193950608372688 2023-01-24 04:36:56.329935: step: 448/466, loss: 0.041242219507694244 2023-01-24 04:36:56.956869: step: 450/466, loss: 0.00013268241309560835 2023-01-24 04:36:57.555667: step: 452/466, loss: 3.4981661883648485e-05 2023-01-24 04:36:58.203998: step: 454/466, loss: 0.01182777713984251 2023-01-24 04:36:58.762451: step: 456/466, loss: 0.03321721404790878 2023-01-24 04:36:59.343549: step: 458/466, loss: 0.0015951005043461919 2023-01-24 04:36:59.915146: step: 460/466, loss: 0.02566281519830227 2023-01-24 04:37:00.498416: step: 462/466, loss: 0.12666575610637665 2023-01-24 04:37:01.095375: step: 464/466, loss: 0.000737195776309818 2023-01-24 04:37:01.745277: step: 466/466, loss: 0.0006672508898191154 2023-01-24 04:37:02.341734: step: 468/466, loss: 0.002969736233353615 2023-01-24 04:37:03.003814: step: 470/466, loss: 0.0104988943785429 2023-01-24 04:37:03.624505: step: 472/466, loss: 0.08003918826580048 2023-01-24 04:37:04.324005: step: 474/466, loss: 0.0009593369322828948 2023-01-24 04:37:04.964193: step: 476/466, loss: 0.0014240005984902382 2023-01-24 04:37:05.613317: step: 478/466, loss: 0.02312459796667099 2023-01-24 04:37:06.265317: step: 480/466, loss: 0.3419071137905121 2023-01-24 04:37:06.906161: step: 482/466, loss: 0.07372163236141205 2023-01-24 04:37:07.460254: step: 484/466, loss: 0.0016719779232516885 2023-01-24 04:37:08.172566: step: 486/466, loss: 0.0037703728303313255 2023-01-24 04:37:08.851845: step: 488/466, loss: 0.006523465272039175 2023-01-24 04:37:09.469996: step: 490/466, loss: 0.11450313776731491 2023-01-24 04:37:10.016422: step: 492/466, loss: 0.004408995155245066 2023-01-24 04:37:10.704471: step: 494/466, loss: 0.012378161773085594 2023-01-24 04:37:11.288319: step: 496/466, loss: 0.0012394912773743272 2023-01-24 04:37:11.878587: step: 498/466, loss: 0.025968529284000397 2023-01-24 04:37:12.481885: step: 500/466, loss: 0.0003646584809757769 2023-01-24 04:37:13.074494: step: 502/466, loss: 0.009832086972892284 2023-01-24 04:37:13.675871: step: 504/466, loss: 0.01419033482670784 2023-01-24 04:37:14.207778: step: 506/466, loss: 0.006501009687781334 2023-01-24 04:37:14.847088: step: 508/466, loss: 0.014940740540623665 2023-01-24 04:37:15.491771: step: 510/466, loss: 0.0006022718735039234 2023-01-24 04:37:16.142741: step: 512/466, loss: 0.002012605080381036 2023-01-24 04:37:16.777447: step: 514/466, loss: 0.3332501947879791 2023-01-24 04:37:17.382153: step: 516/466, loss: 0.003292681649327278 2023-01-24 04:37:17.976360: step: 518/466, loss: 0.0005466737202368677 2023-01-24 04:37:18.610279: step: 520/466, loss: 0.015075303614139557 2023-01-24 04:37:19.249480: step: 522/466, loss: 0.053734585642814636 2023-01-24 04:37:19.960965: step: 524/466, loss: 0.24074675142765045 2023-01-24 04:37:20.619719: step: 526/466, loss: 0.0006083545158617198 2023-01-24 04:37:21.233613: step: 528/466, loss: 0.1092829778790474 2023-01-24 04:37:21.806867: step: 530/466, loss: 0.0009274989715777338 2023-01-24 04:37:22.429000: step: 532/466, loss: 0.009749316610395908 2023-01-24 04:37:23.057910: step: 534/466, loss: 0.0010659914696589112 2023-01-24 04:37:23.656725: step: 536/466, loss: 0.02175002545118332 2023-01-24 04:37:24.218236: step: 538/466, loss: 0.09114664047956467 2023-01-24 04:37:24.892778: step: 540/466, loss: 0.00017733403365127742 2023-01-24 04:37:25.487681: step: 542/466, loss: 0.009102762676775455 2023-01-24 04:37:26.158708: step: 544/466, loss: 0.03955816477537155 2023-01-24 04:37:26.813208: step: 546/466, loss: 0.010862162336707115 2023-01-24 04:37:27.443367: step: 548/466, loss: 0.007654275745153427 2023-01-24 04:37:28.048492: step: 550/466, loss: 0.006650466471910477 2023-01-24 04:37:28.686215: step: 552/466, loss: 0.02752058580517769 2023-01-24 04:37:29.329747: step: 554/466, loss: 0.32176074385643005 2023-01-24 04:37:29.949614: step: 556/466, loss: 0.0178492683917284 2023-01-24 04:37:30.530200: step: 558/466, loss: 0.0002495641529094428 2023-01-24 04:37:31.155401: step: 560/466, loss: 0.0211823508143425 2023-01-24 04:37:31.737933: step: 562/466, loss: 0.0006840778514742851 2023-01-24 04:37:32.324130: step: 564/466, loss: 0.07020992785692215 2023-01-24 04:37:32.941488: step: 566/466, loss: 0.0003941252361983061 2023-01-24 04:37:33.587152: step: 568/466, loss: 0.002252056263387203 2023-01-24 04:37:34.167033: step: 570/466, loss: 0.0008177889394573867 2023-01-24 04:37:34.767187: step: 572/466, loss: 0.017822712659835815 2023-01-24 04:37:35.486162: step: 574/466, loss: 0.0007715900428593159 2023-01-24 04:37:36.080428: step: 576/466, loss: 0.0024609831161797047 2023-01-24 04:37:36.694948: step: 578/466, loss: 0.0806882232427597 2023-01-24 04:37:37.259627: step: 580/466, loss: 0.01499209739267826 2023-01-24 04:37:37.849426: step: 582/466, loss: 0.007676340639591217 2023-01-24 04:37:38.483832: step: 584/466, loss: 0.020366515964269638 2023-01-24 04:37:39.210440: step: 586/466, loss: 0.003409266471862793 2023-01-24 04:37:39.836633: step: 588/466, loss: 0.036261629313230515 2023-01-24 04:37:40.461695: step: 590/466, loss: 0.017820268869400024 2023-01-24 04:37:41.060252: step: 592/466, loss: 1.446390569981304e-06 2023-01-24 04:37:41.656552: step: 594/466, loss: 0.01295681856572628 2023-01-24 04:37:42.357783: step: 596/466, loss: 0.011897056363523006 2023-01-24 04:37:42.911043: step: 598/466, loss: 0.11405541002750397 2023-01-24 04:37:43.577304: step: 600/466, loss: 0.0030295539181679487 2023-01-24 04:37:44.280198: step: 602/466, loss: 0.4915716350078583 2023-01-24 04:37:44.872211: step: 604/466, loss: 0.006803831551223993 2023-01-24 04:37:45.483892: step: 606/466, loss: 0.002284247661009431 2023-01-24 04:37:46.100339: step: 608/466, loss: 0.019854342564940453 2023-01-24 04:37:46.654666: step: 610/466, loss: 6.362621206790209e-05 2023-01-24 04:37:47.337910: step: 612/466, loss: 0.002294174861162901 2023-01-24 04:37:47.881111: step: 614/466, loss: 0.0015062671154737473 2023-01-24 04:37:48.509223: step: 616/466, loss: 0.008126691915094852 2023-01-24 04:37:49.149631: step: 618/466, loss: 0.05395924299955368 2023-01-24 04:37:49.739328: step: 620/466, loss: 0.007703367620706558 2023-01-24 04:37:50.323655: step: 622/466, loss: 0.019608866423368454 2023-01-24 04:37:50.929910: step: 624/466, loss: 0.000382233556592837 2023-01-24 04:37:51.540861: step: 626/466, loss: 0.030930420383810997 2023-01-24 04:37:52.211347: step: 628/466, loss: 0.0010697138495743275 2023-01-24 04:37:52.821798: step: 630/466, loss: 0.000281863467535004 2023-01-24 04:37:53.460218: step: 632/466, loss: 0.028093401342630386 2023-01-24 04:37:54.083205: step: 634/466, loss: 0.050495248287916183 2023-01-24 04:37:54.762453: step: 636/466, loss: 0.053377680480480194 2023-01-24 04:37:55.351523: step: 638/466, loss: 0.05928385630249977 2023-01-24 04:37:55.990128: step: 640/466, loss: 0.007710906211286783 2023-01-24 04:37:56.611033: step: 642/466, loss: 0.05826025456190109 2023-01-24 04:37:57.234753: step: 644/466, loss: 0.0020928012672811747 2023-01-24 04:37:57.860681: step: 646/466, loss: 0.005672491621226072 2023-01-24 04:37:58.441667: step: 648/466, loss: 0.002800034824758768 2023-01-24 04:37:59.058212: step: 650/466, loss: 0.01343744620680809 2023-01-24 04:37:59.608047: step: 652/466, loss: 0.010437189601361752 2023-01-24 04:38:00.201722: step: 654/466, loss: 0.03881608322262764 2023-01-24 04:38:00.797469: step: 656/466, loss: 0.021717675030231476 2023-01-24 04:38:01.394473: step: 658/466, loss: 0.003933850675821304 2023-01-24 04:38:02.010346: step: 660/466, loss: 0.002849757904186845 2023-01-24 04:38:02.645324: step: 662/466, loss: 0.0028018089942634106 2023-01-24 04:38:03.206798: step: 664/466, loss: 0.3200233280658722 2023-01-24 04:38:03.813223: step: 666/466, loss: 0.004376672208309174 2023-01-24 04:38:04.470908: step: 668/466, loss: 0.008337419480085373 2023-01-24 04:38:05.079608: step: 670/466, loss: 0.06835175305604935 2023-01-24 04:38:05.653620: step: 672/466, loss: 0.002413801848888397 2023-01-24 04:38:06.298612: step: 674/466, loss: 0.010170252993702888 2023-01-24 04:38:06.934890: step: 676/466, loss: 0.035940125584602356 2023-01-24 04:38:07.564602: step: 678/466, loss: 0.0006002828013151884 2023-01-24 04:38:08.293904: step: 680/466, loss: 0.056261248886585236 2023-01-24 04:38:08.903452: step: 682/466, loss: 0.003328244900330901 2023-01-24 04:38:09.536093: step: 684/466, loss: 0.18556299805641174 2023-01-24 04:38:10.140445: step: 686/466, loss: 0.0006311447941698134 2023-01-24 04:38:10.834384: step: 688/466, loss: 0.10427706688642502 2023-01-24 04:38:11.478806: step: 690/466, loss: 0.010392026044428349 2023-01-24 04:38:12.118690: step: 692/466, loss: 0.03382410481572151 2023-01-24 04:38:12.692109: step: 694/466, loss: 4.42578129877802e-05 2023-01-24 04:38:13.273766: step: 696/466, loss: 0.0063894083723425865 2023-01-24 04:38:13.947695: step: 698/466, loss: 0.008691796101629734 2023-01-24 04:38:14.554809: step: 700/466, loss: 0.024152379482984543 2023-01-24 04:38:15.203579: step: 702/466, loss: 0.0006300556706264615 2023-01-24 04:38:15.798907: step: 704/466, loss: 0.012121733278036118 2023-01-24 04:38:16.438644: step: 706/466, loss: 0.03641531616449356 2023-01-24 04:38:16.975352: step: 708/466, loss: 0.002240638481453061 2023-01-24 04:38:17.681704: step: 710/466, loss: 0.0114654041826725 2023-01-24 04:38:18.299201: step: 712/466, loss: 0.000776631582994014 2023-01-24 04:38:18.840296: step: 714/466, loss: 0.002134573645889759 2023-01-24 04:38:19.476508: step: 716/466, loss: 0.06929346174001694 2023-01-24 04:38:20.314208: step: 718/466, loss: 0.1391819715499878 2023-01-24 04:38:20.917744: step: 720/466, loss: 0.003451647236943245 2023-01-24 04:38:21.544580: step: 722/466, loss: 0.0002969567140098661 2023-01-24 04:38:22.176625: step: 724/466, loss: 0.019914161413908005 2023-01-24 04:38:22.764996: step: 726/466, loss: 0.007525104563683271 2023-01-24 04:38:23.371793: step: 728/466, loss: 0.07898574322462082 2023-01-24 04:38:23.972867: step: 730/466, loss: 0.0006913370452821255 2023-01-24 04:38:24.619273: step: 732/466, loss: 0.00435532396659255 2023-01-24 04:38:25.231161: step: 734/466, loss: 0.0007360997842624784 2023-01-24 04:38:25.892340: step: 736/466, loss: 0.008413765579462051 2023-01-24 04:38:26.478716: step: 738/466, loss: 0.0005035250214859843 2023-01-24 04:38:27.098263: step: 740/466, loss: 0.04341207072138786 2023-01-24 04:38:27.756233: step: 742/466, loss: 0.005954446271061897 2023-01-24 04:38:28.420922: step: 744/466, loss: 0.06513150781393051 2023-01-24 04:38:29.073976: step: 746/466, loss: 0.0178234726190567 2023-01-24 04:38:29.638596: step: 748/466, loss: 0.00010724266758188605 2023-01-24 04:38:30.233187: step: 750/466, loss: 0.03690279647707939 2023-01-24 04:38:30.912246: step: 752/466, loss: 0.0014993331860750914 2023-01-24 04:38:31.566113: step: 754/466, loss: 0.005447831004858017 2023-01-24 04:38:32.145936: step: 756/466, loss: 0.012812536209821701 2023-01-24 04:38:32.725523: step: 758/466, loss: 0.04598311334848404 2023-01-24 04:38:33.390644: step: 760/466, loss: 0.0032629230991005898 2023-01-24 04:38:34.026464: step: 762/466, loss: 0.0031235897913575172 2023-01-24 04:38:34.629124: step: 764/466, loss: 0.03207147866487503 2023-01-24 04:38:35.256231: step: 766/466, loss: 3.29094473272562e-05 2023-01-24 04:38:35.883731: step: 768/466, loss: 0.004162793979048729 2023-01-24 04:38:36.505798: step: 770/466, loss: 0.0203489288687706 2023-01-24 04:38:37.078081: step: 772/466, loss: 0.02295498736202717 2023-01-24 04:38:37.691164: step: 774/466, loss: 0.004239893052726984 2023-01-24 04:38:38.319260: step: 776/466, loss: 0.010488924570381641 2023-01-24 04:38:38.920930: step: 778/466, loss: 0.0013633416965603828 2023-01-24 04:38:39.559944: step: 780/466, loss: 0.004727499093860388 2023-01-24 04:38:40.135039: step: 782/466, loss: 0.02201392501592636 2023-01-24 04:38:40.732977: step: 784/466, loss: 0.14519967138767242 2023-01-24 04:38:41.361378: step: 786/466, loss: 0.0003717355721164495 2023-01-24 04:38:41.983828: step: 788/466, loss: 0.0064461724832654 2023-01-24 04:38:42.614581: step: 790/466, loss: 0.019439751282334328 2023-01-24 04:38:43.216064: step: 792/466, loss: 0.04224832355976105 2023-01-24 04:38:43.815703: step: 794/466, loss: 0.02717813104391098 2023-01-24 04:38:44.421822: step: 796/466, loss: 0.002636479679495096 2023-01-24 04:38:45.032813: step: 798/466, loss: 0.010049636475741863 2023-01-24 04:38:45.639523: step: 800/466, loss: 0.04448629170656204 2023-01-24 04:38:46.253831: step: 802/466, loss: 0.023999234661459923 2023-01-24 04:38:46.882414: step: 804/466, loss: 0.0019117995398119092 2023-01-24 04:38:47.503279: step: 806/466, loss: 0.053400829434394836 2023-01-24 04:38:48.147069: step: 808/466, loss: 0.0008438857039436698 2023-01-24 04:38:48.781494: step: 810/466, loss: 0.0007542734383605421 2023-01-24 04:38:49.368902: step: 812/466, loss: 0.0002328195550944656 2023-01-24 04:38:49.967378: step: 814/466, loss: 0.015046609565615654 2023-01-24 04:38:50.674304: step: 816/466, loss: 0.33053380250930786 2023-01-24 04:38:51.268804: step: 818/466, loss: 0.0062762959860265255 2023-01-24 04:38:51.878205: step: 820/466, loss: 0.0019862898625433445 2023-01-24 04:38:52.514131: step: 822/466, loss: 0.015598426572978497 2023-01-24 04:38:53.149734: step: 824/466, loss: 0.015153339132666588 2023-01-24 04:38:53.776595: step: 826/466, loss: 0.011167055927217007 2023-01-24 04:38:54.422707: step: 828/466, loss: 0.016412854194641113 2023-01-24 04:38:54.945803: step: 830/466, loss: 0.8337557911872864 2023-01-24 04:38:55.631671: step: 832/466, loss: 0.006227242294698954 2023-01-24 04:38:56.272403: step: 834/466, loss: 0.07353105396032333 2023-01-24 04:38:56.899035: step: 836/466, loss: 0.005039968993514776 2023-01-24 04:38:57.520148: step: 838/466, loss: 0.0008421080419793725 2023-01-24 04:38:58.133284: step: 840/466, loss: 0.0037266360595822334 2023-01-24 04:38:58.708657: step: 842/466, loss: 0.06846221536397934 2023-01-24 04:38:59.347246: step: 844/466, loss: 1.583168159413617e-05 2023-01-24 04:38:59.977077: step: 846/466, loss: 0.017023740336298943 2023-01-24 04:39:00.624987: step: 848/466, loss: 0.009995073080062866 2023-01-24 04:39:01.212903: step: 850/466, loss: 0.10446056723594666 2023-01-24 04:39:01.844150: step: 852/466, loss: 0.1504172831773758 2023-01-24 04:39:02.444047: step: 854/466, loss: 0.0077516851015388966 2023-01-24 04:39:03.090607: step: 856/466, loss: 0.022404974326491356 2023-01-24 04:39:03.696187: step: 858/466, loss: 0.013088853098452091 2023-01-24 04:39:04.285375: step: 860/466, loss: 0.030527258291840553 2023-01-24 04:39:04.893989: step: 862/466, loss: 0.14083844423294067 2023-01-24 04:39:05.490079: step: 864/466, loss: 0.0005255054566077888 2023-01-24 04:39:06.112973: step: 866/466, loss: 0.00027508364291861653 2023-01-24 04:39:06.750454: step: 868/466, loss: 0.7037966251373291 2023-01-24 04:39:07.345917: step: 870/466, loss: 0.0019437418086454272 2023-01-24 04:39:07.974594: step: 872/466, loss: 0.0004972312599420547 2023-01-24 04:39:08.561550: step: 874/466, loss: 0.026913179084658623 2023-01-24 04:39:09.154272: step: 876/466, loss: 0.5285601019859314 2023-01-24 04:39:09.683296: step: 878/466, loss: 0.0006900342414155602 2023-01-24 04:39:10.370513: step: 880/466, loss: 0.006860875058919191 2023-01-24 04:39:11.046173: step: 882/466, loss: 0.02796688862144947 2023-01-24 04:39:11.637809: step: 884/466, loss: 0.02239563688635826 2023-01-24 04:39:12.238801: step: 886/466, loss: 0.0003230464062653482 2023-01-24 04:39:12.858252: step: 888/466, loss: 0.003808454377576709 2023-01-24 04:39:13.485571: step: 890/466, loss: 0.011603298597037792 2023-01-24 04:39:14.074911: step: 892/466, loss: 0.001959197921678424 2023-01-24 04:39:14.679448: step: 894/466, loss: 4.1510764276608825e-05 2023-01-24 04:39:15.335914: step: 896/466, loss: 0.018717242404818535 2023-01-24 04:39:15.957282: step: 898/466, loss: 0.0030486127361655235 2023-01-24 04:39:16.480007: step: 900/466, loss: 0.02749359793961048 2023-01-24 04:39:17.048832: step: 902/466, loss: 0.0013783852336928248 2023-01-24 04:39:17.650387: step: 904/466, loss: 0.13980571925640106 2023-01-24 04:39:18.252460: step: 906/466, loss: 0.03152558207511902 2023-01-24 04:39:18.853817: step: 908/466, loss: 0.021568333730101585 2023-01-24 04:39:19.456985: step: 910/466, loss: 0.007910650223493576 2023-01-24 04:39:20.099629: step: 912/466, loss: 0.00212032999843359 2023-01-24 04:39:20.751591: step: 914/466, loss: 0.001687599578872323 2023-01-24 04:39:21.352067: step: 916/466, loss: 0.00031835469417274 2023-01-24 04:39:21.969225: step: 918/466, loss: 0.006139654200524092 2023-01-24 04:39:22.545171: step: 920/466, loss: 0.22900621592998505 2023-01-24 04:39:23.216788: step: 922/466, loss: 0.03427125886082649 2023-01-24 04:39:23.898140: step: 924/466, loss: 0.00580116780474782 2023-01-24 04:39:24.474147: step: 926/466, loss: 0.009941783733665943 2023-01-24 04:39:25.062805: step: 928/466, loss: 0.0014622800517827272 2023-01-24 04:39:25.692427: step: 930/466, loss: 0.0010867106029763818 2023-01-24 04:39:26.331430: step: 932/466, loss: 0.07350685447454453 ================================================== Loss: 0.038 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3612106737418063, 'r': 0.3248839835931996, 'f1': 0.3420856330741582}, 'combined': 0.25206309805464283, 'epoch': 34} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35234501916245337, 'r': 0.28238481680167743, 'f1': 0.31350939580787435}, 'combined': 0.19630962167408958, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3547978640867963, 'r': 0.34469925315453454, 'f1': 0.34967566200662115}, 'combined': 0.25765575095224713, 'epoch': 34} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3644579411881094, 'r': 0.2963687770852767, 'f1': 0.32690553012492446}, 'combined': 0.20258934261262923, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32325133893443037, 'r': 0.32079781833530757, 'f1': 0.3220199052622992}, 'combined': 0.23727782493011518, 'epoch': 34} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3607223213294642, 'r': 0.2851237860553579, 'f1': 0.3184985178236944}, 'combined': 0.21128119499195572, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39285714285714285, 'r': 0.3142857142857143, 'f1': 0.34920634920634924}, 'combined': 0.23280423280423282, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.358695652173913, 'f1': 0.3666666666666666}, 'combined': 0.1833333333333333, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32324861099239804, 'r': 0.3201817361252975, 'f1': 0.3217078645148366}, 'combined': 0.23704790016882693, 'epoch': 31} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36854991917508884, 'r': 0.2943742585468896, 'f1': 0.32731227141992336}, 'combined': 0.2171279424270779, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 31} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:41:59.706224: step: 2/466, loss: 0.19672130048274994 2023-01-24 04:42:00.371048: step: 4/466, loss: 0.00019182507821824402 2023-01-24 04:42:00.957651: step: 6/466, loss: 0.0061749680899083614 2023-01-24 04:42:01.602007: step: 8/466, loss: 0.0005687942611984909 2023-01-24 04:42:02.262477: step: 10/466, loss: 0.0019311527721583843 2023-01-24 04:42:02.887667: step: 12/466, loss: 0.2713383436203003 2023-01-24 04:42:03.504664: step: 14/466, loss: 0.001256522722542286 2023-01-24 04:42:04.094191: step: 16/466, loss: 0.0015868941554799676 2023-01-24 04:42:04.639717: step: 18/466, loss: 0.003078905399888754 2023-01-24 04:42:05.334619: step: 20/466, loss: 0.007138105109333992 2023-01-24 04:42:05.922571: step: 22/466, loss: 0.03378899395465851 2023-01-24 04:42:06.623432: step: 24/466, loss: 0.057214029133319855 2023-01-24 04:42:07.255830: step: 26/466, loss: 0.020001688972115517 2023-01-24 04:42:07.884913: step: 28/466, loss: 0.0021218005567789078 2023-01-24 04:42:08.503819: step: 30/466, loss: 0.00015065399929881096 2023-01-24 04:42:09.139115: step: 32/466, loss: 0.0314030721783638 2023-01-24 04:42:09.726239: step: 34/466, loss: 0.006553353741765022 2023-01-24 04:42:10.329634: step: 36/466, loss: 0.0008050925098359585 2023-01-24 04:42:10.992208: step: 38/466, loss: 0.026836374774575233 2023-01-24 04:42:11.549536: step: 40/466, loss: 0.0006224775570444763 2023-01-24 04:42:12.114420: step: 42/466, loss: 0.0006816504755988717 2023-01-24 04:42:12.712650: step: 44/466, loss: 0.0022528169211000204 2023-01-24 04:42:13.315435: step: 46/466, loss: 9.378709364682436e-05 2023-01-24 04:42:13.876358: step: 48/466, loss: 0.0031867213547229767 2023-01-24 04:42:14.433669: step: 50/466, loss: 0.0002920925326179713 2023-01-24 04:42:15.090456: step: 52/466, loss: 0.010656697675585747 2023-01-24 04:42:15.645610: step: 54/466, loss: 0.0034762206487357616 2023-01-24 04:42:16.222333: step: 56/466, loss: 4.362952313385904e-05 2023-01-24 04:42:16.883305: step: 58/466, loss: 0.007797377184033394 2023-01-24 04:42:17.517880: step: 60/466, loss: 0.013550630770623684 2023-01-24 04:42:18.116291: step: 62/466, loss: 0.0037805496249347925 2023-01-24 04:42:18.750641: step: 64/466, loss: 0.00014615154941566288 2023-01-24 04:42:19.414626: step: 66/466, loss: 0.004896939266473055 2023-01-24 04:42:20.077315: step: 68/466, loss: 0.005394264589995146 2023-01-24 04:42:20.736419: step: 70/466, loss: 0.0030124287586659193 2023-01-24 04:42:21.355680: step: 72/466, loss: 0.06591653823852539 2023-01-24 04:42:21.964427: step: 74/466, loss: 0.00654204934835434 2023-01-24 04:42:22.538893: step: 76/466, loss: 0.010498001240193844 2023-01-24 04:42:23.271489: step: 78/466, loss: 0.03141288831830025 2023-01-24 04:42:23.876764: step: 80/466, loss: 0.19972841441631317 2023-01-24 04:42:24.503567: step: 82/466, loss: 0.04782477393746376 2023-01-24 04:42:25.100149: step: 84/466, loss: 0.015525314025580883 2023-01-24 04:42:25.718568: step: 86/466, loss: 0.015194829553365707 2023-01-24 04:42:26.267788: step: 88/466, loss: 0.0035464188549667597 2023-01-24 04:42:26.846430: step: 90/466, loss: 0.018129082396626472 2023-01-24 04:42:27.467936: step: 92/466, loss: 0.007922499440610409 2023-01-24 04:42:28.028449: step: 94/466, loss: 0.0006496994174085557 2023-01-24 04:42:28.635252: step: 96/466, loss: 0.020360559225082397 2023-01-24 04:42:29.220181: step: 98/466, loss: 0.0008911642362363636 2023-01-24 04:42:29.863260: step: 100/466, loss: 0.013872869312763214 2023-01-24 04:42:30.490671: step: 102/466, loss: 0.0041520181111991405 2023-01-24 04:42:31.138364: step: 104/466, loss: 0.00037203289684839547 2023-01-24 04:42:31.770499: step: 106/466, loss: 0.027455279603600502 2023-01-24 04:42:32.390286: step: 108/466, loss: 0.015023048967123032 2023-01-24 04:42:33.035070: step: 110/466, loss: 0.0091076185926795 2023-01-24 04:42:33.666719: step: 112/466, loss: 0.0025266988668590784 2023-01-24 04:42:34.246783: step: 114/466, loss: 0.005472118966281414 2023-01-24 04:42:34.857923: step: 116/466, loss: 0.0005205310299061239 2023-01-24 04:42:35.537561: step: 118/466, loss: 0.0006418328848667443 2023-01-24 04:42:36.170644: step: 120/466, loss: 0.006029690615832806 2023-01-24 04:42:36.756098: step: 122/466, loss: 0.0007082900847308338 2023-01-24 04:42:37.382638: step: 124/466, loss: 0.014394117519259453 2023-01-24 04:42:37.948253: step: 126/466, loss: 0.02503759041428566 2023-01-24 04:42:38.536524: step: 128/466, loss: 0.00828527007251978 2023-01-24 04:42:39.185348: step: 130/466, loss: 0.0002908979950007051 2023-01-24 04:42:39.725339: step: 132/466, loss: 0.0011940551921725273 2023-01-24 04:42:40.426949: step: 134/466, loss: 0.0036164771299809217 2023-01-24 04:42:41.083875: step: 136/466, loss: 0.043024443089962006 2023-01-24 04:42:41.645599: step: 138/466, loss: 0.006661163177341223 2023-01-24 04:42:42.255288: step: 140/466, loss: 0.11081206053495407 2023-01-24 04:42:42.840065: step: 142/466, loss: 0.0023321532644331455 2023-01-24 04:42:43.447963: step: 144/466, loss: 0.014660653658211231 2023-01-24 04:42:43.990793: step: 146/466, loss: 0.000896903220564127 2023-01-24 04:42:44.567508: step: 148/466, loss: 0.006206926889717579 2023-01-24 04:42:45.161500: step: 150/466, loss: 0.00997986551374197 2023-01-24 04:42:45.778509: step: 152/466, loss: 0.019994856789708138 2023-01-24 04:42:46.429200: step: 154/466, loss: 0.0007062026998028159 2023-01-24 04:42:47.019634: step: 156/466, loss: 0.0196046382188797 2023-01-24 04:42:47.630252: step: 158/466, loss: 0.06580505520105362 2023-01-24 04:42:48.267636: step: 160/466, loss: 0.0017799704801291227 2023-01-24 04:42:48.925266: step: 162/466, loss: 0.019087480381131172 2023-01-24 04:42:49.468426: step: 164/466, loss: 0.002953437389805913 2023-01-24 04:42:50.151347: step: 166/466, loss: 0.7682852149009705 2023-01-24 04:42:50.857756: step: 168/466, loss: 0.00706633273512125 2023-01-24 04:42:51.475822: step: 170/466, loss: 0.0007466856040991843 2023-01-24 04:42:52.071934: step: 172/466, loss: 0.0010011651320382953 2023-01-24 04:42:52.654103: step: 174/466, loss: 0.01231431309133768 2023-01-24 04:42:53.286014: step: 176/466, loss: 0.0023385584354400635 2023-01-24 04:42:53.874185: step: 178/466, loss: 0.0020545772276818752 2023-01-24 04:42:54.445543: step: 180/466, loss: 0.034714844077825546 2023-01-24 04:42:55.077586: step: 182/466, loss: 0.002392650581896305 2023-01-24 04:42:55.691389: step: 184/466, loss: 0.00034955190494656563 2023-01-24 04:42:56.317572: step: 186/466, loss: 0.0005167002091184258 2023-01-24 04:42:56.919240: step: 188/466, loss: 0.003416383871808648 2023-01-24 04:42:57.591976: step: 190/466, loss: 0.008075198158621788 2023-01-24 04:42:58.176482: step: 192/466, loss: 0.02795613929629326 2023-01-24 04:42:58.809264: step: 194/466, loss: 0.0013707567704841495 2023-01-24 04:42:59.444494: step: 196/466, loss: 0.004438911098986864 2023-01-24 04:42:59.964018: step: 198/466, loss: 0.00027351867174729705 2023-01-24 04:43:00.590563: step: 200/466, loss: 0.05398409068584442 2023-01-24 04:43:01.177229: step: 202/466, loss: 0.04659602418541908 2023-01-24 04:43:01.799410: step: 204/466, loss: 0.012363685294985771 2023-01-24 04:43:02.415698: step: 206/466, loss: 0.0038786998484283686 2023-01-24 04:43:02.999954: step: 208/466, loss: 0.14861993491649628 2023-01-24 04:43:03.628692: step: 210/466, loss: 0.001692389720119536 2023-01-24 04:43:04.314645: step: 212/466, loss: 0.034790534526109695 2023-01-24 04:43:04.893910: step: 214/466, loss: 0.021161269396543503 2023-01-24 04:43:05.483854: step: 216/466, loss: 0.04754173010587692 2023-01-24 04:43:06.176116: step: 218/466, loss: 0.01908714324235916 2023-01-24 04:43:06.803539: step: 220/466, loss: 0.02536274492740631 2023-01-24 04:43:07.404748: step: 222/466, loss: 0.007284355349838734 2023-01-24 04:43:08.066315: step: 224/466, loss: 0.002910307142883539 2023-01-24 04:43:08.691963: step: 226/466, loss: 0.04186934605240822 2023-01-24 04:43:09.337611: step: 228/466, loss: 0.0024629044346511364 2023-01-24 04:43:09.983405: step: 230/466, loss: 1.4538025856018066 2023-01-24 04:43:10.630792: step: 232/466, loss: 0.06269078701734543 2023-01-24 04:43:11.226104: step: 234/466, loss: 0.03725360706448555 2023-01-24 04:43:11.833587: step: 236/466, loss: 0.0020332932472229004 2023-01-24 04:43:12.469204: step: 238/466, loss: 0.0004961374215781689 2023-01-24 04:43:13.043885: step: 240/466, loss: 0.0014273915439844131 2023-01-24 04:43:13.703460: step: 242/466, loss: 0.005457594990730286 2023-01-24 04:43:14.260952: step: 244/466, loss: 0.004949708469212055 2023-01-24 04:43:14.884654: step: 246/466, loss: 0.0017929062014445662 2023-01-24 04:43:15.516570: step: 248/466, loss: 0.016505340114235878 2023-01-24 04:43:16.112659: step: 250/466, loss: 0.006642746273428202 2023-01-24 04:43:16.785979: step: 252/466, loss: 0.039198290556669235 2023-01-24 04:43:17.472832: step: 254/466, loss: 0.03569181263446808 2023-01-24 04:43:18.090106: step: 256/466, loss: 0.07561182975769043 2023-01-24 04:43:18.733580: step: 258/466, loss: 0.006765153259038925 2023-01-24 04:43:19.386150: step: 260/466, loss: 0.009034424088895321 2023-01-24 04:43:20.012502: step: 262/466, loss: 0.0216000284999609 2023-01-24 04:43:20.626364: step: 264/466, loss: 0.013857035897672176 2023-01-24 04:43:21.317078: step: 266/466, loss: 0.012999437749385834 2023-01-24 04:43:21.964557: step: 268/466, loss: 2.2698400020599365 2023-01-24 04:43:22.697550: step: 270/466, loss: 0.011805834248661995 2023-01-24 04:43:23.292745: step: 272/466, loss: 0.005838778801262379 2023-01-24 04:43:23.866146: step: 274/466, loss: 0.005848190281540155 2023-01-24 04:43:24.410674: step: 276/466, loss: 0.002828894881531596 2023-01-24 04:43:25.055882: step: 278/466, loss: 0.032090093940496445 2023-01-24 04:43:25.696338: step: 280/466, loss: 0.006285602226853371 2023-01-24 04:43:26.273799: step: 282/466, loss: 0.004960036836564541 2023-01-24 04:43:26.871387: step: 284/466, loss: 0.001981942215934396 2023-01-24 04:43:27.488308: step: 286/466, loss: 0.001998018939048052 2023-01-24 04:43:28.143953: step: 288/466, loss: 0.06795407086610794 2023-01-24 04:43:28.775360: step: 290/466, loss: 0.0028585109394043684 2023-01-24 04:43:29.356635: step: 292/466, loss: 0.008331945165991783 2023-01-24 04:43:29.904245: step: 294/466, loss: 0.06210816279053688 2023-01-24 04:43:30.533541: step: 296/466, loss: 0.0009348626481369138 2023-01-24 04:43:31.143552: step: 298/466, loss: 0.06100241839885712 2023-01-24 04:43:31.766289: step: 300/466, loss: 0.011931007727980614 2023-01-24 04:43:32.405491: step: 302/466, loss: 0.022689325734972954 2023-01-24 04:43:33.054448: step: 304/466, loss: 0.0016204646090045571 2023-01-24 04:43:33.647108: step: 306/466, loss: 0.031040312722325325 2023-01-24 04:43:34.251844: step: 308/466, loss: 0.030522683635354042 2023-01-24 04:43:34.862264: step: 310/466, loss: 0.0019973935559391975 2023-01-24 04:43:35.456696: step: 312/466, loss: 0.0004837153246626258 2023-01-24 04:43:36.117025: step: 314/466, loss: 0.0034056873992085457 2023-01-24 04:43:36.684239: step: 316/466, loss: 0.004625469446182251 2023-01-24 04:43:37.365410: step: 318/466, loss: 0.006939669139683247 2023-01-24 04:43:37.946469: step: 320/466, loss: 0.00302899070084095 2023-01-24 04:43:38.652506: step: 322/466, loss: 0.0508137084543705 2023-01-24 04:43:39.300152: step: 324/466, loss: 0.008497731760144234 2023-01-24 04:43:39.970354: step: 326/466, loss: 0.03148103505373001 2023-01-24 04:43:40.574544: step: 328/466, loss: 0.10339414328336716 2023-01-24 04:43:41.204040: step: 330/466, loss: 0.08141098916530609 2023-01-24 04:43:41.797692: step: 332/466, loss: 1.042614530888386e-05 2023-01-24 04:43:42.436788: step: 334/466, loss: 0.16875165700912476 2023-01-24 04:43:43.056092: step: 336/466, loss: 0.04247257113456726 2023-01-24 04:43:43.714202: step: 338/466, loss: 0.0014910728204995394 2023-01-24 04:43:44.312587: step: 340/466, loss: 0.0002597893762867898 2023-01-24 04:43:44.908455: step: 342/466, loss: 0.003451672848314047 2023-01-24 04:43:45.518640: step: 344/466, loss: 0.029710598289966583 2023-01-24 04:43:46.194719: step: 346/466, loss: 0.007508716080337763 2023-01-24 04:43:46.807802: step: 348/466, loss: 0.0005733236903324723 2023-01-24 04:43:47.385179: step: 350/466, loss: 0.12284275889396667 2023-01-24 04:43:47.970921: step: 352/466, loss: 0.49797219038009644 2023-01-24 04:43:48.574711: step: 354/466, loss: 0.011442849412560463 2023-01-24 04:43:49.160526: step: 356/466, loss: 0.04998866468667984 2023-01-24 04:43:49.709920: step: 358/466, loss: 0.15562476217746735 2023-01-24 04:43:50.343330: step: 360/466, loss: 0.002112923888489604 2023-01-24 04:43:50.892884: step: 362/466, loss: 0.011992626823484898 2023-01-24 04:43:51.512404: step: 364/466, loss: 0.018306145444512367 2023-01-24 04:43:52.094110: step: 366/466, loss: 0.018336445093154907 2023-01-24 04:43:52.767914: step: 368/466, loss: 0.046762946993112564 2023-01-24 04:43:53.354840: step: 370/466, loss: 0.020381107926368713 2023-01-24 04:43:53.970767: step: 372/466, loss: 0.02027146890759468 2023-01-24 04:43:54.615148: step: 374/466, loss: 0.021069901064038277 2023-01-24 04:43:55.301181: step: 376/466, loss: 0.022868456318974495 2023-01-24 04:43:55.923432: step: 378/466, loss: 0.0026336428709328175 2023-01-24 04:43:56.490997: step: 380/466, loss: 0.023216865956783295 2023-01-24 04:43:57.094360: step: 382/466, loss: 0.00418890081346035 2023-01-24 04:43:57.814450: step: 384/466, loss: 0.04266766458749771 2023-01-24 04:43:58.462704: step: 386/466, loss: 0.0019366155611351132 2023-01-24 04:43:59.075227: step: 388/466, loss: 0.15027840435504913 2023-01-24 04:43:59.650005: step: 390/466, loss: 0.004704800900071859 2023-01-24 04:44:00.241075: step: 392/466, loss: 0.004052850417792797 2023-01-24 04:44:00.959355: step: 394/466, loss: 0.000787840923294425 2023-01-24 04:44:01.546277: step: 396/466, loss: 0.005768901668488979 2023-01-24 04:44:02.194278: step: 398/466, loss: 0.009184678085148335 2023-01-24 04:44:02.822206: step: 400/466, loss: 0.004484910052269697 2023-01-24 04:44:03.411801: step: 402/466, loss: 0.0012350109172984958 2023-01-24 04:44:04.107832: step: 404/466, loss: 0.003340089926496148 2023-01-24 04:44:04.718801: step: 406/466, loss: 0.01358309667557478 2023-01-24 04:44:05.329914: step: 408/466, loss: 0.07484845072031021 2023-01-24 04:44:05.971058: step: 410/466, loss: 0.0009002909064292908 2023-01-24 04:44:06.660249: step: 412/466, loss: 0.025044383481144905 2023-01-24 04:44:07.307132: step: 414/466, loss: 0.001945171388797462 2023-01-24 04:44:07.897431: step: 416/466, loss: 0.0009749350720085204 2023-01-24 04:44:08.547333: step: 418/466, loss: 0.05633292719721794 2023-01-24 04:44:09.166648: step: 420/466, loss: 0.08718458563089371 2023-01-24 04:44:09.812991: step: 422/466, loss: 0.005591679364442825 2023-01-24 04:44:10.429897: step: 424/466, loss: 0.0003500099410302937 2023-01-24 04:44:11.025890: step: 426/466, loss: 0.010241672396659851 2023-01-24 04:44:11.709297: step: 428/466, loss: 0.016269562765955925 2023-01-24 04:44:12.354260: step: 430/466, loss: 0.025463281199336052 2023-01-24 04:44:12.986829: step: 432/466, loss: 0.0015716906636953354 2023-01-24 04:44:13.583896: step: 434/466, loss: 0.00032375584123656154 2023-01-24 04:44:14.262994: step: 436/466, loss: 0.01443261094391346 2023-01-24 04:44:14.835570: step: 438/466, loss: 0.012558290734887123 2023-01-24 04:44:15.548507: step: 440/466, loss: 0.0001916693290695548 2023-01-24 04:44:16.123577: step: 442/466, loss: 0.017497636377811432 2023-01-24 04:44:16.721584: step: 444/466, loss: 0.004431367386132479 2023-01-24 04:44:17.352359: step: 446/466, loss: 0.005800188984721899 2023-01-24 04:44:18.009862: step: 448/466, loss: 0.00022565516701433808 2023-01-24 04:44:18.646881: step: 450/466, loss: 0.00861687958240509 2023-01-24 04:44:19.435359: step: 452/466, loss: 0.00256314966827631 2023-01-24 04:44:20.044892: step: 454/466, loss: 0.0067755659110844135 2023-01-24 04:44:20.771591: step: 456/466, loss: 0.008343460038304329 2023-01-24 04:44:21.426655: step: 458/466, loss: 0.8243575692176819 2023-01-24 04:44:22.026276: step: 460/466, loss: 0.0017933855997398496 2023-01-24 04:44:22.673968: step: 462/466, loss: 0.042824890464544296 2023-01-24 04:44:23.315195: step: 464/466, loss: 0.010092045180499554 2023-01-24 04:44:23.913562: step: 466/466, loss: 0.012770496308803558 2023-01-24 04:44:24.528952: step: 468/466, loss: 0.0638820230960846 2023-01-24 04:44:25.135138: step: 470/466, loss: 0.0006324428832158446 2023-01-24 04:44:25.765466: step: 472/466, loss: 0.016825968399643898 2023-01-24 04:44:26.400183: step: 474/466, loss: 0.0032879516948014498 2023-01-24 04:44:27.014145: step: 476/466, loss: 0.004367944318801165 2023-01-24 04:44:27.606640: step: 478/466, loss: 0.0025375245604664087 2023-01-24 04:44:28.178781: step: 480/466, loss: 0.0017064290586858988 2023-01-24 04:44:28.826148: step: 482/466, loss: 0.06843334436416626 2023-01-24 04:44:29.452966: step: 484/466, loss: 0.35727813839912415 2023-01-24 04:44:30.015383: step: 486/466, loss: 0.0025273372884839773 2023-01-24 04:44:30.585825: step: 488/466, loss: 0.0005819305079057813 2023-01-24 04:44:31.268180: step: 490/466, loss: 0.044151756912469864 2023-01-24 04:44:31.872397: step: 492/466, loss: 0.009603282436728477 2023-01-24 04:44:32.466776: step: 494/466, loss: 0.0023218526039272547 2023-01-24 04:44:33.126841: step: 496/466, loss: 0.10366561263799667 2023-01-24 04:44:33.729170: step: 498/466, loss: 0.02948777936398983 2023-01-24 04:44:34.398567: step: 500/466, loss: 0.0006938776350580156 2023-01-24 04:44:35.039563: step: 502/466, loss: 0.04879922792315483 2023-01-24 04:44:35.651200: step: 504/466, loss: 0.004724626895040274 2023-01-24 04:44:36.250537: step: 506/466, loss: 0.00025952394935302436 2023-01-24 04:44:36.823012: step: 508/466, loss: 0.002397992415353656 2023-01-24 04:44:37.419732: step: 510/466, loss: 0.00031070475233718753 2023-01-24 04:44:37.984898: step: 512/466, loss: 0.001031419262290001 2023-01-24 04:44:38.631988: step: 514/466, loss: 4.788067235494964e-05 2023-01-24 04:44:39.243517: step: 516/466, loss: 0.002394371200352907 2023-01-24 04:44:39.916071: step: 518/466, loss: 0.007762254681438208 2023-01-24 04:44:40.488860: step: 520/466, loss: 0.015812717378139496 2023-01-24 04:44:41.102328: step: 522/466, loss: 0.005003506317734718 2023-01-24 04:44:41.672540: step: 524/466, loss: 0.0646287053823471 2023-01-24 04:44:42.288235: step: 526/466, loss: 0.0006683768006041646 2023-01-24 04:44:42.943336: step: 528/466, loss: 0.010825795121490955 2023-01-24 04:44:43.535144: step: 530/466, loss: 0.05152782052755356 2023-01-24 04:44:44.075376: step: 532/466, loss: 0.00973795261234045 2023-01-24 04:44:44.658113: step: 534/466, loss: 0.001491101342253387 2023-01-24 04:44:45.305172: step: 536/466, loss: 0.022614777088165283 2023-01-24 04:44:45.885738: step: 538/466, loss: 0.0004979691002517939 2023-01-24 04:44:46.583528: step: 540/466, loss: 0.008996004238724709 2023-01-24 04:44:47.148055: step: 542/466, loss: 0.047437675297260284 2023-01-24 04:44:47.750985: step: 544/466, loss: 0.021595099940896034 2023-01-24 04:44:48.399481: step: 546/466, loss: 0.03466839715838432 2023-01-24 04:44:48.973092: step: 548/466, loss: 0.00020024352124892175 2023-01-24 04:44:49.587402: step: 550/466, loss: 0.03670458868145943 2023-01-24 04:44:50.169650: step: 552/466, loss: 0.022422349080443382 2023-01-24 04:44:50.798305: step: 554/466, loss: 0.4138838052749634 2023-01-24 04:44:51.443960: step: 556/466, loss: 0.002338852733373642 2023-01-24 04:44:52.119634: step: 558/466, loss: 0.010339035652577877 2023-01-24 04:44:52.844898: step: 560/466, loss: 0.008293172344565392 2023-01-24 04:44:53.494548: step: 562/466, loss: 0.0033398624509572983 2023-01-24 04:44:54.104293: step: 564/466, loss: 0.0009204355301335454 2023-01-24 04:44:54.766704: step: 566/466, loss: 0.014306855387985706 2023-01-24 04:44:55.391715: step: 568/466, loss: 0.0131671829149127 2023-01-24 04:44:55.985392: step: 570/466, loss: 0.011301815509796143 2023-01-24 04:44:56.586399: step: 572/466, loss: 0.04796997085213661 2023-01-24 04:44:57.174260: step: 574/466, loss: 0.005415166728198528 2023-01-24 04:44:57.801315: step: 576/466, loss: 0.006941391620784998 2023-01-24 04:44:58.419152: step: 578/466, loss: 0.11789073795080185 2023-01-24 04:44:58.959263: step: 580/466, loss: 0.08147279918193817 2023-01-24 04:44:59.610707: step: 582/466, loss: 0.047601763159036636 2023-01-24 04:45:00.273678: step: 584/466, loss: 0.07497742027044296 2023-01-24 04:45:00.818074: step: 586/466, loss: 0.0024831874761730433 2023-01-24 04:45:01.424886: step: 588/466, loss: 0.0021917091216892004 2023-01-24 04:45:02.002383: step: 590/466, loss: 0.0030568272341042757 2023-01-24 04:45:02.722607: step: 592/466, loss: 0.010162237100303173 2023-01-24 04:45:03.303543: step: 594/466, loss: 0.001877710223197937 2023-01-24 04:45:03.909485: step: 596/466, loss: 0.007134966552257538 2023-01-24 04:45:04.426799: step: 598/466, loss: 7.020118209766224e-05 2023-01-24 04:45:05.064455: step: 600/466, loss: 0.008285176940262318 2023-01-24 04:45:05.684886: step: 602/466, loss: 0.06338031589984894 2023-01-24 04:45:06.297808: step: 604/466, loss: 0.0649886354804039 2023-01-24 04:45:06.886163: step: 606/466, loss: 0.02187025547027588 2023-01-24 04:45:07.547764: step: 608/466, loss: 0.08032085746526718 2023-01-24 04:45:08.171820: step: 610/466, loss: 0.03648293390870094 2023-01-24 04:45:08.740399: step: 612/466, loss: 0.010144883766770363 2023-01-24 04:45:09.384586: step: 614/466, loss: 0.0015597706660628319 2023-01-24 04:45:10.076231: step: 616/466, loss: 0.013114754110574722 2023-01-24 04:45:10.635887: step: 618/466, loss: 0.009712484665215015 2023-01-24 04:45:11.241157: step: 620/466, loss: 0.0004671521601267159 2023-01-24 04:45:11.907493: step: 622/466, loss: 0.019531484693288803 2023-01-24 04:45:12.528397: step: 624/466, loss: 0.00597155699506402 2023-01-24 04:45:13.135866: step: 626/466, loss: 0.0009442660957574844 2023-01-24 04:45:13.699365: step: 628/466, loss: 0.013763475231826305 2023-01-24 04:45:14.295800: step: 630/466, loss: 0.02130374312400818 2023-01-24 04:45:14.878773: step: 632/466, loss: 9.01765379239805e-05 2023-01-24 04:45:15.467218: step: 634/466, loss: 0.008411786518990993 2023-01-24 04:45:16.066988: step: 636/466, loss: 0.0007759033469483256 2023-01-24 04:45:16.664629: step: 638/466, loss: 0.011445640586316586 2023-01-24 04:45:17.246783: step: 640/466, loss: 0.004646544344723225 2023-01-24 04:45:17.877070: step: 642/466, loss: 0.006833262275904417 2023-01-24 04:45:18.519281: step: 644/466, loss: 0.1428021341562271 2023-01-24 04:45:19.122562: step: 646/466, loss: 0.003483706619590521 2023-01-24 04:45:19.721093: step: 648/466, loss: 0.0012693015160039067 2023-01-24 04:45:20.361497: step: 650/466, loss: 0.0013372853863984346 2023-01-24 04:45:20.937396: step: 652/466, loss: 0.004944841843098402 2023-01-24 04:45:21.556119: step: 654/466, loss: 0.012301434762775898 2023-01-24 04:45:22.198164: step: 656/466, loss: 0.006135048344731331 2023-01-24 04:45:22.792385: step: 658/466, loss: 0.011722376570105553 2023-01-24 04:45:23.384782: step: 660/466, loss: 0.004831454250961542 2023-01-24 04:45:23.952899: step: 662/466, loss: 0.0010106010595336556 2023-01-24 04:45:24.569279: step: 664/466, loss: 0.0006785733858123422 2023-01-24 04:45:25.230913: step: 666/466, loss: 0.0036645540967583656 2023-01-24 04:45:25.780097: step: 668/466, loss: 0.354581743478775 2023-01-24 04:45:26.425283: step: 670/466, loss: 0.0022827736102044582 2023-01-24 04:45:27.063886: step: 672/466, loss: 0.0003675426996778697 2023-01-24 04:45:27.729247: step: 674/466, loss: 0.032468073070049286 2023-01-24 04:45:28.312780: step: 676/466, loss: 0.0008371906587854028 2023-01-24 04:45:28.901278: step: 678/466, loss: 0.009145176969468594 2023-01-24 04:45:29.465923: step: 680/466, loss: 0.007412533741444349 2023-01-24 04:45:30.118257: step: 682/466, loss: 0.021561123430728912 2023-01-24 04:45:30.785635: step: 684/466, loss: 0.00114986184053123 2023-01-24 04:45:31.395845: step: 686/466, loss: 0.12480369210243225 2023-01-24 04:45:31.996076: step: 688/466, loss: 0.055875711143016815 2023-01-24 04:45:32.636997: step: 690/466, loss: 0.0023874372709542513 2023-01-24 04:45:33.245352: step: 692/466, loss: 0.003263173159211874 2023-01-24 04:45:33.827435: step: 694/466, loss: 0.003564089071005583 2023-01-24 04:45:34.455594: step: 696/466, loss: 0.9798381924629211 2023-01-24 04:45:35.027300: step: 698/466, loss: 0.026384569704532623 2023-01-24 04:45:35.606193: step: 700/466, loss: 0.009991944767534733 2023-01-24 04:45:36.337447: step: 702/466, loss: 0.013687239028513432 2023-01-24 04:45:36.966974: step: 704/466, loss: 0.013689437881112099 2023-01-24 04:45:37.648565: step: 706/466, loss: 0.010718021541833878 2023-01-24 04:45:38.280831: step: 708/466, loss: 0.37827393412590027 2023-01-24 04:45:38.867923: step: 710/466, loss: 0.13925020396709442 2023-01-24 04:45:39.570606: step: 712/466, loss: 0.007778448984026909 2023-01-24 04:45:40.161572: step: 714/466, loss: 0.019935665652155876 2023-01-24 04:45:40.808386: step: 716/466, loss: 0.002576232887804508 2023-01-24 04:45:41.414303: step: 718/466, loss: 0.05597037822008133 2023-01-24 04:45:42.046934: step: 720/466, loss: 0.005296092014759779 2023-01-24 04:45:42.581837: step: 722/466, loss: 5.4547977924812585e-05 2023-01-24 04:45:43.225385: step: 724/466, loss: 0.05543103441596031 2023-01-24 04:45:43.788479: step: 726/466, loss: 0.00011633874964900315 2023-01-24 04:45:44.394707: step: 728/466, loss: 0.0037250141613185406 2023-01-24 04:45:45.002154: step: 730/466, loss: 0.0020853602327406406 2023-01-24 04:45:45.696786: step: 732/466, loss: 0.016643157228827477 2023-01-24 04:45:46.325744: step: 734/466, loss: 0.09759864211082458 2023-01-24 04:45:46.903794: step: 736/466, loss: 0.03504372388124466 2023-01-24 04:45:47.556649: step: 738/466, loss: 0.0022636796347796917 2023-01-24 04:45:48.144988: step: 740/466, loss: 0.009098563343286514 2023-01-24 04:45:48.730754: step: 742/466, loss: 0.01605973392724991 2023-01-24 04:45:49.307058: step: 744/466, loss: 0.013869882561266422 2023-01-24 04:45:49.890582: step: 746/466, loss: 0.00021724813268519938 2023-01-24 04:45:50.491521: step: 748/466, loss: 0.000851553282700479 2023-01-24 04:45:51.075961: step: 750/466, loss: 0.005408648867160082 2023-01-24 04:45:51.712068: step: 752/466, loss: 0.018733207136392593 2023-01-24 04:45:52.358108: step: 754/466, loss: 0.006612854544073343 2023-01-24 04:45:52.953589: step: 756/466, loss: 0.002440654905512929 2023-01-24 04:45:53.582030: step: 758/466, loss: 0.02385859191417694 2023-01-24 04:45:54.192425: step: 760/466, loss: 0.00998731143772602 2023-01-24 04:45:54.775335: step: 762/466, loss: 0.02541702799499035 2023-01-24 04:45:55.304563: step: 764/466, loss: 0.0004247078322805464 2023-01-24 04:45:55.870327: step: 766/466, loss: 0.00015261516091413796 2023-01-24 04:45:56.449358: step: 768/466, loss: 0.004938450176268816 2023-01-24 04:45:57.028209: step: 770/466, loss: 0.012439640238881111 2023-01-24 04:45:57.663253: step: 772/466, loss: 0.058742985129356384 2023-01-24 04:45:58.225743: step: 774/466, loss: 0.022723093628883362 2023-01-24 04:45:58.846856: step: 776/466, loss: 0.006965222768485546 2023-01-24 04:45:59.474053: step: 778/466, loss: 0.0020023758988827467 2023-01-24 04:46:00.124558: step: 780/466, loss: 0.020294897258281708 2023-01-24 04:46:00.681290: step: 782/466, loss: 0.08900012075901031 2023-01-24 04:46:01.349585: step: 784/466, loss: 0.04531180113554001 2023-01-24 04:46:01.898072: step: 786/466, loss: 0.001975729363039136 2023-01-24 04:46:02.521287: step: 788/466, loss: 0.010713978670537472 2023-01-24 04:46:03.140076: step: 790/466, loss: 0.02182890474796295 2023-01-24 04:46:03.820314: step: 792/466, loss: 0.0789024755358696 2023-01-24 04:46:04.416601: step: 794/466, loss: 0.003523700637742877 2023-01-24 04:46:05.013837: step: 796/466, loss: 1.0092992852150928e-05 2023-01-24 04:46:05.638981: step: 798/466, loss: 0.03954795002937317 2023-01-24 04:46:06.231984: step: 800/466, loss: 0.0508272759616375 2023-01-24 04:46:06.841484: step: 802/466, loss: 0.014859997667372227 2023-01-24 04:46:07.511242: step: 804/466, loss: 0.07760845124721527 2023-01-24 04:46:08.171175: step: 806/466, loss: 0.005476176273077726 2023-01-24 04:46:08.781304: step: 808/466, loss: 0.0022972505539655685 2023-01-24 04:46:09.416111: step: 810/466, loss: 0.0020878976210951805 2023-01-24 04:46:10.118603: step: 812/466, loss: 0.03605556860566139 2023-01-24 04:46:10.772304: step: 814/466, loss: 0.03797121345996857 2023-01-24 04:46:11.361802: step: 816/466, loss: 0.004127658903598785 2023-01-24 04:46:11.992431: step: 818/466, loss: 0.002642194740474224 2023-01-24 04:46:12.540977: step: 820/466, loss: 0.003927435725927353 2023-01-24 04:46:13.143116: step: 822/466, loss: 0.002834962448105216 2023-01-24 04:46:13.681711: step: 824/466, loss: 0.014993560500442982 2023-01-24 04:46:14.354270: step: 826/466, loss: 0.019240783527493477 2023-01-24 04:46:14.980000: step: 828/466, loss: 0.010984636843204498 2023-01-24 04:46:15.573961: step: 830/466, loss: 0.004568424075841904 2023-01-24 04:46:16.170266: step: 832/466, loss: 0.03935897722840309 2023-01-24 04:46:16.719863: step: 834/466, loss: 0.0003054741828236729 2023-01-24 04:46:17.271195: step: 836/466, loss: 0.037199921905994415 2023-01-24 04:46:17.920381: step: 838/466, loss: 0.010629761032760143 2023-01-24 04:46:18.495504: step: 840/466, loss: 0.0027806272264569998 2023-01-24 04:46:19.111969: step: 842/466, loss: 0.0029716147109866142 2023-01-24 04:46:19.721558: step: 844/466, loss: 0.0017348077381029725 2023-01-24 04:46:20.348984: step: 846/466, loss: 0.011939690448343754 2023-01-24 04:46:20.962197: step: 848/466, loss: 0.0006046024500392377 2023-01-24 04:46:21.528843: step: 850/466, loss: 0.01060577854514122 2023-01-24 04:46:22.140442: step: 852/466, loss: 0.0018012551590800285 2023-01-24 04:46:22.755466: step: 854/466, loss: 0.02042742818593979 2023-01-24 04:46:23.442410: step: 856/466, loss: 0.026546571403741837 2023-01-24 04:46:23.998888: step: 858/466, loss: 0.007592841051518917 2023-01-24 04:46:24.581540: step: 860/466, loss: 0.1809961050748825 2023-01-24 04:46:25.273572: step: 862/466, loss: 0.00013021420454606414 2023-01-24 04:46:25.939452: step: 864/466, loss: 0.0031625409610569477 2023-01-24 04:46:26.605291: step: 866/466, loss: 0.017291951924562454 2023-01-24 04:46:27.234821: step: 868/466, loss: 0.017253616824746132 2023-01-24 04:46:27.964067: step: 870/466, loss: 0.014447838999330997 2023-01-24 04:46:28.596115: step: 872/466, loss: 0.010009496472775936 2023-01-24 04:46:29.179731: step: 874/466, loss: 0.032422348856925964 2023-01-24 04:46:29.796372: step: 876/466, loss: 0.004229803569614887 2023-01-24 04:46:30.431814: step: 878/466, loss: 0.04559878259897232 2023-01-24 04:46:31.114357: step: 880/466, loss: 0.026000287383794785 2023-01-24 04:46:31.730980: step: 882/466, loss: 0.0010667172027751803 2023-01-24 04:46:32.336136: step: 884/466, loss: 0.00041076657362282276 2023-01-24 04:46:33.011384: step: 886/466, loss: 0.032993000000715256 2023-01-24 04:46:33.685374: step: 888/466, loss: 0.026025397703051567 2023-01-24 04:46:34.283569: step: 890/466, loss: 0.014603899791836739 2023-01-24 04:46:34.940569: step: 892/466, loss: 0.016040993854403496 2023-01-24 04:46:35.557240: step: 894/466, loss: 0.012828697450459003 2023-01-24 04:46:36.155275: step: 896/466, loss: 0.0034813571255654097 2023-01-24 04:46:36.707012: step: 898/466, loss: 0.0012004021555185318 2023-01-24 04:46:37.308942: step: 900/466, loss: 0.0019947951659560204 2023-01-24 04:46:37.909076: step: 902/466, loss: 0.005977644119411707 2023-01-24 04:46:38.480856: step: 904/466, loss: 0.004979805089533329 2023-01-24 04:46:39.179566: step: 906/466, loss: 0.23743489384651184 2023-01-24 04:46:39.768769: step: 908/466, loss: 0.0009202069486491382 2023-01-24 04:46:40.377998: step: 910/466, loss: 0.004790265113115311 2023-01-24 04:46:40.980486: step: 912/466, loss: 6.339549145195633e-05 2023-01-24 04:46:41.623693: step: 914/466, loss: 0.017467763274908066 2023-01-24 04:46:42.214779: step: 916/466, loss: 0.0016730381175875664 2023-01-24 04:46:42.803171: step: 918/466, loss: 0.0036454566288739443 2023-01-24 04:46:43.389060: step: 920/466, loss: 0.047419484704732895 2023-01-24 04:46:43.942328: step: 922/466, loss: 0.005076316650956869 2023-01-24 04:46:44.554863: step: 924/466, loss: 0.010990198701620102 2023-01-24 04:46:45.213957: step: 926/466, loss: 0.016945889219641685 2023-01-24 04:46:45.833246: step: 928/466, loss: 3.716916762641631e-05 2023-01-24 04:46:46.389912: step: 930/466, loss: 0.0017149192281067371 2023-01-24 04:46:47.069426: step: 932/466, loss: 0.005673174746334553 ================================================== Loss: 0.038 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3575304689910522, 'r': 0.31411120899972894, 'f1': 0.3344173881673882}, 'combined': 0.24641281233386497, 'epoch': 35} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3485843907554434, 'r': 0.27842706301314746, 'f1': 0.30958071833273193}, 'combined': 0.19384960867563591, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3543515420424789, 'r': 0.33014536459745186, 'f1': 0.3418204462531575}, 'combined': 0.2518676972391687, 'epoch': 35} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3626003082768433, 'r': 0.29453093632595573, 'f1': 0.32504011698123403}, 'combined': 0.20143331193203234, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3137226540190211, 'r': 0.311341457404076, 'f1': 0.3125275200989487}, 'combined': 0.23028343586238323, 'epoch': 35} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3682847133322911, 'r': 0.2884397890326074, 'f1': 0.32350845639219494}, 'combined': 0.2146046195869016, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29310344827586204, 'r': 0.24285714285714285, 'f1': 0.265625}, 'combined': 0.17708333333333331, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.358695652173913, 'f1': 0.3666666666666666}, 'combined': 0.1833333333333333, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.1724137931034483, 'f1': 0.22727272727272724}, 'combined': 0.1515151515151515, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32324861099239804, 'r': 0.3201817361252975, 'f1': 0.3217078645148366}, 'combined': 0.23704790016882693, 'epoch': 31} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36854991917508884, 'r': 0.2943742585468896, 'f1': 0.32731227141992336}, 'combined': 0.2171279424270779, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 31} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:49:21.002248: step: 2/466, loss: 0.0028844948392361403 2023-01-24 04:49:21.608287: step: 4/466, loss: 0.004477688577026129 2023-01-24 04:49:22.230700: step: 6/466, loss: 0.0003000227443408221 2023-01-24 04:49:22.874385: step: 8/466, loss: 1.3290955394040793e-05 2023-01-24 04:49:23.557587: step: 10/466, loss: 0.12150408327579498 2023-01-24 04:49:24.254777: step: 12/466, loss: 0.0013362518511712551 2023-01-24 04:49:24.909681: step: 14/466, loss: 0.005980302579700947 2023-01-24 04:49:25.496046: step: 16/466, loss: 0.00017088482854887843 2023-01-24 04:49:26.105961: step: 18/466, loss: 0.0010265213204547763 2023-01-24 04:49:26.701503: step: 20/466, loss: 0.0037992754951119423 2023-01-24 04:49:27.302374: step: 22/466, loss: 0.0003503831976559013 2023-01-24 04:49:27.951330: step: 24/466, loss: 0.009516526944935322 2023-01-24 04:49:28.541118: step: 26/466, loss: 0.0005678251036442816 2023-01-24 04:49:29.157446: step: 28/466, loss: 0.0016911040293052793 2023-01-24 04:49:29.725466: step: 30/466, loss: 0.010018992237746716 2023-01-24 04:49:30.305984: step: 32/466, loss: 0.002980363555252552 2023-01-24 04:49:30.912887: step: 34/466, loss: 0.0024216691963374615 2023-01-24 04:49:31.553367: step: 36/466, loss: 0.020891092717647552 2023-01-24 04:49:32.212200: step: 38/466, loss: 0.010172278620302677 2023-01-24 04:49:32.795961: step: 40/466, loss: 0.03021528571844101 2023-01-24 04:49:33.418102: step: 42/466, loss: 0.0006058058934286237 2023-01-24 04:49:34.015562: step: 44/466, loss: 2.0629704522434622e-05 2023-01-24 04:49:34.651926: step: 46/466, loss: 0.0018936379346996546 2023-01-24 04:49:35.295658: step: 48/466, loss: 0.04245248809456825 2023-01-24 04:49:35.908310: step: 50/466, loss: 0.022075532004237175 2023-01-24 04:49:36.575189: step: 52/466, loss: 0.009132004342973232 2023-01-24 04:49:37.250732: step: 54/466, loss: 0.003254002658650279 2023-01-24 04:49:37.819114: step: 56/466, loss: 0.002954368479549885 2023-01-24 04:49:38.386616: step: 58/466, loss: 0.009963125921785831 2023-01-24 04:49:39.005684: step: 60/466, loss: 0.001487000030465424 2023-01-24 04:49:39.600584: step: 62/466, loss: 0.00010598334483802319 2023-01-24 04:49:40.292293: step: 64/466, loss: 0.007556512951850891 2023-01-24 04:49:40.919466: step: 66/466, loss: 0.002061608247458935 2023-01-24 04:49:41.502350: step: 68/466, loss: 0.06095759943127632 2023-01-24 04:49:42.176432: step: 70/466, loss: 0.03266202658414841 2023-01-24 04:49:42.761403: step: 72/466, loss: 0.03090989775955677 2023-01-24 04:49:43.406095: step: 74/466, loss: 0.0016189554007723927 2023-01-24 04:49:43.990316: step: 76/466, loss: 0.059345971792936325 2023-01-24 04:49:44.621179: step: 78/466, loss: 0.0012508289655670524 2023-01-24 04:49:45.237392: step: 80/466, loss: 0.0017027511494234204 2023-01-24 04:49:45.858625: step: 82/466, loss: 0.08864498883485794 2023-01-24 04:49:46.559882: step: 84/466, loss: 0.008869364857673645 2023-01-24 04:49:47.234108: step: 86/466, loss: 0.1268760859966278 2023-01-24 04:49:47.846418: step: 88/466, loss: 0.004266493953764439 2023-01-24 04:49:48.469074: step: 90/466, loss: 0.0001659767294768244 2023-01-24 04:49:49.131254: step: 92/466, loss: 0.0091606630012393 2023-01-24 04:49:49.777929: step: 94/466, loss: 0.011976691894233227 2023-01-24 04:49:50.300933: step: 96/466, loss: 0.0005975335370749235 2023-01-24 04:49:50.944381: step: 98/466, loss: 0.012866249307990074 2023-01-24 04:49:51.512229: step: 100/466, loss: 0.00027132805553264916 2023-01-24 04:49:52.110521: step: 102/466, loss: 0.003607760416343808 2023-01-24 04:49:52.679957: step: 104/466, loss: 0.004815015941858292 2023-01-24 04:49:53.244439: step: 106/466, loss: 9.031585796037689e-05 2023-01-24 04:49:53.839900: step: 108/466, loss: 0.002620894927531481 2023-01-24 04:49:54.406126: step: 110/466, loss: 0.017637314274907112 2023-01-24 04:49:54.994534: step: 112/466, loss: 0.0327749028801918 2023-01-24 04:49:55.590526: step: 114/466, loss: 0.05157430097460747 2023-01-24 04:49:56.187917: step: 116/466, loss: 0.01633082889020443 2023-01-24 04:49:56.892677: step: 118/466, loss: 0.00734519399702549 2023-01-24 04:49:57.513749: step: 120/466, loss: 0.43789926171302795 2023-01-24 04:49:58.138862: step: 122/466, loss: 0.005928925704210997 2023-01-24 04:49:58.752836: step: 124/466, loss: 0.0004282771551515907 2023-01-24 04:49:59.318182: step: 126/466, loss: 0.011000387370586395 2023-01-24 04:49:59.894103: step: 128/466, loss: 0.005932093132287264 2023-01-24 04:50:00.592954: step: 130/466, loss: 0.4482037425041199 2023-01-24 04:50:01.239888: step: 132/466, loss: 0.01929701678454876 2023-01-24 04:50:01.908076: step: 134/466, loss: 0.047041188925504684 2023-01-24 04:50:02.515142: step: 136/466, loss: 0.0006318792584352195 2023-01-24 04:50:03.140956: step: 138/466, loss: 0.0014515618095174432 2023-01-24 04:50:03.802137: step: 140/466, loss: 0.0021152058616280556 2023-01-24 04:50:04.409328: step: 142/466, loss: 0.0003849035711027682 2023-01-24 04:50:05.050465: step: 144/466, loss: 0.026079481467604637 2023-01-24 04:50:05.680161: step: 146/466, loss: 0.013263884000480175 2023-01-24 04:50:06.277369: step: 148/466, loss: 0.030426165089011192 2023-01-24 04:50:06.922022: step: 150/466, loss: 0.0001940604270203039 2023-01-24 04:50:07.529453: step: 152/466, loss: 0.009219626896083355 2023-01-24 04:50:08.089942: step: 154/466, loss: 0.00032124348217621446 2023-01-24 04:50:08.720090: step: 156/466, loss: 0.011430817656219006 2023-01-24 04:50:09.423762: step: 158/466, loss: 0.11611834913492203 2023-01-24 04:50:10.054600: step: 160/466, loss: 0.000801683112513274 2023-01-24 04:50:10.747111: step: 162/466, loss: 0.0005692451959475875 2023-01-24 04:50:11.338473: step: 164/466, loss: 0.00034218060318380594 2023-01-24 04:50:11.978104: step: 166/466, loss: 0.005028598476201296 2023-01-24 04:50:12.583610: step: 168/466, loss: 0.0063055288046598434 2023-01-24 04:50:13.245504: step: 170/466, loss: 0.006019635125994682 2023-01-24 04:50:13.913852: step: 172/466, loss: 0.00036249557160772383 2023-01-24 04:50:14.536262: step: 174/466, loss: 0.003946827258914709 2023-01-24 04:50:15.227342: step: 176/466, loss: 0.004646969959139824 2023-01-24 04:50:15.902068: step: 178/466, loss: 0.01717056892812252 2023-01-24 04:50:16.502075: step: 180/466, loss: 0.008211605250835419 2023-01-24 04:50:17.255349: step: 182/466, loss: 0.0012590755941346288 2023-01-24 04:50:17.850677: step: 184/466, loss: 0.00900521595031023 2023-01-24 04:50:18.423978: step: 186/466, loss: 0.013704388402402401 2023-01-24 04:50:19.021960: step: 188/466, loss: 0.1032930538058281 2023-01-24 04:50:19.585839: step: 190/466, loss: 0.0007142267422750592 2023-01-24 04:50:20.194503: step: 192/466, loss: 0.001518684788607061 2023-01-24 04:50:20.768303: step: 194/466, loss: 0.041061513125896454 2023-01-24 04:50:21.345163: step: 196/466, loss: 0.00041243646410293877 2023-01-24 04:50:21.963964: step: 198/466, loss: 0.14034345746040344 2023-01-24 04:50:22.581875: step: 200/466, loss: 0.0006125233485363424 2023-01-24 04:50:23.210154: step: 202/466, loss: 0.007131355348974466 2023-01-24 04:50:23.831361: step: 204/466, loss: 0.0033776569180190563 2023-01-24 04:50:24.457382: step: 206/466, loss: 0.1322488784790039 2023-01-24 04:50:25.109347: step: 208/466, loss: 0.00819089449942112 2023-01-24 04:50:25.792845: step: 210/466, loss: 0.04165108874440193 2023-01-24 04:50:26.392134: step: 212/466, loss: 0.00016025331569835544 2023-01-24 04:50:27.051866: step: 214/466, loss: 0.0014197358395904303 2023-01-24 04:50:27.691806: step: 216/466, loss: 0.013069494627416134 2023-01-24 04:50:28.313101: step: 218/466, loss: 0.05171070992946625 2023-01-24 04:50:28.917889: step: 220/466, loss: 0.00040108899702318013 2023-01-24 04:50:29.608991: step: 222/466, loss: 0.0028013801202178 2023-01-24 04:50:30.209311: step: 224/466, loss: 0.0014411675510928035 2023-01-24 04:50:30.858757: step: 226/466, loss: 0.01950146071612835 2023-01-24 04:50:31.461685: step: 228/466, loss: 0.0007650554762221873 2023-01-24 04:50:32.144185: step: 230/466, loss: 0.022157352417707443 2023-01-24 04:50:32.752197: step: 232/466, loss: 0.3453446626663208 2023-01-24 04:50:33.431656: step: 234/466, loss: 0.013007968664169312 2023-01-24 04:50:34.072546: step: 236/466, loss: 0.009503094479441643 2023-01-24 04:50:34.716011: step: 238/466, loss: 0.0013482423964887857 2023-01-24 04:50:35.405431: step: 240/466, loss: 0.0011200150474905968 2023-01-24 04:50:36.005433: step: 242/466, loss: 0.0030352682806551456 2023-01-24 04:50:36.622755: step: 244/466, loss: 0.012318750843405724 2023-01-24 04:50:37.220786: step: 246/466, loss: 0.0367109552025795 2023-01-24 04:50:37.794550: step: 248/466, loss: 2.467216290824581e-05 2023-01-24 04:50:38.417292: step: 250/466, loss: 0.017559640109539032 2023-01-24 04:50:39.202317: step: 252/466, loss: 0.006461227312684059 2023-01-24 04:50:39.781739: step: 254/466, loss: 0.020218346267938614 2023-01-24 04:50:40.553238: step: 256/466, loss: 0.025049136951565742 2023-01-24 04:50:41.150398: step: 258/466, loss: 0.0018531163223087788 2023-01-24 04:50:41.767910: step: 260/466, loss: 0.0019890430849045515 2023-01-24 04:50:42.407644: step: 262/466, loss: 0.02820025011897087 2023-01-24 04:50:43.059269: step: 264/466, loss: 0.03085833229124546 2023-01-24 04:50:43.645464: step: 266/466, loss: 0.02268262393772602 2023-01-24 04:50:44.278143: step: 268/466, loss: 0.013150263577699661 2023-01-24 04:50:44.903242: step: 270/466, loss: 0.004210530314594507 2023-01-24 04:50:45.558804: step: 272/466, loss: 0.011209763586521149 2023-01-24 04:50:46.180690: step: 274/466, loss: 0.0016858786111697555 2023-01-24 04:50:46.805131: step: 276/466, loss: 0.0133918896317482 2023-01-24 04:50:47.408258: step: 278/466, loss: 0.022778602316975594 2023-01-24 04:50:48.053378: step: 280/466, loss: 0.0009834776865318418 2023-01-24 04:50:48.575661: step: 282/466, loss: 0.006443210877478123 2023-01-24 04:50:49.195593: step: 284/466, loss: 0.00044808301026932895 2023-01-24 04:50:49.750648: step: 286/466, loss: 0.001966314623132348 2023-01-24 04:50:50.370149: step: 288/466, loss: 0.032862693071365356 2023-01-24 04:50:50.958210: step: 290/466, loss: 0.002843247726559639 2023-01-24 04:50:51.520446: step: 292/466, loss: 0.00019092507136519998 2023-01-24 04:50:52.116323: step: 294/466, loss: 0.0997622087597847 2023-01-24 04:50:52.698036: step: 296/466, loss: 0.04326612129807472 2023-01-24 04:50:53.287126: step: 298/466, loss: 4.8781112127471715e-05 2023-01-24 04:50:53.875240: step: 300/466, loss: 0.0007119726506061852 2023-01-24 04:50:54.518717: step: 302/466, loss: 0.013141287490725517 2023-01-24 04:50:55.122890: step: 304/466, loss: 0.0002665870124474168 2023-01-24 04:50:55.696479: step: 306/466, loss: 0.006385975982993841 2023-01-24 04:50:56.274591: step: 308/466, loss: 0.02065122500061989 2023-01-24 04:50:56.930535: step: 310/466, loss: 0.6212195754051208 2023-01-24 04:50:57.661051: step: 312/466, loss: 0.03992288187146187 2023-01-24 04:50:58.282818: step: 314/466, loss: 0.0023537592496722937 2023-01-24 04:50:58.873653: step: 316/466, loss: 0.0005659500602632761 2023-01-24 04:50:59.451934: step: 318/466, loss: 0.06017843261361122 2023-01-24 04:51:00.118148: step: 320/466, loss: 0.007353016175329685 2023-01-24 04:51:00.747177: step: 322/466, loss: 0.00731989461928606 2023-01-24 04:51:01.328970: step: 324/466, loss: 0.0017221017042174935 2023-01-24 04:51:01.933101: step: 326/466, loss: 0.014298143796622753 2023-01-24 04:51:02.583120: step: 328/466, loss: 0.0008118084515444934 2023-01-24 04:51:03.228592: step: 330/466, loss: 0.0007438024040311575 2023-01-24 04:51:03.836058: step: 332/466, loss: 0.0015993744600564241 2023-01-24 04:51:04.452509: step: 334/466, loss: 0.0005670600803568959 2023-01-24 04:51:05.101807: step: 336/466, loss: 0.000510596320964396 2023-01-24 04:51:05.812362: step: 338/466, loss: 0.04210871458053589 2023-01-24 04:51:06.481925: step: 340/466, loss: 0.004334737546741962 2023-01-24 04:51:07.118469: step: 342/466, loss: 0.025569146499037743 2023-01-24 04:51:07.747412: step: 344/466, loss: 0.0036312583833932877 2023-01-24 04:51:08.396736: step: 346/466, loss: 0.050589319318532944 2023-01-24 04:51:09.022617: step: 348/466, loss: 0.006575300358235836 2023-01-24 04:51:09.555542: step: 350/466, loss: 0.00393358338624239 2023-01-24 04:51:10.153043: step: 352/466, loss: 0.004188355058431625 2023-01-24 04:51:10.775010: step: 354/466, loss: 0.022143129259347916 2023-01-24 04:51:11.376628: step: 356/466, loss: 0.011643332429230213 2023-01-24 04:51:12.058103: step: 358/466, loss: 0.015739183872938156 2023-01-24 04:51:12.683497: step: 360/466, loss: 0.006921074818819761 2023-01-24 04:51:13.390577: step: 362/466, loss: 0.021753234788775444 2023-01-24 04:51:14.013392: step: 364/466, loss: 0.010252373293042183 2023-01-24 04:51:14.596444: step: 366/466, loss: 0.004911156836897135 2023-01-24 04:51:15.229630: step: 368/466, loss: 0.0020704721100628376 2023-01-24 04:51:15.755169: step: 370/466, loss: 0.01156570389866829 2023-01-24 04:51:16.323074: step: 372/466, loss: 0.010733842849731445 2023-01-24 04:51:16.934187: step: 374/466, loss: 0.47845223546028137 2023-01-24 04:51:17.524001: step: 376/466, loss: 0.002075807424262166 2023-01-24 04:51:18.199149: step: 378/466, loss: 0.15949863195419312 2023-01-24 04:51:18.868905: step: 380/466, loss: 0.004357450176030397 2023-01-24 04:51:19.538542: step: 382/466, loss: 0.002608703449368477 2023-01-24 04:51:20.128188: step: 384/466, loss: 0.00025477929739281535 2023-01-24 04:51:20.730421: step: 386/466, loss: 0.014991780743002892 2023-01-24 04:51:21.335937: step: 388/466, loss: 0.020069321617484093 2023-01-24 04:51:21.979576: step: 390/466, loss: 0.014262584038078785 2023-01-24 04:51:22.601935: step: 392/466, loss: 0.05203903093934059 2023-01-24 04:51:23.258035: step: 394/466, loss: 0.11401324719190598 2023-01-24 04:51:23.875141: step: 396/466, loss: 0.039628949016332626 2023-01-24 04:51:24.525682: step: 398/466, loss: 6.076023055356927e-05 2023-01-24 04:51:25.112336: step: 400/466, loss: 0.01817982643842697 2023-01-24 04:51:25.726314: step: 402/466, loss: 0.0074476925656199455 2023-01-24 04:51:26.310255: step: 404/466, loss: 0.0016515926690772176 2023-01-24 04:51:26.900206: step: 406/466, loss: 0.05185672640800476 2023-01-24 04:51:27.495457: step: 408/466, loss: 0.010113752447068691 2023-01-24 04:51:28.090371: step: 410/466, loss: 0.00014719483442604542 2023-01-24 04:51:28.687538: step: 412/466, loss: 0.0003996268496848643 2023-01-24 04:51:29.367130: step: 414/466, loss: 0.009516970254480839 2023-01-24 04:51:29.985651: step: 416/466, loss: 0.07272463291883469 2023-01-24 04:51:30.588732: step: 418/466, loss: 0.023695437237620354 2023-01-24 04:51:31.147138: step: 420/466, loss: 0.01535420399159193 2023-01-24 04:51:31.754567: step: 422/466, loss: 0.000404239195631817 2023-01-24 04:51:32.322736: step: 424/466, loss: 0.0014723282074555755 2023-01-24 04:51:32.915636: step: 426/466, loss: 4.463467121240683e-05 2023-01-24 04:51:33.566218: step: 428/466, loss: 0.009595575742423534 2023-01-24 04:51:34.194009: step: 430/466, loss: 0.006680773105472326 2023-01-24 04:51:34.778337: step: 432/466, loss: 0.001256832154467702 2023-01-24 04:51:35.410798: step: 434/466, loss: 0.06298192590475082 2023-01-24 04:51:36.080694: step: 436/466, loss: 0.008738339878618717 2023-01-24 04:51:36.787463: step: 438/466, loss: 0.013840259052813053 2023-01-24 04:51:37.457158: step: 440/466, loss: 0.0010939267231151462 2023-01-24 04:51:38.041462: step: 442/466, loss: 0.007236401084810495 2023-01-24 04:51:38.625922: step: 444/466, loss: 0.015179070644080639 2023-01-24 04:51:39.203147: step: 446/466, loss: 0.0005235313437879086 2023-01-24 04:51:39.771411: step: 448/466, loss: 0.0018311061430722475 2023-01-24 04:51:40.350422: step: 450/466, loss: 0.003520967671647668 2023-01-24 04:51:40.918252: step: 452/466, loss: 0.002411621157079935 2023-01-24 04:51:41.547664: step: 454/466, loss: 7.351519161602482e-05 2023-01-24 04:51:42.200044: step: 456/466, loss: 0.025556499138474464 2023-01-24 04:51:42.901910: step: 458/466, loss: 0.06181247904896736 2023-01-24 04:51:43.531261: step: 460/466, loss: 0.0001501685765106231 2023-01-24 04:51:44.129626: step: 462/466, loss: 0.00013221567496657372 2023-01-24 04:51:44.683403: step: 464/466, loss: 8.678200538270175e-05 2023-01-24 04:51:45.269230: step: 466/466, loss: 0.002673679729923606 2023-01-24 04:51:45.834918: step: 468/466, loss: 0.024993278086185455 2023-01-24 04:51:46.450227: step: 470/466, loss: 0.004244156647473574 2023-01-24 04:51:47.154103: step: 472/466, loss: 0.0019127581035718322 2023-01-24 04:51:47.713173: step: 474/466, loss: 5.943119504081551e-06 2023-01-24 04:51:48.302912: step: 476/466, loss: 0.04415856674313545 2023-01-24 04:51:48.942861: step: 478/466, loss: 0.008284655399620533 2023-01-24 04:51:49.511480: step: 480/466, loss: 0.01660160906612873 2023-01-24 04:51:50.187183: step: 482/466, loss: 0.006573822349309921 2023-01-24 04:51:50.828335: step: 484/466, loss: 0.7001836895942688 2023-01-24 04:51:51.431691: step: 486/466, loss: 0.003216114128008485 2023-01-24 04:51:52.089190: step: 488/466, loss: 6.530083192046732e-05 2023-01-24 04:51:52.654098: step: 490/466, loss: 0.0002036297373706475 2023-01-24 04:51:53.246212: step: 492/466, loss: 0.0004934872849844396 2023-01-24 04:51:53.874226: step: 494/466, loss: 0.011126363649964333 2023-01-24 04:51:54.468872: step: 496/466, loss: 0.0009767083683982491 2023-01-24 04:51:55.076514: step: 498/466, loss: 0.0007676812238059938 2023-01-24 04:51:55.726303: step: 500/466, loss: 0.03731703385710716 2023-01-24 04:51:56.330743: step: 502/466, loss: 0.009170123375952244 2023-01-24 04:51:56.966632: step: 504/466, loss: 0.0324895866215229 2023-01-24 04:51:57.550203: step: 506/466, loss: 0.0021355722565203905 2023-01-24 04:51:58.190789: step: 508/466, loss: 0.005301428027451038 2023-01-24 04:51:58.917301: step: 510/466, loss: 0.015567069873213768 2023-01-24 04:51:59.492289: step: 512/466, loss: 0.0011757559841498733 2023-01-24 04:52:00.194099: step: 514/466, loss: 0.00753001356497407 2023-01-24 04:52:00.779582: step: 516/466, loss: 0.00019986425468232483 2023-01-24 04:52:01.432549: step: 518/466, loss: 0.02854587510228157 2023-01-24 04:52:02.075611: step: 520/466, loss: 0.001624485943466425 2023-01-24 04:52:02.689094: step: 522/466, loss: 0.07426968216896057 2023-01-24 04:52:03.274807: step: 524/466, loss: 0.05831073224544525 2023-01-24 04:52:03.883289: step: 526/466, loss: 0.025548964738845825 2023-01-24 04:52:04.554038: step: 528/466, loss: 0.055001456290483475 2023-01-24 04:52:05.196844: step: 530/466, loss: 0.001785986009053886 2023-01-24 04:52:05.781535: step: 532/466, loss: 0.009283631108701229 2023-01-24 04:52:06.470431: step: 534/466, loss: 0.006106048356741667 2023-01-24 04:52:06.998218: step: 536/466, loss: 0.00019283413712400943 2023-01-24 04:52:07.607767: step: 538/466, loss: 0.055529121309518814 2023-01-24 04:52:08.241440: step: 540/466, loss: 0.05571475997567177 2023-01-24 04:52:08.887880: step: 542/466, loss: 0.035128843039274216 2023-01-24 04:52:09.513010: step: 544/466, loss: 0.00015004878514446318 2023-01-24 04:52:10.125666: step: 546/466, loss: 0.01245911605656147 2023-01-24 04:52:10.738638: step: 548/466, loss: 0.0021256059408187866 2023-01-24 04:52:11.327571: step: 550/466, loss: 0.00042605085764080286 2023-01-24 04:52:11.927140: step: 552/466, loss: 0.00026078903465531766 2023-01-24 04:52:12.527145: step: 554/466, loss: 0.006998375058174133 2023-01-24 04:52:13.093429: step: 556/466, loss: 0.025624988600611687 2023-01-24 04:52:13.786023: step: 558/466, loss: 0.055525388568639755 2023-01-24 04:52:14.491580: step: 560/466, loss: 0.04241339489817619 2023-01-24 04:52:15.113509: step: 562/466, loss: 0.000571149168536067 2023-01-24 04:52:15.817640: step: 564/466, loss: 0.003444262547418475 2023-01-24 04:52:16.455942: step: 566/466, loss: 4.600992724590469e-06 2023-01-24 04:52:17.042423: step: 568/466, loss: 0.032708849757909775 2023-01-24 04:52:17.631621: step: 570/466, loss: 0.038914408534765244 2023-01-24 04:52:18.205954: step: 572/466, loss: 0.010325398296117783 2023-01-24 04:52:18.846433: step: 574/466, loss: 0.01143960002809763 2023-01-24 04:52:19.469444: step: 576/466, loss: 0.030573971569538116 2023-01-24 04:52:20.059531: step: 578/466, loss: 0.018395937979221344 2023-01-24 04:52:20.707672: step: 580/466, loss: 0.030351920053362846 2023-01-24 04:52:21.361401: step: 582/466, loss: 0.008664535358548164 2023-01-24 04:52:22.027528: step: 584/466, loss: 0.009360837750136852 2023-01-24 04:52:22.658763: step: 586/466, loss: 0.0016620747046545148 2023-01-24 04:52:23.327615: step: 588/466, loss: 0.030544668436050415 2023-01-24 04:52:23.897398: step: 590/466, loss: 0.006064895074814558 2023-01-24 04:52:24.527472: step: 592/466, loss: 0.009367172606289387 2023-01-24 04:52:25.327105: step: 594/466, loss: 0.005620704032480717 2023-01-24 04:52:25.933439: step: 596/466, loss: 0.20808552205562592 2023-01-24 04:52:26.512693: step: 598/466, loss: 0.00129466294310987 2023-01-24 04:52:27.110057: step: 600/466, loss: 0.00414486788213253 2023-01-24 04:52:27.798245: step: 602/466, loss: 0.0697261318564415 2023-01-24 04:52:28.403273: step: 604/466, loss: 0.0014271108666434884 2023-01-24 04:52:29.002617: step: 606/466, loss: 0.0017695369897410274 2023-01-24 04:52:29.561516: step: 608/466, loss: 0.00022417650325223804 2023-01-24 04:52:30.167225: step: 610/466, loss: 0.005572855472564697 2023-01-24 04:52:30.795707: step: 612/466, loss: 0.009449760429561138 2023-01-24 04:52:31.436150: step: 614/466, loss: 0.010749047622084618 2023-01-24 04:52:32.005314: step: 616/466, loss: 0.04724499210715294 2023-01-24 04:52:32.712034: step: 618/466, loss: 0.0005491743795573711 2023-01-24 04:52:33.316708: step: 620/466, loss: 0.01939254254102707 2023-01-24 04:52:33.926120: step: 622/466, loss: 0.010335844941437244 2023-01-24 04:52:34.512429: step: 624/466, loss: 0.02881159819662571 2023-01-24 04:52:35.095542: step: 626/466, loss: 0.002635092008858919 2023-01-24 04:52:35.717557: step: 628/466, loss: 0.02328282594680786 2023-01-24 04:52:36.322530: step: 630/466, loss: 0.009212059900164604 2023-01-24 04:52:36.874516: step: 632/466, loss: 0.00025175249902531505 2023-01-24 04:52:37.527449: step: 634/466, loss: 0.00782365258783102 2023-01-24 04:52:38.083278: step: 636/466, loss: 0.015573399141430855 2023-01-24 04:52:38.729594: step: 638/466, loss: 0.001913038082420826 2023-01-24 04:52:39.329328: step: 640/466, loss: 1.5752522813272662e-05 2023-01-24 04:52:39.931499: step: 642/466, loss: 0.007720749359577894 2023-01-24 04:52:40.593200: step: 644/466, loss: 8.122907456709072e-05 2023-01-24 04:52:41.171488: step: 646/466, loss: 0.0023835143074393272 2023-01-24 04:52:41.769350: step: 648/466, loss: 0.0002880166284739971 2023-01-24 04:52:42.397538: step: 650/466, loss: 0.013356372714042664 2023-01-24 04:52:43.026527: step: 652/466, loss: 0.04252351447939873 2023-01-24 04:52:43.669325: step: 654/466, loss: 0.0007380394963547587 2023-01-24 04:52:44.307159: step: 656/466, loss: 0.029463758692145348 2023-01-24 04:52:44.891869: step: 658/466, loss: 7.94878214946948e-05 2023-01-24 04:52:45.580182: step: 660/466, loss: 0.006463555619120598 2023-01-24 04:52:46.152149: step: 662/466, loss: 5.085931843495928e-05 2023-01-24 04:52:46.804652: step: 664/466, loss: 0.0656449943780899 2023-01-24 04:52:47.399315: step: 666/466, loss: 7.274935342138633e-05 2023-01-24 04:52:47.984342: step: 668/466, loss: 0.0009882092708721757 2023-01-24 04:52:48.624442: step: 670/466, loss: 0.008753130212426186 2023-01-24 04:52:49.222554: step: 672/466, loss: 0.025208454579114914 2023-01-24 04:52:49.777288: step: 674/466, loss: 0.00046038933214731514 2023-01-24 04:52:50.351264: step: 676/466, loss: 0.04523847997188568 2023-01-24 04:52:51.026748: step: 678/466, loss: 0.04012119024991989 2023-01-24 04:52:51.603847: step: 680/466, loss: 1.3588703950517811e-05 2023-01-24 04:52:52.215982: step: 682/466, loss: 0.004497538786381483 2023-01-24 04:52:52.889689: step: 684/466, loss: 0.0072309295646846294 2023-01-24 04:52:53.533207: step: 686/466, loss: 0.000592927448451519 2023-01-24 04:52:54.143451: step: 688/466, loss: 0.0049162693321704865 2023-01-24 04:52:54.773318: step: 690/466, loss: 0.03505634889006615 2023-01-24 04:52:55.373199: step: 692/466, loss: 0.004243789240717888 2023-01-24 04:52:56.008006: step: 694/466, loss: 0.0038029218558222055 2023-01-24 04:52:56.629881: step: 696/466, loss: 0.3729206919670105 2023-01-24 04:52:57.245217: step: 698/466, loss: 0.002357631688937545 2023-01-24 04:52:57.830543: step: 700/466, loss: 0.005631424952298403 2023-01-24 04:52:58.452457: step: 702/466, loss: 0.001534263021312654 2023-01-24 04:52:59.066078: step: 704/466, loss: 0.027184147387742996 2023-01-24 04:52:59.621611: step: 706/466, loss: 0.00972423143684864 2023-01-24 04:53:00.190702: step: 708/466, loss: 0.0018614258151501417 2023-01-24 04:53:00.835645: step: 710/466, loss: 0.014886019751429558 2023-01-24 04:53:01.450752: step: 712/466, loss: 0.0006989394314587116 2023-01-24 04:53:02.044183: step: 714/466, loss: 0.08167250454425812 2023-01-24 04:53:02.652221: step: 716/466, loss: 0.005497378297150135 2023-01-24 04:53:03.304401: step: 718/466, loss: 0.021845893934369087 2023-01-24 04:53:03.926216: step: 720/466, loss: 0.0031495019793510437 2023-01-24 04:53:04.539705: step: 722/466, loss: 0.003444172441959381 2023-01-24 04:53:05.239962: step: 724/466, loss: 0.10092519968748093 2023-01-24 04:53:05.828941: step: 726/466, loss: 0.018645109608769417 2023-01-24 04:53:06.428771: step: 728/466, loss: 0.010404352098703384 2023-01-24 04:53:07.130354: step: 730/466, loss: 0.11366147547960281 2023-01-24 04:53:07.828720: step: 732/466, loss: 0.009912433102726936 2023-01-24 04:53:08.432953: step: 734/466, loss: 0.0349004864692688 2023-01-24 04:53:09.044825: step: 736/466, loss: 0.001815163530409336 2023-01-24 04:53:09.629195: step: 738/466, loss: 0.004219917114824057 2023-01-24 04:53:10.250056: step: 740/466, loss: 0.08490953594446182 2023-01-24 04:53:10.837700: step: 742/466, loss: 0.0011273091658949852 2023-01-24 04:53:11.463550: step: 744/466, loss: 0.004046041984111071 2023-01-24 04:53:12.107343: step: 746/466, loss: 0.0011817996855825186 2023-01-24 04:53:12.718683: step: 748/466, loss: 0.01741914264857769 2023-01-24 04:53:13.359795: step: 750/466, loss: 5.113750012242235e-05 2023-01-24 04:53:13.954472: step: 752/466, loss: 0.0561121366918087 2023-01-24 04:53:14.526562: step: 754/466, loss: 0.0012292331084609032 2023-01-24 04:53:15.129643: step: 756/466, loss: 0.0034764278680086136 2023-01-24 04:53:15.741199: step: 758/466, loss: 0.0012881318107247353 2023-01-24 04:53:16.455472: step: 760/466, loss: 0.0566745400428772 2023-01-24 04:53:17.094024: step: 762/466, loss: 0.014587437734007835 2023-01-24 04:53:17.716250: step: 764/466, loss: 0.005054814741015434 2023-01-24 04:53:18.275938: step: 766/466, loss: 0.0002718236646614969 2023-01-24 04:53:18.942490: step: 768/466, loss: 9.154299914371222e-05 2023-01-24 04:53:19.561005: step: 770/466, loss: 0.014392387121915817 2023-01-24 04:53:20.223773: step: 772/466, loss: 0.03583939000964165 2023-01-24 04:53:20.842848: step: 774/466, loss: 0.0008179117576219141 2023-01-24 04:53:21.502643: step: 776/466, loss: 0.0015706622507423162 2023-01-24 04:53:22.112971: step: 778/466, loss: 0.0004586191789712757 2023-01-24 04:53:22.689735: step: 780/466, loss: 0.0033738790079951286 2023-01-24 04:53:23.369145: step: 782/466, loss: 0.00022474599245470017 2023-01-24 04:53:23.960027: step: 784/466, loss: 0.04620259627699852 2023-01-24 04:53:24.545502: step: 786/466, loss: 0.0006574672879651189 2023-01-24 04:53:25.207058: step: 788/466, loss: 0.007191939279437065 2023-01-24 04:53:25.846328: step: 790/466, loss: 0.12468365579843521 2023-01-24 04:53:26.421551: step: 792/466, loss: 0.08092932403087616 2023-01-24 04:53:27.012725: step: 794/466, loss: 0.13907642662525177 2023-01-24 04:53:27.637016: step: 796/466, loss: 0.06445979326963425 2023-01-24 04:53:28.248387: step: 798/466, loss: 0.00036164215998724103 2023-01-24 04:53:28.886674: step: 800/466, loss: 0.0062723662704229355 2023-01-24 04:53:29.521457: step: 802/466, loss: 0.21664755046367645 2023-01-24 04:53:30.112419: step: 804/466, loss: 0.00553386053070426 2023-01-24 04:53:30.724941: step: 806/466, loss: 0.004248048644512892 2023-01-24 04:53:31.343891: step: 808/466, loss: 0.00211940030567348 2023-01-24 04:53:31.958542: step: 810/466, loss: 0.049605898559093475 2023-01-24 04:53:32.541048: step: 812/466, loss: 0.917293906211853 2023-01-24 04:53:33.111599: step: 814/466, loss: 0.0008797876071184874 2023-01-24 04:53:33.749569: step: 816/466, loss: 0.02780606597661972 2023-01-24 04:53:34.376463: step: 818/466, loss: 0.06368755549192429 2023-01-24 04:53:35.042121: step: 820/466, loss: 0.03188211843371391 2023-01-24 04:53:35.759787: step: 822/466, loss: 0.02889896184206009 2023-01-24 04:53:36.325054: step: 824/466, loss: 0.020797166973352432 2023-01-24 04:53:36.926994: step: 826/466, loss: 0.0011842402163892984 2023-01-24 04:53:37.536704: step: 828/466, loss: 0.005489708855748177 2023-01-24 04:53:38.155224: step: 830/466, loss: 0.019772449508309364 2023-01-24 04:53:38.810464: step: 832/466, loss: 0.03640015423297882 2023-01-24 04:53:39.409603: step: 834/466, loss: 0.0011282063787803054 2023-01-24 04:53:40.085447: step: 836/466, loss: 0.0023520293179899454 2023-01-24 04:53:40.723223: step: 838/466, loss: 0.020443731918931007 2023-01-24 04:53:41.268806: step: 840/466, loss: 0.000996671849861741 2023-01-24 04:53:41.931452: step: 842/466, loss: 0.02468843385577202 2023-01-24 04:53:42.674193: step: 844/466, loss: 0.0001617139350855723 2023-01-24 04:53:43.299763: step: 846/466, loss: 0.010490602813661098 2023-01-24 04:53:43.888523: step: 848/466, loss: 0.000732666056137532 2023-01-24 04:53:44.445563: step: 850/466, loss: 0.02549736574292183 2023-01-24 04:53:45.052927: step: 852/466, loss: 0.06734530627727509 2023-01-24 04:53:45.675211: step: 854/466, loss: 0.015717370435595512 2023-01-24 04:53:46.325398: step: 856/466, loss: 0.05792534723877907 2023-01-24 04:53:46.905031: step: 858/466, loss: 0.000274793739663437 2023-01-24 04:53:47.487798: step: 860/466, loss: 0.001243875245563686 2023-01-24 04:53:48.139758: step: 862/466, loss: 0.0016297302208840847 2023-01-24 04:53:48.732961: step: 864/466, loss: 0.017735572531819344 2023-01-24 04:53:49.343422: step: 866/466, loss: 0.00045983216841705143 2023-01-24 04:53:49.970555: step: 868/466, loss: 0.0008117897668853402 2023-01-24 04:53:50.575556: step: 870/466, loss: 0.005976210348308086 2023-01-24 04:53:51.173970: step: 872/466, loss: 0.034743718802928925 2023-01-24 04:53:51.819567: step: 874/466, loss: 0.002306940034031868 2023-01-24 04:53:52.397222: step: 876/466, loss: 0.004866317845880985 2023-01-24 04:53:52.995333: step: 878/466, loss: 0.005829141475260258 2023-01-24 04:53:53.598578: step: 880/466, loss: 5.3960906370775774e-05 2023-01-24 04:53:54.158511: step: 882/466, loss: 0.03501942381262779 2023-01-24 04:53:54.790679: step: 884/466, loss: 0.00935305655002594 2023-01-24 04:53:55.379644: step: 886/466, loss: 0.001111685298383236 2023-01-24 04:53:56.031657: step: 888/466, loss: 0.009631190448999405 2023-01-24 04:53:56.582434: step: 890/466, loss: 0.00010575946362223476 2023-01-24 04:53:57.181514: step: 892/466, loss: 0.0009285346022807062 2023-01-24 04:53:57.777023: step: 894/466, loss: 0.3155493438243866 2023-01-24 04:53:58.329144: step: 896/466, loss: 0.0023564333096146584 2023-01-24 04:53:58.944037: step: 898/466, loss: 0.011746902018785477 2023-01-24 04:53:59.574616: step: 900/466, loss: 0.003073877189308405 2023-01-24 04:54:00.170075: step: 902/466, loss: 0.07641258835792542 2023-01-24 04:54:00.797759: step: 904/466, loss: 0.015829866752028465 2023-01-24 04:54:01.412540: step: 906/466, loss: 0.007194718345999718 2023-01-24 04:54:02.050218: step: 908/466, loss: 0.0010927760740742087 2023-01-24 04:54:02.634421: step: 910/466, loss: 0.00888837780803442 2023-01-24 04:54:03.251659: step: 912/466, loss: 0.12945696711540222 2023-01-24 04:54:03.918821: step: 914/466, loss: 0.002949284855276346 2023-01-24 04:54:04.521039: step: 916/466, loss: 0.01856701448559761 2023-01-24 04:54:05.144958: step: 918/466, loss: 0.010092062875628471 2023-01-24 04:54:05.731909: step: 920/466, loss: 0.0028126072138547897 2023-01-24 04:54:06.373469: step: 922/466, loss: 0.012266576290130615 2023-01-24 04:54:07.003706: step: 924/466, loss: 0.00044847832759842277 2023-01-24 04:54:07.596275: step: 926/466, loss: 0.012883850373327732 2023-01-24 04:54:08.196988: step: 928/466, loss: 0.056210801005363464 2023-01-24 04:54:08.764257: step: 930/466, loss: 0.05528941750526428 2023-01-24 04:54:09.416293: step: 932/466, loss: 0.008800864219665527 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35170971257101036, 'r': 0.3223449547851954, 'f1': 0.3363877052906891}, 'combined': 0.24786462495103406, 'epoch': 36} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.34368273176544856, 'r': 0.289351676608309, 'f1': 0.3141856851450692}, 'combined': 0.1967330925674733, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34224124002317874, 'r': 0.33055178590474, 'f1': 0.3362949636521197}, 'combined': 0.24779628900682502, 'epoch': 36} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3555200693861196, 'r': 0.30450229769623416, 'f1': 0.3280394223115483}, 'combined': 0.20329203636208626, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30252119282391077, 'r': 0.3076875889062925, 'f1': 0.3050825199503596}, 'combined': 0.22479764627921234, 'epoch': 36} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3503533654294524, 'r': 0.29301705517381865, 'f1': 0.31913034271023477}, 'combined': 0.21170032635233396, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27439024390243905, 'r': 0.32142857142857145, 'f1': 0.2960526315789474}, 'combined': 0.19736842105263158, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34375, 'r': 0.358695652173913, 'f1': 0.351063829787234}, 'combined': 0.175531914893617, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.13793103448275862, 'f1': 0.17777777777777778}, 'combined': 0.11851851851851852, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32324861099239804, 'r': 0.3201817361252975, 'f1': 0.3217078645148366}, 'combined': 0.23704790016882693, 'epoch': 31} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36854991917508884, 'r': 0.2943742585468896, 'f1': 0.32731227141992336}, 'combined': 0.2171279424270779, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 31} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:56:42.916846: step: 2/466, loss: 0.011019522324204445 2023-01-24 04:56:43.509459: step: 4/466, loss: 0.010127645917236805 2023-01-24 04:56:44.142270: step: 6/466, loss: 0.0007951930165290833 2023-01-24 04:56:44.780124: step: 8/466, loss: 0.01275271363556385 2023-01-24 04:56:45.340645: step: 10/466, loss: 0.0066956644877791405 2023-01-24 04:56:45.946593: step: 12/466, loss: 0.06437095999717712 2023-01-24 04:56:46.528631: step: 14/466, loss: 0.05077988654375076 2023-01-24 04:56:47.108390: step: 16/466, loss: 0.04438474774360657 2023-01-24 04:56:47.702233: step: 18/466, loss: 0.001817838754504919 2023-01-24 04:56:48.272518: step: 20/466, loss: 0.009348759427666664 2023-01-24 04:56:48.939392: step: 22/466, loss: 0.006619395688176155 2023-01-24 04:56:49.513193: step: 24/466, loss: 0.00030899926787242293 2023-01-24 04:56:50.148967: step: 26/466, loss: 0.003811010392382741 2023-01-24 04:56:50.851591: step: 28/466, loss: 0.05893457308411598 2023-01-24 04:56:51.425305: step: 30/466, loss: 0.014599674381315708 2023-01-24 04:56:52.090736: step: 32/466, loss: 0.4832135736942291 2023-01-24 04:56:52.634134: step: 34/466, loss: 0.0015102779725566506 2023-01-24 04:56:53.207216: step: 36/466, loss: 0.00190758949611336 2023-01-24 04:56:53.825630: step: 38/466, loss: 0.030581766739487648 2023-01-24 04:56:54.463836: step: 40/466, loss: 0.011711365543305874 2023-01-24 04:56:55.033490: step: 42/466, loss: 0.0027332899626344442 2023-01-24 04:56:55.617389: step: 44/466, loss: 0.0058179195038974285 2023-01-24 04:56:56.190718: step: 46/466, loss: 0.03327443450689316 2023-01-24 04:56:56.911297: step: 48/466, loss: 0.005963337607681751 2023-01-24 04:56:57.526312: step: 50/466, loss: 0.00114289834164083 2023-01-24 04:56:58.189397: step: 52/466, loss: 0.0360976941883564 2023-01-24 04:56:58.860189: step: 54/466, loss: 0.0014520614640787244 2023-01-24 04:56:59.482465: step: 56/466, loss: 0.008371063508093357 2023-01-24 04:56:59.999333: step: 58/466, loss: 0.0008297368767671287 2023-01-24 04:57:00.594506: step: 60/466, loss: 0.0015597924357280135 2023-01-24 04:57:01.208235: step: 62/466, loss: 0.0013233617646619678 2023-01-24 04:57:01.919587: step: 64/466, loss: 0.017565947026014328 2023-01-24 04:57:02.586279: step: 66/466, loss: 0.017326395958662033 2023-01-24 04:57:03.171295: step: 68/466, loss: 0.013437674380838871 2023-01-24 04:57:03.836387: step: 70/466, loss: 0.0070917424745857716 2023-01-24 04:57:04.433724: step: 72/466, loss: 0.07519196718931198 2023-01-24 04:57:05.102062: step: 74/466, loss: 0.0027385542634874582 2023-01-24 04:57:05.756343: step: 76/466, loss: 0.021933214738965034 2023-01-24 04:57:06.442194: step: 78/466, loss: 0.01939769834280014 2023-01-24 04:57:07.076289: step: 80/466, loss: 0.02084903046488762 2023-01-24 04:57:07.656743: step: 82/466, loss: 0.0013701777206733823 2023-01-24 04:57:08.311099: step: 84/466, loss: 0.02175498567521572 2023-01-24 04:57:08.986833: step: 86/466, loss: 0.01719932071864605 2023-01-24 04:57:09.577028: step: 88/466, loss: 0.0009665418183431029 2023-01-24 04:57:10.162479: step: 90/466, loss: 0.0013758244458585978 2023-01-24 04:57:10.778738: step: 92/466, loss: 0.02238643541932106 2023-01-24 04:57:11.452408: step: 94/466, loss: 0.0037119691260159016 2023-01-24 04:57:12.113754: step: 96/466, loss: 0.006917356047779322 2023-01-24 04:57:12.709271: step: 98/466, loss: 0.03723670542240143 2023-01-24 04:57:13.295493: step: 100/466, loss: 0.0020379177294671535 2023-01-24 04:57:14.004698: step: 102/466, loss: 0.020293638110160828 2023-01-24 04:57:14.619897: step: 104/466, loss: 0.12235407531261444 2023-01-24 04:57:15.244615: step: 106/466, loss: 0.033439889550209045 2023-01-24 04:57:15.848924: step: 108/466, loss: 0.001259556389413774 2023-01-24 04:57:16.445287: step: 110/466, loss: 0.0005565416067838669 2023-01-24 04:57:17.200097: step: 112/466, loss: 0.047064412385225296 2023-01-24 04:57:17.892931: step: 114/466, loss: 0.02563040517270565 2023-01-24 04:57:18.531191: step: 116/466, loss: 0.09876726567745209 2023-01-24 04:57:19.119927: step: 118/466, loss: 0.001955243293195963 2023-01-24 04:57:19.694890: step: 120/466, loss: 0.009090467356145382 2023-01-24 04:57:20.299345: step: 122/466, loss: 8.463918493362144e-05 2023-01-24 04:57:20.962998: step: 124/466, loss: 0.010689635761082172 2023-01-24 04:57:21.641978: step: 126/466, loss: 0.049118880182504654 2023-01-24 04:57:22.312378: step: 128/466, loss: 0.06984400749206543 2023-01-24 04:57:23.005174: step: 130/466, loss: 0.0016691607888787985 2023-01-24 04:57:23.656914: step: 132/466, loss: 0.0015183526556938887 2023-01-24 04:57:24.231296: step: 134/466, loss: 0.00012503209291025996 2023-01-24 04:57:24.886194: step: 136/466, loss: 0.15007728338241577 2023-01-24 04:57:25.486703: step: 138/466, loss: 0.01478609349578619 2023-01-24 04:57:26.099809: step: 140/466, loss: 0.00026948051527142525 2023-01-24 04:57:26.722748: step: 142/466, loss: 0.015629736706614494 2023-01-24 04:57:27.331248: step: 144/466, loss: 0.005343706347048283 2023-01-24 04:57:27.979927: step: 146/466, loss: 0.000775020569562912 2023-01-24 04:57:28.579207: step: 148/466, loss: 0.0014441086677834392 2023-01-24 04:57:29.202820: step: 150/466, loss: 0.019887277856469154 2023-01-24 04:57:29.750476: step: 152/466, loss: 0.005369046237319708 2023-01-24 04:57:30.403121: step: 154/466, loss: 0.02766246907413006 2023-01-24 04:57:31.026505: step: 156/466, loss: 0.0004748774226754904 2023-01-24 04:57:31.649795: step: 158/466, loss: 0.028046274557709694 2023-01-24 04:57:32.262799: step: 160/466, loss: 0.003485351335257292 2023-01-24 04:57:32.841413: step: 162/466, loss: 0.00030218041501939297 2023-01-24 04:57:33.511142: step: 164/466, loss: 0.00042470174957998097 2023-01-24 04:57:34.095251: step: 166/466, loss: 0.04308995604515076 2023-01-24 04:57:34.804770: step: 168/466, loss: 0.009484400041401386 2023-01-24 04:57:35.395993: step: 170/466, loss: 0.0004260554851498455 2023-01-24 04:57:35.966861: step: 172/466, loss: 0.009698664769530296 2023-01-24 04:57:36.590300: step: 174/466, loss: 0.0035012245643883944 2023-01-24 04:57:37.275092: step: 176/466, loss: 0.01406619232147932 2023-01-24 04:57:37.844138: step: 178/466, loss: 0.0095423748716712 2023-01-24 04:57:38.444643: step: 180/466, loss: 0.0014280936447903514 2023-01-24 04:57:39.012813: step: 182/466, loss: 0.001411979435943067 2023-01-24 04:57:39.619363: step: 184/466, loss: 3.411167926969938e-05 2023-01-24 04:57:40.215738: step: 186/466, loss: 0.0007187300943769515 2023-01-24 04:57:40.853546: step: 188/466, loss: 0.007806873414665461 2023-01-24 04:57:41.466984: step: 190/466, loss: 0.013449162244796753 2023-01-24 04:57:42.143222: step: 192/466, loss: 0.0009487065253779292 2023-01-24 04:57:42.803406: step: 194/466, loss: 0.022043446078896523 2023-01-24 04:57:43.391316: step: 196/466, loss: 0.002760909032076597 2023-01-24 04:57:43.981944: step: 198/466, loss: 0.0002739167248364538 2023-01-24 04:57:44.612214: step: 200/466, loss: 0.0047027189284563065 2023-01-24 04:57:45.167219: step: 202/466, loss: 0.03824473172426224 2023-01-24 04:57:45.759515: step: 204/466, loss: 0.003228937741369009 2023-01-24 04:57:46.428031: step: 206/466, loss: 0.0475756898522377 2023-01-24 04:57:46.995485: step: 208/466, loss: 0.007542956620454788 2023-01-24 04:57:47.625280: step: 210/466, loss: 0.01231348980218172 2023-01-24 04:57:48.193682: step: 212/466, loss: 0.018550366163253784 2023-01-24 04:57:48.829701: step: 214/466, loss: 0.0036698374897241592 2023-01-24 04:57:49.501432: step: 216/466, loss: 0.033354442566633224 2023-01-24 04:57:50.125367: step: 218/466, loss: 6.645784014835954e-05 2023-01-24 04:57:50.759202: step: 220/466, loss: 0.020804526284337044 2023-01-24 04:57:51.435709: step: 222/466, loss: 0.01046005729585886 2023-01-24 04:57:52.081501: step: 224/466, loss: 0.0008722355705685914 2023-01-24 04:57:52.661065: step: 226/466, loss: 0.018724089488387108 2023-01-24 04:57:53.277657: step: 228/466, loss: 0.016693826764822006 2023-01-24 04:57:53.947215: step: 230/466, loss: 0.0009288593428209424 2023-01-24 04:57:54.547289: step: 232/466, loss: 0.0007292639347724617 2023-01-24 04:57:55.122834: step: 234/466, loss: 0.0008308116812258959 2023-01-24 04:57:55.709782: step: 236/466, loss: 0.1941482573747635 2023-01-24 04:57:56.320905: step: 238/466, loss: 0.0003392081707715988 2023-01-24 04:57:56.987264: step: 240/466, loss: 0.010306607000529766 2023-01-24 04:57:57.701026: step: 242/466, loss: 0.0009510318632237613 2023-01-24 04:57:58.271089: step: 244/466, loss: 0.009845195338129997 2023-01-24 04:57:58.918886: step: 246/466, loss: 0.006684688851237297 2023-01-24 04:57:59.566823: step: 248/466, loss: 0.01415190938860178 2023-01-24 04:58:00.248526: step: 250/466, loss: 0.0021747788414359093 2023-01-24 04:58:00.879415: step: 252/466, loss: 0.10681437700986862 2023-01-24 04:58:01.502058: step: 254/466, loss: 0.003000908065587282 2023-01-24 04:58:02.122558: step: 256/466, loss: 0.02510448358952999 2023-01-24 04:58:02.791695: step: 258/466, loss: 0.015247062779963017 2023-01-24 04:58:03.473224: step: 260/466, loss: 0.04318120703101158 2023-01-24 04:58:04.109826: step: 262/466, loss: 0.0001173671189462766 2023-01-24 04:58:04.736774: step: 264/466, loss: 0.0623827762901783 2023-01-24 04:58:05.360044: step: 266/466, loss: 0.0010009552352130413 2023-01-24 04:58:06.019331: step: 268/466, loss: 0.03292268142104149 2023-01-24 04:58:06.679369: step: 270/466, loss: 0.16150562465190887 2023-01-24 04:58:07.271240: step: 272/466, loss: 0.09163973480463028 2023-01-24 04:58:07.896163: step: 274/466, loss: 0.01140571665018797 2023-01-24 04:58:08.589971: step: 276/466, loss: 0.0015015015378594398 2023-01-24 04:58:09.167278: step: 278/466, loss: 2.4360746465390548e-05 2023-01-24 04:58:09.759507: step: 280/466, loss: 0.005441330373287201 2023-01-24 04:58:10.315062: step: 282/466, loss: 0.034476932138204575 2023-01-24 04:58:11.034317: step: 284/466, loss: 0.014575645327568054 2023-01-24 04:58:11.608374: step: 286/466, loss: 0.0428362712264061 2023-01-24 04:58:12.214610: step: 288/466, loss: 1.2249766768945847e-05 2023-01-24 04:58:12.810848: step: 290/466, loss: 0.0012035742402076721 2023-01-24 04:58:13.436185: step: 292/466, loss: 0.03102664276957512 2023-01-24 04:58:14.028023: step: 294/466, loss: 0.006781739182770252 2023-01-24 04:58:14.648505: step: 296/466, loss: 0.0010324480244889855 2023-01-24 04:58:15.321294: step: 298/466, loss: 0.009565313346683979 2023-01-24 04:58:15.933890: step: 300/466, loss: 0.012004152871668339 2023-01-24 04:58:16.597292: step: 302/466, loss: 0.0013970371801406145 2023-01-24 04:58:17.215310: step: 304/466, loss: 0.001083197072148323 2023-01-24 04:58:17.853821: step: 306/466, loss: 0.0037915578577667475 2023-01-24 04:58:18.477240: step: 308/466, loss: 0.0009314219933003187 2023-01-24 04:58:19.074806: step: 310/466, loss: 0.1204252764582634 2023-01-24 04:58:19.757665: step: 312/466, loss: 0.006242868024855852 2023-01-24 04:58:20.399707: step: 314/466, loss: 0.00015747385623399168 2023-01-24 04:58:21.039704: step: 316/466, loss: 0.0005098479450680315 2023-01-24 04:58:21.627292: step: 318/466, loss: 0.012559473514556885 2023-01-24 04:58:22.308164: step: 320/466, loss: 0.0019534605089575052 2023-01-24 04:58:22.948337: step: 322/466, loss: 0.0011656004935503006 2023-01-24 04:58:23.559555: step: 324/466, loss: 0.005038025323301554 2023-01-24 04:58:24.137424: step: 326/466, loss: 0.011869724839925766 2023-01-24 04:58:24.787241: step: 328/466, loss: 0.03267310932278633 2023-01-24 04:58:25.369828: step: 330/466, loss: 0.0036926791071891785 2023-01-24 04:58:25.949126: step: 332/466, loss: 0.029556231573224068 2023-01-24 04:58:26.536291: step: 334/466, loss: 0.010744617320597172 2023-01-24 04:58:27.138902: step: 336/466, loss: 0.0029276181012392044 2023-01-24 04:58:27.797237: step: 338/466, loss: 0.04652019962668419 2023-01-24 04:58:28.392354: step: 340/466, loss: 0.0017216216074302793 2023-01-24 04:58:29.065521: step: 342/466, loss: 1.7856009435490705e-05 2023-01-24 04:58:29.738501: step: 344/466, loss: 0.003602897049859166 2023-01-24 04:58:30.305885: step: 346/466, loss: 0.008376662619411945 2023-01-24 04:58:30.901824: step: 348/466, loss: 0.05960991233587265 2023-01-24 04:58:31.542824: step: 350/466, loss: 0.0006441866280511022 2023-01-24 04:58:32.154268: step: 352/466, loss: 0.0007531185983680189 2023-01-24 04:58:32.777872: step: 354/466, loss: 0.004106097389012575 2023-01-24 04:58:33.405664: step: 356/466, loss: 0.005356463138014078 2023-01-24 04:58:34.006022: step: 358/466, loss: 0.0008049256284721196 2023-01-24 04:58:34.594825: step: 360/466, loss: 0.029184794053435326 2023-01-24 04:58:35.225901: step: 362/466, loss: 0.007187245413661003 2023-01-24 04:58:35.922926: step: 364/466, loss: 0.002978815231472254 2023-01-24 04:58:36.518096: step: 366/466, loss: 0.01251143403351307 2023-01-24 04:58:37.132707: step: 368/466, loss: 0.018451590090990067 2023-01-24 04:58:37.728722: step: 370/466, loss: 0.0023503785487264395 2023-01-24 04:58:38.313787: step: 372/466, loss: 0.0005784441018477082 2023-01-24 04:58:38.929310: step: 374/466, loss: 0.00456195417791605 2023-01-24 04:58:39.612163: step: 376/466, loss: 0.003032136242836714 2023-01-24 04:58:40.130862: step: 378/466, loss: 0.0005832591559737921 2023-01-24 04:58:40.749479: step: 380/466, loss: 0.03324274718761444 2023-01-24 04:58:41.343072: step: 382/466, loss: 0.0004480695934034884 2023-01-24 04:58:41.991832: step: 384/466, loss: 0.0007840922335162759 2023-01-24 04:58:42.558664: step: 386/466, loss: 0.004485865589231253 2023-01-24 04:58:43.171390: step: 388/466, loss: 0.0025899396277964115 2023-01-24 04:58:43.875592: step: 390/466, loss: 0.01471379678696394 2023-01-24 04:58:44.456306: step: 392/466, loss: 0.04674030467867851 2023-01-24 04:58:45.014600: step: 394/466, loss: 0.0004218451213091612 2023-01-24 04:58:45.594160: step: 396/466, loss: 0.0010176504729315639 2023-01-24 04:58:46.161571: step: 398/466, loss: 0.01860765554010868 2023-01-24 04:58:46.859387: step: 400/466, loss: 0.006526969838887453 2023-01-24 04:58:47.533281: step: 402/466, loss: 0.016039496287703514 2023-01-24 04:58:48.161521: step: 404/466, loss: 0.00013114621106069535 2023-01-24 04:58:48.709495: step: 406/466, loss: 0.003917271737009287 2023-01-24 04:58:49.422878: step: 408/466, loss: 0.826931893825531 2023-01-24 04:58:50.110167: step: 410/466, loss: 0.011356177739799023 2023-01-24 04:58:50.701690: step: 412/466, loss: 0.07401003688573837 2023-01-24 04:58:51.253392: step: 414/466, loss: 0.015200392343103886 2023-01-24 04:58:51.903346: step: 416/466, loss: 0.0029499316588044167 2023-01-24 04:58:52.548544: step: 418/466, loss: 0.03140866756439209 2023-01-24 04:58:53.210863: step: 420/466, loss: 7.102837844286114e-05 2023-01-24 04:58:53.805174: step: 422/466, loss: 0.04962131008505821 2023-01-24 04:58:54.384351: step: 424/466, loss: 0.01557162031531334 2023-01-24 04:58:54.998651: step: 426/466, loss: 0.020801618695259094 2023-01-24 04:58:55.570990: step: 428/466, loss: 0.00034787176991812885 2023-01-24 04:58:56.169347: step: 430/466, loss: 0.001129518961533904 2023-01-24 04:58:56.712943: step: 432/466, loss: 0.00616582902148366 2023-01-24 04:58:57.315277: step: 434/466, loss: 0.055054135620594025 2023-01-24 04:58:57.921703: step: 436/466, loss: 0.00042150524677708745 2023-01-24 04:58:58.558435: step: 438/466, loss: 0.0036059392150491476 2023-01-24 04:58:59.149677: step: 440/466, loss: 0.00019929753034375608 2023-01-24 04:58:59.721091: step: 442/466, loss: 0.006033843848854303 2023-01-24 04:59:00.381354: step: 444/466, loss: 3.503536936477758e-05 2023-01-24 04:59:00.953796: step: 446/466, loss: 0.00111696170642972 2023-01-24 04:59:01.562290: step: 448/466, loss: 0.022110553458333015 2023-01-24 04:59:02.159836: step: 450/466, loss: 0.00023391265131067485 2023-01-24 04:59:02.813183: step: 452/466, loss: 0.02570257894694805 2023-01-24 04:59:03.436337: step: 454/466, loss: 0.08661607652902603 2023-01-24 04:59:04.072651: step: 456/466, loss: 4.477473703445867e-05 2023-01-24 04:59:04.661094: step: 458/466, loss: 0.002013164572417736 2023-01-24 04:59:05.246550: step: 460/466, loss: 0.004852669779211283 2023-01-24 04:59:05.820312: step: 462/466, loss: 0.0012019069399684668 2023-01-24 04:59:06.551294: step: 464/466, loss: 0.009157421998679638 2023-01-24 04:59:07.128508: step: 466/466, loss: 0.2808040380477905 2023-01-24 04:59:07.724235: step: 468/466, loss: 0.003522509476169944 2023-01-24 04:59:08.385674: step: 470/466, loss: 0.016518112272024155 2023-01-24 04:59:09.029821: step: 472/466, loss: 0.0008490536129102111 2023-01-24 04:59:09.717055: step: 474/466, loss: 0.02368561364710331 2023-01-24 04:59:10.417699: step: 476/466, loss: 0.002999295713379979 2023-01-24 04:59:11.086607: step: 478/466, loss: 0.7123193740844727 2023-01-24 04:59:11.717247: step: 480/466, loss: 0.04895234480500221 2023-01-24 04:59:12.304903: step: 482/466, loss: 0.00048576542758382857 2023-01-24 04:59:12.892907: step: 484/466, loss: 0.021133853122591972 2023-01-24 04:59:13.491688: step: 486/466, loss: 0.0035724611952900887 2023-01-24 04:59:14.114982: step: 488/466, loss: 0.003965089563280344 2023-01-24 04:59:14.777270: step: 490/466, loss: 0.0005103085422888398 2023-01-24 04:59:15.465593: step: 492/466, loss: 0.002834130311384797 2023-01-24 04:59:16.120847: step: 494/466, loss: 0.10848957300186157 2023-01-24 04:59:16.671650: step: 496/466, loss: 0.0019518728367984295 2023-01-24 04:59:17.239141: step: 498/466, loss: 0.09013953059911728 2023-01-24 04:59:17.881476: step: 500/466, loss: 0.002601301297545433 2023-01-24 04:59:18.537275: step: 502/466, loss: 3.179002305842005e-05 2023-01-24 04:59:19.142984: step: 504/466, loss: 0.030233832076191902 2023-01-24 04:59:19.757619: step: 506/466, loss: 0.00021096097771078348 2023-01-24 04:59:20.421690: step: 508/466, loss: 0.006912588141858578 2023-01-24 04:59:20.940176: step: 510/466, loss: 0.06605498492717743 2023-01-24 04:59:21.482891: step: 512/466, loss: 0.0012793607311323285 2023-01-24 04:59:22.197499: step: 514/466, loss: 0.01516756508499384 2023-01-24 04:59:22.778704: step: 516/466, loss: 0.005258440971374512 2023-01-24 04:59:23.372050: step: 518/466, loss: 0.031163925305008888 2023-01-24 04:59:23.989515: step: 520/466, loss: 0.047974832355976105 2023-01-24 04:59:24.593830: step: 522/466, loss: 0.005257150158286095 2023-01-24 04:59:25.154716: step: 524/466, loss: 0.007114611566066742 2023-01-24 04:59:25.738436: step: 526/466, loss: 0.014129843562841415 2023-01-24 04:59:26.364792: step: 528/466, loss: 0.05840504541993141 2023-01-24 04:59:26.913794: step: 530/466, loss: 0.000509614881593734 2023-01-24 04:59:27.501192: step: 532/466, loss: 0.005425469484180212 2023-01-24 04:59:28.055648: step: 534/466, loss: 0.0015449508791789412 2023-01-24 04:59:28.880249: step: 536/466, loss: 0.001625897828489542 2023-01-24 04:59:29.544467: step: 538/466, loss: 0.006065103225409985 2023-01-24 04:59:30.109562: step: 540/466, loss: 8.518200047546998e-05 2023-01-24 04:59:30.696685: step: 542/466, loss: 0.0006184268859215081 2023-01-24 04:59:31.341478: step: 544/466, loss: 0.030829263851046562 2023-01-24 04:59:32.063067: step: 546/466, loss: 0.07033118605613708 2023-01-24 04:59:32.689406: step: 548/466, loss: 0.014811261557042599 2023-01-24 04:59:33.316889: step: 550/466, loss: 0.07217790931463242 2023-01-24 04:59:33.889324: step: 552/466, loss: 0.00011854747572215274 2023-01-24 04:59:34.518462: step: 554/466, loss: 0.02122669667005539 2023-01-24 04:59:35.129555: step: 556/466, loss: 0.01861976832151413 2023-01-24 04:59:35.762497: step: 558/466, loss: 0.0003908296348527074 2023-01-24 04:59:36.352270: step: 560/466, loss: 0.1115989238023758 2023-01-24 04:59:36.952128: step: 562/466, loss: 0.1496153175830841 2023-01-24 04:59:37.560018: step: 564/466, loss: 0.004777440335601568 2023-01-24 04:59:38.161778: step: 566/466, loss: 0.007495975121855736 2023-01-24 04:59:38.808623: step: 568/466, loss: 0.029308144003152847 2023-01-24 04:59:39.468903: step: 570/466, loss: 0.004541173577308655 2023-01-24 04:59:40.018628: step: 572/466, loss: 0.004064922221004963 2023-01-24 04:59:40.614751: step: 574/466, loss: 0.001175748067907989 2023-01-24 04:59:41.318096: step: 576/466, loss: 0.0008852386381477118 2023-01-24 04:59:41.932542: step: 578/466, loss: 0.04126737266778946 2023-01-24 04:59:42.552886: step: 580/466, loss: 0.0035846279934048653 2023-01-24 04:59:43.235098: step: 582/466, loss: 0.2606767416000366 2023-01-24 04:59:43.837236: step: 584/466, loss: 0.0002340715000173077 2023-01-24 04:59:44.469454: step: 586/466, loss: 0.004101741127669811 2023-01-24 04:59:45.103945: step: 588/466, loss: 0.008464156650006771 2023-01-24 04:59:45.630355: step: 590/466, loss: 0.001931003644131124 2023-01-24 04:59:46.243131: step: 592/466, loss: 0.003803492523729801 2023-01-24 04:59:46.858955: step: 594/466, loss: 0.002909739501774311 2023-01-24 04:59:47.541071: step: 596/466, loss: 0.007848568260669708 2023-01-24 04:59:48.194198: step: 598/466, loss: 0.001027796184644103 2023-01-24 04:59:48.780737: step: 600/466, loss: 0.0002775905013550073 2023-01-24 04:59:49.379425: step: 602/466, loss: 0.002364953514188528 2023-01-24 04:59:50.007823: step: 604/466, loss: 0.008006014861166477 2023-01-24 04:59:50.625014: step: 606/466, loss: 0.014175712130963802 2023-01-24 04:59:51.305614: step: 608/466, loss: 0.03752944618463516 2023-01-24 04:59:51.891437: step: 610/466, loss: 0.02041816897690296 2023-01-24 04:59:52.504267: step: 612/466, loss: 0.018955394625663757 2023-01-24 04:59:53.094015: step: 614/466, loss: 0.004180672578513622 2023-01-24 04:59:53.729319: step: 616/466, loss: 0.007526230067014694 2023-01-24 04:59:54.284145: step: 618/466, loss: 0.013544775545597076 2023-01-24 04:59:54.907314: step: 620/466, loss: 0.0005253756535239518 2023-01-24 04:59:55.513996: step: 622/466, loss: 0.00041710224468261003 2023-01-24 04:59:56.083255: step: 624/466, loss: 0.0052870395593345165 2023-01-24 04:59:56.710586: step: 626/466, loss: 0.13957083225250244 2023-01-24 04:59:57.318779: step: 628/466, loss: 0.0020317896269261837 2023-01-24 04:59:58.057576: step: 630/466, loss: 0.004546259995549917 2023-01-24 04:59:58.639304: step: 632/466, loss: 0.006747208535671234 2023-01-24 04:59:59.238379: step: 634/466, loss: 0.00037356914253905416 2023-01-24 04:59:59.812610: step: 636/466, loss: 0.049915898591279984 2023-01-24 05:00:00.436877: step: 638/466, loss: 0.02618241123855114 2023-01-24 05:00:01.019596: step: 640/466, loss: 0.00983812939375639 2023-01-24 05:00:01.632620: step: 642/466, loss: 0.0003110206453129649 2023-01-24 05:00:02.235038: step: 644/466, loss: 0.017514945939183235 2023-01-24 05:00:02.831053: step: 646/466, loss: 0.015234136953949928 2023-01-24 05:00:03.438246: step: 648/466, loss: 0.016556590795516968 2023-01-24 05:00:04.050085: step: 650/466, loss: 0.0021507632918655872 2023-01-24 05:00:04.677516: step: 652/466, loss: 0.0331457257270813 2023-01-24 05:00:05.336988: step: 654/466, loss: 7.748071220703423e-05 2023-01-24 05:00:05.938904: step: 656/466, loss: 0.005226131994277239 2023-01-24 05:00:06.518028: step: 658/466, loss: 0.01206152606755495 2023-01-24 05:00:07.116913: step: 660/466, loss: 0.009711089543998241 2023-01-24 05:00:07.706112: step: 662/466, loss: 0.0005779156344942749 2023-01-24 05:00:08.313436: step: 664/466, loss: 0.05312402918934822 2023-01-24 05:00:08.949380: step: 666/466, loss: 0.0019117280608043075 2023-01-24 05:00:09.512981: step: 668/466, loss: 0.01204315759241581 2023-01-24 05:00:10.150389: step: 670/466, loss: 0.0220714770257473 2023-01-24 05:00:10.772810: step: 672/466, loss: 0.0005926095182076097 2023-01-24 05:00:11.345622: step: 674/466, loss: 0.005150275304913521 2023-01-24 05:00:11.975967: step: 676/466, loss: 0.00197270093485713 2023-01-24 05:00:12.574911: step: 678/466, loss: 0.020318780094385147 2023-01-24 05:00:13.179051: step: 680/466, loss: 0.11400794237852097 2023-01-24 05:00:13.745389: step: 682/466, loss: 0.00041246210457757115 2023-01-24 05:00:14.327639: step: 684/466, loss: 0.01166682131588459 2023-01-24 05:00:14.955458: step: 686/466, loss: 0.004297053907066584 2023-01-24 05:00:15.609767: step: 688/466, loss: 0.031819894909858704 2023-01-24 05:00:16.279434: step: 690/466, loss: 0.0027910592034459114 2023-01-24 05:00:16.824995: step: 692/466, loss: 0.020834175869822502 2023-01-24 05:00:17.405810: step: 694/466, loss: 0.008798377588391304 2023-01-24 05:00:18.026061: step: 696/466, loss: 0.023229394108057022 2023-01-24 05:00:18.654855: step: 698/466, loss: 0.1519005447626114 2023-01-24 05:00:19.242925: step: 700/466, loss: 0.0005471892072819173 2023-01-24 05:00:19.819854: step: 702/466, loss: 0.0010586096905171871 2023-01-24 05:00:20.476706: step: 704/466, loss: 0.013087703846395016 2023-01-24 05:00:21.017687: step: 706/466, loss: 0.013915506191551685 2023-01-24 05:00:21.642868: step: 708/466, loss: 0.0024498875718563795 2023-01-24 05:00:22.230139: step: 710/466, loss: 0.002289401600137353 2023-01-24 05:00:22.876787: step: 712/466, loss: 0.007158982567489147 2023-01-24 05:00:23.492268: step: 714/466, loss: 0.03404374420642853 2023-01-24 05:00:24.126751: step: 716/466, loss: 0.007108825258910656 2023-01-24 05:00:24.701160: step: 718/466, loss: 0.010414959862828255 2023-01-24 05:00:25.318556: step: 720/466, loss: 0.0224582739174366 2023-01-24 05:00:25.886225: step: 722/466, loss: 0.0011145860189571977 2023-01-24 05:00:26.430519: step: 724/466, loss: 0.0005832858732901514 2023-01-24 05:00:27.061962: step: 726/466, loss: 0.010251102037727833 2023-01-24 05:00:27.734209: step: 728/466, loss: 0.009993134997785091 2023-01-24 05:00:28.349861: step: 730/466, loss: 0.000616840086877346 2023-01-24 05:00:28.949912: step: 732/466, loss: 0.006950164679437876 2023-01-24 05:00:29.567683: step: 734/466, loss: 0.013368867337703705 2023-01-24 05:00:30.137281: step: 736/466, loss: 0.005068691913038492 2023-01-24 05:00:30.833716: step: 738/466, loss: 0.0001408860698575154 2023-01-24 05:00:31.468654: step: 740/466, loss: 0.003125702030956745 2023-01-24 05:00:32.066866: step: 742/466, loss: 0.000734903325792402 2023-01-24 05:00:32.755267: step: 744/466, loss: 0.03700494393706322 2023-01-24 05:00:33.405325: step: 746/466, loss: 0.0028334090020507574 2023-01-24 05:00:34.086615: step: 748/466, loss: 0.005490779411047697 2023-01-24 05:00:34.694594: step: 750/466, loss: 0.0191445704549551 2023-01-24 05:00:35.271161: step: 752/466, loss: 0.0021933047100901604 2023-01-24 05:00:35.881723: step: 754/466, loss: 0.06623340398073196 2023-01-24 05:00:36.543899: step: 756/466, loss: 1.1274349689483643 2023-01-24 05:00:37.227880: step: 758/466, loss: 0.00706103490665555 2023-01-24 05:00:37.789381: step: 760/466, loss: 0.00011960876145167276 2023-01-24 05:00:38.428505: step: 762/466, loss: 0.012318236753344536 2023-01-24 05:00:39.040912: step: 764/466, loss: 0.2550526559352875 2023-01-24 05:00:39.676135: step: 766/466, loss: 8.795595931587741e-05 2023-01-24 05:00:40.227643: step: 768/466, loss: 0.00039759004721418023 2023-01-24 05:00:40.834383: step: 770/466, loss: 9.025069448398426e-05 2023-01-24 05:00:41.437367: step: 772/466, loss: 8.084368164418265e-05 2023-01-24 05:00:42.004854: step: 774/466, loss: 0.007387692574411631 2023-01-24 05:00:42.637549: step: 776/466, loss: 0.0002266254450660199 2023-01-24 05:00:43.240290: step: 778/466, loss: 0.0748785138130188 2023-01-24 05:00:43.813167: step: 780/466, loss: 0.010603842325508595 2023-01-24 05:00:44.418346: step: 782/466, loss: 0.03986622393131256 2023-01-24 05:00:45.027741: step: 784/466, loss: 0.0015694086905568838 2023-01-24 05:00:45.671932: step: 786/466, loss: 0.002429869258776307 2023-01-24 05:00:46.365329: step: 788/466, loss: 0.017495468258857727 2023-01-24 05:00:46.991035: step: 790/466, loss: 0.008161415345966816 2023-01-24 05:00:47.621943: step: 792/466, loss: 0.004917403683066368 2023-01-24 05:00:48.199097: step: 794/466, loss: 0.01133313961327076 2023-01-24 05:00:48.948901: step: 796/466, loss: 0.002892041113227606 2023-01-24 05:00:49.516812: step: 798/466, loss: 0.00031576791661791503 2023-01-24 05:00:50.111162: step: 800/466, loss: 0.00044341786997392774 2023-01-24 05:00:50.724174: step: 802/466, loss: 0.004508330021053553 2023-01-24 05:00:51.365154: step: 804/466, loss: 0.05905351787805557 2023-01-24 05:00:51.961574: step: 806/466, loss: 0.14383766055107117 2023-01-24 05:00:52.591014: step: 808/466, loss: 0.04062948748469353 2023-01-24 05:00:53.243506: step: 810/466, loss: 0.011387858539819717 2023-01-24 05:00:53.883263: step: 812/466, loss: 0.004854270722717047 2023-01-24 05:00:54.587372: step: 814/466, loss: 0.0001678572007222101 2023-01-24 05:00:55.144244: step: 816/466, loss: 0.002151502761989832 2023-01-24 05:00:55.780327: step: 818/466, loss: 4.258429180481471e-05 2023-01-24 05:00:56.375558: step: 820/466, loss: 0.1596289724111557 2023-01-24 05:00:56.965708: step: 822/466, loss: 0.02567148394882679 2023-01-24 05:00:57.587374: step: 824/466, loss: 0.027583090588450432 2023-01-24 05:00:58.189114: step: 826/466, loss: 0.030280442908406258 2023-01-24 05:00:58.768142: step: 828/466, loss: 0.0019176894566044211 2023-01-24 05:00:59.389512: step: 830/466, loss: 0.019382530823349953 2023-01-24 05:01:00.028756: step: 832/466, loss: 0.06708116829395294 2023-01-24 05:01:00.618296: step: 834/466, loss: 0.002640919527038932 2023-01-24 05:01:01.187137: step: 836/466, loss: 0.007880817167460918 2023-01-24 05:01:01.838065: step: 838/466, loss: 0.06637432426214218 2023-01-24 05:01:02.489241: step: 840/466, loss: 0.016892267391085625 2023-01-24 05:01:03.039838: step: 842/466, loss: 5.45619914191775e-05 2023-01-24 05:01:03.601555: step: 844/466, loss: 0.0007705793250352144 2023-01-24 05:01:04.177022: step: 846/466, loss: 0.0003873187815770507 2023-01-24 05:01:04.760423: step: 848/466, loss: 0.0012421660358086228 2023-01-24 05:01:05.347674: step: 850/466, loss: 0.0006693482282571495 2023-01-24 05:01:05.923966: step: 852/466, loss: 0.0021642204374074936 2023-01-24 05:01:06.558027: step: 854/466, loss: 0.00042549276258796453 2023-01-24 05:01:07.194561: step: 856/466, loss: 0.0043931263498961926 2023-01-24 05:01:07.807909: step: 858/466, loss: 0.0002430929453112185 2023-01-24 05:01:08.485369: step: 860/466, loss: 0.00892496295273304 2023-01-24 05:01:09.131360: step: 862/466, loss: 0.02985687181353569 2023-01-24 05:01:09.749778: step: 864/466, loss: 0.001226606429554522 2023-01-24 05:01:10.333014: step: 866/466, loss: 0.0005107595934532583 2023-01-24 05:01:10.891919: step: 868/466, loss: 0.1805555671453476 2023-01-24 05:01:11.605537: step: 870/466, loss: 0.011593032628297806 2023-01-24 05:01:12.214660: step: 872/466, loss: 0.0036725210957229137 2023-01-24 05:01:12.835915: step: 874/466, loss: 0.0036835698410868645 2023-01-24 05:01:13.438937: step: 876/466, loss: 0.003917853347957134 2023-01-24 05:01:13.990141: step: 878/466, loss: 0.0009317730437032878 2023-01-24 05:01:14.636842: step: 880/466, loss: 0.008433650247752666 2023-01-24 05:01:15.235276: step: 882/466, loss: 0.0009968248195946217 2023-01-24 05:01:15.755781: step: 884/466, loss: 0.10775865614414215 2023-01-24 05:01:16.341946: step: 886/466, loss: 0.0008649419178254902 2023-01-24 05:01:17.029945: step: 888/466, loss: 0.0063049462623894215 2023-01-24 05:01:17.667769: step: 890/466, loss: 0.0001419674081262201 2023-01-24 05:01:18.288442: step: 892/466, loss: 9.720688103698194e-05 2023-01-24 05:01:18.984734: step: 894/466, loss: 0.009378932416439056 2023-01-24 05:01:19.627932: step: 896/466, loss: 0.21360918879508972 2023-01-24 05:01:20.206513: step: 898/466, loss: 0.0008198576397262514 2023-01-24 05:01:20.808938: step: 900/466, loss: 3.818368350039236e-05 2023-01-24 05:01:21.432422: step: 902/466, loss: 0.002505519660189748 2023-01-24 05:01:22.088681: step: 904/466, loss: 0.050724759697914124 2023-01-24 05:01:22.687775: step: 906/466, loss: 0.03639456257224083 2023-01-24 05:01:23.310279: step: 908/466, loss: 1.6460932493209839 2023-01-24 05:01:23.987216: step: 910/466, loss: 0.04788459837436676 2023-01-24 05:01:24.649808: step: 912/466, loss: 0.008800013922154903 2023-01-24 05:01:25.323860: step: 914/466, loss: 0.0014374033780768514 2023-01-24 05:01:25.944319: step: 916/466, loss: 0.008077157661318779 2023-01-24 05:01:26.568557: step: 918/466, loss: 0.09034464508295059 2023-01-24 05:01:27.208587: step: 920/466, loss: 0.008930986747145653 2023-01-24 05:01:27.832196: step: 922/466, loss: 0.006056064274162054 2023-01-24 05:01:28.530451: step: 924/466, loss: 0.00033832492772489786 2023-01-24 05:01:29.157258: step: 926/466, loss: 0.0006647157715633512 2023-01-24 05:01:29.727252: step: 928/466, loss: 0.0029178534168750048 2023-01-24 05:01:30.356552: step: 930/466, loss: 0.0412764772772789 2023-01-24 05:01:30.958079: step: 932/466, loss: 0.02187582291662693 ================================================== Loss: 0.030 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36123831227340547, 'r': 0.307772300210169, 'f1': 0.3323688569892604}, 'combined': 0.24490336830787607, 'epoch': 37} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3592187340176659, 'r': 0.2649602564711007, 'f1': 0.30497241767145894}, 'combined': 0.1909640372335304, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34967442327856035, 'r': 0.31384440647202855, 'f1': 0.33079200442151807}, 'combined': 0.2437414769421712, 'epoch': 37} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3714854381711742, 'r': 0.27760825162972225, 'f1': 0.31775820537782257}, 'combined': 0.19692057798062243, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3177957777952905, 'r': 0.30091099263728643, 'f1': 0.30912298853771925}, 'combined': 0.22777483365937207, 'epoch': 37} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.35851277978150814, 'r': 0.2662127416263773, 'f1': 0.30554432864738174}, 'combined': 0.20268782197400573, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4076086956521739, 'r': 0.26785714285714285, 'f1': 0.3232758620689655}, 'combined': 0.21551724137931033, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3375, 'r': 0.29347826086956524, 'f1': 0.313953488372093}, 'combined': 0.1569767441860465, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.20689655172413793, 'f1': 0.2727272727272727}, 'combined': 0.1818181818181818, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32324861099239804, 'r': 0.3201817361252975, 'f1': 0.3217078645148366}, 'combined': 0.23704790016882693, 'epoch': 31} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36854991917508884, 'r': 0.2943742585468896, 'f1': 0.32731227141992336}, 'combined': 0.2171279424270779, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 31} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:04:04.349946: step: 2/466, loss: 0.022437838837504387 2023-01-24 05:04:04.963901: step: 4/466, loss: 0.13200338184833527 2023-01-24 05:04:05.578764: step: 6/466, loss: 0.010720699094235897 2023-01-24 05:04:06.202949: step: 8/466, loss: 0.0032551377080380917 2023-01-24 05:04:06.810753: step: 10/466, loss: 0.0005900732357986271 2023-01-24 05:04:07.479002: step: 12/466, loss: 0.003145418828353286 2023-01-24 05:04:08.131116: step: 14/466, loss: 0.0015280803199857473 2023-01-24 05:04:08.724778: step: 16/466, loss: 0.01795143447816372 2023-01-24 05:04:09.378495: step: 18/466, loss: 0.004306386690586805 2023-01-24 05:04:10.030096: step: 20/466, loss: 0.07706570625305176 2023-01-24 05:04:10.654388: step: 22/466, loss: 0.005568062420934439 2023-01-24 05:04:11.239496: step: 24/466, loss: 0.00024192110868170857 2023-01-24 05:04:11.899057: step: 26/466, loss: 0.0006435669492930174 2023-01-24 05:04:12.514293: step: 28/466, loss: 0.006629817187786102 2023-01-24 05:04:13.073176: step: 30/466, loss: 4.5981073526490945e-06 2023-01-24 05:04:13.613061: step: 32/466, loss: 0.00011412185267545283 2023-01-24 05:04:14.242064: step: 34/466, loss: 0.002991331508383155 2023-01-24 05:04:14.871027: step: 36/466, loss: 0.03248078003525734 2023-01-24 05:04:15.595016: step: 38/466, loss: 0.9280694127082825 2023-01-24 05:04:16.283794: step: 40/466, loss: 0.18692031502723694 2023-01-24 05:04:16.865163: step: 42/466, loss: 0.005263492465019226 2023-01-24 05:04:17.532908: step: 44/466, loss: 0.002784058451652527 2023-01-24 05:04:18.254475: step: 46/466, loss: 0.023738479241728783 2023-01-24 05:04:18.862452: step: 48/466, loss: 0.0004294861573725939 2023-01-24 05:04:19.555106: step: 50/466, loss: 0.0001612896448932588 2023-01-24 05:04:20.195636: step: 52/466, loss: 0.013227762654423714 2023-01-24 05:04:20.793152: step: 54/466, loss: 0.0059240576811134815 2023-01-24 05:04:21.399561: step: 56/466, loss: 0.007520228624343872 2023-01-24 05:04:22.003372: step: 58/466, loss: 0.0002002977125812322 2023-01-24 05:04:22.665493: step: 60/466, loss: 0.010102560743689537 2023-01-24 05:04:23.298701: step: 62/466, loss: 0.0009014192037284374 2023-01-24 05:04:23.918807: step: 64/466, loss: 0.005567123647779226 2023-01-24 05:04:24.514386: step: 66/466, loss: 0.00010532901796977967 2023-01-24 05:04:25.188774: step: 68/466, loss: 0.0026699909940361977 2023-01-24 05:04:25.899568: step: 70/466, loss: 0.00039817256038077176 2023-01-24 05:04:26.536100: step: 72/466, loss: 0.011175579391419888 2023-01-24 05:04:27.213038: step: 74/466, loss: 0.030114492401480675 2023-01-24 05:04:27.804129: step: 76/466, loss: 0.1101958379149437 2023-01-24 05:04:28.363711: step: 78/466, loss: 0.005225814413279295 2023-01-24 05:04:28.993037: step: 80/466, loss: 0.0245907511562109 2023-01-24 05:04:29.669732: step: 82/466, loss: 0.08617166429758072 2023-01-24 05:04:30.264787: step: 84/466, loss: 0.004091610666364431 2023-01-24 05:04:30.911945: step: 86/466, loss: 0.3025188148021698 2023-01-24 05:04:31.523424: step: 88/466, loss: 0.0014836537884548306 2023-01-24 05:04:32.112302: step: 90/466, loss: 0.0003159225743729621 2023-01-24 05:04:32.743622: step: 92/466, loss: 0.001323531847447157 2023-01-24 05:04:33.320996: step: 94/466, loss: 0.000104486447526142 2023-01-24 05:04:33.884346: step: 96/466, loss: 0.07179936021566391 2023-01-24 05:04:34.491537: step: 98/466, loss: 0.005564398597925901 2023-01-24 05:04:35.129104: step: 100/466, loss: 0.008363309316337109 2023-01-24 05:04:35.751738: step: 102/466, loss: 0.00012066560884704813 2023-01-24 05:04:36.363603: step: 104/466, loss: 0.004050121642649174 2023-01-24 05:04:36.965239: step: 106/466, loss: 0.0018115936545655131 2023-01-24 05:04:37.587864: step: 108/466, loss: 0.009684362448751926 2023-01-24 05:04:38.195008: step: 110/466, loss: 0.0004107225395273417 2023-01-24 05:04:38.833604: step: 112/466, loss: 0.15530163049697876 2023-01-24 05:04:39.448136: step: 114/466, loss: 0.004585616756230593 2023-01-24 05:04:40.056365: step: 116/466, loss: 0.002014612779021263 2023-01-24 05:04:40.645552: step: 118/466, loss: 0.015289516188204288 2023-01-24 05:04:41.237060: step: 120/466, loss: 0.07194361090660095 2023-01-24 05:04:41.908959: step: 122/466, loss: 0.002642291598021984 2023-01-24 05:04:42.516262: step: 124/466, loss: 0.08866244554519653 2023-01-24 05:04:43.190939: step: 126/466, loss: 0.010965188033878803 2023-01-24 05:04:43.849218: step: 128/466, loss: 0.000287467148154974 2023-01-24 05:04:44.488141: step: 130/466, loss: 0.014270431362092495 2023-01-24 05:04:45.160095: step: 132/466, loss: 0.0008079251856543124 2023-01-24 05:04:45.778340: step: 134/466, loss: 0.002872289391234517 2023-01-24 05:04:46.399141: step: 136/466, loss: 0.005854931194335222 2023-01-24 05:04:47.008803: step: 138/466, loss: 0.016251269727945328 2023-01-24 05:04:47.602738: step: 140/466, loss: 0.05686492472887039 2023-01-24 05:04:48.184526: step: 142/466, loss: 0.0037327632308006287 2023-01-24 05:04:48.781644: step: 144/466, loss: 0.0008345603127963841 2023-01-24 05:04:49.370132: step: 146/466, loss: 4.008270479971543e-05 2023-01-24 05:04:50.003924: step: 148/466, loss: 0.00392695888876915 2023-01-24 05:04:50.630592: step: 150/466, loss: 0.030992215499281883 2023-01-24 05:04:51.262981: step: 152/466, loss: 0.02886170893907547 2023-01-24 05:04:51.855119: step: 154/466, loss: 0.0024528333451598883 2023-01-24 05:04:52.490438: step: 156/466, loss: 0.00909585040062666 2023-01-24 05:04:53.124585: step: 158/466, loss: 0.031976863741874695 2023-01-24 05:04:53.765477: step: 160/466, loss: 0.009366384707391262 2023-01-24 05:04:54.386374: step: 162/466, loss: 0.000469852500827983 2023-01-24 05:04:55.040463: step: 164/466, loss: 0.2418440282344818 2023-01-24 05:04:55.696837: step: 166/466, loss: 0.0012514491099864244 2023-01-24 05:04:56.373994: step: 168/466, loss: 0.03002353198826313 2023-01-24 05:04:56.987085: step: 170/466, loss: 0.0030836581718176603 2023-01-24 05:04:57.625239: step: 172/466, loss: 0.011341028846800327 2023-01-24 05:04:58.232941: step: 174/466, loss: 0.007660390343517065 2023-01-24 05:04:58.830709: step: 176/466, loss: 0.0029395518358796835 2023-01-24 05:04:59.460160: step: 178/466, loss: 0.004267183598130941 2023-01-24 05:05:00.055058: step: 180/466, loss: 0.00800000037997961 2023-01-24 05:05:00.657442: step: 182/466, loss: 0.8297370672225952 2023-01-24 05:05:01.343705: step: 184/466, loss: 0.006819949019700289 2023-01-24 05:05:01.974090: step: 186/466, loss: 0.05917951837182045 2023-01-24 05:05:02.663830: step: 188/466, loss: 0.008328716270625591 2023-01-24 05:05:03.240363: step: 190/466, loss: 5.350938954507001e-05 2023-01-24 05:05:03.882557: step: 192/466, loss: 0.0059956153854727745 2023-01-24 05:05:04.466723: step: 194/466, loss: 0.009970581158995628 2023-01-24 05:05:05.123453: step: 196/466, loss: 0.004255726467818022 2023-01-24 05:05:05.715451: step: 198/466, loss: 0.0013649130705744028 2023-01-24 05:05:06.356276: step: 200/466, loss: 0.011420375667512417 2023-01-24 05:05:06.927816: step: 202/466, loss: 0.010235724039375782 2023-01-24 05:05:07.544921: step: 204/466, loss: 0.007816760800778866 2023-01-24 05:05:08.146872: step: 206/466, loss: 0.037151530385017395 2023-01-24 05:05:08.761634: step: 208/466, loss: 0.02781081013381481 2023-01-24 05:05:09.400332: step: 210/466, loss: 0.0002925058361142874 2023-01-24 05:05:10.001733: step: 212/466, loss: 0.001422711880877614 2023-01-24 05:05:10.584585: step: 214/466, loss: 0.0009883105522021651 2023-01-24 05:05:11.178981: step: 216/466, loss: 0.007165312301367521 2023-01-24 05:05:11.735341: step: 218/466, loss: 0.03599708154797554 2023-01-24 05:05:12.350779: step: 220/466, loss: 0.0014106587041169405 2023-01-24 05:05:12.953788: step: 222/466, loss: 0.042335882782936096 2023-01-24 05:05:13.552974: step: 224/466, loss: 0.019867349416017532 2023-01-24 05:05:14.076832: step: 226/466, loss: 0.0019416243303567171 2023-01-24 05:05:14.646500: step: 228/466, loss: 0.08412843197584152 2023-01-24 05:05:15.296147: step: 230/466, loss: 0.001664413372054696 2023-01-24 05:05:15.848146: step: 232/466, loss: 0.000471496838144958 2023-01-24 05:05:16.595664: step: 234/466, loss: 0.007500917185097933 2023-01-24 05:05:17.297519: step: 236/466, loss: 0.02185356430709362 2023-01-24 05:05:17.911307: step: 238/466, loss: 0.00652121240273118 2023-01-24 05:05:18.514971: step: 240/466, loss: 0.008082284592092037 2023-01-24 05:05:19.133542: step: 242/466, loss: 0.002958297962322831 2023-01-24 05:05:19.837915: step: 244/466, loss: 0.01800958812236786 2023-01-24 05:05:20.442645: step: 246/466, loss: 0.0011644359910860658 2023-01-24 05:05:21.081371: step: 248/466, loss: 0.025905592367053032 2023-01-24 05:05:21.734155: step: 250/466, loss: 0.0024262622464448214 2023-01-24 05:05:22.382454: step: 252/466, loss: 0.01273428462445736 2023-01-24 05:05:22.982274: step: 254/466, loss: 0.001079851994290948 2023-01-24 05:05:23.613370: step: 256/466, loss: 0.0021350423339754343 2023-01-24 05:05:24.212582: step: 258/466, loss: 3.1468382076127455e-05 2023-01-24 05:05:24.829867: step: 260/466, loss: 0.00010797793220262975 2023-01-24 05:05:25.385399: step: 262/466, loss: 0.0028548038098961115 2023-01-24 05:05:25.963625: step: 264/466, loss: 0.0009370913612656295 2023-01-24 05:05:26.645356: step: 266/466, loss: 0.003106689313426614 2023-01-24 05:05:27.182446: step: 268/466, loss: 0.010445700958371162 2023-01-24 05:05:27.817791: step: 270/466, loss: 7.891649875091389e-05 2023-01-24 05:05:28.453171: step: 272/466, loss: 0.0008442797116003931 2023-01-24 05:05:29.053837: step: 274/466, loss: 0.028718503192067146 2023-01-24 05:05:29.719976: step: 276/466, loss: 0.00021276797633618116 2023-01-24 05:05:30.306960: step: 278/466, loss: 0.003402187954634428 2023-01-24 05:05:30.906232: step: 280/466, loss: 0.010248236358165741 2023-01-24 05:05:31.516195: step: 282/466, loss: 0.015227698720991611 2023-01-24 05:05:32.141993: step: 284/466, loss: 0.03913683444261551 2023-01-24 05:05:32.732142: step: 286/466, loss: 7.576395091746235e-06 2023-01-24 05:05:33.399549: step: 288/466, loss: 0.0048559121787548065 2023-01-24 05:05:34.135801: step: 290/466, loss: 0.010100842453539371 2023-01-24 05:05:34.718997: step: 292/466, loss: 0.002126762643456459 2023-01-24 05:05:35.257304: step: 294/466, loss: 0.00041677968692965806 2023-01-24 05:05:35.841141: step: 296/466, loss: 0.0007044681697152555 2023-01-24 05:05:36.414106: step: 298/466, loss: 0.0008290159748867154 2023-01-24 05:05:37.006681: step: 300/466, loss: 0.0004512334999162704 2023-01-24 05:05:37.626861: step: 302/466, loss: 0.039500828832387924 2023-01-24 05:05:38.203491: step: 304/466, loss: 0.0011578116100281477 2023-01-24 05:05:38.836320: step: 306/466, loss: 0.023430608212947845 2023-01-24 05:05:39.424924: step: 308/466, loss: 0.021048884838819504 2023-01-24 05:05:40.093819: step: 310/466, loss: 0.008742013014853 2023-01-24 05:05:40.609031: step: 312/466, loss: 1.865769263531547e-05 2023-01-24 05:05:41.265040: step: 314/466, loss: 5.350162973627448e-05 2023-01-24 05:05:41.923294: step: 316/466, loss: 0.08590547740459442 2023-01-24 05:05:42.573383: step: 318/466, loss: 0.00015754085325170308 2023-01-24 05:05:43.141552: step: 320/466, loss: 0.0016338627319782972 2023-01-24 05:05:43.719498: step: 322/466, loss: 0.0017493648920208216 2023-01-24 05:05:44.292795: step: 324/466, loss: 0.00036035527591593564 2023-01-24 05:05:44.967137: step: 326/466, loss: 0.027754299342632294 2023-01-24 05:05:45.651069: step: 328/466, loss: 0.00023671459348406643 2023-01-24 05:05:46.292630: step: 330/466, loss: 0.014487960375845432 2023-01-24 05:05:47.017330: step: 332/466, loss: 0.024945516139268875 2023-01-24 05:05:47.600492: step: 334/466, loss: 0.017370715737342834 2023-01-24 05:05:48.208746: step: 336/466, loss: 0.021477192640304565 2023-01-24 05:05:48.801507: step: 338/466, loss: 0.0010344891343265772 2023-01-24 05:05:49.381242: step: 340/466, loss: 0.0019837208092212677 2023-01-24 05:05:49.952411: step: 342/466, loss: 0.0013008707901462913 2023-01-24 05:05:50.458511: step: 344/466, loss: 0.012516515329480171 2023-01-24 05:05:51.103949: step: 346/466, loss: 0.031781699508428574 2023-01-24 05:05:51.864179: step: 348/466, loss: 0.017205674201250076 2023-01-24 05:05:52.473425: step: 350/466, loss: 0.0004225615703035146 2023-01-24 05:05:53.127313: step: 352/466, loss: 0.0017902185209095478 2023-01-24 05:05:53.742452: step: 354/466, loss: 0.0006667530396953225 2023-01-24 05:05:54.413023: step: 356/466, loss: 13.148250579833984 2023-01-24 05:05:55.012545: step: 358/466, loss: 6.158139876788482e-05 2023-01-24 05:05:55.653731: step: 360/466, loss: 0.01335648912936449 2023-01-24 05:05:56.204515: step: 362/466, loss: 0.00912449136376381 2023-01-24 05:05:56.783330: step: 364/466, loss: 0.002331480849534273 2023-01-24 05:05:57.356263: step: 366/466, loss: 5.07688382640481e-05 2023-01-24 05:05:57.968564: step: 368/466, loss: 0.022180695086717606 2023-01-24 05:05:58.581702: step: 370/466, loss: 0.0009626256069168448 2023-01-24 05:05:59.167966: step: 372/466, loss: 0.000288007955532521 2023-01-24 05:05:59.772503: step: 374/466, loss: 0.011116331443190575 2023-01-24 05:06:00.370409: step: 376/466, loss: 0.0007267410983331501 2023-01-24 05:06:00.980880: step: 378/466, loss: 0.03145395219326019 2023-01-24 05:06:01.602614: step: 380/466, loss: 0.008209649473428726 2023-01-24 05:06:02.219632: step: 382/466, loss: 0.0030028829351067543 2023-01-24 05:06:02.838085: step: 384/466, loss: 4.2137100535910577e-05 2023-01-24 05:06:03.493939: step: 386/466, loss: 0.0022868614178150892 2023-01-24 05:06:04.131962: step: 388/466, loss: 0.02006855420768261 2023-01-24 05:06:04.763332: step: 390/466, loss: 0.0006233835592865944 2023-01-24 05:06:05.358122: step: 392/466, loss: 0.001457541948184371 2023-01-24 05:06:06.025095: step: 394/466, loss: 0.016138330101966858 2023-01-24 05:06:06.710089: step: 396/466, loss: 0.011141076683998108 2023-01-24 05:06:07.246786: step: 398/466, loss: 0.00016360492736566812 2023-01-24 05:06:07.897040: step: 400/466, loss: 0.0023589630145579576 2023-01-24 05:06:08.499088: step: 402/466, loss: 0.3350490927696228 2023-01-24 05:06:09.112188: step: 404/466, loss: 0.02268916554749012 2023-01-24 05:06:09.794679: step: 406/466, loss: 0.025465872138738632 2023-01-24 05:06:10.436755: step: 408/466, loss: 0.02911735139787197 2023-01-24 05:06:11.087168: step: 410/466, loss: 0.003883585799485445 2023-01-24 05:06:11.676311: step: 412/466, loss: 0.014216568320989609 2023-01-24 05:06:12.235623: step: 414/466, loss: 0.018321098759770393 2023-01-24 05:06:12.823450: step: 416/466, loss: 1.0013217433879618e-05 2023-01-24 05:06:13.416588: step: 418/466, loss: 0.00023530589533038437 2023-01-24 05:06:14.041661: step: 420/466, loss: 0.009018952026963234 2023-01-24 05:06:14.624778: step: 422/466, loss: 0.00021789773018099368 2023-01-24 05:06:15.284374: step: 424/466, loss: 0.009294179268181324 2023-01-24 05:06:15.926378: step: 426/466, loss: 0.0018578199669718742 2023-01-24 05:06:16.527603: step: 428/466, loss: 0.08332467079162598 2023-01-24 05:06:17.205116: step: 430/466, loss: 0.008198990486562252 2023-01-24 05:06:17.775247: step: 432/466, loss: 0.0760422796010971 2023-01-24 05:06:18.395695: step: 434/466, loss: 0.048082102090120316 2023-01-24 05:06:19.000656: step: 436/466, loss: 0.7173665165901184 2023-01-24 05:06:19.602117: step: 438/466, loss: 0.0007875600131228566 2023-01-24 05:06:20.252073: step: 440/466, loss: 0.00019213580526411533 2023-01-24 05:06:20.875146: step: 442/466, loss: 0.0004895920865237713 2023-01-24 05:06:21.492088: step: 444/466, loss: 0.027286848053336143 2023-01-24 05:06:22.110055: step: 446/466, loss: 0.0006382520077750087 2023-01-24 05:06:22.725498: step: 448/466, loss: 0.00019963916565757245 2023-01-24 05:06:23.326625: step: 450/466, loss: 0.0013467309763655066 2023-01-24 05:06:23.987309: step: 452/466, loss: 0.0013148612342774868 2023-01-24 05:06:24.594436: step: 454/466, loss: 0.0014030374586582184 2023-01-24 05:06:25.274992: step: 456/466, loss: 0.029512371867895126 2023-01-24 05:06:25.886842: step: 458/466, loss: 0.0027600282337516546 2023-01-24 05:06:26.466123: step: 460/466, loss: 0.01813333109021187 2023-01-24 05:06:27.127939: step: 462/466, loss: 0.018591925501823425 2023-01-24 05:06:27.711830: step: 464/466, loss: 0.0026494336780160666 2023-01-24 05:06:28.329182: step: 466/466, loss: 0.016628867015242577 2023-01-24 05:06:28.955062: step: 468/466, loss: 0.003137952880933881 2023-01-24 05:06:29.649158: step: 470/466, loss: 9.976519584655762 2023-01-24 05:06:30.211169: step: 472/466, loss: 8.773814624873921e-05 2023-01-24 05:06:30.833897: step: 474/466, loss: 0.007623251993209124 2023-01-24 05:06:31.468300: step: 476/466, loss: 0.029763251543045044 2023-01-24 05:06:32.023382: step: 478/466, loss: 0.00034636148484423757 2023-01-24 05:06:32.689850: step: 480/466, loss: 0.14045073091983795 2023-01-24 05:06:33.246090: step: 482/466, loss: 0.10622703284025192 2023-01-24 05:06:33.831631: step: 484/466, loss: 0.0007836679578758776 2023-01-24 05:06:34.463558: step: 486/466, loss: 0.00019406896899454296 2023-01-24 05:06:35.065654: step: 488/466, loss: 0.003651735605672002 2023-01-24 05:06:35.695144: step: 490/466, loss: 0.0036884131841361523 2023-01-24 05:06:36.414654: step: 492/466, loss: 0.027452562004327774 2023-01-24 05:06:37.031711: step: 494/466, loss: 0.012056098319590092 2023-01-24 05:06:37.593411: step: 496/466, loss: 0.001362653449177742 2023-01-24 05:06:38.193404: step: 498/466, loss: 0.0002041674597421661 2023-01-24 05:06:38.837994: step: 500/466, loss: 0.0017715252470225096 2023-01-24 05:06:39.402999: step: 502/466, loss: 0.0030959725845605135 2023-01-24 05:06:40.010283: step: 504/466, loss: 0.011061202734708786 2023-01-24 05:06:40.614674: step: 506/466, loss: 0.06606190651655197 2023-01-24 05:06:41.286671: step: 508/466, loss: 0.09801173955202103 2023-01-24 05:06:41.917232: step: 510/466, loss: 0.0014111229684203863 2023-01-24 05:06:42.510457: step: 512/466, loss: 0.0019478622125461698 2023-01-24 05:06:43.116231: step: 514/466, loss: 0.010642904788255692 2023-01-24 05:06:43.645714: step: 516/466, loss: 0.00015110296953935176 2023-01-24 05:06:44.224584: step: 518/466, loss: 0.003046165220439434 2023-01-24 05:06:44.829571: step: 520/466, loss: 0.05332741141319275 2023-01-24 05:06:45.455715: step: 522/466, loss: 9.049453365150839e-05 2023-01-24 05:06:46.031273: step: 524/466, loss: 0.005797157529741526 2023-01-24 05:06:46.654177: step: 526/466, loss: 0.0008684933418408036 2023-01-24 05:06:47.273168: step: 528/466, loss: 0.030726298689842224 2023-01-24 05:06:47.882503: step: 530/466, loss: 0.0005216370918788016 2023-01-24 05:06:48.487483: step: 532/466, loss: 0.033588308840990067 2023-01-24 05:06:49.052839: step: 534/466, loss: 0.0958075225353241 2023-01-24 05:06:49.674924: step: 536/466, loss: 0.0002483285206835717 2023-01-24 05:06:50.259109: step: 538/466, loss: 0.0002913588541559875 2023-01-24 05:06:50.920758: step: 540/466, loss: 0.0007436099695041776 2023-01-24 05:06:51.581155: step: 542/466, loss: 6.0492820921353996e-05 2023-01-24 05:06:52.103302: step: 544/466, loss: 0.0014556294772773981 2023-01-24 05:06:52.769628: step: 546/466, loss: 0.08864249289035797 2023-01-24 05:06:53.410042: step: 548/466, loss: 0.02043880894780159 2023-01-24 05:06:54.087912: step: 550/466, loss: 0.012917747721076012 2023-01-24 05:06:54.674295: step: 552/466, loss: 0.028537839651107788 2023-01-24 05:06:55.324326: step: 554/466, loss: 3.2078983167593833e-06 2023-01-24 05:06:55.926822: step: 556/466, loss: 0.00022550432186108083 2023-01-24 05:06:56.530657: step: 558/466, loss: 0.00040336415986530483 2023-01-24 05:06:57.192210: step: 560/466, loss: 0.012805212289094925 2023-01-24 05:06:57.795988: step: 562/466, loss: 0.02135901339352131 2023-01-24 05:06:58.409941: step: 564/466, loss: 0.0027558517176657915 2023-01-24 05:06:59.005820: step: 566/466, loss: 0.031806498765945435 2023-01-24 05:06:59.678971: step: 568/466, loss: 0.003004643600434065 2023-01-24 05:07:00.280072: step: 570/466, loss: 0.0016120121581479907 2023-01-24 05:07:00.913234: step: 572/466, loss: 0.007913384586572647 2023-01-24 05:07:01.537647: step: 574/466, loss: 0.013110068626701832 2023-01-24 05:07:02.138672: step: 576/466, loss: 5.770703410235001e-06 2023-01-24 05:07:02.733744: step: 578/466, loss: 0.0206610057502985 2023-01-24 05:07:03.314918: step: 580/466, loss: 3.0003804567968473e-05 2023-01-24 05:07:03.888277: step: 582/466, loss: 7.978229405125603e-05 2023-01-24 05:07:04.552136: step: 584/466, loss: 0.00429700268432498 2023-01-24 05:07:05.204752: step: 586/466, loss: 0.0014660474844276905 2023-01-24 05:07:05.821299: step: 588/466, loss: 0.0036870657932013273 2023-01-24 05:07:06.373756: step: 590/466, loss: 0.003413487458601594 2023-01-24 05:07:06.992790: step: 592/466, loss: 0.001022740500047803 2023-01-24 05:07:07.643070: step: 594/466, loss: 0.011163215152919292 2023-01-24 05:07:08.209392: step: 596/466, loss: 0.009377296082675457 2023-01-24 05:07:08.804874: step: 598/466, loss: 0.012516893446445465 2023-01-24 05:07:09.428438: step: 600/466, loss: 0.061928533017635345 2023-01-24 05:07:10.038136: step: 602/466, loss: 0.004551253281533718 2023-01-24 05:07:10.730741: step: 604/466, loss: 0.0005189354415051639 2023-01-24 05:07:11.310871: step: 606/466, loss: 0.010122747160494328 2023-01-24 05:07:11.977119: step: 608/466, loss: 0.013840803876519203 2023-01-24 05:07:12.612477: step: 610/466, loss: 0.038683075457811356 2023-01-24 05:07:13.238048: step: 612/466, loss: 0.01938362419605255 2023-01-24 05:07:13.877566: step: 614/466, loss: 0.03968636319041252 2023-01-24 05:07:14.545063: step: 616/466, loss: 0.0006703808903694153 2023-01-24 05:07:15.121313: step: 618/466, loss: 0.007403464522212744 2023-01-24 05:07:15.737607: step: 620/466, loss: 0.053251270204782486 2023-01-24 05:07:16.359124: step: 622/466, loss: 0.006112065631896257 2023-01-24 05:07:16.915719: step: 624/466, loss: 0.00026970345061272383 2023-01-24 05:07:17.573343: step: 626/466, loss: 0.00943650584667921 2023-01-24 05:07:18.185640: step: 628/466, loss: 0.03737393021583557 2023-01-24 05:07:18.830252: step: 630/466, loss: 0.0004983880207873881 2023-01-24 05:07:19.512318: step: 632/466, loss: 0.05362514406442642 2023-01-24 05:07:20.138771: step: 634/466, loss: 0.0756533220410347 2023-01-24 05:07:20.780205: step: 636/466, loss: 0.08298175781965256 2023-01-24 05:07:21.371135: step: 638/466, loss: 0.001337780966423452 2023-01-24 05:07:22.042723: step: 640/466, loss: 0.006372489035129547 2023-01-24 05:07:22.749402: step: 642/466, loss: 0.1665182262659073 2023-01-24 05:07:23.422933: step: 644/466, loss: 0.0029575438238680363 2023-01-24 05:07:24.007035: step: 646/466, loss: 0.003067981917411089 2023-01-24 05:07:24.642029: step: 648/466, loss: 0.0019569708965718746 2023-01-24 05:07:25.240328: step: 650/466, loss: 0.0004455255693756044 2023-01-24 05:07:25.856689: step: 652/466, loss: 0.005703385919332504 2023-01-24 05:07:26.557993: step: 654/466, loss: 0.040502529591321945 2023-01-24 05:07:27.178953: step: 656/466, loss: 0.00125522306188941 2023-01-24 05:07:27.815847: step: 658/466, loss: 0.08380692452192307 2023-01-24 05:07:28.523323: step: 660/466, loss: 0.0005227712099440396 2023-01-24 05:07:29.135311: step: 662/466, loss: 0.01361200213432312 2023-01-24 05:07:29.732124: step: 664/466, loss: 2.1812933482578956e-05 2023-01-24 05:07:30.350595: step: 666/466, loss: 0.019716404378414154 2023-01-24 05:07:30.990624: step: 668/466, loss: 0.0013545232359319925 2023-01-24 05:07:31.604841: step: 670/466, loss: 0.15497149527072906 2023-01-24 05:07:32.172489: step: 672/466, loss: 0.01695387251675129 2023-01-24 05:07:32.729404: step: 674/466, loss: 0.005211781244724989 2023-01-24 05:07:33.318607: step: 676/466, loss: 0.005118411034345627 2023-01-24 05:07:33.893440: step: 678/466, loss: 0.00019001048349309713 2023-01-24 05:07:34.476656: step: 680/466, loss: 0.0051450724713504314 2023-01-24 05:07:35.102182: step: 682/466, loss: 0.000495147833134979 2023-01-24 05:07:35.794960: step: 684/466, loss: 0.00042439333628863096 2023-01-24 05:07:36.410805: step: 686/466, loss: 0.013371428474783897 2023-01-24 05:07:37.030335: step: 688/466, loss: 0.29739809036254883 2023-01-24 05:07:37.619392: step: 690/466, loss: 0.0012508954387158155 2023-01-24 05:07:38.198047: step: 692/466, loss: 0.00010261051647830755 2023-01-24 05:07:38.888864: step: 694/466, loss: 0.0014332940336316824 2023-01-24 05:07:39.561791: step: 696/466, loss: 0.0008747635874897242 2023-01-24 05:07:40.094906: step: 698/466, loss: 0.011899001896381378 2023-01-24 05:07:40.701409: step: 700/466, loss: 0.0002116097166435793 2023-01-24 05:07:41.330041: step: 702/466, loss: 0.00435160705819726 2023-01-24 05:07:41.984981: step: 704/466, loss: 0.010911312885582447 2023-01-24 05:07:42.636549: step: 706/466, loss: 0.01379432063549757 2023-01-24 05:07:43.271228: step: 708/466, loss: 0.02665085159242153 2023-01-24 05:07:43.886167: step: 710/466, loss: 0.0011860569939017296 2023-01-24 05:07:44.489109: step: 712/466, loss: 0.0071630934253335 2023-01-24 05:07:45.107754: step: 714/466, loss: 7.17996881576255e-05 2023-01-24 05:07:45.766944: step: 716/466, loss: 0.0008964896551333368 2023-01-24 05:07:46.334609: step: 718/466, loss: 0.0026297878939658403 2023-01-24 05:07:46.908297: step: 720/466, loss: 0.025944450870156288 2023-01-24 05:07:47.478846: step: 722/466, loss: 0.005806812085211277 2023-01-24 05:07:48.106059: step: 724/466, loss: 0.00132958241738379 2023-01-24 05:07:48.717937: step: 726/466, loss: 8.125565364025533e-05 2023-01-24 05:07:49.343686: step: 728/466, loss: 0.010557837784290314 2023-01-24 05:07:50.065020: step: 730/466, loss: 0.011784940026700497 2023-01-24 05:07:50.706508: step: 732/466, loss: 0.01866106316447258 2023-01-24 05:07:51.311695: step: 734/466, loss: 6.802675488870591e-05 2023-01-24 05:07:51.891166: step: 736/466, loss: 0.018818223848938942 2023-01-24 05:07:52.606756: step: 738/466, loss: 0.03583343327045441 2023-01-24 05:07:53.188234: step: 740/466, loss: 0.04375249519944191 2023-01-24 05:07:53.878046: step: 742/466, loss: 0.019149398431181908 2023-01-24 05:07:54.510281: step: 744/466, loss: 0.0026311357505619526 2023-01-24 05:07:55.188378: step: 746/466, loss: 0.00022247993911150843 2023-01-24 05:07:55.837330: step: 748/466, loss: 0.003206611378118396 2023-01-24 05:07:56.437776: step: 750/466, loss: 0.019325412809848785 2023-01-24 05:07:57.039968: step: 752/466, loss: 0.007997137494385242 2023-01-24 05:07:57.654084: step: 754/466, loss: 0.039681576192379 2023-01-24 05:07:58.274578: step: 756/466, loss: 0.00011258641461608931 2023-01-24 05:07:58.877885: step: 758/466, loss: 0.0005655647837556899 2023-01-24 05:07:59.499801: step: 760/466, loss: 0.019853683188557625 2023-01-24 05:08:00.059658: step: 762/466, loss: 0.09221357107162476 2023-01-24 05:08:00.665154: step: 764/466, loss: 0.00041412244900129735 2023-01-24 05:08:01.321092: step: 766/466, loss: 5.017004241381073e-06 2023-01-24 05:08:01.899656: step: 768/466, loss: 0.00016973994206637144 2023-01-24 05:08:02.546911: step: 770/466, loss: 0.325126588344574 2023-01-24 05:08:03.124355: step: 772/466, loss: 0.02632707543671131 2023-01-24 05:08:03.718519: step: 774/466, loss: 0.037189681082963943 2023-01-24 05:08:04.345398: step: 776/466, loss: 0.001395417028106749 2023-01-24 05:08:04.968240: step: 778/466, loss: 0.014405585825443268 2023-01-24 05:08:05.612061: step: 780/466, loss: 0.0018268510466441512 2023-01-24 05:08:06.225001: step: 782/466, loss: 0.00023554843210149556 2023-01-24 05:08:06.797121: step: 784/466, loss: 0.00029686972266063094 2023-01-24 05:08:07.448605: step: 786/466, loss: 0.0013889693655073643 2023-01-24 05:08:08.006183: step: 788/466, loss: 0.0017082897247746587 2023-01-24 05:08:08.605197: step: 790/466, loss: 0.00015379107207991183 2023-01-24 05:08:09.212950: step: 792/466, loss: 0.011510002426803112 2023-01-24 05:08:09.801471: step: 794/466, loss: 0.0012235584435984492 2023-01-24 05:08:10.372369: step: 796/466, loss: 0.0002206399367423728 2023-01-24 05:08:10.940053: step: 798/466, loss: 0.016461864113807678 2023-01-24 05:08:11.558431: step: 800/466, loss: 0.00021372491028159857 2023-01-24 05:08:12.209615: step: 802/466, loss: 0.006961922161281109 2023-01-24 05:08:12.797794: step: 804/466, loss: 0.0021296602208167315 2023-01-24 05:08:13.443811: step: 806/466, loss: 0.004430140368640423 2023-01-24 05:08:14.044894: step: 808/466, loss: 0.01939069665968418 2023-01-24 05:08:14.735382: step: 810/466, loss: 0.018148865550756454 2023-01-24 05:08:15.416114: step: 812/466, loss: 0.032092414796352386 2023-01-24 05:08:16.009903: step: 814/466, loss: 0.002228677272796631 2023-01-24 05:08:16.614293: step: 816/466, loss: 0.46651652455329895 2023-01-24 05:08:17.269450: step: 818/466, loss: 0.0010596377542242408 2023-01-24 05:08:17.946265: step: 820/466, loss: 0.03519681096076965 2023-01-24 05:08:18.569469: step: 822/466, loss: 0.02435610629618168 2023-01-24 05:08:19.105550: step: 824/466, loss: 0.018063798546791077 2023-01-24 05:08:19.701977: step: 826/466, loss: 0.025036821141839027 2023-01-24 05:08:20.316542: step: 828/466, loss: 0.00565796485170722 2023-01-24 05:08:20.974576: step: 830/466, loss: 0.00033403924317099154 2023-01-24 05:08:21.578780: step: 832/466, loss: 0.035529982298612595 2023-01-24 05:08:22.170655: step: 834/466, loss: 0.0027162355836480856 2023-01-24 05:08:22.849911: step: 836/466, loss: 0.28506195545196533 2023-01-24 05:08:23.436718: step: 838/466, loss: 0.003587544895708561 2023-01-24 05:08:24.026069: step: 840/466, loss: 0.0032926108688116074 2023-01-24 05:08:24.712298: step: 842/466, loss: 0.00020436853810679168 2023-01-24 05:08:25.300558: step: 844/466, loss: 0.015589582733809948 2023-01-24 05:08:25.935611: step: 846/466, loss: 0.011255361139774323 2023-01-24 05:08:26.568528: step: 848/466, loss: 0.0009275318589061499 2023-01-24 05:08:27.157413: step: 850/466, loss: 0.010922861285507679 2023-01-24 05:08:27.748784: step: 852/466, loss: 0.0019173540640622377 2023-01-24 05:08:28.451125: step: 854/466, loss: 0.015206730924546719 2023-01-24 05:08:29.046358: step: 856/466, loss: 0.006905407179147005 2023-01-24 05:08:29.618000: step: 858/466, loss: 8.463582344120368e-05 2023-01-24 05:08:30.241703: step: 860/466, loss: 0.008326876908540726 2023-01-24 05:08:30.845196: step: 862/466, loss: 0.01070437952876091 2023-01-24 05:08:31.500366: step: 864/466, loss: 0.023237278684973717 2023-01-24 05:08:32.073502: step: 866/466, loss: 0.001569541753269732 2023-01-24 05:08:32.636898: step: 868/466, loss: 0.6829010844230652 2023-01-24 05:08:33.220190: step: 870/466, loss: 0.024112524464726448 2023-01-24 05:08:33.881437: step: 872/466, loss: 0.04400830343365669 2023-01-24 05:08:34.525590: step: 874/466, loss: 0.0033177779987454414 2023-01-24 05:08:35.073213: step: 876/466, loss: 3.7316167436074466e-05 2023-01-24 05:08:35.673133: step: 878/466, loss: 0.014675674960017204 2023-01-24 05:08:36.270837: step: 880/466, loss: 0.0008311232668347657 2023-01-24 05:08:36.916277: step: 882/466, loss: 0.0009520520106889307 2023-01-24 05:08:37.556733: step: 884/466, loss: 0.02135886438190937 2023-01-24 05:08:38.210559: step: 886/466, loss: 0.0956830233335495 2023-01-24 05:08:38.848386: step: 888/466, loss: 0.0027999801095575094 2023-01-24 05:08:39.478335: step: 890/466, loss: 0.007054173853248358 2023-01-24 05:08:40.035030: step: 892/466, loss: 0.0005048189777880907 2023-01-24 05:08:40.615901: step: 894/466, loss: 0.009962208569049835 2023-01-24 05:08:41.282304: step: 896/466, loss: 2.0304094505263492e-05 2023-01-24 05:08:41.915492: step: 898/466, loss: 0.00035148989991284907 2023-01-24 05:08:42.465113: step: 900/466, loss: 1.965487354027573e-05 2023-01-24 05:08:43.139160: step: 902/466, loss: 1.1942763194383588e-05 2023-01-24 05:08:43.747406: step: 904/466, loss: 0.011142687872052193 2023-01-24 05:08:44.417089: step: 906/466, loss: 7.367681246250868e-05 2023-01-24 05:08:45.078087: step: 908/466, loss: 0.056911956518888474 2023-01-24 05:08:45.763931: step: 910/466, loss: 0.001020154682919383 2023-01-24 05:08:46.349740: step: 912/466, loss: 0.00010358607687521726 2023-01-24 05:08:46.944943: step: 914/466, loss: 0.009350110776722431 2023-01-24 05:08:47.615108: step: 916/466, loss: 0.07540338486433029 2023-01-24 05:08:48.172385: step: 918/466, loss: 0.0008968925685621798 2023-01-24 05:08:48.787918: step: 920/466, loss: 0.0002797171182464808 2023-01-24 05:08:49.414478: step: 922/466, loss: 0.012770457193255424 2023-01-24 05:08:50.116447: step: 924/466, loss: 0.0036673741415143013 2023-01-24 05:08:50.746549: step: 926/466, loss: 0.015117192640900612 2023-01-24 05:08:51.404026: step: 928/466, loss: 0.0056894137524068356 2023-01-24 05:08:51.989372: step: 930/466, loss: 0.00023494100605603307 2023-01-24 05:08:52.596877: step: 932/466, loss: 0.009276535362005234 ================================================== Loss: 0.076 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36207322638234113, 'r': 0.3284079738344574, 'f1': 0.34441990489703295}, 'combined': 0.2537830878188664, 'epoch': 38} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.35005329881693753, 'r': 0.2764410076397295, 'f1': 0.3089224775237724}, 'combined': 0.19343743919712852, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3524161660823415, 'r': 0.33101708199385016, 'f1': 0.34138160902301184}, 'combined': 0.2515443434906403, 'epoch': 38} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.36495489769135536, 'r': 0.29347907386552136, 'f1': 0.3253374825842898}, 'combined': 0.2016175948409683, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32636911285407755, 'r': 0.31893755810218205, 'f1': 0.3226105434162187}, 'combined': 0.23771303199089797, 'epoch': 38} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36122216365675663, 'r': 0.2806242644487902, 'f1': 0.31586279689355434}, 'combined': 0.20953274645414002, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39285714285714285, 'r': 0.3142857142857143, 'f1': 0.34920634920634924}, 'combined': 0.23280423280423282, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4166666666666667, 'r': 0.43478260869565216, 'f1': 0.425531914893617}, 'combined': 0.2127659574468085, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32324861099239804, 'r': 0.3201817361252975, 'f1': 0.3217078645148366}, 'combined': 0.23704790016882693, 'epoch': 31} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.36854991917508884, 'r': 0.2943742585468896, 'f1': 0.32731227141992336}, 'combined': 0.2171279424270779, 'epoch': 31} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 31} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:11:25.560917: step: 2/466, loss: 0.015146813355386257 2023-01-24 05:11:26.145672: step: 4/466, loss: 0.0003074166306760162 2023-01-24 05:11:26.710222: step: 6/466, loss: 0.0002357083430979401 2023-01-24 05:11:27.327993: step: 8/466, loss: 0.000184124888619408 2023-01-24 05:11:27.993679: step: 10/466, loss: 0.001829985179938376 2023-01-24 05:11:28.585456: step: 12/466, loss: 0.012212690897285938 2023-01-24 05:11:29.140661: step: 14/466, loss: 0.00026231296942569315 2023-01-24 05:11:29.845703: step: 16/466, loss: 0.020328694954514503 2023-01-24 05:11:30.392811: step: 18/466, loss: 0.0020918946247547865 2023-01-24 05:11:31.010456: step: 20/466, loss: 0.4596867561340332 2023-01-24 05:11:31.615786: step: 22/466, loss: 0.005000561010092497 2023-01-24 05:11:32.280286: step: 24/466, loss: 0.0014943135902285576 2023-01-24 05:11:32.854312: step: 26/466, loss: 0.0005825718399137259 2023-01-24 05:11:33.503841: step: 28/466, loss: 0.0016708880430087447 2023-01-24 05:11:34.170467: step: 30/466, loss: 0.00558796850964427 2023-01-24 05:11:34.952717: step: 32/466, loss: 0.009061440825462341 2023-01-24 05:11:35.628086: step: 34/466, loss: 0.004432479850947857 2023-01-24 05:11:36.226164: step: 36/466, loss: 0.001037839101627469 2023-01-24 05:11:36.871913: step: 38/466, loss: 0.02181771956384182 2023-01-24 05:11:37.560968: step: 40/466, loss: 0.004848001524806023 2023-01-24 05:11:38.158395: step: 42/466, loss: 0.0003929196682292968 2023-01-24 05:11:38.743733: step: 44/466, loss: 0.004267378244549036 2023-01-24 05:11:39.380615: step: 46/466, loss: 0.004441943950951099 2023-01-24 05:11:39.939990: step: 48/466, loss: 0.0002382261009188369 2023-01-24 05:11:40.500079: step: 50/466, loss: 0.022365359589457512 2023-01-24 05:11:41.168213: step: 52/466, loss: 0.003134834812954068 2023-01-24 05:11:41.792180: step: 54/466, loss: 0.01796906441450119 2023-01-24 05:11:42.414346: step: 56/466, loss: 0.00047307138447649777 2023-01-24 05:11:43.015625: step: 58/466, loss: 0.0003856524417642504 2023-01-24 05:11:43.618392: step: 60/466, loss: 0.0001241409918293357 2023-01-24 05:11:44.264833: step: 62/466, loss: 0.0018064368050545454 2023-01-24 05:11:44.822251: step: 64/466, loss: 0.00014263468619901687 2023-01-24 05:11:45.447512: step: 66/466, loss: 0.049406711012125015 2023-01-24 05:11:46.065052: step: 68/466, loss: 0.040563397109508514 2023-01-24 05:11:46.664860: step: 70/466, loss: 6.365009903674945e-05 2023-01-24 05:11:47.345199: step: 72/466, loss: 2.4744760594330728e-05 2023-01-24 05:11:47.940925: step: 74/466, loss: 0.0002055208751698956 2023-01-24 05:11:48.604569: step: 76/466, loss: 0.00016762428276706487 2023-01-24 05:11:49.226621: step: 78/466, loss: 0.0049255709163844585 2023-01-24 05:11:49.832869: step: 80/466, loss: 0.022715115919709206 2023-01-24 05:11:50.510450: step: 82/466, loss: 0.047502294182777405 2023-01-24 05:11:51.152737: step: 84/466, loss: 0.0009096862049773335 2023-01-24 05:11:51.745175: step: 86/466, loss: 0.0430033914744854 2023-01-24 05:11:52.350928: step: 88/466, loss: 0.0008197950082831085 2023-01-24 05:11:52.940019: step: 90/466, loss: 0.010219205170869827 2023-01-24 05:11:53.571707: step: 92/466, loss: 0.000343933526892215 2023-01-24 05:11:54.140248: step: 94/466, loss: 0.0016889260150492191 2023-01-24 05:11:54.837572: step: 96/466, loss: 0.001990893855690956 2023-01-24 05:11:55.459678: step: 98/466, loss: 0.001507123582996428 2023-01-24 05:11:56.001702: step: 100/466, loss: 0.003949206788092852 2023-01-24 05:11:56.637214: step: 102/466, loss: 0.00011206905037397519 2023-01-24 05:11:57.186184: step: 104/466, loss: 0.0004756792332045734 2023-01-24 05:11:57.837651: step: 106/466, loss: 0.19810990989208221 2023-01-24 05:11:58.406617: step: 108/466, loss: 0.0008925989968702197 2023-01-24 05:11:59.097015: step: 110/466, loss: 0.019793085753917694 2023-01-24 05:11:59.698121: step: 112/466, loss: 0.0001608679158380255 2023-01-24 05:12:00.309349: step: 114/466, loss: 0.0023188581690192223 2023-01-24 05:12:00.917323: step: 116/466, loss: 0.00044002002687193453 2023-01-24 05:12:01.533866: step: 118/466, loss: 0.0035514081828296185 2023-01-24 05:12:02.140306: step: 120/466, loss: 0.017122356221079826 2023-01-24 05:12:02.763723: step: 122/466, loss: 0.0045568267814815044 2023-01-24 05:12:03.391683: step: 124/466, loss: 0.0007005815277807415 2023-01-24 05:12:03.986626: step: 126/466, loss: 0.056621044874191284 2023-01-24 05:12:04.618018: step: 128/466, loss: 0.009771609678864479 2023-01-24 05:12:05.250486: step: 130/466, loss: 0.025190964341163635 2023-01-24 05:12:05.788422: step: 132/466, loss: 0.0009690226870588958 2023-01-24 05:12:06.399549: step: 134/466, loss: 0.007555143907666206 2023-01-24 05:12:06.983850: step: 136/466, loss: 0.0006410313653759658 2023-01-24 05:12:07.651942: step: 138/466, loss: 0.0059431311674416065 2023-01-24 05:12:08.278631: step: 140/466, loss: 0.01306483056396246 2023-01-24 05:12:08.867491: step: 142/466, loss: 0.000849867588840425 2023-01-24 05:12:09.463042: step: 144/466, loss: 0.005498063750565052 2023-01-24 05:12:10.092583: step: 146/466, loss: 0.011659790761768818 2023-01-24 05:12:10.698472: step: 148/466, loss: 0.00012292210885789245 2023-01-24 05:12:11.279292: step: 150/466, loss: 0.003171361517161131 2023-01-24 05:12:11.899249: step: 152/466, loss: 0.0027298645582050085 2023-01-24 05:12:12.579451: step: 154/466, loss: 0.0005431473255157471 2023-01-24 05:12:13.165265: step: 156/466, loss: 0.005113815888762474 2023-01-24 05:12:13.726343: step: 158/466, loss: 0.0003711992467287928 2023-01-24 05:12:14.330312: step: 160/466, loss: 0.01225435733795166 2023-01-24 05:12:14.914455: step: 162/466, loss: 0.007259010802954435 2023-01-24 05:12:15.540974: step: 164/466, loss: 0.011203518137335777 2023-01-24 05:12:16.170603: step: 166/466, loss: 0.0008613622630946338 2023-01-24 05:12:16.808827: step: 168/466, loss: 0.015586303547024727 2023-01-24 05:12:17.448952: step: 170/466, loss: 0.032504454255104065 2023-01-24 05:12:18.066980: step: 172/466, loss: 0.0831817090511322 2023-01-24 05:12:18.670514: step: 174/466, loss: 0.051060471683740616 2023-01-24 05:12:19.310316: step: 176/466, loss: 0.0001223188592121005 2023-01-24 05:12:19.954991: step: 178/466, loss: 0.01799921505153179 2023-01-24 05:12:20.546840: step: 180/466, loss: 0.07070158421993256 2023-01-24 05:12:21.095490: step: 182/466, loss: 5.117091131978668e-05 2023-01-24 05:12:21.758532: step: 184/466, loss: 0.026660479605197906 2023-01-24 05:12:22.433769: step: 186/466, loss: 0.0007928465493023396 2023-01-24 05:12:23.115632: step: 188/466, loss: 0.025318864732980728 2023-01-24 05:12:23.711350: step: 190/466, loss: 0.0017006566049531102 2023-01-24 05:12:24.332216: step: 192/466, loss: 0.0001773086842149496 2023-01-24 05:12:24.914622: step: 194/466, loss: 0.0009296719217672944 2023-01-24 05:12:25.550967: step: 196/466, loss: 0.974057674407959 2023-01-24 05:12:26.182870: step: 198/466, loss: 0.002374091884121299 2023-01-24 05:12:26.770370: step: 200/466, loss: 0.0025441867765039206 2023-01-24 05:12:27.427550: step: 202/466, loss: 0.0020350173581391573 2023-01-24 05:12:28.095240: step: 204/466, loss: 0.0001868289109552279 2023-01-24 05:12:28.693926: step: 206/466, loss: 0.007404529023915529 2023-01-24 05:12:29.317362: step: 208/466, loss: 0.06039193645119667 2023-01-24 05:12:29.916134: step: 210/466, loss: 0.034476589411497116 2023-01-24 05:12:30.566211: step: 212/466, loss: 0.0005918517708778381 2023-01-24 05:12:31.201879: step: 214/466, loss: 0.002764403820037842 2023-01-24 05:12:31.778860: step: 216/466, loss: 0.0005349020357243717 2023-01-24 05:12:32.462827: step: 218/466, loss: 0.11122305691242218 2023-01-24 05:12:33.123223: step: 220/466, loss: 0.03173927962779999 2023-01-24 05:12:33.722435: step: 222/466, loss: 0.4182344079017639 2023-01-24 05:12:34.298610: step: 224/466, loss: 0.0015414378140121698 2023-01-24 05:12:34.853128: step: 226/466, loss: 0.005522494204342365 2023-01-24 05:12:35.561373: step: 228/466, loss: 0.0012009440688416362 2023-01-24 05:12:36.160090: step: 230/466, loss: 0.006269220262765884 2023-01-24 05:12:36.772724: step: 232/466, loss: 0.0009267473360523582 2023-01-24 05:12:37.361332: step: 234/466, loss: 0.0010035564191639423 2023-01-24 05:12:38.002161: step: 236/466, loss: 0.007629994302988052 2023-01-24 05:12:38.598162: step: 238/466, loss: 0.0006880948203615844 2023-01-24 05:12:39.232066: step: 240/466, loss: 0.017418693751096725 2023-01-24 05:12:39.886866: step: 242/466, loss: 0.018945759162306786 2023-01-24 05:12:40.515605: step: 244/466, loss: 0.045282479375600815 2023-01-24 05:12:41.089228: step: 246/466, loss: 3.4216670883324696e-06 2023-01-24 05:12:41.622794: step: 248/466, loss: 0.013945143669843674 2023-01-24 05:12:42.309595: step: 250/466, loss: 0.0008457014337182045 2023-01-24 05:12:42.953250: step: 252/466, loss: 0.0006486780475825071 2023-01-24 05:12:43.543360: step: 254/466, loss: 0.0004897922044619918 2023-01-24 05:12:44.147944: step: 256/466, loss: 0.04232799634337425 2023-01-24 05:12:44.756342: step: 258/466, loss: 0.0715564414858818 2023-01-24 05:12:45.435641: step: 260/466, loss: 0.0019753577653318644 2023-01-24 05:12:45.941519: step: 262/466, loss: 1.726490336295683e-05 2023-01-24 05:12:46.516470: step: 264/466, loss: 0.032654985785484314 2023-01-24 05:12:47.122486: step: 266/466, loss: 0.005304881837219 2023-01-24 05:12:47.822018: step: 268/466, loss: 0.00436329934746027 2023-01-24 05:12:48.450076: step: 270/466, loss: 0.025691457092761993 2023-01-24 05:12:49.063004: step: 272/466, loss: 0.11292967945337296 2023-01-24 05:12:49.641406: step: 274/466, loss: 0.02001877874135971 2023-01-24 05:12:50.197930: step: 276/466, loss: 0.031532302498817444 2023-01-24 05:12:50.791874: step: 278/466, loss: 0.01248552929610014 2023-01-24 05:12:51.375336: step: 280/466, loss: 0.007771877571940422 2023-01-24 05:12:51.940603: step: 282/466, loss: 0.00010319840657757595 2023-01-24 05:12:52.539793: step: 284/466, loss: 0.002683585975319147 2023-01-24 05:12:53.122041: step: 286/466, loss: 0.017789045348763466 2023-01-24 05:12:53.679295: step: 288/466, loss: 0.03899354115128517 2023-01-24 05:12:54.295991: step: 290/466, loss: 0.0007138618966564536 2023-01-24 05:12:54.861763: step: 292/466, loss: 0.01902945153415203 2023-01-24 05:12:55.504644: step: 294/466, loss: 0.001563211902976036 2023-01-24 05:12:56.072750: step: 296/466, loss: 0.12047767639160156 2023-01-24 05:12:56.649197: step: 298/466, loss: 0.0019326814217492938 2023-01-24 05:12:57.227412: step: 300/466, loss: 0.1401207000017166 2023-01-24 05:12:57.820275: step: 302/466, loss: 0.0678744688630104 2023-01-24 05:12:58.336195: step: 304/466, loss: 1.8989589079865254e-05 2023-01-24 05:12:58.919477: step: 306/466, loss: 0.0033861834090203047 2023-01-24 05:12:59.551834: step: 308/466, loss: 0.006160313729196787 2023-01-24 05:13:00.225204: step: 310/466, loss: 0.00039482087595388293 2023-01-24 05:13:00.800415: step: 312/466, loss: 0.0005335321184247732 2023-01-24 05:13:01.420301: step: 314/466, loss: 0.003995128907263279 2023-01-24 05:13:02.075409: step: 316/466, loss: 0.006496662739664316 2023-01-24 05:13:02.636257: step: 318/466, loss: 0.013022312894463539 2023-01-24 05:13:03.221415: step: 320/466, loss: 0.004884499125182629 2023-01-24 05:13:03.891032: step: 322/466, loss: 0.006754643749445677 2023-01-24 05:13:04.503664: step: 324/466, loss: 0.003081450704485178 2023-01-24 05:13:05.094821: step: 326/466, loss: 0.0004135740746278316 2023-01-24 05:13:05.680343: step: 328/466, loss: 0.017562659457325935 2023-01-24 05:13:06.333235: step: 330/466, loss: 0.023915909230709076 2023-01-24 05:13:06.956063: step: 332/466, loss: 0.006121632177382708 2023-01-24 05:13:07.553504: step: 334/466, loss: 0.0018397157546132803 2023-01-24 05:13:08.149242: step: 336/466, loss: 0.017214152961969376 2023-01-24 05:13:08.738015: step: 338/466, loss: 0.0007691142382100224 2023-01-24 05:13:09.336543: step: 340/466, loss: 0.0006501898751594126 2023-01-24 05:13:09.944776: step: 342/466, loss: 0.0071964929811656475 2023-01-24 05:13:10.532272: step: 344/466, loss: 0.00021978993027005345 2023-01-24 05:13:11.176962: step: 346/466, loss: 0.0007107586716301739 2023-01-24 05:13:11.780516: step: 348/466, loss: 0.00016834375855978578 2023-01-24 05:13:12.401560: step: 350/466, loss: 0.002281171502545476 2023-01-24 05:13:13.020722: step: 352/466, loss: 0.26626327633857727 2023-01-24 05:13:13.627223: step: 354/466, loss: 0.002194957807660103 2023-01-24 05:13:14.188903: step: 356/466, loss: 0.006199446506798267 2023-01-24 05:13:14.807372: step: 358/466, loss: 0.00025742349680513144 2023-01-24 05:13:15.421562: step: 360/466, loss: 0.0033204129431396723 2023-01-24 05:13:16.015416: step: 362/466, loss: 0.0002158809220418334 2023-01-24 05:13:16.669263: step: 364/466, loss: 0.01004930678755045 2023-01-24 05:13:17.319156: step: 366/466, loss: 0.012771162204444408 2023-01-24 05:13:17.981632: step: 368/466, loss: 0.3831963539123535 2023-01-24 05:13:18.607856: step: 370/466, loss: 0.0004712164809461683 2023-01-24 05:13:19.211201: step: 372/466, loss: 8.565105963498354e-05 2023-01-24 05:13:19.818159: step: 374/466, loss: 0.008703215047717094 2023-01-24 05:13:20.457222: step: 376/466, loss: 0.0017890515737235546 2023-01-24 05:13:21.147423: step: 378/466, loss: 0.030411401763558388 2023-01-24 05:13:21.714233: step: 380/466, loss: 0.011570041067898273 2023-01-24 05:13:22.368803: step: 382/466, loss: 6.799383845645934e-05 2023-01-24 05:13:22.959122: step: 384/466, loss: 0.0009624511003494263 2023-01-24 05:13:23.570116: step: 386/466, loss: 0.010127180255949497 2023-01-24 05:13:24.100573: step: 388/466, loss: 0.04178277775645256 2023-01-24 05:13:24.722641: step: 390/466, loss: 0.29337212443351746 2023-01-24 05:13:25.372662: step: 392/466, loss: 0.01280940417200327 2023-01-24 05:13:26.196202: step: 394/466, loss: 0.0004417002492118627 2023-01-24 05:13:26.804160: step: 396/466, loss: 1.205349326133728 2023-01-24 05:13:27.467266: step: 398/466, loss: 0.01344842929393053 2023-01-24 05:13:28.080020: step: 400/466, loss: 0.009408276528120041 2023-01-24 05:13:28.704009: step: 402/466, loss: 0.004019880201667547 2023-01-24 05:13:29.354382: step: 404/466, loss: 0.0017869789153337479 2023-01-24 05:13:30.016922: step: 406/466, loss: 0.0024829041212797165 2023-01-24 05:13:30.607983: step: 408/466, loss: 0.0006193328881636262 2023-01-24 05:13:31.220289: step: 410/466, loss: 7.454582373611629e-05 2023-01-24 05:13:31.768491: step: 412/466, loss: 0.002369493246078491 2023-01-24 05:13:32.388157: step: 414/466, loss: 0.008629947900772095 2023-01-24 05:13:32.984950: step: 416/466, loss: 0.0034468895755708218 2023-01-24 05:13:33.656731: step: 418/466, loss: 0.043524369597435 2023-01-24 05:13:34.288324: step: 420/466, loss: 9.258293721359223e-05 2023-01-24 05:13:34.870160: step: 422/466, loss: 0.0004924088134430349 2023-01-24 05:13:35.463351: step: 424/466, loss: 0.0020500586833804846 2023-01-24 05:13:36.055027: step: 426/466, loss: 0.000378899450879544 2023-01-24 05:13:36.684780: step: 428/466, loss: 0.0009215320460498333 2023-01-24 05:13:37.278452: step: 430/466, loss: 0.0014220247976481915 2023-01-24 05:13:37.934681: step: 432/466, loss: 0.03277864679694176 2023-01-24 05:13:38.549157: step: 434/466, loss: 0.0034149999264627695 2023-01-24 05:13:39.213747: step: 436/466, loss: 0.05704144015908241 2023-01-24 05:13:39.777147: step: 438/466, loss: 0.0003729193704202771 2023-01-24 05:13:40.353461: step: 440/466, loss: 0.0002809804864227772 2023-01-24 05:13:40.987390: step: 442/466, loss: 0.022944778203964233 2023-01-24 05:13:41.550264: step: 444/466, loss: 1.9058319594478235e-05 2023-01-24 05:13:42.181530: step: 446/466, loss: 0.011696002446115017 2023-01-24 05:13:42.798939: step: 448/466, loss: 0.011733698658645153 2023-01-24 05:13:43.479023: step: 450/466, loss: 0.011339561082422733 2023-01-24 05:13:44.122361: step: 452/466, loss: 0.02468111738562584 2023-01-24 05:13:44.798344: step: 454/466, loss: 0.00014265567006077617 2023-01-24 05:13:45.449094: step: 456/466, loss: 0.005921643693000078 2023-01-24 05:13:46.119739: step: 458/466, loss: 3.866276074404595e-06 2023-01-24 05:13:46.851015: step: 460/466, loss: 0.0376657210290432 2023-01-24 05:13:47.450767: step: 462/466, loss: 0.027660418301820755 2023-01-24 05:13:48.092662: step: 464/466, loss: 0.030926374718546867 2023-01-24 05:13:48.670528: step: 466/466, loss: 0.05319409817457199 2023-01-24 05:13:49.253676: step: 468/466, loss: 0.01862410455942154 2023-01-24 05:13:49.833438: step: 470/466, loss: 0.05574251338839531 2023-01-24 05:13:50.417894: step: 472/466, loss: 1.8421047570882365e-05 2023-01-24 05:13:51.217085: step: 474/466, loss: 0.009457133710384369 2023-01-24 05:13:51.829939: step: 476/466, loss: 0.055761680006980896 2023-01-24 05:13:52.456402: step: 478/466, loss: 0.08062682300806046 2023-01-24 05:13:53.060998: step: 480/466, loss: 0.03951627388596535 2023-01-24 05:13:53.675660: step: 482/466, loss: 0.002509194193407893 2023-01-24 05:13:54.232797: step: 484/466, loss: 0.010882500559091568 2023-01-24 05:13:54.848498: step: 486/466, loss: 0.00012993431300856173 2023-01-24 05:13:55.393592: step: 488/466, loss: 0.0002025272697210312 2023-01-24 05:13:56.074330: step: 490/466, loss: 0.0013053443981334567 2023-01-24 05:13:56.636290: step: 492/466, loss: 0.0002276983723277226 2023-01-24 05:13:57.219071: step: 494/466, loss: 0.00048199991579167545 2023-01-24 05:13:57.864257: step: 496/466, loss: 0.005118025932461023 2023-01-24 05:13:58.468406: step: 498/466, loss: 0.0019508281257003546 2023-01-24 05:13:59.073779: step: 500/466, loss: 0.0013491560239344835 2023-01-24 05:13:59.722643: step: 502/466, loss: 0.007075273897498846 2023-01-24 05:14:00.354090: step: 504/466, loss: 0.005225518252700567 2023-01-24 05:14:01.044246: step: 506/466, loss: 0.003914638888090849 2023-01-24 05:14:01.599499: step: 508/466, loss: 0.0018765986897051334 2023-01-24 05:14:02.224852: step: 510/466, loss: 0.00469422759488225 2023-01-24 05:14:02.836876: step: 512/466, loss: 0.0008197916322387755 2023-01-24 05:14:03.443714: step: 514/466, loss: 0.0004325522168073803 2023-01-24 05:14:04.104501: step: 516/466, loss: 0.02749079279601574 2023-01-24 05:14:04.704155: step: 518/466, loss: 0.010200398974120617 2023-01-24 05:14:05.319132: step: 520/466, loss: 0.006094923242926598 2023-01-24 05:14:05.977322: step: 522/466, loss: 0.029264384880661964 2023-01-24 05:14:06.593943: step: 524/466, loss: 0.008493440225720406 2023-01-24 05:14:07.174032: step: 526/466, loss: 0.010457186959683895 2023-01-24 05:14:07.761235: step: 528/466, loss: 0.0030657630413770676 2023-01-24 05:14:08.354529: step: 530/466, loss: 0.011401893571019173 2023-01-24 05:14:08.928180: step: 532/466, loss: 0.0011566388420760632 2023-01-24 05:14:09.665625: step: 534/466, loss: 0.0009276172495447099 2023-01-24 05:14:10.252357: step: 536/466, loss: 0.0007449205149896443 2023-01-24 05:14:10.849141: step: 538/466, loss: 0.04266156628727913 2023-01-24 05:14:11.461590: step: 540/466, loss: 2.943605613836553e-05 2023-01-24 05:14:12.082477: step: 542/466, loss: 0.005300668999552727 2023-01-24 05:14:12.705814: step: 544/466, loss: 0.004938418511301279 2023-01-24 05:14:13.265986: step: 546/466, loss: 0.0005849276203662157 2023-01-24 05:14:13.894293: step: 548/466, loss: 0.00024039176059886813 2023-01-24 05:14:14.541719: step: 550/466, loss: 0.05814645066857338 2023-01-24 05:14:15.331667: step: 552/466, loss: 0.034741807729005814 2023-01-24 05:14:16.013126: step: 554/466, loss: 0.010376464575529099 2023-01-24 05:14:16.624752: step: 556/466, loss: 0.007738225162029266 2023-01-24 05:14:17.231164: step: 558/466, loss: 0.015268164686858654 2023-01-24 05:14:17.869421: step: 560/466, loss: 0.0005753119476139545 2023-01-24 05:14:18.463273: step: 562/466, loss: 0.004864976741373539 2023-01-24 05:14:19.170209: step: 564/466, loss: 0.00046194621245376766 2023-01-24 05:14:19.709235: step: 566/466, loss: 5.7880228268913925e-05 2023-01-24 05:14:20.369712: step: 568/466, loss: 0.0002593135286588222 2023-01-24 05:14:21.022085: step: 570/466, loss: 0.00015103169425856322 2023-01-24 05:14:21.629914: step: 572/466, loss: 0.0005016021896153688 2023-01-24 05:14:22.285405: step: 574/466, loss: 0.00017170878709293902 2023-01-24 05:14:22.938921: step: 576/466, loss: 0.002684682607650757 2023-01-24 05:14:23.588772: step: 578/466, loss: 0.02178850769996643 2023-01-24 05:14:24.153141: step: 580/466, loss: 0.001377226086333394 2023-01-24 05:14:24.792665: step: 582/466, loss: 0.016383491456508636 2023-01-24 05:14:25.477212: step: 584/466, loss: 0.026128072291612625 2023-01-24 05:14:26.132420: step: 586/466, loss: 0.8927581310272217 2023-01-24 05:14:26.757421: step: 588/466, loss: 0.002846858697012067 2023-01-24 05:14:27.380960: step: 590/466, loss: 0.010788768529891968 2023-01-24 05:14:27.999866: step: 592/466, loss: 0.016432126984000206 2023-01-24 05:14:28.570990: step: 594/466, loss: 0.00408394169062376 2023-01-24 05:14:29.169607: step: 596/466, loss: 0.0014608690980821848 2023-01-24 05:14:29.789516: step: 598/466, loss: 0.021469846367836 2023-01-24 05:14:30.410475: step: 600/466, loss: 0.010216777212917805 2023-01-24 05:14:31.029093: step: 602/466, loss: 0.0032048202119767666 2023-01-24 05:14:31.639772: step: 604/466, loss: 0.001778666046448052 2023-01-24 05:14:32.272107: step: 606/466, loss: 0.0009655572939664125 2023-01-24 05:14:32.881347: step: 608/466, loss: 0.02794690988957882 2023-01-24 05:14:33.457759: step: 610/466, loss: 0.0011128874029964209 2023-01-24 05:14:34.090436: step: 612/466, loss: 0.8146460056304932 2023-01-24 05:14:34.695293: step: 614/466, loss: 0.07190607488155365 2023-01-24 05:14:35.341080: step: 616/466, loss: 0.0043852198868989944 2023-01-24 05:14:35.972784: step: 618/466, loss: 0.00029586939490400255 2023-01-24 05:14:36.604223: step: 620/466, loss: 0.00011438815272413194 2023-01-24 05:14:37.233395: step: 622/466, loss: 0.03767530992627144 2023-01-24 05:14:37.880964: step: 624/466, loss: 0.001130322809331119 2023-01-24 05:14:38.557916: step: 626/466, loss: 0.012164980173110962 2023-01-24 05:14:39.167899: step: 628/466, loss: 0.0004524152318481356 2023-01-24 05:14:39.889469: step: 630/466, loss: 0.5553993582725525 2023-01-24 05:14:40.483532: step: 632/466, loss: 0.0335814394056797 2023-01-24 05:14:41.081195: step: 634/466, loss: 0.006767832674086094 2023-01-24 05:14:41.705003: step: 636/466, loss: 0.0005514143267646432 2023-01-24 05:14:42.363104: step: 638/466, loss: 0.03491172939538956 2023-01-24 05:14:42.982954: step: 640/466, loss: 0.00022917038586456329 2023-01-24 05:14:43.605922: step: 642/466, loss: 0.013450004160404205 2023-01-24 05:14:44.223484: step: 644/466, loss: 0.0004298327548895031 2023-01-24 05:14:44.887334: step: 646/466, loss: 0.05986448749899864 2023-01-24 05:14:45.507129: step: 648/466, loss: 0.09393955022096634 2023-01-24 05:14:46.112211: step: 650/466, loss: 0.01259876973927021 2023-01-24 05:14:46.716631: step: 652/466, loss: 0.005960284266620874 2023-01-24 05:14:47.314311: step: 654/466, loss: 0.0038826039526611567 2023-01-24 05:14:47.941441: step: 656/466, loss: 0.02023625746369362 2023-01-24 05:14:48.535442: step: 658/466, loss: 0.013375281356275082 2023-01-24 05:14:49.171985: step: 660/466, loss: 0.009454164654016495 2023-01-24 05:14:49.762833: step: 662/466, loss: 0.03510063886642456 2023-01-24 05:14:50.396365: step: 664/466, loss: 2.4045333702815697e-05 2023-01-24 05:14:51.121295: step: 666/466, loss: 0.03811146691441536 2023-01-24 05:14:51.742170: step: 668/466, loss: 0.004611080978065729 2023-01-24 05:14:52.430149: step: 670/466, loss: 0.01828666590154171 2023-01-24 05:14:53.166959: step: 672/466, loss: 0.08229411393404007 2023-01-24 05:14:53.806105: step: 674/466, loss: 0.07620945572853088 2023-01-24 05:14:54.427287: step: 676/466, loss: 0.020200613886117935 2023-01-24 05:14:55.097996: step: 678/466, loss: 0.011386757716536522 2023-01-24 05:14:55.738910: step: 680/466, loss: 0.06650497019290924 2023-01-24 05:14:56.365058: step: 682/466, loss: 0.009418701753020287 2023-01-24 05:14:57.015941: step: 684/466, loss: 0.011599413119256496 2023-01-24 05:14:57.550501: step: 686/466, loss: 2.19941957766423e-05 2023-01-24 05:14:58.141483: step: 688/466, loss: 7.218680548248813e-05 2023-01-24 05:14:58.747710: step: 690/466, loss: 0.000757165253162384 2023-01-24 05:14:59.290749: step: 692/466, loss: 0.0006082160398364067 2023-01-24 05:14:59.847213: step: 694/466, loss: 0.009860683232545853 2023-01-24 05:15:00.439594: step: 696/466, loss: 0.0037478036247193813 2023-01-24 05:15:01.063683: step: 698/466, loss: 0.00012133311975048855 2023-01-24 05:15:01.665719: step: 700/466, loss: 1.673118233680725 2023-01-24 05:15:02.367711: step: 702/466, loss: 0.012901155278086662 2023-01-24 05:15:02.913233: step: 704/466, loss: 0.03286221623420715 2023-01-24 05:15:03.505252: step: 706/466, loss: 0.0008559783454984426 2023-01-24 05:15:04.657018: step: 708/466, loss: 0.014345237985253334 2023-01-24 05:15:05.208640: step: 710/466, loss: 0.03778726980090141 2023-01-24 05:15:05.885986: step: 712/466, loss: 0.0023969518952071667 2023-01-24 05:15:06.533453: step: 714/466, loss: 0.0012106853537261486 2023-01-24 05:15:07.128235: step: 716/466, loss: 6.305293209152296e-05 2023-01-24 05:15:07.695233: step: 718/466, loss: 0.0016548899002373219 2023-01-24 05:15:08.300518: step: 720/466, loss: 0.01998119428753853 2023-01-24 05:15:08.939263: step: 722/466, loss: 0.004715107847005129 2023-01-24 05:15:09.501424: step: 724/466, loss: 0.018795287236571312 2023-01-24 05:15:10.156670: step: 726/466, loss: 0.001077094697393477 2023-01-24 05:15:10.790315: step: 728/466, loss: 0.016393447294831276 2023-01-24 05:15:11.459611: step: 730/466, loss: 0.0015165505465120077 2023-01-24 05:15:12.117570: step: 732/466, loss: 0.24392935633659363 2023-01-24 05:15:12.758669: step: 734/466, loss: 0.009406911209225655 2023-01-24 05:15:13.339569: step: 736/466, loss: 0.0006833647494204342 2023-01-24 05:15:13.978556: step: 738/466, loss: 0.01145173143595457 2023-01-24 05:15:14.603193: step: 740/466, loss: 0.017168890684843063 2023-01-24 05:15:15.268398: step: 742/466, loss: 0.039287131279706955 2023-01-24 05:15:15.879924: step: 744/466, loss: 0.0018259059870615602 2023-01-24 05:15:16.524515: step: 746/466, loss: 0.04990117996931076 2023-01-24 05:15:17.161659: step: 748/466, loss: 0.005614149384200573 2023-01-24 05:15:17.773451: step: 750/466, loss: 0.34750282764434814 2023-01-24 05:15:18.400720: step: 752/466, loss: 0.007629684172570705 2023-01-24 05:15:19.158517: step: 754/466, loss: 0.000609358714427799 2023-01-24 05:15:19.735390: step: 756/466, loss: 6.950379611225799e-05 2023-01-24 05:15:20.348063: step: 758/466, loss: 0.007717175409197807 2023-01-24 05:15:20.960874: step: 760/466, loss: 0.0005427895812317729 2023-01-24 05:15:21.592171: step: 762/466, loss: 0.009419084526598454 2023-01-24 05:15:22.220829: step: 764/466, loss: 0.003427655203267932 2023-01-24 05:15:22.764501: step: 766/466, loss: 0.004435101989656687 2023-01-24 05:15:23.437664: step: 768/466, loss: 0.02116675302386284 2023-01-24 05:15:24.074427: step: 770/466, loss: 0.0014157030964270234 2023-01-24 05:15:24.703838: step: 772/466, loss: 0.024654695764183998 2023-01-24 05:15:25.372030: step: 774/466, loss: 0.01110147312283516 2023-01-24 05:15:25.955560: step: 776/466, loss: 0.011182224377989769 2023-01-24 05:15:26.573920: step: 778/466, loss: 0.0026764869689941406 2023-01-24 05:15:27.150277: step: 780/466, loss: 0.03134223818778992 2023-01-24 05:15:27.803509: step: 782/466, loss: 0.04435329511761665 2023-01-24 05:15:28.416212: step: 784/466, loss: 0.0007561178063042462 2023-01-24 05:15:29.030201: step: 786/466, loss: 0.0002951621718239039 2023-01-24 05:15:29.653015: step: 788/466, loss: 0.0003678632201626897 2023-01-24 05:15:30.245825: step: 790/466, loss: 0.00023044607951305807 2023-01-24 05:15:30.891820: step: 792/466, loss: 0.004554104525595903 2023-01-24 05:15:31.524936: step: 794/466, loss: 0.03749992698431015 2023-01-24 05:15:32.127488: step: 796/466, loss: 0.0050889598205685616 2023-01-24 05:15:32.734611: step: 798/466, loss: 0.022501587867736816 2023-01-24 05:15:33.379751: step: 800/466, loss: 0.0016455540899187326 2023-01-24 05:15:33.950698: step: 802/466, loss: 0.02270793356001377 2023-01-24 05:15:34.524342: step: 804/466, loss: 0.02387150004506111 2023-01-24 05:15:35.120990: step: 806/466, loss: 0.00879066064953804 2023-01-24 05:15:35.722380: step: 808/466, loss: 0.02466905117034912 2023-01-24 05:15:36.321063: step: 810/466, loss: 0.0003841761499643326 2023-01-24 05:15:36.865880: step: 812/466, loss: 0.022657496854662895 2023-01-24 05:15:37.472328: step: 814/466, loss: 0.021001193672418594 2023-01-24 05:15:38.069863: step: 816/466, loss: 0.0012862952426075935 2023-01-24 05:15:38.714086: step: 818/466, loss: 0.0014121142448857427 2023-01-24 05:15:39.363887: step: 820/466, loss: 0.07674619555473328 2023-01-24 05:15:39.930419: step: 822/466, loss: 0.00023735903960186988 2023-01-24 05:15:40.514940: step: 824/466, loss: 0.008518465794622898 2023-01-24 05:15:41.154407: step: 826/466, loss: 0.047690004110336304 2023-01-24 05:15:41.782283: step: 828/466, loss: 0.012457768432796001 2023-01-24 05:15:42.459031: step: 830/466, loss: 0.0053737955167889595 2023-01-24 05:15:43.068218: step: 832/466, loss: 0.02782123163342476 2023-01-24 05:15:43.702362: step: 834/466, loss: 0.012997192330658436 2023-01-24 05:15:44.349300: step: 836/466, loss: 0.0006396679091267288 2023-01-24 05:15:44.975970: step: 838/466, loss: 0.06348526477813721 2023-01-24 05:15:45.615994: step: 840/466, loss: 0.009664001874625683 2023-01-24 05:15:46.241719: step: 842/466, loss: 6.664659304078668e-05 2023-01-24 05:15:46.844520: step: 844/466, loss: 0.0036700644996017218 2023-01-24 05:15:47.471412: step: 846/466, loss: 0.00011915640061488375 2023-01-24 05:15:48.052460: step: 848/466, loss: 0.019163085147738457 2023-01-24 05:15:48.755157: step: 850/466, loss: 0.003196119097992778 2023-01-24 05:15:49.421885: step: 852/466, loss: 0.023471256718039513 2023-01-24 05:15:50.057766: step: 854/466, loss: 0.03895140066742897 2023-01-24 05:15:50.640456: step: 856/466, loss: 0.14801856875419617 2023-01-24 05:15:51.212873: step: 858/466, loss: 0.002836451167240739 2023-01-24 05:15:51.807922: step: 860/466, loss: 0.03754058852791786 2023-01-24 05:15:52.392803: step: 862/466, loss: 0.0010937509359791875 2023-01-24 05:15:52.986072: step: 864/466, loss: 0.013703150674700737 2023-01-24 05:15:53.533203: step: 866/466, loss: 0.005252503789961338 2023-01-24 05:15:54.076642: step: 868/466, loss: 0.0045418632216751575 2023-01-24 05:15:54.604444: step: 870/466, loss: 0.001951231388375163 2023-01-24 05:15:55.203601: step: 872/466, loss: 0.00119210802949965 2023-01-24 05:15:55.847274: step: 874/466, loss: 0.030209461227059364 2023-01-24 05:15:56.463789: step: 876/466, loss: 0.011322562582790852 2023-01-24 05:15:57.134472: step: 878/466, loss: 0.010349459014832973 2023-01-24 05:15:57.786300: step: 880/466, loss: 0.012835453264415264 2023-01-24 05:15:58.402523: step: 882/466, loss: 0.01803370751440525 2023-01-24 05:15:58.960882: step: 884/466, loss: 0.01328533235937357 2023-01-24 05:15:59.625997: step: 886/466, loss: 0.04617784172296524 2023-01-24 05:16:00.214880: step: 888/466, loss: 0.004265455529093742 2023-01-24 05:16:00.824940: step: 890/466, loss: 0.0004143684927839786 2023-01-24 05:16:01.445986: step: 892/466, loss: 0.00028480804758146405 2023-01-24 05:16:02.084179: step: 894/466, loss: 0.013935239054262638 2023-01-24 05:16:02.649024: step: 896/466, loss: 0.0036510045174509287 2023-01-24 05:16:03.262109: step: 898/466, loss: 0.01588931865990162 2023-01-24 05:16:03.886500: step: 900/466, loss: 0.0030068140476942062 2023-01-24 05:16:04.498195: step: 902/466, loss: 1.1271354196651373e-05 2023-01-24 05:16:05.084724: step: 904/466, loss: 0.03463272377848625 2023-01-24 05:16:05.688426: step: 906/466, loss: 0.00010575733904261142 2023-01-24 05:16:06.251685: step: 908/466, loss: 0.010893854312598705 2023-01-24 05:16:06.874132: step: 910/466, loss: 0.012717542238533497 2023-01-24 05:16:07.423455: step: 912/466, loss: 0.003962480928748846 2023-01-24 05:16:08.040183: step: 914/466, loss: 0.00039833056507632136 2023-01-24 05:16:08.682612: step: 916/466, loss: 0.02264953963458538 2023-01-24 05:16:09.291705: step: 918/466, loss: 0.019269829615950584 2023-01-24 05:16:09.910075: step: 920/466, loss: 0.016555093228816986 2023-01-24 05:16:10.538481: step: 922/466, loss: 0.0030862840358167887 2023-01-24 05:16:11.130924: step: 924/466, loss: 0.032286155968904495 2023-01-24 05:16:11.804003: step: 926/466, loss: 0.0031867055222392082 2023-01-24 05:16:12.432180: step: 928/466, loss: 0.00030876160599291325 2023-01-24 05:16:13.047279: step: 930/466, loss: 0.004123792517930269 2023-01-24 05:16:13.673204: step: 932/466, loss: 0.022024938836693764 ================================================== Loss: 0.032 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3717006416364311, 'r': 0.33220304024811964, 'f1': 0.3508436918051283}, 'combined': 0.2585164044879893, 'epoch': 39} Test Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3591893141305069, 'r': 0.274655129290054, 'f1': 0.3112851695275114}, 'combined': 0.1949168818536754, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3533366533864542, 'r': 0.3365749525616698, 'f1': 0.34475218658892126}, 'combined': 0.2540279269602578, 'epoch': 39} Test Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3770961485172942, 'r': 0.2883476410664252, 'f1': 0.32680381598637254}, 'combined': 0.20252630849859704, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32239561429629265, 'r': 0.32178385791242875, 'f1': 0.3220894456217473}, 'combined': 0.23732906519497168, 'epoch': 39} Test Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3822930310989561, 'r': 0.2853396540783725, 'f1': 0.3267765754199862}, 'combined': 0.21677257973405029, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3142857142857143, 'r': 0.3142857142857143, 'f1': 0.3142857142857143}, 'combined': 0.2095238095238095, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.40217391304347827, 'r': 0.40217391304347827, 'f1': 0.40217391304347827}, 'combined': 0.20108695652173914, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 39} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3648356802322833, 'r': 0.3246829867721838, 'f1': 0.3435902289737769}, 'combined': 0.2531717476648882, 'epoch': 30} Test for Chinese: {'template': {'p': 0.8375, 'r': 0.5, 'f1': 0.6261682242990655}, 'slot': {'p': 0.3667961673520445, 'r': 0.27484906678949417, 'f1': 0.3142346547108753}, 'combined': 0.19676375575353877, 'epoch': 30} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4322916666666667, 'r': 0.29642857142857143, 'f1': 0.35169491525423724}, 'combined': 0.23446327683615814, 'epoch': 30} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35229085235153196, 'r': 0.34426904926193347, 'f1': 0.3482337600019942}, 'combined': 0.2565932968435746, 'epoch': 27} Test for Korean: {'template': {'p': 0.8354430379746836, 'r': 0.4925373134328358, 'f1': 0.6197183098591549}, 'slot': {'p': 0.3719346926915001, 'r': 0.28976697428805775, 'f1': 0.3257491885306194}, 'combined': 0.20187273655418667, 'epoch': 27} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4230769230769231, 'r': 0.4782608695652174, 'f1': 0.44897959183673475}, 'combined': 0.22448979591836737, 'epoch': 27} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32239561429629265, 'r': 0.32178385791242875, 'f1': 0.3220894456217473}, 'combined': 0.23732906519497168, 'epoch': 39} Test for Russian: {'template': {'p': 0.9852941176470589, 'r': 0.5, 'f1': 0.6633663366336634}, 'slot': {'p': 0.3822930310989561, 'r': 0.2853396540783725, 'f1': 0.3267765754199862}, 'combined': 0.21677257973405029, 'epoch': 39} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 39}