Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1127]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 613743345, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:43:28.609947: step: 2/464, loss: 25.893234252929688 2023-01-24 00:43:29.193585: step: 4/464, loss: 7.9878716468811035 2023-01-24 00:43:29.758352: step: 6/464, loss: 34.16461181640625 2023-01-24 00:43:30.423379: step: 8/464, loss: 18.327600479125977 2023-01-24 00:43:31.023228: step: 10/464, loss: 23.868892669677734 2023-01-24 00:43:31.640976: step: 12/464, loss: 7.332606315612793 2023-01-24 00:43:32.283228: step: 14/464, loss: 13.878583908081055 2023-01-24 00:43:32.892391: step: 16/464, loss: 22.151412963867188 2023-01-24 00:43:33.504903: step: 18/464, loss: 14.98178482055664 2023-01-24 00:43:34.203404: step: 20/464, loss: 5.897348403930664 2023-01-24 00:43:34.792259: step: 22/464, loss: 10.577079772949219 2023-01-24 00:43:35.380902: step: 24/464, loss: 21.333738327026367 2023-01-24 00:43:35.990768: step: 26/464, loss: 6.832368850708008 2023-01-24 00:43:36.605840: step: 28/464, loss: 8.707086563110352 2023-01-24 00:43:37.239517: step: 30/464, loss: 21.255126953125 2023-01-24 00:43:37.922278: step: 32/464, loss: 23.188125610351562 2023-01-24 00:43:38.503522: step: 34/464, loss: 11.354782104492188 2023-01-24 00:43:39.170373: step: 36/464, loss: 9.144607543945312 2023-01-24 00:43:39.817682: step: 38/464, loss: 9.938633918762207 2023-01-24 00:43:40.503069: step: 40/464, loss: 9.579240798950195 2023-01-24 00:43:41.121009: step: 42/464, loss: 7.6820969581604 2023-01-24 00:43:41.718750: step: 44/464, loss: 8.154853820800781 2023-01-24 00:43:42.344999: step: 46/464, loss: 17.464797973632812 2023-01-24 00:43:42.954272: step: 48/464, loss: 24.921092987060547 2023-01-24 00:43:43.624523: step: 50/464, loss: 10.282855987548828 2023-01-24 00:43:44.272472: step: 52/464, loss: 9.140432357788086 2023-01-24 00:43:44.915973: step: 54/464, loss: 9.383966445922852 2023-01-24 00:43:45.523027: step: 56/464, loss: 41.08988571166992 2023-01-24 00:43:46.075171: step: 58/464, loss: 16.880178451538086 2023-01-24 00:43:46.645383: step: 60/464, loss: 5.725705146789551 2023-01-24 00:43:47.190940: step: 62/464, loss: 16.076332092285156 2023-01-24 00:43:47.833107: step: 64/464, loss: 23.628440856933594 2023-01-24 00:43:48.468385: step: 66/464, loss: 24.215709686279297 2023-01-24 00:43:49.079800: step: 68/464, loss: 4.20828914642334 2023-01-24 00:43:49.690919: step: 70/464, loss: 8.635614395141602 2023-01-24 00:43:50.257298: step: 72/464, loss: 25.89919662475586 2023-01-24 00:43:50.841278: step: 74/464, loss: 6.081369400024414 2023-01-24 00:43:51.425624: step: 76/464, loss: 8.278801918029785 2023-01-24 00:43:52.038843: step: 78/464, loss: 18.827049255371094 2023-01-24 00:43:52.588586: step: 80/464, loss: 15.493717193603516 2023-01-24 00:43:53.204092: step: 82/464, loss: 14.888334274291992 2023-01-24 00:43:53.826571: step: 84/464, loss: 7.287348747253418 2023-01-24 00:43:54.429225: step: 86/464, loss: 4.9151458740234375 2023-01-24 00:43:55.102630: step: 88/464, loss: 19.12151336669922 2023-01-24 00:43:55.663471: step: 90/464, loss: 10.497282981872559 2023-01-24 00:43:56.238396: step: 92/464, loss: 21.297636032104492 2023-01-24 00:43:56.936782: step: 94/464, loss: 15.331517219543457 2023-01-24 00:43:57.555639: step: 96/464, loss: 23.956851959228516 2023-01-24 00:43:58.196647: step: 98/464, loss: 11.051862716674805 2023-01-24 00:43:58.788010: step: 100/464, loss: 10.751426696777344 2023-01-24 00:43:59.369226: step: 102/464, loss: 13.166668891906738 2023-01-24 00:43:59.906993: step: 104/464, loss: 13.671867370605469 2023-01-24 00:44:00.493976: step: 106/464, loss: 14.124359130859375 2023-01-24 00:44:01.141726: step: 108/464, loss: 25.87872314453125 2023-01-24 00:44:01.771436: step: 110/464, loss: 5.732607364654541 2023-01-24 00:44:02.375524: step: 112/464, loss: 11.07235336303711 2023-01-24 00:44:03.044631: step: 114/464, loss: 9.082109451293945 2023-01-24 00:44:03.673789: step: 116/464, loss: 12.189888000488281 2023-01-24 00:44:04.283862: step: 118/464, loss: 11.501178741455078 2023-01-24 00:44:04.921615: step: 120/464, loss: 19.088016510009766 2023-01-24 00:44:05.595389: step: 122/464, loss: 14.197904586791992 2023-01-24 00:44:06.165506: step: 124/464, loss: 4.7059149742126465 2023-01-24 00:44:06.761555: step: 126/464, loss: 7.997165679931641 2023-01-24 00:44:07.322850: step: 128/464, loss: 11.891801834106445 2023-01-24 00:44:07.921495: step: 130/464, loss: 6.820798397064209 2023-01-24 00:44:08.611778: step: 132/464, loss: 9.701921463012695 2023-01-24 00:44:09.220320: step: 134/464, loss: 6.925141334533691 2023-01-24 00:44:09.788383: step: 136/464, loss: 13.604503631591797 2023-01-24 00:44:10.404773: step: 138/464, loss: 19.978195190429688 2023-01-24 00:44:11.069691: step: 140/464, loss: 6.376090049743652 2023-01-24 00:44:11.673350: step: 142/464, loss: 7.691537857055664 2023-01-24 00:44:12.312238: step: 144/464, loss: 5.879671573638916 2023-01-24 00:44:12.954315: step: 146/464, loss: 9.334442138671875 2023-01-24 00:44:13.623324: step: 148/464, loss: 5.401294708251953 2023-01-24 00:44:14.211914: step: 150/464, loss: 13.273971557617188 2023-01-24 00:44:14.856034: step: 152/464, loss: 12.276399612426758 2023-01-24 00:44:15.418162: step: 154/464, loss: 6.026156425476074 2023-01-24 00:44:16.074438: step: 156/464, loss: 16.545625686645508 2023-01-24 00:44:16.691328: step: 158/464, loss: 16.53818130493164 2023-01-24 00:44:17.382220: step: 160/464, loss: 8.795269012451172 2023-01-24 00:44:18.086359: step: 162/464, loss: 7.852406024932861 2023-01-24 00:44:18.694763: step: 164/464, loss: 4.57177734375 2023-01-24 00:44:19.390283: step: 166/464, loss: 4.372272491455078 2023-01-24 00:44:19.938515: step: 168/464, loss: 14.673152923583984 2023-01-24 00:44:20.523110: step: 170/464, loss: 2.6895434856414795 2023-01-24 00:44:21.115313: step: 172/464, loss: 8.476045608520508 2023-01-24 00:44:21.746973: step: 174/464, loss: 11.554985046386719 2023-01-24 00:44:22.325703: step: 176/464, loss: 4.613548755645752 2023-01-24 00:44:22.988976: step: 178/464, loss: 17.155364990234375 2023-01-24 00:44:23.578426: step: 180/464, loss: 13.597175598144531 2023-01-24 00:44:24.217519: step: 182/464, loss: 13.538841247558594 2023-01-24 00:44:24.769765: step: 184/464, loss: 4.875265121459961 2023-01-24 00:44:25.325220: step: 186/464, loss: 7.670773983001709 2023-01-24 00:44:25.897157: step: 188/464, loss: 10.10245418548584 2023-01-24 00:44:26.529913: step: 190/464, loss: 15.357199668884277 2023-01-24 00:44:27.099414: step: 192/464, loss: 14.560921669006348 2023-01-24 00:44:27.721425: step: 194/464, loss: 7.032919406890869 2023-01-24 00:44:28.360202: step: 196/464, loss: 10.493429183959961 2023-01-24 00:44:28.990218: step: 198/464, loss: 12.067194938659668 2023-01-24 00:44:29.618165: step: 200/464, loss: 15.097469329833984 2023-01-24 00:44:30.205113: step: 202/464, loss: 5.528450012207031 2023-01-24 00:44:30.795654: step: 204/464, loss: 10.181692123413086 2023-01-24 00:44:31.406294: step: 206/464, loss: 10.08667278289795 2023-01-24 00:44:31.998793: step: 208/464, loss: 8.108156204223633 2023-01-24 00:44:32.631799: step: 210/464, loss: 3.823437213897705 2023-01-24 00:44:33.240565: step: 212/464, loss: 3.2625980377197266 2023-01-24 00:44:33.850758: step: 214/464, loss: 9.450902938842773 2023-01-24 00:44:34.440415: step: 216/464, loss: 7.178184509277344 2023-01-24 00:44:35.035525: step: 218/464, loss: 15.258796691894531 2023-01-24 00:44:35.710866: step: 220/464, loss: 13.636726379394531 2023-01-24 00:44:36.391039: step: 222/464, loss: 2.666865348815918 2023-01-24 00:44:37.109997: step: 224/464, loss: 10.846983909606934 2023-01-24 00:44:37.746257: step: 226/464, loss: 6.188336372375488 2023-01-24 00:44:38.337277: step: 228/464, loss: 5.240720748901367 2023-01-24 00:44:38.960114: step: 230/464, loss: 12.24063491821289 2023-01-24 00:44:39.537791: step: 232/464, loss: 4.9737372398376465 2023-01-24 00:44:40.136407: step: 234/464, loss: 3.9984071254730225 2023-01-24 00:44:40.751564: step: 236/464, loss: 7.2288618087768555 2023-01-24 00:44:41.327302: step: 238/464, loss: 13.265220642089844 2023-01-24 00:44:41.934120: step: 240/464, loss: 3.304727077484131 2023-01-24 00:44:42.563638: step: 242/464, loss: 7.927270889282227 2023-01-24 00:44:43.177279: step: 244/464, loss: 6.870214462280273 2023-01-24 00:44:43.765979: step: 246/464, loss: 15.09991455078125 2023-01-24 00:44:44.395128: step: 248/464, loss: 4.424330234527588 2023-01-24 00:44:45.074550: step: 250/464, loss: 20.561283111572266 2023-01-24 00:44:45.699024: step: 252/464, loss: 16.219852447509766 2023-01-24 00:44:46.334011: step: 254/464, loss: 9.015839576721191 2023-01-24 00:44:46.912559: step: 256/464, loss: 6.264186859130859 2023-01-24 00:44:47.539598: step: 258/464, loss: 3.0174646377563477 2023-01-24 00:44:48.280467: step: 260/464, loss: 13.42202377319336 2023-01-24 00:44:48.962600: step: 262/464, loss: 5.563702583312988 2023-01-24 00:44:49.614276: step: 264/464, loss: 13.135787010192871 2023-01-24 00:44:50.172447: step: 266/464, loss: 6.930896282196045 2023-01-24 00:44:50.822427: step: 268/464, loss: 5.268439769744873 2023-01-24 00:44:51.370406: step: 270/464, loss: 4.526968002319336 2023-01-24 00:44:51.967958: step: 272/464, loss: 1.6564035415649414 2023-01-24 00:44:52.615777: step: 274/464, loss: 2.870774507522583 2023-01-24 00:44:53.187875: step: 276/464, loss: 9.916946411132812 2023-01-24 00:44:53.876076: step: 278/464, loss: 3.924294948577881 2023-01-24 00:44:54.475826: step: 280/464, loss: 10.471038818359375 2023-01-24 00:44:55.137706: step: 282/464, loss: 10.573553085327148 2023-01-24 00:44:55.764208: step: 284/464, loss: 13.430482864379883 2023-01-24 00:44:56.504487: step: 286/464, loss: 2.6837880611419678 2023-01-24 00:44:57.100491: step: 288/464, loss: 9.279662132263184 2023-01-24 00:44:57.671837: step: 290/464, loss: 15.841981887817383 2023-01-24 00:44:58.230746: step: 292/464, loss: 10.159821510314941 2023-01-24 00:44:58.801043: step: 294/464, loss: 4.929985046386719 2023-01-24 00:44:59.484026: step: 296/464, loss: 5.215975761413574 2023-01-24 00:45:00.085987: step: 298/464, loss: 3.1540944576263428 2023-01-24 00:45:00.677829: step: 300/464, loss: 11.945456504821777 2023-01-24 00:45:01.224169: step: 302/464, loss: 4.5313920974731445 2023-01-24 00:45:01.826950: step: 304/464, loss: 8.657771110534668 2023-01-24 00:45:02.460820: step: 306/464, loss: 2.949284553527832 2023-01-24 00:45:03.014546: step: 308/464, loss: 2.8570871353149414 2023-01-24 00:45:03.620502: step: 310/464, loss: 9.278157234191895 2023-01-24 00:45:04.229990: step: 312/464, loss: 6.604586601257324 2023-01-24 00:45:04.868044: step: 314/464, loss: 9.309601783752441 2023-01-24 00:45:05.470987: step: 316/464, loss: 4.563741207122803 2023-01-24 00:45:06.114708: step: 318/464, loss: 5.5995283126831055 2023-01-24 00:45:06.797910: step: 320/464, loss: 1.8042103052139282 2023-01-24 00:45:07.468669: step: 322/464, loss: 22.809059143066406 2023-01-24 00:45:08.042916: step: 324/464, loss: 5.388643741607666 2023-01-24 00:45:08.668907: step: 326/464, loss: 5.1678266525268555 2023-01-24 00:45:09.280319: step: 328/464, loss: 11.327838897705078 2023-01-24 00:45:09.892457: step: 330/464, loss: 7.059610843658447 2023-01-24 00:45:10.550180: step: 332/464, loss: 23.0896053314209 2023-01-24 00:45:11.169846: step: 334/464, loss: 4.956096172332764 2023-01-24 00:45:11.831334: step: 336/464, loss: 13.520605087280273 2023-01-24 00:45:12.498449: step: 338/464, loss: 6.89166259765625 2023-01-24 00:45:13.167009: step: 340/464, loss: 3.214412212371826 2023-01-24 00:45:13.737383: step: 342/464, loss: 2.595851421356201 2023-01-24 00:45:14.350961: step: 344/464, loss: 18.70476531982422 2023-01-24 00:45:14.985199: step: 346/464, loss: 6.768799781799316 2023-01-24 00:45:15.618957: step: 348/464, loss: 5.832672119140625 2023-01-24 00:45:16.269371: step: 350/464, loss: 6.987588882446289 2023-01-24 00:45:16.980693: step: 352/464, loss: 31.79225730895996 2023-01-24 00:45:17.688407: step: 354/464, loss: 6.638126373291016 2023-01-24 00:45:18.281385: step: 356/464, loss: 8.499835968017578 2023-01-24 00:45:18.896093: step: 358/464, loss: 2.6248958110809326 2023-01-24 00:45:19.553631: step: 360/464, loss: 23.990123748779297 2023-01-24 00:45:20.262192: step: 362/464, loss: 7.547783374786377 2023-01-24 00:45:20.857099: step: 364/464, loss: 1.7016931772232056 2023-01-24 00:45:21.381232: step: 366/464, loss: 1.898937463760376 2023-01-24 00:45:22.001201: step: 368/464, loss: 3.6756973266601562 2023-01-24 00:45:22.627513: step: 370/464, loss: 6.603666305541992 2023-01-24 00:45:23.233396: step: 372/464, loss: 3.9010848999023438 2023-01-24 00:45:23.834133: step: 374/464, loss: 7.395991325378418 2023-01-24 00:45:24.438807: step: 376/464, loss: 5.332321643829346 2023-01-24 00:45:25.117311: step: 378/464, loss: 4.313864707946777 2023-01-24 00:45:25.659099: step: 380/464, loss: 4.549687385559082 2023-01-24 00:45:26.215377: step: 382/464, loss: 7.73099422454834 2023-01-24 00:45:26.740525: step: 384/464, loss: 1.9287822246551514 2023-01-24 00:45:27.431908: step: 386/464, loss: 2.0085694789886475 2023-01-24 00:45:28.025778: step: 388/464, loss: 3.305400848388672 2023-01-24 00:45:28.733045: step: 390/464, loss: 3.4623374938964844 2023-01-24 00:45:29.363434: step: 392/464, loss: 5.293943405151367 2023-01-24 00:45:29.992639: step: 394/464, loss: 5.717831134796143 2023-01-24 00:45:30.574827: step: 396/464, loss: 1.6635620594024658 2023-01-24 00:45:31.202283: step: 398/464, loss: 0.9851242303848267 2023-01-24 00:45:31.784952: step: 400/464, loss: 1.1627798080444336 2023-01-24 00:45:32.409999: step: 402/464, loss: 2.4061439037323 2023-01-24 00:45:32.995498: step: 404/464, loss: 3.4080448150634766 2023-01-24 00:45:33.624489: step: 406/464, loss: 6.438791751861572 2023-01-24 00:45:34.257971: step: 408/464, loss: 1.6807105541229248 2023-01-24 00:45:34.862535: step: 410/464, loss: 2.9713494777679443 2023-01-24 00:45:35.477170: step: 412/464, loss: 1.5428835153579712 2023-01-24 00:45:36.141186: step: 414/464, loss: 1.4319688081741333 2023-01-24 00:45:36.763823: step: 416/464, loss: 1.5449755191802979 2023-01-24 00:45:37.400378: step: 418/464, loss: 2.1071863174438477 2023-01-24 00:45:37.977697: step: 420/464, loss: 1.7880160808563232 2023-01-24 00:45:38.595585: step: 422/464, loss: 3.24936580657959 2023-01-24 00:45:39.200494: step: 424/464, loss: 3.7851812839508057 2023-01-24 00:45:39.807273: step: 426/464, loss: 3.3374154567718506 2023-01-24 00:45:40.421487: step: 428/464, loss: 1.0898224115371704 2023-01-24 00:45:41.009615: step: 430/464, loss: 1.4387747049331665 2023-01-24 00:45:41.691411: step: 432/464, loss: 1.4988466501235962 2023-01-24 00:45:42.323560: step: 434/464, loss: 4.100554466247559 2023-01-24 00:45:42.969451: step: 436/464, loss: 5.842254638671875 2023-01-24 00:45:43.593076: step: 438/464, loss: 1.7818788290023804 2023-01-24 00:45:44.290926: step: 440/464, loss: 1.24330472946167 2023-01-24 00:45:44.992116: step: 442/464, loss: 1.083603024482727 2023-01-24 00:45:45.589346: step: 444/464, loss: 3.9309680461883545 2023-01-24 00:45:46.148415: step: 446/464, loss: 1.6359953880310059 2023-01-24 00:45:46.772272: step: 448/464, loss: 8.672527313232422 2023-01-24 00:45:47.354661: step: 450/464, loss: 1.3632621765136719 2023-01-24 00:45:47.975218: step: 452/464, loss: 1.7263782024383545 2023-01-24 00:45:48.659650: step: 454/464, loss: 3.074094772338867 2023-01-24 00:45:49.299635: step: 456/464, loss: 1.5102462768554688 2023-01-24 00:45:49.891669: step: 458/464, loss: 3.697157621383667 2023-01-24 00:45:50.531253: step: 460/464, loss: 6.106800079345703 2023-01-24 00:45:51.181543: step: 462/464, loss: 0.9169987440109253 2023-01-24 00:45:51.861025: step: 464/464, loss: 6.377109527587891 2023-01-24 00:45:52.432656: step: 466/464, loss: 0.525560200214386 2023-01-24 00:45:53.025618: step: 468/464, loss: 2.8090384006500244 2023-01-24 00:45:53.640781: step: 470/464, loss: 1.1070313453674316 2023-01-24 00:45:54.272001: step: 472/464, loss: 1.8549484014511108 2023-01-24 00:45:54.850220: step: 474/464, loss: 8.1439208984375 2023-01-24 00:45:55.505083: step: 476/464, loss: 2.7541751861572266 2023-01-24 00:45:56.118157: step: 478/464, loss: 2.8204009532928467 2023-01-24 00:45:56.695106: step: 480/464, loss: 1.782137155532837 2023-01-24 00:45:57.307203: step: 482/464, loss: 0.5694196820259094 2023-01-24 00:45:57.933612: step: 484/464, loss: 2.7259013652801514 2023-01-24 00:45:58.551015: step: 486/464, loss: 0.5797066688537598 2023-01-24 00:45:59.174796: step: 488/464, loss: 1.6275838613510132 2023-01-24 00:45:59.787856: step: 490/464, loss: 2.965346574783325 2023-01-24 00:46:00.431225: step: 492/464, loss: 5.800957679748535 2023-01-24 00:46:01.057277: step: 494/464, loss: 8.127657890319824 2023-01-24 00:46:01.655232: step: 496/464, loss: 1.8809348344802856 2023-01-24 00:46:02.263555: step: 498/464, loss: 0.6403765082359314 2023-01-24 00:46:02.912073: step: 500/464, loss: 0.5952467918395996 2023-01-24 00:46:03.459529: step: 502/464, loss: 6.675691604614258 2023-01-24 00:46:04.099621: step: 504/464, loss: 1.6062860488891602 2023-01-24 00:46:04.694470: step: 506/464, loss: 8.55487060546875 2023-01-24 00:46:05.317185: step: 508/464, loss: 4.537815093994141 2023-01-24 00:46:05.883626: step: 510/464, loss: 2.048262596130371 2023-01-24 00:46:06.515995: step: 512/464, loss: 3.5662026405334473 2023-01-24 00:46:07.136923: step: 514/464, loss: 0.9425341486930847 2023-01-24 00:46:07.797812: step: 516/464, loss: 0.9256479740142822 2023-01-24 00:46:08.472258: step: 518/464, loss: 2.127551794052124 2023-01-24 00:46:09.094906: step: 520/464, loss: 3.395993709564209 2023-01-24 00:46:09.734822: step: 522/464, loss: 2.3432466983795166 2023-01-24 00:46:10.393808: step: 524/464, loss: 1.383481502532959 2023-01-24 00:46:10.957443: step: 526/464, loss: 3.4100661277770996 2023-01-24 00:46:11.656919: step: 528/464, loss: 2.922839641571045 2023-01-24 00:46:12.224644: step: 530/464, loss: 1.9120745658874512 2023-01-24 00:46:12.925142: step: 532/464, loss: 1.312376618385315 2023-01-24 00:46:13.547932: step: 534/464, loss: 3.5674753189086914 2023-01-24 00:46:14.127265: step: 536/464, loss: 3.0240063667297363 2023-01-24 00:46:14.748511: step: 538/464, loss: 1.7374709844589233 2023-01-24 00:46:15.331490: step: 540/464, loss: 0.6851668953895569 2023-01-24 00:46:15.946513: step: 542/464, loss: 4.37807035446167 2023-01-24 00:46:16.588668: step: 544/464, loss: 1.2854983806610107 2023-01-24 00:46:17.253111: step: 546/464, loss: 6.369767189025879 2023-01-24 00:46:17.852327: step: 548/464, loss: 2.6160709857940674 2023-01-24 00:46:18.428603: step: 550/464, loss: 4.167600154876709 2023-01-24 00:46:19.017012: step: 552/464, loss: 1.1332805156707764 2023-01-24 00:46:19.619149: step: 554/464, loss: 0.7444527745246887 2023-01-24 00:46:20.209008: step: 556/464, loss: 12.333288192749023 2023-01-24 00:46:20.812456: step: 558/464, loss: 8.097074508666992 2023-01-24 00:46:21.456065: step: 560/464, loss: 2.7238166332244873 2023-01-24 00:46:22.021894: step: 562/464, loss: 3.192190170288086 2023-01-24 00:46:22.624292: step: 564/464, loss: 0.49808821082115173 2023-01-24 00:46:23.249862: step: 566/464, loss: 1.5350732803344727 2023-01-24 00:46:23.880645: step: 568/464, loss: 2.190887928009033 2023-01-24 00:46:24.540821: step: 570/464, loss: 1.2030106782913208 2023-01-24 00:46:25.196023: step: 572/464, loss: 1.8200008869171143 2023-01-24 00:46:25.800285: step: 574/464, loss: 1.8087153434753418 2023-01-24 00:46:26.388804: step: 576/464, loss: 0.961233377456665 2023-01-24 00:46:27.068278: step: 578/464, loss: 1.1750059127807617 2023-01-24 00:46:27.662707: step: 580/464, loss: 3.103426933288574 2023-01-24 00:46:28.243742: step: 582/464, loss: 8.684738159179688 2023-01-24 00:46:28.830308: step: 584/464, loss: 3.7423057556152344 2023-01-24 00:46:29.431081: step: 586/464, loss: 7.903364658355713 2023-01-24 00:46:30.064053: step: 588/464, loss: 2.912686586380005 2023-01-24 00:46:30.719241: step: 590/464, loss: 2.24096941947937 2023-01-24 00:46:31.331420: step: 592/464, loss: 2.5934605598449707 2023-01-24 00:46:31.910164: step: 594/464, loss: 2.838994264602661 2023-01-24 00:46:32.508459: step: 596/464, loss: 2.592299699783325 2023-01-24 00:46:33.104947: step: 598/464, loss: 2.6744465827941895 2023-01-24 00:46:33.741343: step: 600/464, loss: 3.6681036949157715 2023-01-24 00:46:34.318255: step: 602/464, loss: 1.1927385330200195 2023-01-24 00:46:34.907305: step: 604/464, loss: 0.988146960735321 2023-01-24 00:46:35.496068: step: 606/464, loss: 0.9232864379882812 2023-01-24 00:46:36.095689: step: 608/464, loss: 2.6107113361358643 2023-01-24 00:46:36.695581: step: 610/464, loss: 6.5684614181518555 2023-01-24 00:46:37.337882: step: 612/464, loss: 2.9056572914123535 2023-01-24 00:46:37.976283: step: 614/464, loss: 1.2010129690170288 2023-01-24 00:46:38.628990: step: 616/464, loss: 2.280693292617798 2023-01-24 00:46:39.231411: step: 618/464, loss: 1.2569115161895752 2023-01-24 00:46:39.812464: step: 620/464, loss: 0.6354900002479553 2023-01-24 00:46:40.498884: step: 622/464, loss: 6.259234428405762 2023-01-24 00:46:41.157931: step: 624/464, loss: 1.5421119928359985 2023-01-24 00:46:41.675835: step: 626/464, loss: 1.037724256515503 2023-01-24 00:46:42.316200: step: 628/464, loss: 3.7696783542633057 2023-01-24 00:46:42.958953: step: 630/464, loss: 7.192898750305176 2023-01-24 00:46:43.605380: step: 632/464, loss: 3.1449084281921387 2023-01-24 00:46:44.356570: step: 634/464, loss: 3.239497423171997 2023-01-24 00:46:45.073161: step: 636/464, loss: 3.8216047286987305 2023-01-24 00:46:45.716474: step: 638/464, loss: 1.120566725730896 2023-01-24 00:46:46.341914: step: 640/464, loss: 0.9647707939147949 2023-01-24 00:46:47.022644: step: 642/464, loss: 3.0738391876220703 2023-01-24 00:46:47.763603: step: 644/464, loss: 0.7195495963096619 2023-01-24 00:46:48.361155: step: 646/464, loss: 4.335334300994873 2023-01-24 00:46:48.954428: step: 648/464, loss: 1.711287498474121 2023-01-24 00:46:49.611295: step: 650/464, loss: 1.3882620334625244 2023-01-24 00:46:50.209750: step: 652/464, loss: 2.3732476234436035 2023-01-24 00:46:50.818353: step: 654/464, loss: 0.8575702905654907 2023-01-24 00:46:51.418156: step: 656/464, loss: 0.5496256947517395 2023-01-24 00:46:52.051920: step: 658/464, loss: 1.9140018224716187 2023-01-24 00:46:52.584838: step: 660/464, loss: 2.0099096298217773 2023-01-24 00:46:53.249055: step: 662/464, loss: 2.6142263412475586 2023-01-24 00:46:53.916611: step: 664/464, loss: 5.653798580169678 2023-01-24 00:46:54.507472: step: 666/464, loss: 1.4585859775543213 2023-01-24 00:46:55.095235: step: 668/464, loss: 4.889749526977539 2023-01-24 00:46:55.635927: step: 670/464, loss: 2.410616159439087 2023-01-24 00:46:56.219650: step: 672/464, loss: 0.8650250434875488 2023-01-24 00:46:56.781045: step: 674/464, loss: 1.1738193035125732 2023-01-24 00:46:57.332249: step: 676/464, loss: 3.919182062149048 2023-01-24 00:46:57.951089: step: 678/464, loss: 3.5058436393737793 2023-01-24 00:46:58.503887: step: 680/464, loss: 0.35765209794044495 2023-01-24 00:46:59.120746: step: 682/464, loss: 6.447207450866699 2023-01-24 00:46:59.725131: step: 684/464, loss: 2.850321054458618 2023-01-24 00:47:00.307772: step: 686/464, loss: 6.106337070465088 2023-01-24 00:47:01.000617: step: 688/464, loss: 0.6156391501426697 2023-01-24 00:47:01.609564: step: 690/464, loss: 5.986627578735352 2023-01-24 00:47:02.213044: step: 692/464, loss: 1.725941777229309 2023-01-24 00:47:02.860525: step: 694/464, loss: 3.855546236038208 2023-01-24 00:47:03.511750: step: 696/464, loss: 12.777762413024902 2023-01-24 00:47:04.172867: step: 698/464, loss: 0.6011559963226318 2023-01-24 00:47:04.910122: step: 700/464, loss: 16.054662704467773 2023-01-24 00:47:05.522519: step: 702/464, loss: 2.418577194213867 2023-01-24 00:47:06.139712: step: 704/464, loss: 1.305466651916504 2023-01-24 00:47:06.782835: step: 706/464, loss: 5.697353363037109 2023-01-24 00:47:07.437385: step: 708/464, loss: 1.6900533437728882 2023-01-24 00:47:08.210683: step: 710/464, loss: 0.9690849781036377 2023-01-24 00:47:08.827282: step: 712/464, loss: 0.8801342844963074 2023-01-24 00:47:09.495829: step: 714/464, loss: 2.1792311668395996 2023-01-24 00:47:10.189938: step: 716/464, loss: 4.735498905181885 2023-01-24 00:47:10.987372: step: 718/464, loss: 0.9284535646438599 2023-01-24 00:47:11.606024: step: 720/464, loss: 2.369762420654297 2023-01-24 00:47:12.234068: step: 722/464, loss: 10.625313758850098 2023-01-24 00:47:12.967822: step: 724/464, loss: 2.382413864135742 2023-01-24 00:47:13.594299: step: 726/464, loss: 7.8514180183410645 2023-01-24 00:47:14.229135: step: 728/464, loss: 1.2403920888900757 2023-01-24 00:47:14.871250: step: 730/464, loss: 2.554053544998169 2023-01-24 00:47:15.427332: step: 732/464, loss: 1.515782356262207 2023-01-24 00:47:16.053896: step: 734/464, loss: 2.2663307189941406 2023-01-24 00:47:16.692995: step: 736/464, loss: 4.186933994293213 2023-01-24 00:47:17.311597: step: 738/464, loss: 0.7283748984336853 2023-01-24 00:47:17.982008: step: 740/464, loss: 3.5390701293945312 2023-01-24 00:47:18.601038: step: 742/464, loss: 0.6066951751708984 2023-01-24 00:47:19.196980: step: 744/464, loss: 2.166688919067383 2023-01-24 00:47:19.755803: step: 746/464, loss: 1.1567081212997437 2023-01-24 00:47:20.392559: step: 748/464, loss: 1.163918137550354 2023-01-24 00:47:20.946253: step: 750/464, loss: 0.7749082446098328 2023-01-24 00:47:21.592281: step: 752/464, loss: 0.47420597076416016 2023-01-24 00:47:22.207535: step: 754/464, loss: 1.50184166431427 2023-01-24 00:47:22.804277: step: 756/464, loss: 6.195437431335449 2023-01-24 00:47:23.464313: step: 758/464, loss: 1.8309895992279053 2023-01-24 00:47:24.092137: step: 760/464, loss: 5.113033294677734 2023-01-24 00:47:24.764187: step: 762/464, loss: 0.9480839371681213 2023-01-24 00:47:25.430653: step: 764/464, loss: 5.2821478843688965 2023-01-24 00:47:26.078019: step: 766/464, loss: 0.63105708360672 2023-01-24 00:47:26.689059: step: 768/464, loss: 8.196881294250488 2023-01-24 00:47:27.268955: step: 770/464, loss: 0.6351608037948608 2023-01-24 00:47:27.879582: step: 772/464, loss: 1.5293173789978027 2023-01-24 00:47:28.454559: step: 774/464, loss: 1.171482801437378 2023-01-24 00:47:29.047970: step: 776/464, loss: 10.900691986083984 2023-01-24 00:47:29.707023: step: 778/464, loss: 4.51688814163208 2023-01-24 00:47:30.375157: step: 780/464, loss: 6.444057941436768 2023-01-24 00:47:30.967061: step: 782/464, loss: 2.60901141166687 2023-01-24 00:47:31.583115: step: 784/464, loss: 0.8947842717170715 2023-01-24 00:47:32.230365: step: 786/464, loss: 1.6689441204071045 2023-01-24 00:47:32.897963: step: 788/464, loss: 1.3592464923858643 2023-01-24 00:47:33.495021: step: 790/464, loss: 0.7408801317214966 2023-01-24 00:47:34.147618: step: 792/464, loss: 0.40166574716567993 2023-01-24 00:47:34.808903: step: 794/464, loss: 2.8900511264801025 2023-01-24 00:47:35.413506: step: 796/464, loss: 5.074830532073975 2023-01-24 00:47:36.057779: step: 798/464, loss: 3.0470948219299316 2023-01-24 00:47:36.634841: step: 800/464, loss: 1.3238725662231445 2023-01-24 00:47:37.296612: step: 802/464, loss: 2.3110499382019043 2023-01-24 00:47:37.930247: step: 804/464, loss: 4.000260829925537 2023-01-24 00:47:38.487146: step: 806/464, loss: 5.249054431915283 2023-01-24 00:47:39.144574: step: 808/464, loss: 1.7909568548202515 2023-01-24 00:47:39.743396: step: 810/464, loss: 2.6298608779907227 2023-01-24 00:47:40.337336: step: 812/464, loss: 2.052001714706421 2023-01-24 00:47:40.923346: step: 814/464, loss: 7.3221211433410645 2023-01-24 00:47:41.559172: step: 816/464, loss: 1.4111716747283936 2023-01-24 00:47:42.280316: step: 818/464, loss: 1.5203192234039307 2023-01-24 00:47:42.834011: step: 820/464, loss: 1.7173218727111816 2023-01-24 00:47:43.450510: step: 822/464, loss: 4.458634376525879 2023-01-24 00:47:44.035418: step: 824/464, loss: 0.8378711938858032 2023-01-24 00:47:44.626467: step: 826/464, loss: 0.9857144355773926 2023-01-24 00:47:45.305573: step: 828/464, loss: 1.1376981735229492 2023-01-24 00:47:45.962585: step: 830/464, loss: 2.9880220890045166 2023-01-24 00:47:46.476518: step: 832/464, loss: 1.2170941829681396 2023-01-24 00:47:47.057868: step: 834/464, loss: 0.9180886745452881 2023-01-24 00:47:47.695450: step: 836/464, loss: 3.690925121307373 2023-01-24 00:47:48.367019: step: 838/464, loss: 2.100409746170044 2023-01-24 00:47:48.973863: step: 840/464, loss: 2.7280213832855225 2023-01-24 00:47:49.564752: step: 842/464, loss: 0.4261971116065979 2023-01-24 00:47:50.127296: step: 844/464, loss: 1.1917791366577148 2023-01-24 00:47:50.818543: step: 846/464, loss: 2.6262195110321045 2023-01-24 00:47:51.466873: step: 848/464, loss: 2.6037867069244385 2023-01-24 00:47:52.083285: step: 850/464, loss: 5.234838485717773 2023-01-24 00:47:52.672411: step: 852/464, loss: 1.619897723197937 2023-01-24 00:47:53.382498: step: 854/464, loss: 1.9957997798919678 2023-01-24 00:47:53.969928: step: 856/464, loss: 1.6528217792510986 2023-01-24 00:47:54.567817: step: 858/464, loss: 2.4378433227539062 2023-01-24 00:47:55.145496: step: 860/464, loss: 0.8490089178085327 2023-01-24 00:47:55.765926: step: 862/464, loss: 0.8172425031661987 2023-01-24 00:47:56.325360: step: 864/464, loss: 1.6703541278839111 2023-01-24 00:47:56.925424: step: 866/464, loss: 1.3778300285339355 2023-01-24 00:47:57.486431: step: 868/464, loss: 2.208710193634033 2023-01-24 00:47:58.115657: step: 870/464, loss: 2.3513078689575195 2023-01-24 00:47:58.753030: step: 872/464, loss: 1.8490746021270752 2023-01-24 00:47:59.389181: step: 874/464, loss: 0.4844001531600952 2023-01-24 00:48:00.081007: step: 876/464, loss: 0.6247026324272156 2023-01-24 00:48:00.682796: step: 878/464, loss: 3.3467068672180176 2023-01-24 00:48:01.398102: step: 880/464, loss: 0.645189106464386 2023-01-24 00:48:02.077797: step: 882/464, loss: 2.4063289165496826 2023-01-24 00:48:02.644026: step: 884/464, loss: 1.1080090999603271 2023-01-24 00:48:03.244199: step: 886/464, loss: 1.222339153289795 2023-01-24 00:48:03.938559: step: 888/464, loss: 1.995471715927124 2023-01-24 00:48:04.568996: step: 890/464, loss: 0.5842683911323547 2023-01-24 00:48:05.191271: step: 892/464, loss: 2.0001533031463623 2023-01-24 00:48:05.808347: step: 894/464, loss: 3.2898120880126953 2023-01-24 00:48:06.469068: step: 896/464, loss: 1.5262447595596313 2023-01-24 00:48:07.112739: step: 898/464, loss: 1.5730087757110596 2023-01-24 00:48:07.744437: step: 900/464, loss: 0.9650188088417053 2023-01-24 00:48:08.351928: step: 902/464, loss: 2.3240749835968018 2023-01-24 00:48:08.953365: step: 904/464, loss: 5.325174808502197 2023-01-24 00:48:09.499374: step: 906/464, loss: 1.3020225763320923 2023-01-24 00:48:10.107843: step: 908/464, loss: 2.313237190246582 2023-01-24 00:48:10.772975: step: 910/464, loss: 3.4341206550598145 2023-01-24 00:48:11.394023: step: 912/464, loss: 5.504545211791992 2023-01-24 00:48:12.003213: step: 914/464, loss: 2.7761149406433105 2023-01-24 00:48:12.619566: step: 916/464, loss: 2.362014055252075 2023-01-24 00:48:13.186591: step: 918/464, loss: 0.7432173490524292 2023-01-24 00:48:13.742314: step: 920/464, loss: 9.008573532104492 2023-01-24 00:48:14.355236: step: 922/464, loss: 0.6847949624061584 2023-01-24 00:48:14.974281: step: 924/464, loss: 0.9933121204376221 2023-01-24 00:48:15.600892: step: 926/464, loss: 0.7144534587860107 2023-01-24 00:48:16.236297: step: 928/464, loss: 3.1983590126037598 2023-01-24 00:48:16.728433: step: 930/464, loss: 0.08428919315338135 ================================================== Loss: 6.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3337722274835044, 'r': 0.10473759879444561, 'f1': 0.15944227269715605}, 'combined': 0.11748377988211497, 'epoch': 0} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3883802308802308, 'r': 0.05868382609454037, 'f1': 0.10196139712846156}, 'combined': 0.0665654717004464, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.334933666718221, 'r': 0.10320262320429116, 'f1': 0.15778666958112722}, 'combined': 0.11626386179662004, 'epoch': 0} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3850093005952381, 'r': 0.05641161913483342, 'f1': 0.09840493305948578}, 'combined': 0.06424363505437931, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3400741327397006, 'r': 0.09128644016831282, 'f1': 0.14393599656941128}, 'combined': 0.10605810273535567, 'epoch': 0} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.41668129346545174, 'r': 0.057618017686156785, 'f1': 0.10123712842040741}, 'combined': 0.06609263306202763, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.13333333333333333, 'f1': 0.2074074074074074}, 'combined': 0.1382716049382716, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.75, 'r': 0.10344827586206896, 'f1': 0.18181818181818182}, 'combined': 0.12121212121212122, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3337722274835044, 'r': 0.10473759879444561, 'f1': 0.15944227269715605}, 'combined': 0.11748377988211497, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3883802308802308, 'r': 0.05868382609454037, 'f1': 0.10196139712846156}, 'combined': 0.0665654717004464, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.13333333333333333, 'f1': 0.2074074074074074}, 'combined': 0.1382716049382716, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.334933666718221, 'r': 0.10320262320429116, 'f1': 0.15778666958112722}, 'combined': 0.11626386179662004, 'epoch': 0} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3850093005952381, 'r': 0.05641161913483342, 'f1': 0.09840493305948578}, 'combined': 0.06424363505437931, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3400741327397006, 'r': 0.09128644016831282, 'f1': 0.14393599656941128}, 'combined': 0.10605810273535567, 'epoch': 0} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.41668129346545174, 'r': 0.057618017686156785, 'f1': 0.10123712842040741}, 'combined': 0.06609263306202763, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.75, 'r': 0.10344827586206896, 'f1': 0.18181818181818182}, 'combined': 0.12121212121212122, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:51:00.871755: step: 2/464, loss: 2.4085278511047363 2023-01-24 00:51:01.433708: step: 4/464, loss: 4.375063896179199 2023-01-24 00:51:02.088363: step: 6/464, loss: 1.7053239345550537 2023-01-24 00:51:02.761377: step: 8/464, loss: 2.6834206581115723 2023-01-24 00:51:03.373863: step: 10/464, loss: 5.249751091003418 2023-01-24 00:51:04.007104: step: 12/464, loss: 0.7104110717773438 2023-01-24 00:51:04.614822: step: 14/464, loss: 3.3045058250427246 2023-01-24 00:51:05.187126: step: 16/464, loss: 2.128939151763916 2023-01-24 00:51:05.789265: step: 18/464, loss: 1.1264522075653076 2023-01-24 00:51:06.349319: step: 20/464, loss: 1.1155978441238403 2023-01-24 00:51:06.996234: step: 22/464, loss: 5.055933952331543 2023-01-24 00:51:07.627406: step: 24/464, loss: 1.4645519256591797 2023-01-24 00:51:08.271185: step: 26/464, loss: 1.1370962858200073 2023-01-24 00:51:08.848163: step: 28/464, loss: 0.4650828242301941 2023-01-24 00:51:09.535434: step: 30/464, loss: 0.6294041872024536 2023-01-24 00:51:10.161760: step: 32/464, loss: 1.4798829555511475 2023-01-24 00:51:10.787375: step: 34/464, loss: 6.095230579376221 2023-01-24 00:51:11.371296: step: 36/464, loss: 0.5478473901748657 2023-01-24 00:51:12.041246: step: 38/464, loss: 2.8686866760253906 2023-01-24 00:51:12.636333: step: 40/464, loss: 1.9995484352111816 2023-01-24 00:51:13.277120: step: 42/464, loss: 2.31650972366333 2023-01-24 00:51:13.928757: step: 44/464, loss: 1.5859663486480713 2023-01-24 00:51:14.494632: step: 46/464, loss: 1.5173289775848389 2023-01-24 00:51:15.135141: step: 48/464, loss: 6.074652671813965 2023-01-24 00:51:15.779262: step: 50/464, loss: 1.6784247159957886 2023-01-24 00:51:16.411707: step: 52/464, loss: 1.4369029998779297 2023-01-24 00:51:17.016559: step: 54/464, loss: 2.3493897914886475 2023-01-24 00:51:17.620007: step: 56/464, loss: 2.295942544937134 2023-01-24 00:51:18.263930: step: 58/464, loss: 1.5282453298568726 2023-01-24 00:51:18.898625: step: 60/464, loss: 1.5331041812896729 2023-01-24 00:51:19.497281: step: 62/464, loss: 3.563119888305664 2023-01-24 00:51:20.206518: step: 64/464, loss: 1.34977126121521 2023-01-24 00:51:20.824848: step: 66/464, loss: 0.8964823484420776 2023-01-24 00:51:21.432828: step: 68/464, loss: 0.8326548933982849 2023-01-24 00:51:22.023629: step: 70/464, loss: 0.6099372506141663 2023-01-24 00:51:22.564266: step: 72/464, loss: 0.5478998422622681 2023-01-24 00:51:23.241425: step: 74/464, loss: 4.812145709991455 2023-01-24 00:51:23.801605: step: 76/464, loss: 1.0465631484985352 2023-01-24 00:51:24.437367: step: 78/464, loss: 0.7286931276321411 2023-01-24 00:51:25.052515: step: 80/464, loss: 4.54271125793457 2023-01-24 00:51:25.688904: step: 82/464, loss: 2.443305253982544 2023-01-24 00:51:26.301089: step: 84/464, loss: 1.6770225763320923 2023-01-24 00:51:26.854734: step: 86/464, loss: 0.9204431772232056 2023-01-24 00:51:27.503847: step: 88/464, loss: 0.9454526305198669 2023-01-24 00:51:28.109299: step: 90/464, loss: 2.1437172889709473 2023-01-24 00:51:28.785019: step: 92/464, loss: 1.6663503646850586 2023-01-24 00:51:29.379356: step: 94/464, loss: 4.770730972290039 2023-01-24 00:51:29.992252: step: 96/464, loss: 1.5186935663223267 2023-01-24 00:51:30.608155: step: 98/464, loss: 0.8113821744918823 2023-01-24 00:51:31.244271: step: 100/464, loss: 1.9256279468536377 2023-01-24 00:51:31.834001: step: 102/464, loss: 1.964971899986267 2023-01-24 00:51:32.432707: step: 104/464, loss: 1.2278070449829102 2023-01-24 00:51:33.063279: step: 106/464, loss: 2.1178746223449707 2023-01-24 00:51:33.738960: step: 108/464, loss: 1.0850036144256592 2023-01-24 00:51:34.298490: step: 110/464, loss: 3.908975601196289 2023-01-24 00:51:34.944383: step: 112/464, loss: 8.245256423950195 2023-01-24 00:51:35.559553: step: 114/464, loss: 0.6226215362548828 2023-01-24 00:51:36.205187: step: 116/464, loss: 1.4349032640457153 2023-01-24 00:51:36.820975: step: 118/464, loss: 1.4965016841888428 2023-01-24 00:51:37.414759: step: 120/464, loss: 5.736588954925537 2023-01-24 00:51:38.023740: step: 122/464, loss: 0.5515249371528625 2023-01-24 00:51:38.631377: step: 124/464, loss: 2.035945415496826 2023-01-24 00:51:39.249102: step: 126/464, loss: 0.27566272020339966 2023-01-24 00:51:39.857444: step: 128/464, loss: 1.9847958087921143 2023-01-24 00:51:40.503437: step: 130/464, loss: 1.4272325038909912 2023-01-24 00:51:41.142683: step: 132/464, loss: 0.5434401035308838 2023-01-24 00:51:41.736932: step: 134/464, loss: 4.213326930999756 2023-01-24 00:51:42.282324: step: 136/464, loss: 1.4150797128677368 2023-01-24 00:51:42.909442: step: 138/464, loss: 2.063068389892578 2023-01-24 00:51:43.570197: step: 140/464, loss: 1.975632667541504 2023-01-24 00:51:44.168459: step: 142/464, loss: 1.9226182699203491 2023-01-24 00:51:44.839399: step: 144/464, loss: 1.3040951490402222 2023-01-24 00:51:45.464841: step: 146/464, loss: 2.4764351844787598 2023-01-24 00:51:46.051590: step: 148/464, loss: 0.8127474188804626 2023-01-24 00:51:46.596395: step: 150/464, loss: 0.4643832743167877 2023-01-24 00:51:47.165416: step: 152/464, loss: 1.3815553188323975 2023-01-24 00:51:47.897842: step: 154/464, loss: 1.9366369247436523 2023-01-24 00:51:48.533473: step: 156/464, loss: 0.44186994433403015 2023-01-24 00:51:49.157194: step: 158/464, loss: 1.2369760274887085 2023-01-24 00:51:49.771281: step: 160/464, loss: 3.2742278575897217 2023-01-24 00:51:50.312533: step: 162/464, loss: 0.3232155740261078 2023-01-24 00:51:50.960252: step: 164/464, loss: 13.85115909576416 2023-01-24 00:51:51.551694: step: 166/464, loss: 0.5114213824272156 2023-01-24 00:51:52.213603: step: 168/464, loss: 1.908266544342041 2023-01-24 00:51:52.827718: step: 170/464, loss: 5.393420219421387 2023-01-24 00:51:53.439658: step: 172/464, loss: 0.997491180896759 2023-01-24 00:51:54.077246: step: 174/464, loss: 2.017730951309204 2023-01-24 00:51:54.729577: step: 176/464, loss: 3.1283817291259766 2023-01-24 00:51:55.375555: step: 178/464, loss: 2.6031904220581055 2023-01-24 00:51:56.051832: step: 180/464, loss: 0.28233417868614197 2023-01-24 00:51:56.701657: step: 182/464, loss: 1.887563705444336 2023-01-24 00:51:57.334525: step: 184/464, loss: 1.3217190504074097 2023-01-24 00:51:57.884668: step: 186/464, loss: 2.251591205596924 2023-01-24 00:51:58.464122: step: 188/464, loss: 1.1629953384399414 2023-01-24 00:51:59.025087: step: 190/464, loss: 1.2898328304290771 2023-01-24 00:51:59.629398: step: 192/464, loss: 5.701601028442383 2023-01-24 00:52:00.212524: step: 194/464, loss: 2.956191062927246 2023-01-24 00:52:00.807507: step: 196/464, loss: 1.9662350416183472 2023-01-24 00:52:01.386787: step: 198/464, loss: 4.0824055671691895 2023-01-24 00:52:02.018047: step: 200/464, loss: 1.9693928956985474 2023-01-24 00:52:02.644336: step: 202/464, loss: 2.0761656761169434 2023-01-24 00:52:03.271281: step: 204/464, loss: 4.071382999420166 2023-01-24 00:52:03.866206: step: 206/464, loss: 0.8178318738937378 2023-01-24 00:52:04.480201: step: 208/464, loss: 6.27664852142334 2023-01-24 00:52:05.105053: step: 210/464, loss: 2.5243842601776123 2023-01-24 00:52:05.714956: step: 212/464, loss: 1.3201401233673096 2023-01-24 00:52:06.348341: step: 214/464, loss: 0.9902519583702087 2023-01-24 00:52:06.981875: step: 216/464, loss: 3.904649496078491 2023-01-24 00:52:07.588192: step: 218/464, loss: 0.5062090158462524 2023-01-24 00:52:08.153063: step: 220/464, loss: 1.3447294235229492 2023-01-24 00:52:08.895199: step: 222/464, loss: 0.7208210229873657 2023-01-24 00:52:09.485588: step: 224/464, loss: 0.9612250328063965 2023-01-24 00:52:10.059076: step: 226/464, loss: 1.8179367780685425 2023-01-24 00:52:10.677969: step: 228/464, loss: 0.6544939279556274 2023-01-24 00:52:11.325280: step: 230/464, loss: 4.567896366119385 2023-01-24 00:52:11.939950: step: 232/464, loss: 1.5239843130111694 2023-01-24 00:52:12.544260: step: 234/464, loss: 1.2557464838027954 2023-01-24 00:52:13.187504: step: 236/464, loss: 0.5024946331977844 2023-01-24 00:52:13.779079: step: 238/464, loss: 3.551517963409424 2023-01-24 00:52:14.409038: step: 240/464, loss: 0.9418332576751709 2023-01-24 00:52:14.979093: step: 242/464, loss: 1.2138640880584717 2023-01-24 00:52:15.566307: step: 244/464, loss: 1.2311737537384033 2023-01-24 00:52:16.171479: step: 246/464, loss: 1.031320571899414 2023-01-24 00:52:16.783962: step: 248/464, loss: 1.4449968338012695 2023-01-24 00:52:17.423974: step: 250/464, loss: 0.6216345429420471 2023-01-24 00:52:17.981588: step: 252/464, loss: 1.7167075872421265 2023-01-24 00:52:18.611848: step: 254/464, loss: 1.0585178136825562 2023-01-24 00:52:19.181106: step: 256/464, loss: 0.9872919917106628 2023-01-24 00:52:19.761538: step: 258/464, loss: 2.103480100631714 2023-01-24 00:52:20.356243: step: 260/464, loss: 3.0257513523101807 2023-01-24 00:52:20.936982: step: 262/464, loss: 0.34053394198417664 2023-01-24 00:52:21.642705: step: 264/464, loss: 3.0975921154022217 2023-01-24 00:52:22.257549: step: 266/464, loss: 1.6566007137298584 2023-01-24 00:52:22.878495: step: 268/464, loss: 1.2498109340667725 2023-01-24 00:52:23.460492: step: 270/464, loss: 0.18298912048339844 2023-01-24 00:52:24.089821: step: 272/464, loss: 2.479445457458496 2023-01-24 00:52:24.770438: step: 274/464, loss: 2.039584159851074 2023-01-24 00:52:25.357650: step: 276/464, loss: 3.011651039123535 2023-01-24 00:52:26.012417: step: 278/464, loss: 1.4087164402008057 2023-01-24 00:52:26.658069: step: 280/464, loss: 1.6842014789581299 2023-01-24 00:52:27.332970: step: 282/464, loss: 2.3197803497314453 2023-01-24 00:52:27.965635: step: 284/464, loss: 3.6316616535186768 2023-01-24 00:52:28.588629: step: 286/464, loss: 2.2884769439697266 2023-01-24 00:52:29.192073: step: 288/464, loss: 1.5190672874450684 2023-01-24 00:52:29.805660: step: 290/464, loss: 0.6203249096870422 2023-01-24 00:52:30.371088: step: 292/464, loss: 1.1577428579330444 2023-01-24 00:52:30.945960: step: 294/464, loss: 0.1644885390996933 2023-01-24 00:52:31.523833: step: 296/464, loss: 5.212287425994873 2023-01-24 00:52:32.178049: step: 298/464, loss: 1.941894769668579 2023-01-24 00:52:32.894467: step: 300/464, loss: 1.883209228515625 2023-01-24 00:52:33.546107: step: 302/464, loss: 3.0236852169036865 2023-01-24 00:52:34.218125: step: 304/464, loss: 1.6562798023223877 2023-01-24 00:52:34.853856: step: 306/464, loss: 1.2389827966690063 2023-01-24 00:52:35.459843: step: 308/464, loss: 5.742254734039307 2023-01-24 00:52:36.084516: step: 310/464, loss: 3.27360463142395 2023-01-24 00:52:36.685954: step: 312/464, loss: 4.364145278930664 2023-01-24 00:52:37.363025: step: 314/464, loss: 6.163469314575195 2023-01-24 00:52:38.031724: step: 316/464, loss: 7.255561828613281 2023-01-24 00:52:38.654931: step: 318/464, loss: 0.9108213782310486 2023-01-24 00:52:39.262702: step: 320/464, loss: 0.9451639652252197 2023-01-24 00:52:39.834590: step: 322/464, loss: 1.8270184993743896 2023-01-24 00:52:40.418305: step: 324/464, loss: 0.9697843194007874 2023-01-24 00:52:41.047290: step: 326/464, loss: 3.6913609504699707 2023-01-24 00:52:41.578557: step: 328/464, loss: 4.866645812988281 2023-01-24 00:52:42.237740: step: 330/464, loss: 0.8567318320274353 2023-01-24 00:52:42.900829: step: 332/464, loss: 2.2915091514587402 2023-01-24 00:52:43.495790: step: 334/464, loss: 3.5241568088531494 2023-01-24 00:52:44.121995: step: 336/464, loss: 0.7516891360282898 2023-01-24 00:52:44.732977: step: 338/464, loss: 1.6842435598373413 2023-01-24 00:52:45.334945: step: 340/464, loss: 2.723252296447754 2023-01-24 00:52:45.988139: step: 342/464, loss: 1.7390908002853394 2023-01-24 00:52:46.725651: step: 344/464, loss: 3.8381030559539795 2023-01-24 00:52:47.355097: step: 346/464, loss: 0.8049541711807251 2023-01-24 00:52:47.904838: step: 348/464, loss: 0.7041277885437012 2023-01-24 00:52:48.609032: step: 350/464, loss: 2.2053818702697754 2023-01-24 00:52:49.270146: step: 352/464, loss: 8.421344757080078 2023-01-24 00:52:49.864855: step: 354/464, loss: 0.6538692116737366 2023-01-24 00:52:50.423776: step: 356/464, loss: 0.3336998224258423 2023-01-24 00:52:51.093486: step: 358/464, loss: 0.8734704256057739 2023-01-24 00:52:51.672378: step: 360/464, loss: 1.0664860010147095 2023-01-24 00:52:52.264823: step: 362/464, loss: 0.6072345972061157 2023-01-24 00:52:52.868131: step: 364/464, loss: 0.42965126037597656 2023-01-24 00:52:53.429674: step: 366/464, loss: 2.433159112930298 2023-01-24 00:52:54.042812: step: 368/464, loss: 1.0689464807510376 2023-01-24 00:52:54.610946: step: 370/464, loss: 0.9118122458457947 2023-01-24 00:52:55.279547: step: 372/464, loss: 1.2723240852355957 2023-01-24 00:52:55.956206: step: 374/464, loss: 2.288015842437744 2023-01-24 00:52:56.537972: step: 376/464, loss: 1.927898645401001 2023-01-24 00:52:57.141641: step: 378/464, loss: 2.154618740081787 2023-01-24 00:52:57.764916: step: 380/464, loss: 2.2089216709136963 2023-01-24 00:52:58.425884: step: 382/464, loss: 3.4999170303344727 2023-01-24 00:52:59.099846: step: 384/464, loss: 0.4415696859359741 2023-01-24 00:52:59.698664: step: 386/464, loss: 5.115442752838135 2023-01-24 00:53:00.326352: step: 388/464, loss: 1.9158753156661987 2023-01-24 00:53:00.921579: step: 390/464, loss: 1.7842466831207275 2023-01-24 00:53:01.571348: step: 392/464, loss: 0.6219794750213623 2023-01-24 00:53:02.260634: step: 394/464, loss: 1.2650036811828613 2023-01-24 00:53:02.877262: step: 396/464, loss: 2.0196750164031982 2023-01-24 00:53:03.573121: step: 398/464, loss: 1.8090322017669678 2023-01-24 00:53:04.213145: step: 400/464, loss: 1.7102181911468506 2023-01-24 00:53:04.869152: step: 402/464, loss: 4.04551362991333 2023-01-24 00:53:05.503940: step: 404/464, loss: 0.5374937653541565 2023-01-24 00:53:06.108462: step: 406/464, loss: 1.2260247468948364 2023-01-24 00:53:06.765164: step: 408/464, loss: 0.8174344301223755 2023-01-24 00:53:07.414048: step: 410/464, loss: 1.277963399887085 2023-01-24 00:53:08.002890: step: 412/464, loss: 1.2115471363067627 2023-01-24 00:53:08.626469: step: 414/464, loss: 1.9816060066223145 2023-01-24 00:53:09.247761: step: 416/464, loss: 3.267216682434082 2023-01-24 00:53:09.881175: step: 418/464, loss: 1.381102204322815 2023-01-24 00:53:10.475106: step: 420/464, loss: 1.9319000244140625 2023-01-24 00:53:11.121127: step: 422/464, loss: 0.7982077598571777 2023-01-24 00:53:11.760040: step: 424/464, loss: 1.7125730514526367 2023-01-24 00:53:12.331217: step: 426/464, loss: 1.6744165420532227 2023-01-24 00:53:12.982009: step: 428/464, loss: 1.3618673086166382 2023-01-24 00:53:13.566387: step: 430/464, loss: 0.8129212260246277 2023-01-24 00:53:14.099725: step: 432/464, loss: 1.3697012662887573 2023-01-24 00:53:14.661857: step: 434/464, loss: 1.0622531175613403 2023-01-24 00:53:15.216741: step: 436/464, loss: 1.1265355348587036 2023-01-24 00:53:15.860856: step: 438/464, loss: 6.394579887390137 2023-01-24 00:53:16.464017: step: 440/464, loss: 1.1750330924987793 2023-01-24 00:53:17.045014: step: 442/464, loss: 3.9063398838043213 2023-01-24 00:53:17.650686: step: 444/464, loss: 7.915856838226318 2023-01-24 00:53:18.223534: step: 446/464, loss: 2.0682547092437744 2023-01-24 00:53:18.803815: step: 448/464, loss: 1.599812626838684 2023-01-24 00:53:19.376571: step: 450/464, loss: 0.47663643956184387 2023-01-24 00:53:20.056763: step: 452/464, loss: 1.7338075637817383 2023-01-24 00:53:20.679265: step: 454/464, loss: 1.2220450639724731 2023-01-24 00:53:21.381404: step: 456/464, loss: 0.8941260576248169 2023-01-24 00:53:22.014314: step: 458/464, loss: 1.4423011541366577 2023-01-24 00:53:22.632829: step: 460/464, loss: 1.9423093795776367 2023-01-24 00:53:23.363101: step: 462/464, loss: 5.917514801025391 2023-01-24 00:53:23.972288: step: 464/464, loss: 0.6594303250312805 2023-01-24 00:53:24.583691: step: 466/464, loss: 5.047465801239014 2023-01-24 00:53:25.205083: step: 468/464, loss: 6.084995746612549 2023-01-24 00:53:25.786301: step: 470/464, loss: 0.6543664336204529 2023-01-24 00:53:26.359486: step: 472/464, loss: 1.768980860710144 2023-01-24 00:53:27.036095: step: 474/464, loss: 1.1559669971466064 2023-01-24 00:53:27.583926: step: 476/464, loss: 1.564383625984192 2023-01-24 00:53:28.199102: step: 478/464, loss: 0.354350745677948 2023-01-24 00:53:28.884024: step: 480/464, loss: 7.63833475112915 2023-01-24 00:53:29.472915: step: 482/464, loss: 3.8567113876342773 2023-01-24 00:53:30.089446: step: 484/464, loss: 3.6513772010803223 2023-01-24 00:53:30.669973: step: 486/464, loss: 1.1392251253128052 2023-01-24 00:53:31.308375: step: 488/464, loss: 0.7947462201118469 2023-01-24 00:53:31.948424: step: 490/464, loss: 0.6445846557617188 2023-01-24 00:53:32.561743: step: 492/464, loss: 0.7971004247665405 2023-01-24 00:53:33.142912: step: 494/464, loss: 2.8422393798828125 2023-01-24 00:53:33.734438: step: 496/464, loss: 1.5413612127304077 2023-01-24 00:53:34.404735: step: 498/464, loss: 1.6812660694122314 2023-01-24 00:53:34.977428: step: 500/464, loss: 0.44464367628097534 2023-01-24 00:53:35.609454: step: 502/464, loss: 1.7780667543411255 2023-01-24 00:53:36.192597: step: 504/464, loss: 3.0602879524230957 2023-01-24 00:53:36.888170: step: 506/464, loss: 2.5655436515808105 2023-01-24 00:53:37.468798: step: 508/464, loss: 0.34945783019065857 2023-01-24 00:53:38.141256: step: 510/464, loss: 1.001344919204712 2023-01-24 00:53:38.782517: step: 512/464, loss: 0.6395118236541748 2023-01-24 00:53:39.437125: step: 514/464, loss: 1.4813051223754883 2023-01-24 00:53:40.030056: step: 516/464, loss: 1.1046370267868042 2023-01-24 00:53:40.664980: step: 518/464, loss: 0.7151631712913513 2023-01-24 00:53:41.275355: step: 520/464, loss: 1.0134127140045166 2023-01-24 00:53:41.841809: step: 522/464, loss: 1.9913588762283325 2023-01-24 00:53:42.529328: step: 524/464, loss: 6.719761848449707 2023-01-24 00:53:43.170785: step: 526/464, loss: 0.5916548371315002 2023-01-24 00:53:43.773111: step: 528/464, loss: 1.092638373374939 2023-01-24 00:53:44.367107: step: 530/464, loss: 0.8220130801200867 2023-01-24 00:53:45.046125: step: 532/464, loss: 0.7773857116699219 2023-01-24 00:53:45.604090: step: 534/464, loss: 1.0024209022521973 2023-01-24 00:53:46.213364: step: 536/464, loss: 2.910573959350586 2023-01-24 00:53:46.834859: step: 538/464, loss: 3.8845930099487305 2023-01-24 00:53:47.434788: step: 540/464, loss: 0.8535181879997253 2023-01-24 00:53:48.000428: step: 542/464, loss: 0.4238739609718323 2023-01-24 00:53:48.640173: step: 544/464, loss: 1.4098758697509766 2023-01-24 00:53:49.240100: step: 546/464, loss: 0.9261318445205688 2023-01-24 00:53:49.825139: step: 548/464, loss: 0.5454732179641724 2023-01-24 00:53:50.360474: step: 550/464, loss: 0.25435954332351685 2023-01-24 00:53:50.871595: step: 552/464, loss: 0.3379531502723694 2023-01-24 00:53:51.550494: step: 554/464, loss: 0.7175856828689575 2023-01-24 00:53:52.167072: step: 556/464, loss: 2.1935458183288574 2023-01-24 00:53:52.796652: step: 558/464, loss: 2.169313907623291 2023-01-24 00:53:53.364193: step: 560/464, loss: 1.6759428977966309 2023-01-24 00:53:53.952561: step: 562/464, loss: 0.8559223413467407 2023-01-24 00:53:54.557497: step: 564/464, loss: 1.2192752361297607 2023-01-24 00:53:55.184528: step: 566/464, loss: 6.734214782714844 2023-01-24 00:53:55.835841: step: 568/464, loss: 1.4761178493499756 2023-01-24 00:53:56.464981: step: 570/464, loss: 0.6868404150009155 2023-01-24 00:53:57.039130: step: 572/464, loss: 0.4026981592178345 2023-01-24 00:53:57.694165: step: 574/464, loss: 1.4963315725326538 2023-01-24 00:53:58.374813: step: 576/464, loss: 0.5705739259719849 2023-01-24 00:53:59.007295: step: 578/464, loss: 0.6995615363121033 2023-01-24 00:53:59.574924: step: 580/464, loss: 2.8022780418395996 2023-01-24 00:54:00.210246: step: 582/464, loss: 3.60697340965271 2023-01-24 00:54:00.826779: step: 584/464, loss: 3.5830140113830566 2023-01-24 00:54:01.470095: step: 586/464, loss: 1.1568769216537476 2023-01-24 00:54:02.051868: step: 588/464, loss: 0.32793667912483215 2023-01-24 00:54:02.632452: step: 590/464, loss: 0.7447863817214966 2023-01-24 00:54:03.273584: step: 592/464, loss: 1.2953349351882935 2023-01-24 00:54:03.842321: step: 594/464, loss: 6.4309587478637695 2023-01-24 00:54:04.444033: step: 596/464, loss: 1.7616764307022095 2023-01-24 00:54:05.145162: step: 598/464, loss: 1.6291272640228271 2023-01-24 00:54:05.803014: step: 600/464, loss: 2.5082497596740723 2023-01-24 00:54:06.378465: step: 602/464, loss: 3.272620916366577 2023-01-24 00:54:06.965535: step: 604/464, loss: 0.9270907640457153 2023-01-24 00:54:07.577378: step: 606/464, loss: 0.6672835350036621 2023-01-24 00:54:08.159546: step: 608/464, loss: 0.9879858493804932 2023-01-24 00:54:08.698016: step: 610/464, loss: 0.5429264307022095 2023-01-24 00:54:09.351492: step: 612/464, loss: 2.867297887802124 2023-01-24 00:54:09.972522: step: 614/464, loss: 0.9067877531051636 2023-01-24 00:54:10.715583: step: 616/464, loss: 0.9723592400550842 2023-01-24 00:54:11.370334: step: 618/464, loss: 1.9591346979141235 2023-01-24 00:54:11.974449: step: 620/464, loss: 3.082007884979248 2023-01-24 00:54:12.593290: step: 622/464, loss: 0.9543931484222412 2023-01-24 00:54:13.216179: step: 624/464, loss: 1.2434319257736206 2023-01-24 00:54:13.895625: step: 626/464, loss: 0.28026509284973145 2023-01-24 00:54:14.581594: step: 628/464, loss: 1.2699750661849976 2023-01-24 00:54:15.217976: step: 630/464, loss: 10.263710975646973 2023-01-24 00:54:15.853470: step: 632/464, loss: 3.928663730621338 2023-01-24 00:54:16.456602: step: 634/464, loss: 0.27222734689712524 2023-01-24 00:54:17.119912: step: 636/464, loss: 0.46683162450790405 2023-01-24 00:54:17.664303: step: 638/464, loss: 0.9551849365234375 2023-01-24 00:54:18.304134: step: 640/464, loss: 2.2991268634796143 2023-01-24 00:54:18.913833: step: 642/464, loss: 0.5585823059082031 2023-01-24 00:54:19.534622: step: 644/464, loss: 1.1316943168640137 2023-01-24 00:54:20.261371: step: 646/464, loss: 1.7934308052062988 2023-01-24 00:54:20.839098: step: 648/464, loss: 0.7643765807151794 2023-01-24 00:54:21.482227: step: 650/464, loss: 0.8959051370620728 2023-01-24 00:54:22.099460: step: 652/464, loss: 1.3295584917068481 2023-01-24 00:54:22.726768: step: 654/464, loss: 0.3932426869869232 2023-01-24 00:54:23.373192: step: 656/464, loss: 0.37686648964881897 2023-01-24 00:54:23.984232: step: 658/464, loss: 1.5109634399414062 2023-01-24 00:54:24.598748: step: 660/464, loss: 1.0998424291610718 2023-01-24 00:54:25.231344: step: 662/464, loss: 4.340769290924072 2023-01-24 00:54:25.790422: step: 664/464, loss: 1.1464810371398926 2023-01-24 00:54:26.334368: step: 666/464, loss: 1.1062222719192505 2023-01-24 00:54:26.982895: step: 668/464, loss: 1.909510612487793 2023-01-24 00:54:27.590094: step: 670/464, loss: 3.512485980987549 2023-01-24 00:54:28.182394: step: 672/464, loss: 0.8809099197387695 2023-01-24 00:54:28.811102: step: 674/464, loss: 1.7871596813201904 2023-01-24 00:54:29.393310: step: 676/464, loss: 0.6380197405815125 2023-01-24 00:54:30.048231: step: 678/464, loss: 0.8312307596206665 2023-01-24 00:54:30.641630: step: 680/464, loss: 1.8398313522338867 2023-01-24 00:54:31.294573: step: 682/464, loss: 2.106290340423584 2023-01-24 00:54:31.866592: step: 684/464, loss: 0.15295329689979553 2023-01-24 00:54:32.459434: step: 686/464, loss: 1.5513062477111816 2023-01-24 00:54:33.086813: step: 688/464, loss: 1.4429486989974976 2023-01-24 00:54:33.714944: step: 690/464, loss: 2.961740493774414 2023-01-24 00:54:34.276815: step: 692/464, loss: 1.389230728149414 2023-01-24 00:54:34.901243: step: 694/464, loss: 0.4239702820777893 2023-01-24 00:54:35.507515: step: 696/464, loss: 1.065203309059143 2023-01-24 00:54:36.104496: step: 698/464, loss: 1.3233675956726074 2023-01-24 00:54:36.694392: step: 700/464, loss: 0.9995968341827393 2023-01-24 00:54:37.334622: step: 702/464, loss: 0.5759196281433105 2023-01-24 00:54:38.010111: step: 704/464, loss: 2.6891186237335205 2023-01-24 00:54:38.686830: step: 706/464, loss: 5.920416355133057 2023-01-24 00:54:39.303044: step: 708/464, loss: 0.6381198763847351 2023-01-24 00:54:39.901891: step: 710/464, loss: 1.4230495691299438 2023-01-24 00:54:40.512971: step: 712/464, loss: 2.112436294555664 2023-01-24 00:54:41.117879: step: 714/464, loss: 0.27687883377075195 2023-01-24 00:54:41.646332: step: 716/464, loss: 0.7682527303695679 2023-01-24 00:54:42.261940: step: 718/464, loss: 0.506624698638916 2023-01-24 00:54:42.869336: step: 720/464, loss: 3.78389310836792 2023-01-24 00:54:43.433437: step: 722/464, loss: 2.2654786109924316 2023-01-24 00:54:44.008159: step: 724/464, loss: 0.934356689453125 2023-01-24 00:54:44.621238: step: 726/464, loss: 0.2805936932563782 2023-01-24 00:54:45.217663: step: 728/464, loss: 3.379147529602051 2023-01-24 00:54:45.863557: step: 730/464, loss: 1.473331332206726 2023-01-24 00:54:46.472359: step: 732/464, loss: 1.9181344509124756 2023-01-24 00:54:47.170598: step: 734/464, loss: 1.289449691772461 2023-01-24 00:54:47.712444: step: 736/464, loss: 0.7308832406997681 2023-01-24 00:54:48.366189: step: 738/464, loss: 1.2970857620239258 2023-01-24 00:54:48.946088: step: 740/464, loss: 1.1372034549713135 2023-01-24 00:54:49.646765: step: 742/464, loss: 9.448132514953613 2023-01-24 00:54:50.286755: step: 744/464, loss: 15.97134017944336 2023-01-24 00:54:50.913793: step: 746/464, loss: 0.32879093289375305 2023-01-24 00:54:51.557907: step: 748/464, loss: 1.876343011856079 2023-01-24 00:54:52.145782: step: 750/464, loss: 2.1643118858337402 2023-01-24 00:54:52.707295: step: 752/464, loss: 4.110321044921875 2023-01-24 00:54:53.284533: step: 754/464, loss: 2.606478452682495 2023-01-24 00:54:53.918890: step: 756/464, loss: 0.7163191437721252 2023-01-24 00:54:54.509191: step: 758/464, loss: 1.9252287149429321 2023-01-24 00:54:55.087723: step: 760/464, loss: 0.46068650484085083 2023-01-24 00:54:55.710721: step: 762/464, loss: 1.9304959774017334 2023-01-24 00:54:56.370076: step: 764/464, loss: 0.5159561038017273 2023-01-24 00:54:56.985997: step: 766/464, loss: 2.7147603034973145 2023-01-24 00:54:57.581878: step: 768/464, loss: 1.8933334350585938 2023-01-24 00:54:58.259286: step: 770/464, loss: 1.7268967628479004 2023-01-24 00:54:58.882897: step: 772/464, loss: 0.45652639865875244 2023-01-24 00:54:59.436501: step: 774/464, loss: 2.0273325443267822 2023-01-24 00:55:00.096662: step: 776/464, loss: 2.12650203704834 2023-01-24 00:55:00.755991: step: 778/464, loss: 1.441702961921692 2023-01-24 00:55:01.416187: step: 780/464, loss: 0.38216400146484375 2023-01-24 00:55:02.074706: step: 782/464, loss: 2.160226821899414 2023-01-24 00:55:02.744949: step: 784/464, loss: 0.460218608379364 2023-01-24 00:55:03.404192: step: 786/464, loss: 0.9313029050827026 2023-01-24 00:55:04.053324: step: 788/464, loss: 8.107101440429688 2023-01-24 00:55:04.655185: step: 790/464, loss: 1.3128178119659424 2023-01-24 00:55:05.264529: step: 792/464, loss: 0.8122116923332214 2023-01-24 00:55:05.831762: step: 794/464, loss: 0.34943875670433044 2023-01-24 00:55:06.448932: step: 796/464, loss: 0.8123091459274292 2023-01-24 00:55:07.075891: step: 798/464, loss: 2.5083839893341064 2023-01-24 00:55:07.704446: step: 800/464, loss: 1.1529542207717896 2023-01-24 00:55:08.303256: step: 802/464, loss: 1.4196804761886597 2023-01-24 00:55:08.858555: step: 804/464, loss: 0.8466091752052307 2023-01-24 00:55:09.475117: step: 806/464, loss: 2.2197837829589844 2023-01-24 00:55:10.101873: step: 808/464, loss: 1.2632802724838257 2023-01-24 00:55:10.777015: step: 810/464, loss: 0.4522426128387451 2023-01-24 00:55:11.386668: step: 812/464, loss: 0.1866062730550766 2023-01-24 00:55:11.942338: step: 814/464, loss: 1.1612019538879395 2023-01-24 00:55:12.557202: step: 816/464, loss: 3.2337965965270996 2023-01-24 00:55:13.196522: step: 818/464, loss: 1.704648494720459 2023-01-24 00:55:13.882509: step: 820/464, loss: 1.4136182069778442 2023-01-24 00:55:14.535017: step: 822/464, loss: 0.6175124645233154 2023-01-24 00:55:15.169262: step: 824/464, loss: 3.2209134101867676 2023-01-24 00:55:15.763502: step: 826/464, loss: 0.3057088851928711 2023-01-24 00:55:16.354160: step: 828/464, loss: 0.34447264671325684 2023-01-24 00:55:17.057663: step: 830/464, loss: 0.23444117605686188 2023-01-24 00:55:17.687869: step: 832/464, loss: 1.7572524547576904 2023-01-24 00:55:18.314901: step: 834/464, loss: 1.0998787879943848 2023-01-24 00:55:18.937161: step: 836/464, loss: 1.7545276880264282 2023-01-24 00:55:19.481320: step: 838/464, loss: 0.498182088136673 2023-01-24 00:55:20.111801: step: 840/464, loss: 0.8405647277832031 2023-01-24 00:55:20.727334: step: 842/464, loss: 0.36363208293914795 2023-01-24 00:55:21.360419: step: 844/464, loss: 3.207554340362549 2023-01-24 00:55:21.906918: step: 846/464, loss: 0.6012241840362549 2023-01-24 00:55:22.553149: step: 848/464, loss: 0.46905815601348877 2023-01-24 00:55:23.131327: step: 850/464, loss: 0.22596511244773865 2023-01-24 00:55:23.766617: step: 852/464, loss: 1.1187995672225952 2023-01-24 00:55:24.391624: step: 854/464, loss: 8.382316589355469 2023-01-24 00:55:24.989789: step: 856/464, loss: 0.8991714715957642 2023-01-24 00:55:25.627391: step: 858/464, loss: 1.5431783199310303 2023-01-24 00:55:26.366173: step: 860/464, loss: 1.9879878759384155 2023-01-24 00:55:27.018126: step: 862/464, loss: 4.223198890686035 2023-01-24 00:55:27.558036: step: 864/464, loss: 1.4735733270645142 2023-01-24 00:55:28.186029: step: 866/464, loss: 1.8661390542984009 2023-01-24 00:55:28.799623: step: 868/464, loss: 2.429267406463623 2023-01-24 00:55:29.483460: step: 870/464, loss: 2.5032222270965576 2023-01-24 00:55:30.059120: step: 872/464, loss: 3.873025417327881 2023-01-24 00:55:30.713029: step: 874/464, loss: 9.972686767578125 2023-01-24 00:55:31.269095: step: 876/464, loss: 0.4837803840637207 2023-01-24 00:55:31.840605: step: 878/464, loss: 1.799896240234375 2023-01-24 00:55:32.526200: step: 880/464, loss: 0.8232937455177307 2023-01-24 00:55:33.080280: step: 882/464, loss: 4.3672332763671875 2023-01-24 00:55:33.681100: step: 884/464, loss: 1.8989098072052002 2023-01-24 00:55:34.278735: step: 886/464, loss: 1.4259133338928223 2023-01-24 00:55:34.884521: step: 888/464, loss: 1.350890874862671 2023-01-24 00:55:35.529012: step: 890/464, loss: 1.217652440071106 2023-01-24 00:55:36.147533: step: 892/464, loss: 2.2741332054138184 2023-01-24 00:55:36.776687: step: 894/464, loss: 1.9730730056762695 2023-01-24 00:55:37.464999: step: 896/464, loss: 1.6086657047271729 2023-01-24 00:55:38.094421: step: 898/464, loss: 0.7476229667663574 2023-01-24 00:55:38.685520: step: 900/464, loss: 1.049003005027771 2023-01-24 00:55:39.393674: step: 902/464, loss: 1.2468827962875366 2023-01-24 00:55:40.024401: step: 904/464, loss: 0.4603670835494995 2023-01-24 00:55:40.656442: step: 906/464, loss: 0.921544075012207 2023-01-24 00:55:41.272707: step: 908/464, loss: 1.3425829410552979 2023-01-24 00:55:41.889247: step: 910/464, loss: 5.488288879394531 2023-01-24 00:55:42.456423: step: 912/464, loss: 0.4062989354133606 2023-01-24 00:55:43.054708: step: 914/464, loss: 2.260221481323242 2023-01-24 00:55:43.737078: step: 916/464, loss: 0.9517034292221069 2023-01-24 00:55:44.312100: step: 918/464, loss: 1.0265724658966064 2023-01-24 00:55:44.918494: step: 920/464, loss: 0.3605181574821472 2023-01-24 00:55:45.556512: step: 922/464, loss: 0.7988200187683105 2023-01-24 00:55:46.196329: step: 924/464, loss: 0.6456014513969421 2023-01-24 00:55:46.802012: step: 926/464, loss: 0.7226334810256958 2023-01-24 00:55:47.455884: step: 928/464, loss: 4.191205024719238 2023-01-24 00:55:47.944346: step: 930/464, loss: 0.06958475708961487 ================================================== Loss: 1.974 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28450912640710624, 'r': 0.223619862611646, 'f1': 0.25041630426076156}, 'combined': 0.18451727682371905, 'epoch': 1} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3125490553042806, 'r': 0.20807903130909405, 'f1': 0.2498324863237083}, 'combined': 0.1631030739729909, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2870004073991143, 'r': 0.2364491992776794, 'f1': 0.25928385715184776}, 'combined': 0.1910512631645194, 'epoch': 1} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32552310219073954, 'r': 0.21801088495343107, 'f1': 0.2611339171420218}, 'combined': 0.17048121015489504, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2915343697675265, 'r': 0.224172261601545, 'f1': 0.2534538632240166}, 'combined': 0.18675547816506485, 'epoch': 1} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3298886092830557, 'r': 0.21841109944268425, 'f1': 0.2628173340255062}, 'combined': 0.1715802284311595, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2633333333333333, 'r': 0.1880952380952381, 'f1': 0.21944444444444441}, 'combined': 0.14629629629629626, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.22826086956521738, 'f1': 0.23863636363636362}, 'combined': 0.11931818181818181, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.12931034482758622, 'f1': 0.2142857142857143}, 'combined': 0.14285714285714285, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28450912640710624, 'r': 0.223619862611646, 'f1': 0.25041630426076156}, 'combined': 0.18451727682371905, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3125490553042806, 'r': 0.20807903130909405, 'f1': 0.2498324863237083}, 'combined': 0.1631030739729909, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2633333333333333, 'r': 0.1880952380952381, 'f1': 0.21944444444444441}, 'combined': 0.14629629629629626, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2870004073991143, 'r': 0.2364491992776794, 'f1': 0.25928385715184776}, 'combined': 0.1910512631645194, 'epoch': 1} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32552310219073954, 'r': 0.21801088495343107, 'f1': 0.2611339171420218}, 'combined': 0.17048121015489504, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.22826086956521738, 'f1': 0.23863636363636362}, 'combined': 0.11931818181818181, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2915343697675265, 'r': 0.224172261601545, 'f1': 0.2534538632240166}, 'combined': 0.18675547816506485, 'epoch': 1} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3298886092830557, 'r': 0.21841109944268425, 'f1': 0.2628173340255062}, 'combined': 0.1715802284311595, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.12931034482758622, 'f1': 0.2142857142857143}, 'combined': 0.14285714285714285, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:58:33.930113: step: 2/464, loss: 2.4187142848968506 2023-01-24 00:58:34.612011: step: 4/464, loss: 1.1150403022766113 2023-01-24 00:58:35.227290: step: 6/464, loss: 0.42951124906539917 2023-01-24 00:58:35.821767: step: 8/464, loss: 0.3977397382259369 2023-01-24 00:58:36.393738: step: 10/464, loss: 0.8529415726661682 2023-01-24 00:58:36.979085: step: 12/464, loss: 1.3303990364074707 2023-01-24 00:58:37.567874: step: 14/464, loss: 1.2783358097076416 2023-01-24 00:58:38.204959: step: 16/464, loss: 3.2577548027038574 2023-01-24 00:58:38.846631: step: 18/464, loss: 4.058514595031738 2023-01-24 00:58:39.499559: step: 20/464, loss: 3.979752540588379 2023-01-24 00:58:40.218677: step: 22/464, loss: 0.47690603137016296 2023-01-24 00:58:40.891478: step: 24/464, loss: 13.170414924621582 2023-01-24 00:58:41.483752: step: 26/464, loss: 2.2249720096588135 2023-01-24 00:58:42.068037: step: 28/464, loss: 5.017965316772461 2023-01-24 00:58:42.769779: step: 30/464, loss: 1.719996690750122 2023-01-24 00:58:43.461609: step: 32/464, loss: 0.8921970129013062 2023-01-24 00:58:44.061296: step: 34/464, loss: 2.103689670562744 2023-01-24 00:58:44.689301: step: 36/464, loss: 1.3535950183868408 2023-01-24 00:58:45.342859: step: 38/464, loss: 0.5213176012039185 2023-01-24 00:58:45.970218: step: 40/464, loss: 2.4116697311401367 2023-01-24 00:58:46.579965: step: 42/464, loss: 0.6674009561538696 2023-01-24 00:58:47.201802: step: 44/464, loss: 0.758773684501648 2023-01-24 00:58:47.829402: step: 46/464, loss: 1.5063775777816772 2023-01-24 00:58:48.564195: step: 48/464, loss: 0.4901307225227356 2023-01-24 00:58:49.078880: step: 50/464, loss: 1.0143728256225586 2023-01-24 00:58:49.608580: step: 52/464, loss: 0.5364481806755066 2023-01-24 00:58:50.213314: step: 54/464, loss: 0.7740236520767212 2023-01-24 00:58:50.852024: step: 56/464, loss: 1.2117143869400024 2023-01-24 00:58:51.464850: step: 58/464, loss: 1.6247572898864746 2023-01-24 00:58:52.113907: step: 60/464, loss: 7.515179634094238 2023-01-24 00:58:52.757228: step: 62/464, loss: 1.646538496017456 2023-01-24 00:58:53.409684: step: 64/464, loss: 0.550868809223175 2023-01-24 00:58:54.053086: step: 66/464, loss: 0.6326333284378052 2023-01-24 00:58:54.705153: step: 68/464, loss: 1.768183708190918 2023-01-24 00:58:55.310183: step: 70/464, loss: 1.3389290571212769 2023-01-24 00:58:55.876829: step: 72/464, loss: 2.8966360092163086 2023-01-24 00:58:56.486866: step: 74/464, loss: 2.0588395595550537 2023-01-24 00:58:57.206891: step: 76/464, loss: 0.4368211627006531 2023-01-24 00:58:57.852602: step: 78/464, loss: 1.182746410369873 2023-01-24 00:58:58.454465: step: 80/464, loss: 1.4138734340667725 2023-01-24 00:58:59.099690: step: 82/464, loss: 0.3119286298751831 2023-01-24 00:58:59.710212: step: 84/464, loss: 8.67248249053955 2023-01-24 00:59:00.307072: step: 86/464, loss: 0.36811041831970215 2023-01-24 00:59:00.925959: step: 88/464, loss: 0.8464750051498413 2023-01-24 00:59:01.587026: step: 90/464, loss: 0.6472318768501282 2023-01-24 00:59:02.181747: step: 92/464, loss: 1.5341274738311768 2023-01-24 00:59:02.784837: step: 94/464, loss: 1.2942731380462646 2023-01-24 00:59:03.403211: step: 96/464, loss: 0.365266352891922 2023-01-24 00:59:04.050694: step: 98/464, loss: 0.9241402745246887 2023-01-24 00:59:04.613790: step: 100/464, loss: 0.2900221347808838 2023-01-24 00:59:05.259381: step: 102/464, loss: 0.7217608094215393 2023-01-24 00:59:05.896113: step: 104/464, loss: 0.29744741320610046 2023-01-24 00:59:06.542321: step: 106/464, loss: 1.191608190536499 2023-01-24 00:59:07.114692: step: 108/464, loss: 3.7245192527770996 2023-01-24 00:59:07.728513: step: 110/464, loss: 2.004127025604248 2023-01-24 00:59:08.325457: step: 112/464, loss: 2.8814492225646973 2023-01-24 00:59:08.929287: step: 114/464, loss: 0.2999536991119385 2023-01-24 00:59:09.607356: step: 116/464, loss: 0.24473756551742554 2023-01-24 00:59:10.198599: step: 118/464, loss: 2.501936197280884 2023-01-24 00:59:10.737585: step: 120/464, loss: 0.9501038193702698 2023-01-24 00:59:11.345600: step: 122/464, loss: 0.9306862950325012 2023-01-24 00:59:11.967743: step: 124/464, loss: 1.8798160552978516 2023-01-24 00:59:12.604869: step: 126/464, loss: 4.3524956703186035 2023-01-24 00:59:13.265826: step: 128/464, loss: 1.222733974456787 2023-01-24 00:59:13.934496: step: 130/464, loss: 0.6787784099578857 2023-01-24 00:59:14.596626: step: 132/464, loss: 0.28239545226097107 2023-01-24 00:59:15.160839: step: 134/464, loss: 4.137892723083496 2023-01-24 00:59:15.789457: step: 136/464, loss: 3.7175168991088867 2023-01-24 00:59:16.435401: step: 138/464, loss: 0.6110906600952148 2023-01-24 00:59:17.026809: step: 140/464, loss: 3.6410927772521973 2023-01-24 00:59:17.644295: step: 142/464, loss: 3.896022319793701 2023-01-24 00:59:18.236979: step: 144/464, loss: 0.6825583577156067 2023-01-24 00:59:18.866531: step: 146/464, loss: 1.4839084148406982 2023-01-24 00:59:19.403257: step: 148/464, loss: 0.5772268772125244 2023-01-24 00:59:20.000508: step: 150/464, loss: 1.6391181945800781 2023-01-24 00:59:20.617703: step: 152/464, loss: 1.1953972578048706 2023-01-24 00:59:21.231691: step: 154/464, loss: 2.2237143516540527 2023-01-24 00:59:21.810978: step: 156/464, loss: 0.38013097643852234 2023-01-24 00:59:22.449056: step: 158/464, loss: 1.0890448093414307 2023-01-24 00:59:23.040982: step: 160/464, loss: 0.47497808933258057 2023-01-24 00:59:23.660868: step: 162/464, loss: 1.2158253192901611 2023-01-24 00:59:24.274139: step: 164/464, loss: 0.6530698537826538 2023-01-24 00:59:24.868672: step: 166/464, loss: 0.9841471910476685 2023-01-24 00:59:25.430155: step: 168/464, loss: 3.2851874828338623 2023-01-24 00:59:26.168846: step: 170/464, loss: 2.146338939666748 2023-01-24 00:59:26.749188: step: 172/464, loss: 0.4231458902359009 2023-01-24 00:59:27.437954: step: 174/464, loss: 0.24826262891292572 2023-01-24 00:59:28.027421: step: 176/464, loss: 1.625036597251892 2023-01-24 00:59:28.664542: step: 178/464, loss: 0.5881162881851196 2023-01-24 00:59:29.284859: step: 180/464, loss: 3.509032964706421 2023-01-24 00:59:30.006091: step: 182/464, loss: 1.0909960269927979 2023-01-24 00:59:30.615385: step: 184/464, loss: 3.288181781768799 2023-01-24 00:59:31.326353: step: 186/464, loss: 1.0657768249511719 2023-01-24 00:59:31.917465: step: 188/464, loss: 0.6416198015213013 2023-01-24 00:59:32.471974: step: 190/464, loss: 2.416868209838867 2023-01-24 00:59:33.052354: step: 192/464, loss: 1.6623948812484741 2023-01-24 00:59:33.639637: step: 194/464, loss: 0.4083830416202545 2023-01-24 00:59:34.253748: step: 196/464, loss: 0.6389298439025879 2023-01-24 00:59:34.809793: step: 198/464, loss: 1.5056620836257935 2023-01-24 00:59:35.467169: step: 200/464, loss: 0.6298701763153076 2023-01-24 00:59:36.055961: step: 202/464, loss: 1.815793514251709 2023-01-24 00:59:36.664470: step: 204/464, loss: 0.5246171951293945 2023-01-24 00:59:37.248460: step: 206/464, loss: 0.44010719656944275 2023-01-24 00:59:37.892345: step: 208/464, loss: 0.8312480449676514 2023-01-24 00:59:38.513251: step: 210/464, loss: 0.25231248140335083 2023-01-24 00:59:39.088883: step: 212/464, loss: 1.9889405965805054 2023-01-24 00:59:39.705804: step: 214/464, loss: 0.5998648405075073 2023-01-24 00:59:40.349900: step: 216/464, loss: 2.334963083267212 2023-01-24 00:59:40.897008: step: 218/464, loss: 3.4502251148223877 2023-01-24 00:59:41.644602: step: 220/464, loss: 1.4894522428512573 2023-01-24 00:59:42.165954: step: 222/464, loss: 0.4923945367336273 2023-01-24 00:59:42.893560: step: 224/464, loss: 2.6111702919006348 2023-01-24 00:59:43.489544: step: 226/464, loss: 0.5539352297782898 2023-01-24 00:59:44.117886: step: 228/464, loss: 0.38164007663726807 2023-01-24 00:59:44.825374: step: 230/464, loss: 6.053883075714111 2023-01-24 00:59:45.385743: step: 232/464, loss: 1.0229780673980713 2023-01-24 00:59:46.021020: step: 234/464, loss: 0.270619660615921 2023-01-24 00:59:46.609674: step: 236/464, loss: 0.4925817549228668 2023-01-24 00:59:47.235850: step: 238/464, loss: 0.16969110071659088 2023-01-24 00:59:47.875404: step: 240/464, loss: 1.587585687637329 2023-01-24 00:59:48.497552: step: 242/464, loss: 0.6882718801498413 2023-01-24 00:59:49.074035: step: 244/464, loss: 0.5376545786857605 2023-01-24 00:59:49.687226: step: 246/464, loss: 0.8686901330947876 2023-01-24 00:59:50.329388: step: 248/464, loss: 1.1710368394851685 2023-01-24 00:59:51.022096: step: 250/464, loss: 0.6487290263175964 2023-01-24 00:59:51.585791: step: 252/464, loss: 1.1861798763275146 2023-01-24 00:59:52.179837: step: 254/464, loss: 1.1853152513504028 2023-01-24 00:59:52.827023: step: 256/464, loss: 0.9062669277191162 2023-01-24 00:59:53.417793: step: 258/464, loss: 1.076135277748108 2023-01-24 00:59:54.016707: step: 260/464, loss: 1.5580992698669434 2023-01-24 00:59:54.603792: step: 262/464, loss: 1.5502890348434448 2023-01-24 00:59:55.213829: step: 264/464, loss: 2.189706325531006 2023-01-24 00:59:55.798159: step: 266/464, loss: 3.4544577598571777 2023-01-24 00:59:56.413718: step: 268/464, loss: 2.041504144668579 2023-01-24 00:59:57.029527: step: 270/464, loss: 0.7947772145271301 2023-01-24 00:59:57.680121: step: 272/464, loss: 0.48068466782569885 2023-01-24 00:59:58.284814: step: 274/464, loss: 1.1678788661956787 2023-01-24 00:59:58.998454: step: 276/464, loss: 1.2527238130569458 2023-01-24 00:59:59.635757: step: 278/464, loss: 1.8058478832244873 2023-01-24 01:00:00.215064: step: 280/464, loss: 0.164068803191185 2023-01-24 01:00:00.825030: step: 282/464, loss: 0.997909665107727 2023-01-24 01:00:01.437311: step: 284/464, loss: 0.9107556343078613 2023-01-24 01:00:02.013919: step: 286/464, loss: 0.9681934118270874 2023-01-24 01:00:02.675413: step: 288/464, loss: 2.2107839584350586 2023-01-24 01:00:03.292596: step: 290/464, loss: 0.7348326444625854 2023-01-24 01:00:03.933681: step: 292/464, loss: 1.357094407081604 2023-01-24 01:00:04.529497: step: 294/464, loss: 1.6372148990631104 2023-01-24 01:00:05.179264: step: 296/464, loss: 3.5598602294921875 2023-01-24 01:00:05.798964: step: 298/464, loss: 0.6690852046012878 2023-01-24 01:00:06.413350: step: 300/464, loss: 2.2038683891296387 2023-01-24 01:00:07.076454: step: 302/464, loss: 0.31484052538871765 2023-01-24 01:00:07.682340: step: 304/464, loss: 0.4014740586280823 2023-01-24 01:00:08.277471: step: 306/464, loss: 0.7648613452911377 2023-01-24 01:00:08.826998: step: 308/464, loss: 0.6005369424819946 2023-01-24 01:00:09.441469: step: 310/464, loss: 1.0281970500946045 2023-01-24 01:00:10.126868: step: 312/464, loss: 1.3290072679519653 2023-01-24 01:00:10.776873: step: 314/464, loss: 2.8620059490203857 2023-01-24 01:00:11.371686: step: 316/464, loss: 2.0859313011169434 2023-01-24 01:00:12.022081: step: 318/464, loss: 0.7931711673736572 2023-01-24 01:00:12.610001: step: 320/464, loss: 2.7562625408172607 2023-01-24 01:00:13.230580: step: 322/464, loss: 0.9217475056648254 2023-01-24 01:00:13.910908: step: 324/464, loss: 2.597505569458008 2023-01-24 01:00:14.474870: step: 326/464, loss: 0.5340583324432373 2023-01-24 01:00:15.079909: step: 328/464, loss: 0.7224334478378296 2023-01-24 01:00:15.710168: step: 330/464, loss: 1.8008909225463867 2023-01-24 01:00:16.362908: step: 332/464, loss: 1.970460057258606 2023-01-24 01:00:16.956102: step: 334/464, loss: 1.087660789489746 2023-01-24 01:00:17.558631: step: 336/464, loss: 0.8673958778381348 2023-01-24 01:00:18.173090: step: 338/464, loss: 0.8990617394447327 2023-01-24 01:00:18.810702: step: 340/464, loss: 1.4432100057601929 2023-01-24 01:00:19.398321: step: 342/464, loss: 0.6929177641868591 2023-01-24 01:00:19.927760: step: 344/464, loss: 0.22175689041614532 2023-01-24 01:00:20.519400: step: 346/464, loss: 1.3276338577270508 2023-01-24 01:00:21.074489: step: 348/464, loss: 3.061007499694824 2023-01-24 01:00:21.688136: step: 350/464, loss: 0.39631396532058716 2023-01-24 01:00:22.254730: step: 352/464, loss: 0.8200819492340088 2023-01-24 01:00:22.833295: step: 354/464, loss: 0.6805083751678467 2023-01-24 01:00:23.466091: step: 356/464, loss: 3.4740169048309326 2023-01-24 01:00:24.171379: step: 358/464, loss: 7.517362117767334 2023-01-24 01:00:24.851885: step: 360/464, loss: 0.15741147100925446 2023-01-24 01:00:25.452916: step: 362/464, loss: 0.9822812080383301 2023-01-24 01:00:26.043045: step: 364/464, loss: 1.2826683521270752 2023-01-24 01:00:26.599885: step: 366/464, loss: 1.1188969612121582 2023-01-24 01:00:27.233212: step: 368/464, loss: 7.362642288208008 2023-01-24 01:00:27.904625: step: 370/464, loss: 0.9471052885055542 2023-01-24 01:00:28.546471: step: 372/464, loss: 4.690070629119873 2023-01-24 01:00:29.197550: step: 374/464, loss: 1.3145390748977661 2023-01-24 01:00:29.826289: step: 376/464, loss: 0.794826865196228 2023-01-24 01:00:30.448576: step: 378/464, loss: 1.242023229598999 2023-01-24 01:00:31.011910: step: 380/464, loss: 5.607382774353027 2023-01-24 01:00:31.629913: step: 382/464, loss: 0.3236657381057739 2023-01-24 01:00:32.300722: step: 384/464, loss: 0.6186391711235046 2023-01-24 01:00:33.081172: step: 386/464, loss: 4.379059791564941 2023-01-24 01:00:33.737968: step: 388/464, loss: 2.559650421142578 2023-01-24 01:00:34.376399: step: 390/464, loss: 0.7290323972702026 2023-01-24 01:00:34.944418: step: 392/464, loss: 1.9199469089508057 2023-01-24 01:00:35.567326: step: 394/464, loss: 8.768730163574219 2023-01-24 01:00:36.148278: step: 396/464, loss: 7.07295036315918 2023-01-24 01:00:36.874375: step: 398/464, loss: 3.395118236541748 2023-01-24 01:00:37.558922: step: 400/464, loss: 1.324446201324463 2023-01-24 01:00:38.162377: step: 402/464, loss: 1.6456454992294312 2023-01-24 01:00:38.783915: step: 404/464, loss: 2.236362934112549 2023-01-24 01:00:39.374371: step: 406/464, loss: 1.138903021812439 2023-01-24 01:00:39.983344: step: 408/464, loss: 1.8457611799240112 2023-01-24 01:00:40.581380: step: 410/464, loss: 7.671128273010254 2023-01-24 01:00:41.214279: step: 412/464, loss: 1.9276256561279297 2023-01-24 01:00:41.836814: step: 414/464, loss: 0.4018891453742981 2023-01-24 01:00:42.477348: step: 416/464, loss: 1.0885846614837646 2023-01-24 01:00:43.140465: step: 418/464, loss: 1.0276798009872437 2023-01-24 01:00:43.747365: step: 420/464, loss: 1.6159963607788086 2023-01-24 01:00:44.363990: step: 422/464, loss: 2.1850693225860596 2023-01-24 01:00:44.972146: step: 424/464, loss: 0.8571640253067017 2023-01-24 01:00:45.616306: step: 426/464, loss: 0.421733021736145 2023-01-24 01:00:46.160344: step: 428/464, loss: 1.596917748451233 2023-01-24 01:00:46.727583: step: 430/464, loss: 1.4006236791610718 2023-01-24 01:00:47.271228: step: 432/464, loss: 0.28295522928237915 2023-01-24 01:00:47.861273: step: 434/464, loss: 4.537855625152588 2023-01-24 01:00:48.423370: step: 436/464, loss: 0.3357005715370178 2023-01-24 01:00:49.038556: step: 438/464, loss: 0.3452349901199341 2023-01-24 01:00:49.684862: step: 440/464, loss: 0.5913131237030029 2023-01-24 01:00:50.200027: step: 442/464, loss: 0.19197748601436615 2023-01-24 01:00:50.849671: step: 444/464, loss: 0.9170107245445251 2023-01-24 01:00:51.544000: step: 446/464, loss: 1.5506609678268433 2023-01-24 01:00:52.140447: step: 448/464, loss: 1.7220441102981567 2023-01-24 01:00:52.733090: step: 450/464, loss: 0.3225691020488739 2023-01-24 01:00:53.318209: step: 452/464, loss: 0.25700774788856506 2023-01-24 01:00:54.029341: step: 454/464, loss: 0.4294798672199249 2023-01-24 01:00:54.564446: step: 456/464, loss: 0.9342938661575317 2023-01-24 01:00:55.202873: step: 458/464, loss: 2.666210412979126 2023-01-24 01:00:55.826442: step: 460/464, loss: 1.6266505718231201 2023-01-24 01:00:56.541286: step: 462/464, loss: 1.9788882732391357 2023-01-24 01:00:57.107846: step: 464/464, loss: 1.51228928565979 2023-01-24 01:00:57.734084: step: 466/464, loss: 0.583839476108551 2023-01-24 01:00:58.324166: step: 468/464, loss: 0.7826869487762451 2023-01-24 01:00:58.886498: step: 470/464, loss: 0.9563395977020264 2023-01-24 01:00:59.508999: step: 472/464, loss: 1.4059982299804688 2023-01-24 01:01:00.111839: step: 474/464, loss: 0.27404946088790894 2023-01-24 01:01:00.751050: step: 476/464, loss: 1.0723012685775757 2023-01-24 01:01:01.360473: step: 478/464, loss: 0.6428566575050354 2023-01-24 01:01:01.961644: step: 480/464, loss: 1.2714120149612427 2023-01-24 01:01:02.615432: step: 482/464, loss: 1.4510433673858643 2023-01-24 01:01:03.228020: step: 484/464, loss: 0.9711257219314575 2023-01-24 01:01:03.825250: step: 486/464, loss: 0.8307426571846008 2023-01-24 01:01:04.417045: step: 488/464, loss: 1.4742562770843506 2023-01-24 01:01:05.033964: step: 490/464, loss: 1.3365199565887451 2023-01-24 01:01:05.654084: step: 492/464, loss: 0.7465637922286987 2023-01-24 01:01:06.291789: step: 494/464, loss: 0.24693168699741364 2023-01-24 01:01:06.924116: step: 496/464, loss: 0.6171448230743408 2023-01-24 01:01:07.535062: step: 498/464, loss: 1.3459293842315674 2023-01-24 01:01:08.168804: step: 500/464, loss: 1.4232227802276611 2023-01-24 01:01:08.819096: step: 502/464, loss: 0.6017249226570129 2023-01-24 01:01:09.421950: step: 504/464, loss: 0.8567928075790405 2023-01-24 01:01:10.058199: step: 506/464, loss: 0.2506132125854492 2023-01-24 01:01:10.612105: step: 508/464, loss: 0.15811972320079803 2023-01-24 01:01:11.199765: step: 510/464, loss: 2.0057129859924316 2023-01-24 01:01:11.840770: step: 512/464, loss: 8.11188793182373 2023-01-24 01:01:12.505525: step: 514/464, loss: 1.6561710834503174 2023-01-24 01:01:13.207813: step: 516/464, loss: 0.8971422910690308 2023-01-24 01:01:13.820068: step: 518/464, loss: 0.5069814920425415 2023-01-24 01:01:14.410283: step: 520/464, loss: 1.3453993797302246 2023-01-24 01:01:15.043424: step: 522/464, loss: 2.265744924545288 2023-01-24 01:01:15.663175: step: 524/464, loss: 1.7987972497940063 2023-01-24 01:01:16.278224: step: 526/464, loss: 1.4300066232681274 2023-01-24 01:01:16.893907: step: 528/464, loss: 1.3526700735092163 2023-01-24 01:01:17.530081: step: 530/464, loss: 2.729444980621338 2023-01-24 01:01:18.147974: step: 532/464, loss: 2.88145112991333 2023-01-24 01:01:18.710186: step: 534/464, loss: 0.34708070755004883 2023-01-24 01:01:19.303453: step: 536/464, loss: 10.343750953674316 2023-01-24 01:01:19.955352: step: 538/464, loss: 0.594245433807373 2023-01-24 01:01:20.505291: step: 540/464, loss: 0.765412449836731 2023-01-24 01:01:21.124878: step: 542/464, loss: 1.2842626571655273 2023-01-24 01:01:21.725007: step: 544/464, loss: 2.3602559566497803 2023-01-24 01:01:22.312091: step: 546/464, loss: 1.1977283954620361 2023-01-24 01:01:22.961975: step: 548/464, loss: 0.5755077600479126 2023-01-24 01:01:23.559414: step: 550/464, loss: 3.6580703258514404 2023-01-24 01:01:24.167655: step: 552/464, loss: 0.8802728652954102 2023-01-24 01:01:24.796113: step: 554/464, loss: 0.2585405707359314 2023-01-24 01:01:25.432333: step: 556/464, loss: 0.6584972739219666 2023-01-24 01:01:26.051075: step: 558/464, loss: 0.42822739481925964 2023-01-24 01:01:26.726001: step: 560/464, loss: 0.7816445827484131 2023-01-24 01:01:27.316061: step: 562/464, loss: 1.3169465065002441 2023-01-24 01:01:27.910199: step: 564/464, loss: 3.784709930419922 2023-01-24 01:01:28.492537: step: 566/464, loss: 1.1701374053955078 2023-01-24 01:01:29.114779: step: 568/464, loss: 3.610154628753662 2023-01-24 01:01:29.705347: step: 570/464, loss: 0.28687018156051636 2023-01-24 01:01:30.333381: step: 572/464, loss: 0.5567400455474854 2023-01-24 01:01:30.899193: step: 574/464, loss: 0.470517098903656 2023-01-24 01:01:31.567838: step: 576/464, loss: 2.1874237060546875 2023-01-24 01:01:32.144636: step: 578/464, loss: 0.5025588870048523 2023-01-24 01:01:32.716748: step: 580/464, loss: 1.1785072088241577 2023-01-24 01:01:33.344063: step: 582/464, loss: 0.6726641654968262 2023-01-24 01:01:33.951878: step: 584/464, loss: 0.5943033695220947 2023-01-24 01:01:34.608147: step: 586/464, loss: 0.500882625579834 2023-01-24 01:01:35.272023: step: 588/464, loss: 1.7816160917282104 2023-01-24 01:01:35.914482: step: 590/464, loss: 2.4452710151672363 2023-01-24 01:01:36.491318: step: 592/464, loss: 2.792174816131592 2023-01-24 01:01:37.064666: step: 594/464, loss: 7.6662163734436035 2023-01-24 01:01:37.816638: step: 596/464, loss: 0.7320204973220825 2023-01-24 01:01:38.390959: step: 598/464, loss: 0.4488712549209595 2023-01-24 01:01:38.978108: step: 600/464, loss: 0.5410355925559998 2023-01-24 01:01:39.582247: step: 602/464, loss: 0.4308883249759674 2023-01-24 01:01:40.183853: step: 604/464, loss: 0.9670007824897766 2023-01-24 01:01:40.853999: step: 606/464, loss: 0.26769569516181946 2023-01-24 01:01:41.493156: step: 608/464, loss: 1.1745327711105347 2023-01-24 01:01:42.100483: step: 610/464, loss: 2.1340060234069824 2023-01-24 01:01:42.728910: step: 612/464, loss: 1.5466679334640503 2023-01-24 01:01:43.407128: step: 614/464, loss: 1.054890513420105 2023-01-24 01:01:44.058607: step: 616/464, loss: 0.4689290523529053 2023-01-24 01:01:44.674413: step: 618/464, loss: 3.1759071350097656 2023-01-24 01:01:45.257694: step: 620/464, loss: 0.8746719360351562 2023-01-24 01:01:45.851694: step: 622/464, loss: 0.40230920910835266 2023-01-24 01:01:46.434472: step: 624/464, loss: 1.1251157522201538 2023-01-24 01:01:47.075559: step: 626/464, loss: 0.9146066904067993 2023-01-24 01:01:47.684217: step: 628/464, loss: 1.6216715574264526 2023-01-24 01:01:48.365354: step: 630/464, loss: 1.0551037788391113 2023-01-24 01:01:49.003687: step: 632/464, loss: 0.9549449682235718 2023-01-24 01:01:49.600907: step: 634/464, loss: 3.389556884765625 2023-01-24 01:01:50.185922: step: 636/464, loss: 1.8385090827941895 2023-01-24 01:01:50.831170: step: 638/464, loss: 1.2338461875915527 2023-01-24 01:01:51.450531: step: 640/464, loss: 1.8416494131088257 2023-01-24 01:01:52.033125: step: 642/464, loss: 1.1865665912628174 2023-01-24 01:01:52.670270: step: 644/464, loss: 0.439983993768692 2023-01-24 01:01:53.280976: step: 646/464, loss: 0.5343146324157715 2023-01-24 01:01:54.000280: step: 648/464, loss: 0.8855712413787842 2023-01-24 01:01:54.563856: step: 650/464, loss: 1.6070959568023682 2023-01-24 01:01:55.195911: step: 652/464, loss: 1.1842074394226074 2023-01-24 01:01:55.879234: step: 654/464, loss: 2.822164297103882 2023-01-24 01:01:56.426909: step: 656/464, loss: 0.7699689269065857 2023-01-24 01:01:57.060897: step: 658/464, loss: 24.550432205200195 2023-01-24 01:01:57.711757: step: 660/464, loss: 1.128835916519165 2023-01-24 01:01:58.397416: step: 662/464, loss: 1.0747103691101074 2023-01-24 01:01:58.970891: step: 664/464, loss: 1.582950472831726 2023-01-24 01:01:59.649880: step: 666/464, loss: 0.4008786976337433 2023-01-24 01:02:00.276576: step: 668/464, loss: 0.841454803943634 2023-01-24 01:02:00.875335: step: 670/464, loss: 1.05251145362854 2023-01-24 01:02:01.540141: step: 672/464, loss: 0.6999272108078003 2023-01-24 01:02:02.147111: step: 674/464, loss: 0.5895771980285645 2023-01-24 01:02:02.786617: step: 676/464, loss: 3.9432730674743652 2023-01-24 01:02:03.420283: step: 678/464, loss: 0.5963733196258545 2023-01-24 01:02:04.070814: step: 680/464, loss: 1.180455207824707 2023-01-24 01:02:04.635555: step: 682/464, loss: 0.7794288992881775 2023-01-24 01:02:05.304278: step: 684/464, loss: 0.4899601936340332 2023-01-24 01:02:05.882622: step: 686/464, loss: 0.9549975991249084 2023-01-24 01:02:06.525393: step: 688/464, loss: 0.47775551676750183 2023-01-24 01:02:07.091086: step: 690/464, loss: 4.92923641204834 2023-01-24 01:02:07.742533: step: 692/464, loss: 0.21919465065002441 2023-01-24 01:02:08.359826: step: 694/464, loss: 2.0028936862945557 2023-01-24 01:02:08.985831: step: 696/464, loss: 3.0337226390838623 2023-01-24 01:02:09.545875: step: 698/464, loss: 1.7912003993988037 2023-01-24 01:02:10.163249: step: 700/464, loss: 0.4943981468677521 2023-01-24 01:02:10.848186: step: 702/464, loss: 1.810803771018982 2023-01-24 01:02:11.532353: step: 704/464, loss: 1.6248208284378052 2023-01-24 01:02:12.199759: step: 706/464, loss: 3.5441529750823975 2023-01-24 01:02:12.830057: step: 708/464, loss: 3.9152140617370605 2023-01-24 01:02:13.440402: step: 710/464, loss: 1.0746217966079712 2023-01-24 01:02:14.009136: step: 712/464, loss: 1.0703041553497314 2023-01-24 01:02:14.608326: step: 714/464, loss: 1.0361018180847168 2023-01-24 01:02:15.228560: step: 716/464, loss: 0.34002020955085754 2023-01-24 01:02:15.846736: step: 718/464, loss: 1.2165015935897827 2023-01-24 01:02:16.428999: step: 720/464, loss: 0.5235038995742798 2023-01-24 01:02:17.012176: step: 722/464, loss: 0.7303531169891357 2023-01-24 01:02:17.608575: step: 724/464, loss: 5.70162296295166 2023-01-24 01:02:18.227651: step: 726/464, loss: 0.8262537717819214 2023-01-24 01:02:18.902312: step: 728/464, loss: 0.32231605052948 2023-01-24 01:02:19.481684: step: 730/464, loss: 7.0202178955078125 2023-01-24 01:02:20.168814: step: 732/464, loss: 1.515478491783142 2023-01-24 01:02:20.795051: step: 734/464, loss: 3.805112600326538 2023-01-24 01:02:21.410766: step: 736/464, loss: 1.0174000263214111 2023-01-24 01:02:21.953771: step: 738/464, loss: 1.1117806434631348 2023-01-24 01:02:22.599953: step: 740/464, loss: 1.7839009761810303 2023-01-24 01:02:23.190887: step: 742/464, loss: 5.611761569976807 2023-01-24 01:02:23.791565: step: 744/464, loss: 3.942810535430908 2023-01-24 01:02:24.373668: step: 746/464, loss: 1.0222467184066772 2023-01-24 01:02:25.072834: step: 748/464, loss: 0.8140878677368164 2023-01-24 01:02:25.719996: step: 750/464, loss: 0.8113280534744263 2023-01-24 01:02:26.363596: step: 752/464, loss: 0.7970805168151855 2023-01-24 01:02:26.938402: step: 754/464, loss: 0.326967716217041 2023-01-24 01:02:27.596876: step: 756/464, loss: 0.513617753982544 2023-01-24 01:02:28.205154: step: 758/464, loss: 4.979618072509766 2023-01-24 01:02:28.801485: step: 760/464, loss: 3.6950440406799316 2023-01-24 01:02:29.416094: step: 762/464, loss: 0.11817652732133865 2023-01-24 01:02:29.953865: step: 764/464, loss: 1.2285819053649902 2023-01-24 01:02:30.572528: step: 766/464, loss: 0.31264907121658325 2023-01-24 01:02:31.153744: step: 768/464, loss: 3.249337673187256 2023-01-24 01:02:31.748988: step: 770/464, loss: 2.1495447158813477 2023-01-24 01:02:32.374695: step: 772/464, loss: 0.2141517996788025 2023-01-24 01:02:33.050057: step: 774/464, loss: 0.7330082058906555 2023-01-24 01:02:33.698315: step: 776/464, loss: 2.1982250213623047 2023-01-24 01:02:34.311893: step: 778/464, loss: 2.4884581565856934 2023-01-24 01:02:34.923867: step: 780/464, loss: 1.1558810472488403 2023-01-24 01:02:35.577323: step: 782/464, loss: 1.8573219776153564 2023-01-24 01:02:36.190044: step: 784/464, loss: 2.610272169113159 2023-01-24 01:02:36.908315: step: 786/464, loss: 1.9637396335601807 2023-01-24 01:02:37.537398: step: 788/464, loss: 1.42550528049469 2023-01-24 01:02:38.175994: step: 790/464, loss: 1.9258475303649902 2023-01-24 01:02:38.751068: step: 792/464, loss: 0.8880046010017395 2023-01-24 01:02:39.354160: step: 794/464, loss: 1.1594154834747314 2023-01-24 01:02:39.968899: step: 796/464, loss: 1.5943104028701782 2023-01-24 01:02:40.595750: step: 798/464, loss: 0.8055059909820557 2023-01-24 01:02:41.184301: step: 800/464, loss: 0.7145674228668213 2023-01-24 01:02:41.791068: step: 802/464, loss: 0.6078277826309204 2023-01-24 01:02:42.387355: step: 804/464, loss: 2.397320032119751 2023-01-24 01:02:43.005258: step: 806/464, loss: 0.8714660406112671 2023-01-24 01:02:43.665958: step: 808/464, loss: 2.104137897491455 2023-01-24 01:02:44.231360: step: 810/464, loss: 0.40223950147628784 2023-01-24 01:02:44.846430: step: 812/464, loss: 1.0274685621261597 2023-01-24 01:02:45.387924: step: 814/464, loss: 2.062959909439087 2023-01-24 01:02:46.029426: step: 816/464, loss: 0.6237020492553711 2023-01-24 01:02:46.691427: step: 818/464, loss: 0.4401100277900696 2023-01-24 01:02:47.397774: step: 820/464, loss: 1.614302635192871 2023-01-24 01:02:48.037023: step: 822/464, loss: 0.7584272027015686 2023-01-24 01:02:48.610153: step: 824/464, loss: 1.8146684169769287 2023-01-24 01:02:49.215255: step: 826/464, loss: 1.495345115661621 2023-01-24 01:02:49.811616: step: 828/464, loss: 1.7539260387420654 2023-01-24 01:02:50.508194: step: 830/464, loss: 13.450841903686523 2023-01-24 01:02:51.165296: step: 832/464, loss: 0.9826687574386597 2023-01-24 01:02:51.821662: step: 834/464, loss: 0.8282465934753418 2023-01-24 01:02:52.449979: step: 836/464, loss: 1.085193157196045 2023-01-24 01:02:53.073838: step: 838/464, loss: 0.7056786417961121 2023-01-24 01:02:53.682536: step: 840/464, loss: 8.448920249938965 2023-01-24 01:02:54.359873: step: 842/464, loss: 1.2861590385437012 2023-01-24 01:02:54.999176: step: 844/464, loss: 6.431674003601074 2023-01-24 01:02:55.601099: step: 846/464, loss: 1.2567325830459595 2023-01-24 01:02:56.181487: step: 848/464, loss: 0.5662977695465088 2023-01-24 01:02:56.805971: step: 850/464, loss: 1.2586541175842285 2023-01-24 01:02:57.540046: step: 852/464, loss: 1.3401498794555664 2023-01-24 01:02:58.194189: step: 854/464, loss: 0.34222355484962463 2023-01-24 01:02:58.832438: step: 856/464, loss: 2.7347874641418457 2023-01-24 01:02:59.503891: step: 858/464, loss: 0.3046620488166809 2023-01-24 01:03:00.105387: step: 860/464, loss: 0.5517042279243469 2023-01-24 01:03:00.704590: step: 862/464, loss: 0.48601213097572327 2023-01-24 01:03:01.348758: step: 864/464, loss: 7.9639692306518555 2023-01-24 01:03:01.937933: step: 866/464, loss: 0.9728195667266846 2023-01-24 01:03:02.531974: step: 868/464, loss: 0.6252280473709106 2023-01-24 01:03:03.123905: step: 870/464, loss: 1.1837365627288818 2023-01-24 01:03:03.738147: step: 872/464, loss: 3.1168296337127686 2023-01-24 01:03:04.413308: step: 874/464, loss: 0.8699135184288025 2023-01-24 01:03:05.107230: step: 876/464, loss: 0.5066431164741516 2023-01-24 01:03:05.724879: step: 878/464, loss: 1.5564488172531128 2023-01-24 01:03:06.387455: step: 880/464, loss: 1.1878808736801147 2023-01-24 01:03:06.964295: step: 882/464, loss: 2.0188775062561035 2023-01-24 01:03:07.575263: step: 884/464, loss: 0.8318590521812439 2023-01-24 01:03:08.189159: step: 886/464, loss: 0.8224694132804871 2023-01-24 01:03:08.800370: step: 888/464, loss: 0.2953476309776306 2023-01-24 01:03:09.419730: step: 890/464, loss: 0.8902751207351685 2023-01-24 01:03:10.015503: step: 892/464, loss: 1.3326923847198486 2023-01-24 01:03:10.565080: step: 894/464, loss: 0.45923858880996704 2023-01-24 01:03:11.173671: step: 896/464, loss: 1.2682569026947021 2023-01-24 01:03:11.852013: step: 898/464, loss: 3.1980276107788086 2023-01-24 01:03:12.442817: step: 900/464, loss: 0.6890709400177002 2023-01-24 01:03:13.119825: step: 902/464, loss: 0.5486746430397034 2023-01-24 01:03:13.771376: step: 904/464, loss: 4.070559978485107 2023-01-24 01:03:14.436843: step: 906/464, loss: 1.2787551879882812 2023-01-24 01:03:15.067794: step: 908/464, loss: 0.7099980115890503 2023-01-24 01:03:15.681911: step: 910/464, loss: 0.9993118643760681 2023-01-24 01:03:16.308168: step: 912/464, loss: 0.677141547203064 2023-01-24 01:03:16.941709: step: 914/464, loss: 0.28165921568870544 2023-01-24 01:03:17.677488: step: 916/464, loss: 3.867194652557373 2023-01-24 01:03:18.275908: step: 918/464, loss: 1.677793025970459 2023-01-24 01:03:18.896608: step: 920/464, loss: 2.707850456237793 2023-01-24 01:03:19.476458: step: 922/464, loss: 7.062939643859863 2023-01-24 01:03:20.066752: step: 924/464, loss: 2.674006938934326 2023-01-24 01:03:20.676154: step: 926/464, loss: 1.025741696357727 2023-01-24 01:03:21.309485: step: 928/464, loss: 1.2850465774536133 2023-01-24 01:03:21.796672: step: 930/464, loss: 0.13279880583286285 ================================================== Loss: 1.699 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31939895310085986, 'r': 0.24513416816436503, 'f1': 0.27738176461807296}, 'combined': 0.2043865634027906, 'epoch': 2} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3219012744193015, 'r': 0.20731623361683454, 'f1': 0.2522039002704795}, 'combined': 0.16465125095378455, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3191723159171076, 'r': 0.2606473354937438, 'f1': 0.2869561716465983}, 'combined': 0.2114413896343356, 'epoch': 2} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33680597943266294, 'r': 0.21598842167287283, 'f1': 0.2631943875052335}, 'combined': 0.17182638769771721, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32021533568947363, 'r': 0.24636605222233604, 'f1': 0.27847786672140123}, 'combined': 0.20519421758419037, 'epoch': 2} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33284071746005606, 'r': 0.19799599049640365, 'f1': 0.24829152371445556}, 'combined': 0.16209705693275336, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2532175032175032, 'r': 0.267687074829932, 'f1': 0.26025132275132273}, 'combined': 0.17350088183421514, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2391304347826087, 'r': 0.2391304347826087, 'f1': 0.2391304347826087}, 'combined': 0.11956521739130435, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.575, 'r': 0.19827586206896552, 'f1': 0.2948717948717949}, 'combined': 0.19658119658119658, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31939895310085986, 'r': 0.24513416816436503, 'f1': 0.27738176461807296}, 'combined': 0.2043865634027906, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3219012744193015, 'r': 0.20731623361683454, 'f1': 0.2522039002704795}, 'combined': 0.16465125095378455, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2532175032175032, 'r': 0.267687074829932, 'f1': 0.26025132275132273}, 'combined': 0.17350088183421514, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3191723159171076, 'r': 0.2606473354937438, 'f1': 0.2869561716465983}, 'combined': 0.2114413896343356, 'epoch': 2} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33680597943266294, 'r': 0.21598842167287283, 'f1': 0.2631943875052335}, 'combined': 0.17182638769771721, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2391304347826087, 'r': 0.2391304347826087, 'f1': 0.2391304347826087}, 'combined': 0.11956521739130435, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32021533568947363, 'r': 0.24636605222233604, 'f1': 0.27847786672140123}, 'combined': 0.20519421758419037, 'epoch': 2} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33284071746005606, 'r': 0.19799599049640365, 'f1': 0.24829152371445556}, 'combined': 0.16209705693275336, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.575, 'r': 0.19827586206896552, 'f1': 0.2948717948717949}, 'combined': 0.19658119658119658, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:06:07.306198: step: 2/464, loss: 0.760606050491333 2023-01-24 01:06:07.925251: step: 4/464, loss: 1.206289529800415 2023-01-24 01:06:08.582101: step: 6/464, loss: 0.6236059069633484 2023-01-24 01:06:09.144167: step: 8/464, loss: 0.686949610710144 2023-01-24 01:06:09.748181: step: 10/464, loss: 4.821654796600342 2023-01-24 01:06:10.340207: step: 12/464, loss: 3.201460123062134 2023-01-24 01:06:10.923908: step: 14/464, loss: 1.308962106704712 2023-01-24 01:06:11.562085: step: 16/464, loss: 1.7753371000289917 2023-01-24 01:06:12.121163: step: 18/464, loss: 1.2878186702728271 2023-01-24 01:06:12.708560: step: 20/464, loss: 0.9418953657150269 2023-01-24 01:06:13.318285: step: 22/464, loss: 0.35510966181755066 2023-01-24 01:06:13.931691: step: 24/464, loss: 0.8940156698226929 2023-01-24 01:06:14.540178: step: 26/464, loss: 1.737830400466919 2023-01-24 01:06:15.089608: step: 28/464, loss: 1.108651041984558 2023-01-24 01:06:15.718892: step: 30/464, loss: 0.2774873971939087 2023-01-24 01:06:16.342094: step: 32/464, loss: 1.181756615638733 2023-01-24 01:06:16.982706: step: 34/464, loss: 0.8835384845733643 2023-01-24 01:06:17.667149: step: 36/464, loss: 0.2640799582004547 2023-01-24 01:06:18.393003: step: 38/464, loss: 0.9748669266700745 2023-01-24 01:06:19.041610: step: 40/464, loss: 1.3259875774383545 2023-01-24 01:06:19.647946: step: 42/464, loss: 1.2881635427474976 2023-01-24 01:06:20.272818: step: 44/464, loss: 0.8446848392486572 2023-01-24 01:06:20.886905: step: 46/464, loss: 0.39714711904525757 2023-01-24 01:06:21.495325: step: 48/464, loss: 0.3975988030433655 2023-01-24 01:06:22.112110: step: 50/464, loss: 1.5401337146759033 2023-01-24 01:06:22.819974: step: 52/464, loss: 1.0109367370605469 2023-01-24 01:06:23.520064: step: 54/464, loss: 1.12022864818573 2023-01-24 01:06:24.138233: step: 56/464, loss: 1.0091583728790283 2023-01-24 01:06:24.784700: step: 58/464, loss: 1.8323918581008911 2023-01-24 01:06:25.394371: step: 60/464, loss: 2.5795531272888184 2023-01-24 01:06:26.013631: step: 62/464, loss: 0.4858267903327942 2023-01-24 01:06:26.724696: step: 64/464, loss: 0.16328811645507812 2023-01-24 01:06:27.347977: step: 66/464, loss: 0.4784179925918579 2023-01-24 01:06:27.981499: step: 68/464, loss: 1.4395607709884644 2023-01-24 01:06:28.608936: step: 70/464, loss: 0.24091686308383942 2023-01-24 01:06:29.168382: step: 72/464, loss: 1.3440728187561035 2023-01-24 01:06:29.761061: step: 74/464, loss: 0.4796639084815979 2023-01-24 01:06:30.330538: step: 76/464, loss: 1.3922030925750732 2023-01-24 01:06:30.899786: step: 78/464, loss: 1.6975597143173218 2023-01-24 01:06:31.624956: step: 80/464, loss: 0.6518625020980835 2023-01-24 01:06:32.172719: step: 82/464, loss: 0.5299268960952759 2023-01-24 01:06:32.783474: step: 84/464, loss: 0.3443664312362671 2023-01-24 01:06:33.359600: step: 86/464, loss: 1.5084147453308105 2023-01-24 01:06:33.967732: step: 88/464, loss: 0.21632380783557892 2023-01-24 01:06:34.563369: step: 90/464, loss: 0.5657486319541931 2023-01-24 01:06:35.139513: step: 92/464, loss: 1.2559709548950195 2023-01-24 01:06:35.770736: step: 94/464, loss: 0.5930248498916626 2023-01-24 01:06:36.357003: step: 96/464, loss: 0.35066333413124084 2023-01-24 01:06:36.907329: step: 98/464, loss: 1.4469847679138184 2023-01-24 01:06:37.554703: step: 100/464, loss: 1.8228155374526978 2023-01-24 01:06:38.261566: step: 102/464, loss: 0.4205220639705658 2023-01-24 01:06:38.858166: step: 104/464, loss: 0.4687854051589966 2023-01-24 01:06:39.557511: step: 106/464, loss: 0.39240720868110657 2023-01-24 01:06:40.125450: step: 108/464, loss: 1.0675050020217896 2023-01-24 01:06:40.771778: step: 110/464, loss: 1.3701432943344116 2023-01-24 01:06:41.470843: step: 112/464, loss: 0.5170474648475647 2023-01-24 01:06:42.106752: step: 114/464, loss: 1.0686296224594116 2023-01-24 01:06:42.827044: step: 116/464, loss: 0.533559262752533 2023-01-24 01:06:43.468392: step: 118/464, loss: 0.2963751554489136 2023-01-24 01:06:44.079588: step: 120/464, loss: 0.8292972445487976 2023-01-24 01:06:44.678160: step: 122/464, loss: 1.3835995197296143 2023-01-24 01:06:45.239349: step: 124/464, loss: 0.8213170766830444 2023-01-24 01:06:45.893302: step: 126/464, loss: 0.2727380692958832 2023-01-24 01:06:46.538611: step: 128/464, loss: 0.8483532071113586 2023-01-24 01:06:47.112308: step: 130/464, loss: 0.5043916702270508 2023-01-24 01:06:47.808369: step: 132/464, loss: 1.355796217918396 2023-01-24 01:06:48.454302: step: 134/464, loss: 0.757931649684906 2023-01-24 01:06:49.156588: step: 136/464, loss: 1.686948299407959 2023-01-24 01:06:49.902455: step: 138/464, loss: 0.601470410823822 2023-01-24 01:06:50.514312: step: 140/464, loss: 0.8765469789505005 2023-01-24 01:06:51.109452: step: 142/464, loss: 3.985328197479248 2023-01-24 01:06:51.762139: step: 144/464, loss: 1.0359535217285156 2023-01-24 01:06:52.354337: step: 146/464, loss: 2.8616089820861816 2023-01-24 01:06:53.003851: step: 148/464, loss: 0.5264968276023865 2023-01-24 01:06:53.646177: step: 150/464, loss: 0.31478896737098694 2023-01-24 01:06:54.263246: step: 152/464, loss: 0.9130817651748657 2023-01-24 01:06:54.882535: step: 154/464, loss: 14.749269485473633 2023-01-24 01:06:55.469827: step: 156/464, loss: 1.5816093683242798 2023-01-24 01:06:56.087406: step: 158/464, loss: 0.45651036500930786 2023-01-24 01:06:56.711149: step: 160/464, loss: 0.591182291507721 2023-01-24 01:06:57.395033: step: 162/464, loss: 0.930648684501648 2023-01-24 01:06:58.023609: step: 164/464, loss: 2.3614890575408936 2023-01-24 01:06:58.649485: step: 166/464, loss: 1.4386388063430786 2023-01-24 01:06:59.212056: step: 168/464, loss: 0.519232988357544 2023-01-24 01:06:59.899290: step: 170/464, loss: 2.4041099548339844 2023-01-24 01:07:00.577235: step: 172/464, loss: 1.3577077388763428 2023-01-24 01:07:01.154647: step: 174/464, loss: 0.6223466396331787 2023-01-24 01:07:01.887500: step: 176/464, loss: 0.2883419990539551 2023-01-24 01:07:02.469386: step: 178/464, loss: 0.300167977809906 2023-01-24 01:07:03.106850: step: 180/464, loss: 0.3131827712059021 2023-01-24 01:07:03.717082: step: 182/464, loss: 2.6632723808288574 2023-01-24 01:07:04.348990: step: 184/464, loss: 1.6046333312988281 2023-01-24 01:07:04.976687: step: 186/464, loss: 1.1761341094970703 2023-01-24 01:07:05.607073: step: 188/464, loss: 2.0994763374328613 2023-01-24 01:07:06.269042: step: 190/464, loss: 0.5785619020462036 2023-01-24 01:07:06.919964: step: 192/464, loss: 0.3165262043476105 2023-01-24 01:07:07.505276: step: 194/464, loss: 0.565475344657898 2023-01-24 01:07:08.133976: step: 196/464, loss: 0.776207685470581 2023-01-24 01:07:08.768605: step: 198/464, loss: 0.8251913189888 2023-01-24 01:07:09.367135: step: 200/464, loss: 0.42415738105773926 2023-01-24 01:07:09.968424: step: 202/464, loss: 0.7484412789344788 2023-01-24 01:07:10.591769: step: 204/464, loss: 0.5022807717323303 2023-01-24 01:07:11.139948: step: 206/464, loss: 0.808050274848938 2023-01-24 01:07:11.777453: step: 208/464, loss: 1.0661100149154663 2023-01-24 01:07:12.331610: step: 210/464, loss: 1.1805107593536377 2023-01-24 01:07:12.974683: step: 212/464, loss: 1.4876399040222168 2023-01-24 01:07:13.618431: step: 214/464, loss: 1.7432225942611694 2023-01-24 01:07:14.210297: step: 216/464, loss: 0.44966259598731995 2023-01-24 01:07:14.886041: step: 218/464, loss: 0.4931964874267578 2023-01-24 01:07:15.509872: step: 220/464, loss: 0.9156018495559692 2023-01-24 01:07:16.170677: step: 222/464, loss: 1.7440860271453857 2023-01-24 01:07:16.841553: step: 224/464, loss: 1.1128723621368408 2023-01-24 01:07:17.424933: step: 226/464, loss: 0.9241154193878174 2023-01-24 01:07:17.977918: step: 228/464, loss: 1.0193665027618408 2023-01-24 01:07:18.823014: step: 230/464, loss: 3.656179904937744 2023-01-24 01:07:19.440560: step: 232/464, loss: 2.219571590423584 2023-01-24 01:07:20.057884: step: 234/464, loss: 0.6206840872764587 2023-01-24 01:07:20.732215: step: 236/464, loss: 0.8857594132423401 2023-01-24 01:07:21.413147: step: 238/464, loss: 2.9234108924865723 2023-01-24 01:07:22.048837: step: 240/464, loss: 1.119886875152588 2023-01-24 01:07:22.621227: step: 242/464, loss: 0.18582743406295776 2023-01-24 01:07:23.290091: step: 244/464, loss: 2.426466941833496 2023-01-24 01:07:23.959517: step: 246/464, loss: 0.4373409152030945 2023-01-24 01:07:24.581611: step: 248/464, loss: 1.3089731931686401 2023-01-24 01:07:25.167564: step: 250/464, loss: 1.0849674940109253 2023-01-24 01:07:25.767245: step: 252/464, loss: 0.937401294708252 2023-01-24 01:07:26.421183: step: 254/464, loss: 0.90260910987854 2023-01-24 01:07:27.124175: step: 256/464, loss: 0.1584082692861557 2023-01-24 01:07:27.747914: step: 258/464, loss: 2.6713130474090576 2023-01-24 01:07:28.404378: step: 260/464, loss: 0.5284579396247864 2023-01-24 01:07:29.019913: step: 262/464, loss: 7.9691290855407715 2023-01-24 01:07:29.678277: step: 264/464, loss: 0.2675029933452606 2023-01-24 01:07:30.268714: step: 266/464, loss: 1.0456719398498535 2023-01-24 01:07:30.915290: step: 268/464, loss: 1.667466163635254 2023-01-24 01:07:31.492133: step: 270/464, loss: 3.5396268367767334 2023-01-24 01:07:32.109162: step: 272/464, loss: 1.0372648239135742 2023-01-24 01:07:32.764559: step: 274/464, loss: 1.1810011863708496 2023-01-24 01:07:33.338159: step: 276/464, loss: 1.4830915927886963 2023-01-24 01:07:33.911722: step: 278/464, loss: 0.495393842458725 2023-01-24 01:07:34.509676: step: 280/464, loss: 0.6834374070167542 2023-01-24 01:07:35.068339: step: 282/464, loss: 1.3428385257720947 2023-01-24 01:07:35.708720: step: 284/464, loss: 4.434447288513184 2023-01-24 01:07:36.287957: step: 286/464, loss: 0.7110435962677002 2023-01-24 01:07:36.942829: step: 288/464, loss: 1.4327901601791382 2023-01-24 01:07:37.569103: step: 290/464, loss: 1.5169810056686401 2023-01-24 01:07:38.164528: step: 292/464, loss: 0.9470404386520386 2023-01-24 01:07:38.771167: step: 294/464, loss: 0.6485475897789001 2023-01-24 01:07:39.405619: step: 296/464, loss: 0.8116719126701355 2023-01-24 01:07:40.032988: step: 298/464, loss: 0.4988217055797577 2023-01-24 01:07:40.673164: step: 300/464, loss: 0.7799360752105713 2023-01-24 01:07:41.331010: step: 302/464, loss: 0.10422151535749435 2023-01-24 01:07:41.942662: step: 304/464, loss: 4.046075820922852 2023-01-24 01:07:42.504302: step: 306/464, loss: 1.3420652151107788 2023-01-24 01:07:43.136704: step: 308/464, loss: 4.418938159942627 2023-01-24 01:07:43.760536: step: 310/464, loss: 0.7048822641372681 2023-01-24 01:07:44.407184: step: 312/464, loss: 0.4731024503707886 2023-01-24 01:07:44.997461: step: 314/464, loss: 0.5880229473114014 2023-01-24 01:07:45.610236: step: 316/464, loss: 0.9250689148902893 2023-01-24 01:07:46.161189: step: 318/464, loss: 1.7018532752990723 2023-01-24 01:07:46.759660: step: 320/464, loss: 0.2926866114139557 2023-01-24 01:07:47.350772: step: 322/464, loss: 0.2836105525493622 2023-01-24 01:07:47.889204: step: 324/464, loss: 1.6393259763717651 2023-01-24 01:07:48.500299: step: 326/464, loss: 1.966665267944336 2023-01-24 01:07:49.081902: step: 328/464, loss: 1.082220196723938 2023-01-24 01:07:49.659318: step: 330/464, loss: 0.8523777723312378 2023-01-24 01:07:50.203011: step: 332/464, loss: 0.9466300010681152 2023-01-24 01:07:50.830536: step: 334/464, loss: 0.7340304851531982 2023-01-24 01:07:51.469006: step: 336/464, loss: 0.5134017467498779 2023-01-24 01:07:52.072010: step: 338/464, loss: 4.246328353881836 2023-01-24 01:07:52.749469: step: 340/464, loss: 1.5157082080841064 2023-01-24 01:07:53.357232: step: 342/464, loss: 0.7334668636322021 2023-01-24 01:07:53.926744: step: 344/464, loss: 1.2602307796478271 2023-01-24 01:07:54.537790: step: 346/464, loss: 0.20811858773231506 2023-01-24 01:07:55.084176: step: 348/464, loss: 2.6614458560943604 2023-01-24 01:07:55.696342: step: 350/464, loss: 1.1471977233886719 2023-01-24 01:07:56.333187: step: 352/464, loss: 0.416929692029953 2023-01-24 01:07:56.931259: step: 354/464, loss: 1.1446795463562012 2023-01-24 01:07:57.639623: step: 356/464, loss: 6.461057662963867 2023-01-24 01:07:58.251663: step: 358/464, loss: 1.036741018295288 2023-01-24 01:07:58.930815: step: 360/464, loss: 1.5848876237869263 2023-01-24 01:07:59.528352: step: 362/464, loss: 1.4145913124084473 2023-01-24 01:08:00.191752: step: 364/464, loss: 0.7970523834228516 2023-01-24 01:08:00.833510: step: 366/464, loss: 1.8541629314422607 2023-01-24 01:08:01.470233: step: 368/464, loss: 1.0937998294830322 2023-01-24 01:08:02.088844: step: 370/464, loss: 0.8395752310752869 2023-01-24 01:08:02.672412: step: 372/464, loss: 0.637897253036499 2023-01-24 01:08:03.238002: step: 374/464, loss: 1.4895851612091064 2023-01-24 01:08:03.874153: step: 376/464, loss: 0.5075844526290894 2023-01-24 01:08:04.499220: step: 378/464, loss: 0.44364073872566223 2023-01-24 01:08:05.152540: step: 380/464, loss: 0.4025718867778778 2023-01-24 01:08:05.700522: step: 382/464, loss: 4.496883869171143 2023-01-24 01:08:06.408289: step: 384/464, loss: 0.6705805659294128 2023-01-24 01:08:06.996916: step: 386/464, loss: 1.7309107780456543 2023-01-24 01:08:07.564256: step: 388/464, loss: 1.173380732536316 2023-01-24 01:08:08.153423: step: 390/464, loss: 0.8357263803482056 2023-01-24 01:08:08.755143: step: 392/464, loss: 3.84114146232605 2023-01-24 01:08:09.328281: step: 394/464, loss: 0.624237596988678 2023-01-24 01:08:09.923957: step: 396/464, loss: 11.576501846313477 2023-01-24 01:08:10.551585: step: 398/464, loss: 0.8219079971313477 2023-01-24 01:08:11.203716: step: 400/464, loss: 0.5659942030906677 2023-01-24 01:08:11.801351: step: 402/464, loss: 0.94480961561203 2023-01-24 01:08:12.426104: step: 404/464, loss: 1.4206219911575317 2023-01-24 01:08:13.017439: step: 406/464, loss: 0.8018251061439514 2023-01-24 01:08:13.657680: step: 408/464, loss: 1.2566442489624023 2023-01-24 01:08:14.244971: step: 410/464, loss: 1.8468358516693115 2023-01-24 01:08:14.853632: step: 412/464, loss: 0.3620869815349579 2023-01-24 01:08:15.423051: step: 414/464, loss: 0.9323358535766602 2023-01-24 01:08:15.995032: step: 416/464, loss: 0.6399459838867188 2023-01-24 01:08:16.721204: step: 418/464, loss: 4.124539375305176 2023-01-24 01:08:17.340230: step: 420/464, loss: 3.8888773918151855 2023-01-24 01:08:17.978765: step: 422/464, loss: 1.2105002403259277 2023-01-24 01:08:18.625293: step: 424/464, loss: 0.8193405270576477 2023-01-24 01:08:19.193078: step: 426/464, loss: 0.2333287000656128 2023-01-24 01:08:19.796861: step: 428/464, loss: 1.331892967224121 2023-01-24 01:08:20.405420: step: 430/464, loss: 1.5995988845825195 2023-01-24 01:08:21.034340: step: 432/464, loss: 3.416703224182129 2023-01-24 01:08:21.741204: step: 434/464, loss: 0.3051130771636963 2023-01-24 01:08:22.349278: step: 436/464, loss: 2.9975223541259766 2023-01-24 01:08:22.974270: step: 438/464, loss: 0.33027902245521545 2023-01-24 01:08:23.604426: step: 440/464, loss: 0.8280506730079651 2023-01-24 01:08:24.257895: step: 442/464, loss: 0.4559107720851898 2023-01-24 01:08:24.846549: step: 444/464, loss: 1.1207265853881836 2023-01-24 01:08:25.500463: step: 446/464, loss: 0.6139945387840271 2023-01-24 01:08:26.105522: step: 448/464, loss: 1.029955267906189 2023-01-24 01:08:26.731822: step: 450/464, loss: 1.5795459747314453 2023-01-24 01:08:27.346259: step: 452/464, loss: 1.4873511791229248 2023-01-24 01:08:27.975125: step: 454/464, loss: 0.7497228384017944 2023-01-24 01:08:28.589174: step: 456/464, loss: 0.6450871229171753 2023-01-24 01:08:29.263114: step: 458/464, loss: 0.9673238396644592 2023-01-24 01:08:29.865986: step: 460/464, loss: 0.5542375445365906 2023-01-24 01:08:30.505608: step: 462/464, loss: 3.1521575450897217 2023-01-24 01:08:31.178946: step: 464/464, loss: 0.9212895631790161 2023-01-24 01:08:31.765686: step: 466/464, loss: 0.3986515402793884 2023-01-24 01:08:32.363266: step: 468/464, loss: 3.097533941268921 2023-01-24 01:08:32.934257: step: 470/464, loss: 1.3664813041687012 2023-01-24 01:08:33.530984: step: 472/464, loss: 0.2985018193721771 2023-01-24 01:08:34.154853: step: 474/464, loss: 0.9037873148918152 2023-01-24 01:08:34.780806: step: 476/464, loss: 0.7243385314941406 2023-01-24 01:08:35.383699: step: 478/464, loss: 0.7075210809707642 2023-01-24 01:08:35.981542: step: 480/464, loss: 0.609467089176178 2023-01-24 01:08:36.569110: step: 482/464, loss: 1.2596162557601929 2023-01-24 01:08:37.117038: step: 484/464, loss: 0.3639557361602783 2023-01-24 01:08:37.709835: step: 486/464, loss: 2.3972129821777344 2023-01-24 01:08:38.416815: step: 488/464, loss: 1.2693873643875122 2023-01-24 01:08:39.018854: step: 490/464, loss: 2.1535725593566895 2023-01-24 01:08:39.624042: step: 492/464, loss: 1.125908374786377 2023-01-24 01:08:40.247551: step: 494/464, loss: 0.8671747446060181 2023-01-24 01:08:40.906180: step: 496/464, loss: 0.9024983048439026 2023-01-24 01:08:41.609545: step: 498/464, loss: 3.1360628604888916 2023-01-24 01:08:42.204115: step: 500/464, loss: 1.1961506605148315 2023-01-24 01:08:42.807265: step: 502/464, loss: 1.9053184986114502 2023-01-24 01:08:43.438681: step: 504/464, loss: 1.222051739692688 2023-01-24 01:08:44.075079: step: 506/464, loss: 0.6592516303062439 2023-01-24 01:08:44.634221: step: 508/464, loss: 2.705397129058838 2023-01-24 01:08:45.244848: step: 510/464, loss: 1.3199049234390259 2023-01-24 01:08:45.849336: step: 512/464, loss: 0.7428873777389526 2023-01-24 01:08:46.480912: step: 514/464, loss: 0.8885185718536377 2023-01-24 01:08:47.107252: step: 516/464, loss: 1.146332859992981 2023-01-24 01:08:47.809463: step: 518/464, loss: 1.469440221786499 2023-01-24 01:08:48.413371: step: 520/464, loss: 1.943987488746643 2023-01-24 01:08:49.026290: step: 522/464, loss: 0.3035939037799835 2023-01-24 01:08:49.610724: step: 524/464, loss: 0.741900622844696 2023-01-24 01:08:50.260864: step: 526/464, loss: 0.20480138063430786 2023-01-24 01:08:50.840338: step: 528/464, loss: 0.4008975028991699 2023-01-24 01:08:51.392627: step: 530/464, loss: 0.5813942551612854 2023-01-24 01:08:51.931442: step: 532/464, loss: 0.5860927104949951 2023-01-24 01:08:52.499758: step: 534/464, loss: 0.44807976484298706 2023-01-24 01:08:53.181236: step: 536/464, loss: 0.6833713054656982 2023-01-24 01:08:53.828562: step: 538/464, loss: 0.35068416595458984 2023-01-24 01:08:54.440819: step: 540/464, loss: 0.6230434775352478 2023-01-24 01:08:55.103347: step: 542/464, loss: 1.5239896774291992 2023-01-24 01:08:55.639371: step: 544/464, loss: 2.0269064903259277 2023-01-24 01:08:56.248066: step: 546/464, loss: 1.0461208820343018 2023-01-24 01:08:56.878143: step: 548/464, loss: 0.7397414445877075 2023-01-24 01:08:57.582663: step: 550/464, loss: 9.873390197753906 2023-01-24 01:08:58.199577: step: 552/464, loss: 0.9010847806930542 2023-01-24 01:08:58.813599: step: 554/464, loss: 0.3523099422454834 2023-01-24 01:08:59.519568: step: 556/464, loss: 1.7873268127441406 2023-01-24 01:09:00.130860: step: 558/464, loss: 1.1752336025238037 2023-01-24 01:09:00.762547: step: 560/464, loss: 1.152069091796875 2023-01-24 01:09:01.375776: step: 562/464, loss: 2.558657646179199 2023-01-24 01:09:01.944283: step: 564/464, loss: 2.342944622039795 2023-01-24 01:09:02.523177: step: 566/464, loss: 0.7745879292488098 2023-01-24 01:09:03.152308: step: 568/464, loss: 1.1222927570343018 2023-01-24 01:09:03.692965: step: 570/464, loss: 0.8955410122871399 2023-01-24 01:09:04.343190: step: 572/464, loss: 0.38107162714004517 2023-01-24 01:09:04.950540: step: 574/464, loss: 1.0901833772659302 2023-01-24 01:09:05.551808: step: 576/464, loss: 5.22351598739624 2023-01-24 01:09:06.185691: step: 578/464, loss: 0.7503743171691895 2023-01-24 01:09:06.809246: step: 580/464, loss: 1.6508920192718506 2023-01-24 01:09:07.434920: step: 582/464, loss: 1.1056047677993774 2023-01-24 01:09:08.051734: step: 584/464, loss: 1.5326659679412842 2023-01-24 01:09:08.698625: step: 586/464, loss: 0.6887844800949097 2023-01-24 01:09:09.326403: step: 588/464, loss: 1.729138970375061 2023-01-24 01:09:09.897120: step: 590/464, loss: 0.5543575882911682 2023-01-24 01:09:10.531672: step: 592/464, loss: 0.8567236661911011 2023-01-24 01:09:11.204988: step: 594/464, loss: 2.516667366027832 2023-01-24 01:09:11.771137: step: 596/464, loss: 3.135136127471924 2023-01-24 01:09:12.380406: step: 598/464, loss: 0.6818070411682129 2023-01-24 01:09:13.005812: step: 600/464, loss: 1.3219012022018433 2023-01-24 01:09:13.594810: step: 602/464, loss: 0.669220507144928 2023-01-24 01:09:14.214615: step: 604/464, loss: 0.6591908931732178 2023-01-24 01:09:14.865403: step: 606/464, loss: 0.9920374751091003 2023-01-24 01:09:15.445573: step: 608/464, loss: 0.8480632305145264 2023-01-24 01:09:16.079430: step: 610/464, loss: 1.539106011390686 2023-01-24 01:09:16.701621: step: 612/464, loss: 2.172405242919922 2023-01-24 01:09:17.412299: step: 614/464, loss: 0.5282004475593567 2023-01-24 01:09:18.041723: step: 616/464, loss: 0.40630924701690674 2023-01-24 01:09:18.797656: step: 618/464, loss: 0.6823827028274536 2023-01-24 01:09:19.394129: step: 620/464, loss: 0.4420427083969116 2023-01-24 01:09:20.006758: step: 622/464, loss: 0.8073464035987854 2023-01-24 01:09:20.604326: step: 624/464, loss: 0.20690055191516876 2023-01-24 01:09:21.248950: step: 626/464, loss: 0.7722306847572327 2023-01-24 01:09:21.841851: step: 628/464, loss: 1.8317077159881592 2023-01-24 01:09:22.437640: step: 630/464, loss: 1.550157904624939 2023-01-24 01:09:23.028382: step: 632/464, loss: 0.8743520379066467 2023-01-24 01:09:23.702733: step: 634/464, loss: 1.5449718236923218 2023-01-24 01:09:24.339254: step: 636/464, loss: 1.3520747423171997 2023-01-24 01:09:24.951701: step: 638/464, loss: 2.3713107109069824 2023-01-24 01:09:25.566202: step: 640/464, loss: 0.19988097250461578 2023-01-24 01:09:26.211649: step: 642/464, loss: 1.0374170541763306 2023-01-24 01:09:26.830465: step: 644/464, loss: 1.6740143299102783 2023-01-24 01:09:27.497237: step: 646/464, loss: 0.5114726424217224 2023-01-24 01:09:28.102691: step: 648/464, loss: 0.7445206046104431 2023-01-24 01:09:28.684653: step: 650/464, loss: 1.1598302125930786 2023-01-24 01:09:29.268927: step: 652/464, loss: 0.36706048250198364 2023-01-24 01:09:29.829954: step: 654/464, loss: 3.273355007171631 2023-01-24 01:09:30.425270: step: 656/464, loss: 1.2574751377105713 2023-01-24 01:09:30.975766: step: 658/464, loss: 1.3063020706176758 2023-01-24 01:09:31.560705: step: 660/464, loss: 0.4659620225429535 2023-01-24 01:09:32.168998: step: 662/464, loss: 1.2157269716262817 2023-01-24 01:09:32.816125: step: 664/464, loss: 0.5862348675727844 2023-01-24 01:09:33.383999: step: 666/464, loss: 0.717773973941803 2023-01-24 01:09:34.042937: step: 668/464, loss: 0.49368932843208313 2023-01-24 01:09:34.822591: step: 670/464, loss: 0.4888678789138794 2023-01-24 01:09:35.384298: step: 672/464, loss: 0.3790941536426544 2023-01-24 01:09:36.026447: step: 674/464, loss: 1.465955138206482 2023-01-24 01:09:36.689836: step: 676/464, loss: 1.2423797845840454 2023-01-24 01:09:37.272425: step: 678/464, loss: 0.29232341051101685 2023-01-24 01:09:37.911616: step: 680/464, loss: 0.6101855039596558 2023-01-24 01:09:38.536559: step: 682/464, loss: 0.9923334717750549 2023-01-24 01:09:39.151557: step: 684/464, loss: 1.0098955631256104 2023-01-24 01:09:39.806956: step: 686/464, loss: 0.7436755299568176 2023-01-24 01:09:40.422188: step: 688/464, loss: 1.1025340557098389 2023-01-24 01:09:40.997105: step: 690/464, loss: 0.6141563653945923 2023-01-24 01:09:41.611361: step: 692/464, loss: 5.098584175109863 2023-01-24 01:09:42.231991: step: 694/464, loss: 0.9757270812988281 2023-01-24 01:09:42.901983: step: 696/464, loss: 3.4678096771240234 2023-01-24 01:09:43.483034: step: 698/464, loss: 0.35483089089393616 2023-01-24 01:09:44.061624: step: 700/464, loss: 0.9248161911964417 2023-01-24 01:09:44.644378: step: 702/464, loss: 0.3786623775959015 2023-01-24 01:09:45.311190: step: 704/464, loss: 1.2063881158828735 2023-01-24 01:09:45.923875: step: 706/464, loss: 0.7255852222442627 2023-01-24 01:09:46.479399: step: 708/464, loss: 0.9540723562240601 2023-01-24 01:09:47.078724: step: 710/464, loss: 0.387376993894577 2023-01-24 01:09:47.723590: step: 712/464, loss: 0.412129670381546 2023-01-24 01:09:48.336301: step: 714/464, loss: 4.866971969604492 2023-01-24 01:09:48.905833: step: 716/464, loss: 0.6999180912971497 2023-01-24 01:09:49.477211: step: 718/464, loss: 3.2527616024017334 2023-01-24 01:09:50.153135: step: 720/464, loss: 0.9839757680892944 2023-01-24 01:09:50.778185: step: 722/464, loss: 3.7680652141571045 2023-01-24 01:09:51.411458: step: 724/464, loss: 1.1528985500335693 2023-01-24 01:09:52.110036: step: 726/464, loss: 0.2059568166732788 2023-01-24 01:09:52.744990: step: 728/464, loss: 0.9154174327850342 2023-01-24 01:09:53.360989: step: 730/464, loss: 0.508955180644989 2023-01-24 01:09:53.977262: step: 732/464, loss: 0.45395326614379883 2023-01-24 01:09:54.609082: step: 734/464, loss: 1.9029622077941895 2023-01-24 01:09:55.198666: step: 736/464, loss: 0.24253416061401367 2023-01-24 01:09:55.788679: step: 738/464, loss: 3.9075145721435547 2023-01-24 01:09:56.369865: step: 740/464, loss: 1.4352221488952637 2023-01-24 01:09:56.963365: step: 742/464, loss: 1.3655582666397095 2023-01-24 01:09:57.556561: step: 744/464, loss: 0.7305575609207153 2023-01-24 01:09:58.208348: step: 746/464, loss: 0.31875115633010864 2023-01-24 01:09:58.812555: step: 748/464, loss: 0.8504392504692078 2023-01-24 01:09:59.481384: step: 750/464, loss: 1.7576044797897339 2023-01-24 01:10:00.161391: step: 752/464, loss: 1.8157269954681396 2023-01-24 01:10:00.786168: step: 754/464, loss: 0.5760119557380676 2023-01-24 01:10:01.434887: step: 756/464, loss: 0.46522757411003113 2023-01-24 01:10:02.036016: step: 758/464, loss: 0.4888097643852234 2023-01-24 01:10:02.600803: step: 760/464, loss: 0.8452578186988831 2023-01-24 01:10:03.305382: step: 762/464, loss: 0.5606443285942078 2023-01-24 01:10:03.974778: step: 764/464, loss: 1.1209548711776733 2023-01-24 01:10:04.619137: step: 766/464, loss: 0.8494024276733398 2023-01-24 01:10:05.199028: step: 768/464, loss: 2.736093521118164 2023-01-24 01:10:05.768092: step: 770/464, loss: 1.3247060775756836 2023-01-24 01:10:06.417636: step: 772/464, loss: 2.002469778060913 2023-01-24 01:10:06.994918: step: 774/464, loss: 3.2550501823425293 2023-01-24 01:10:07.580113: step: 776/464, loss: 0.8077048063278198 2023-01-24 01:10:08.166782: step: 778/464, loss: 0.4682812988758087 2023-01-24 01:10:08.712421: step: 780/464, loss: 1.0419408082962036 2023-01-24 01:10:09.406315: step: 782/464, loss: 1.3942831754684448 2023-01-24 01:10:09.933060: step: 784/464, loss: 4.118315696716309 2023-01-24 01:10:10.572740: step: 786/464, loss: 0.7110327482223511 2023-01-24 01:10:11.158157: step: 788/464, loss: 0.537163257598877 2023-01-24 01:10:11.825278: step: 790/464, loss: 3.044668436050415 2023-01-24 01:10:12.412399: step: 792/464, loss: 2.6408910751342773 2023-01-24 01:10:13.050862: step: 794/464, loss: 0.8422490358352661 2023-01-24 01:10:13.666011: step: 796/464, loss: 1.2086379528045654 2023-01-24 01:10:14.405063: step: 798/464, loss: 2.6411752700805664 2023-01-24 01:10:14.977621: step: 800/464, loss: 1.6432220935821533 2023-01-24 01:10:15.563148: step: 802/464, loss: 0.5683345794677734 2023-01-24 01:10:16.197161: step: 804/464, loss: 1.1452221870422363 2023-01-24 01:10:16.771587: step: 806/464, loss: 2.8372998237609863 2023-01-24 01:10:17.370527: step: 808/464, loss: 0.8040033578872681 2023-01-24 01:10:17.933769: step: 810/464, loss: 2.037003755569458 2023-01-24 01:10:18.571894: step: 812/464, loss: 1.8354355096817017 2023-01-24 01:10:19.211452: step: 814/464, loss: 0.59405517578125 2023-01-24 01:10:19.933848: step: 816/464, loss: 2.1018495559692383 2023-01-24 01:10:20.586919: step: 818/464, loss: 0.8677703142166138 2023-01-24 01:10:21.219279: step: 820/464, loss: 0.5010039806365967 2023-01-24 01:10:21.829488: step: 822/464, loss: 0.891311764717102 2023-01-24 01:10:22.430117: step: 824/464, loss: 0.5709667205810547 2023-01-24 01:10:23.060400: step: 826/464, loss: 0.16370205581188202 2023-01-24 01:10:23.640482: step: 828/464, loss: 0.8301252126693726 2023-01-24 01:10:24.267479: step: 830/464, loss: 0.792355477809906 2023-01-24 01:10:24.921998: step: 832/464, loss: 0.8665425777435303 2023-01-24 01:10:25.509385: step: 834/464, loss: 0.21131619811058044 2023-01-24 01:10:26.186815: step: 836/464, loss: 8.229231834411621 2023-01-24 01:10:26.812166: step: 838/464, loss: 0.24403637647628784 2023-01-24 01:10:27.459361: step: 840/464, loss: 1.594043493270874 2023-01-24 01:10:28.044900: step: 842/464, loss: 0.7965450882911682 2023-01-24 01:10:28.620854: step: 844/464, loss: 1.0780465602874756 2023-01-24 01:10:29.257160: step: 846/464, loss: 1.3620350360870361 2023-01-24 01:10:29.791595: step: 848/464, loss: 0.4115268886089325 2023-01-24 01:10:30.437135: step: 850/464, loss: 2.11138916015625 2023-01-24 01:10:31.103353: step: 852/464, loss: 0.945903480052948 2023-01-24 01:10:31.678303: step: 854/464, loss: 1.8096015453338623 2023-01-24 01:10:32.335027: step: 856/464, loss: 2.286001682281494 2023-01-24 01:10:32.945889: step: 858/464, loss: 0.39916497468948364 2023-01-24 01:10:33.523235: step: 860/464, loss: 1.914172649383545 2023-01-24 01:10:34.153274: step: 862/464, loss: 1.116877794265747 2023-01-24 01:10:34.790713: step: 864/464, loss: 0.7338138222694397 2023-01-24 01:10:35.393106: step: 866/464, loss: 1.5705674886703491 2023-01-24 01:10:36.061966: step: 868/464, loss: 3.6879262924194336 2023-01-24 01:10:36.681505: step: 870/464, loss: 0.6445427536964417 2023-01-24 01:10:37.279431: step: 872/464, loss: 1.250487208366394 2023-01-24 01:10:37.858267: step: 874/464, loss: 2.3814239501953125 2023-01-24 01:10:38.500327: step: 876/464, loss: 0.8483049869537354 2023-01-24 01:10:39.200172: step: 878/464, loss: 0.6505409479141235 2023-01-24 01:10:39.793616: step: 880/464, loss: 1.3916373252868652 2023-01-24 01:10:40.433744: step: 882/464, loss: 0.3472626805305481 2023-01-24 01:10:41.036446: step: 884/464, loss: 0.6810139417648315 2023-01-24 01:10:41.605571: step: 886/464, loss: 0.9531253576278687 2023-01-24 01:10:42.222649: step: 888/464, loss: 2.4580156803131104 2023-01-24 01:10:42.830956: step: 890/464, loss: 0.4591882824897766 2023-01-24 01:10:43.455850: step: 892/464, loss: 0.8312535285949707 2023-01-24 01:10:44.065459: step: 894/464, loss: 0.5686916708946228 2023-01-24 01:10:44.709627: step: 896/464, loss: 0.17750242352485657 2023-01-24 01:10:45.240931: step: 898/464, loss: 1.004361867904663 2023-01-24 01:10:45.833517: step: 900/464, loss: 2.430652618408203 2023-01-24 01:10:46.362535: step: 902/464, loss: 1.3346503973007202 2023-01-24 01:10:47.008493: step: 904/464, loss: 1.2361981868743896 2023-01-24 01:10:47.623102: step: 906/464, loss: 0.4384477436542511 2023-01-24 01:10:48.251513: step: 908/464, loss: 0.49424174427986145 2023-01-24 01:10:48.883164: step: 910/464, loss: 0.32670140266418457 2023-01-24 01:10:49.445537: step: 912/464, loss: 1.3756822347640991 2023-01-24 01:10:50.096998: step: 914/464, loss: 5.016775131225586 2023-01-24 01:10:50.740767: step: 916/464, loss: 0.8426229953765869 2023-01-24 01:10:51.361794: step: 918/464, loss: 5.336833953857422 2023-01-24 01:10:51.961105: step: 920/464, loss: 1.2759982347488403 2023-01-24 01:10:52.536462: step: 922/464, loss: 1.1790809631347656 2023-01-24 01:10:53.115113: step: 924/464, loss: 2.6815872192382812 2023-01-24 01:10:53.741455: step: 926/464, loss: 0.760179877281189 2023-01-24 01:10:54.331593: step: 928/464, loss: 0.6275213360786438 2023-01-24 01:10:54.829754: step: 930/464, loss: 0.2987763285636902 ================================================== Loss: 1.330 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3328811011526529, 'r': 0.2332689534592454, 'f1': 0.27431182054895675}, 'combined': 0.20212449935186286, 'epoch': 3} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.316740214583385, 'r': 0.22142756285554072, 'f1': 0.26064367549950257}, 'combined': 0.170161156025582, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33206966644466646, 'r': 0.24527873089662863, 'f1': 0.28215069697912837}, 'combined': 0.20790051356356826, 'epoch': 3} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32083684101787785, 'r': 0.22399709726110556, 'f1': 0.2638107358342572}, 'combined': 0.17222877054464458, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35398858135306016, 'r': 0.23264022297256037, 'f1': 0.2807635148103129}, 'combined': 0.20687837933391476, 'epoch': 3} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3294295745019255, 'r': 0.21348002673956126, 'f1': 0.2590730914047401}, 'combined': 0.1691358006062034, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3115942028985507, 'r': 0.20476190476190478, 'f1': 0.2471264367816092}, 'combined': 0.16475095785440613, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3645833333333333, 'r': 0.3804347826086957, 'f1': 0.3723404255319149}, 'combined': 0.18617021276595744, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5714285714285714, 'r': 0.13793103448275862, 'f1': 0.2222222222222222}, 'combined': 0.14814814814814814, 'epoch': 3} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31939895310085986, 'r': 0.24513416816436503, 'f1': 0.27738176461807296}, 'combined': 0.2043865634027906, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3219012744193015, 'r': 0.20731623361683454, 'f1': 0.2522039002704795}, 'combined': 0.16465125095378455, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2532175032175032, 'r': 0.267687074829932, 'f1': 0.26025132275132273}, 'combined': 0.17350088183421514, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33206966644466646, 'r': 0.24527873089662863, 'f1': 0.28215069697912837}, 'combined': 0.20790051356356826, 'epoch': 3} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32083684101787785, 'r': 0.22399709726110556, 'f1': 0.2638107358342572}, 'combined': 0.17222877054464458, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3645833333333333, 'r': 0.3804347826086957, 'f1': 0.3723404255319149}, 'combined': 0.18617021276595744, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32021533568947363, 'r': 0.24636605222233604, 'f1': 0.27847786672140123}, 'combined': 0.20519421758419037, 'epoch': 2} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33284071746005606, 'r': 0.19799599049640365, 'f1': 0.24829152371445556}, 'combined': 0.16209705693275336, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.575, 'r': 0.19827586206896552, 'f1': 0.2948717948717949}, 'combined': 0.19658119658119658, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:13:31.709368: step: 2/464, loss: 0.5093468427658081 2023-01-24 01:13:32.347540: step: 4/464, loss: 0.3172636330127716 2023-01-24 01:13:32.953467: step: 6/464, loss: 0.5387977361679077 2023-01-24 01:13:33.641546: step: 8/464, loss: 0.3183968663215637 2023-01-24 01:13:34.232361: step: 10/464, loss: 0.4791586399078369 2023-01-24 01:13:34.884825: step: 12/464, loss: 0.463077187538147 2023-01-24 01:13:35.548190: step: 14/464, loss: 0.6189804673194885 2023-01-24 01:13:36.163502: step: 16/464, loss: 1.5961620807647705 2023-01-24 01:13:36.770831: step: 18/464, loss: 1.0718073844909668 2023-01-24 01:13:37.383149: step: 20/464, loss: 2.087601661682129 2023-01-24 01:13:37.976698: step: 22/464, loss: 1.253535270690918 2023-01-24 01:13:38.596060: step: 24/464, loss: 0.6115002632141113 2023-01-24 01:13:39.193787: step: 26/464, loss: 1.6747785806655884 2023-01-24 01:13:39.713854: step: 28/464, loss: 0.09929452836513519 2023-01-24 01:13:40.334522: step: 30/464, loss: 0.4556482434272766 2023-01-24 01:13:40.960925: step: 32/464, loss: 0.3613569438457489 2023-01-24 01:13:41.599803: step: 34/464, loss: 0.9141637086868286 2023-01-24 01:13:42.268414: step: 36/464, loss: 0.960303544998169 2023-01-24 01:13:42.894793: step: 38/464, loss: 0.6171204447746277 2023-01-24 01:13:43.462647: step: 40/464, loss: 0.3142845630645752 2023-01-24 01:13:44.093990: step: 42/464, loss: 0.7349892258644104 2023-01-24 01:13:44.683039: step: 44/464, loss: 0.2745680510997772 2023-01-24 01:13:45.329589: step: 46/464, loss: 0.3958454728126526 2023-01-24 01:13:45.921231: step: 48/464, loss: 0.395162969827652 2023-01-24 01:13:46.560337: step: 50/464, loss: 0.5048075318336487 2023-01-24 01:13:47.165684: step: 52/464, loss: 0.4473804831504822 2023-01-24 01:13:47.775945: step: 54/464, loss: 0.28719592094421387 2023-01-24 01:13:48.372472: step: 56/464, loss: 1.0184855461120605 2023-01-24 01:13:49.012342: step: 58/464, loss: 0.29800641536712646 2023-01-24 01:13:49.613464: step: 60/464, loss: 2.3515331745147705 2023-01-24 01:13:50.175385: step: 62/464, loss: 9.85141658782959 2023-01-24 01:13:50.832918: step: 64/464, loss: 0.6098653674125671 2023-01-24 01:13:51.486482: step: 66/464, loss: 0.5678577423095703 2023-01-24 01:13:52.065789: step: 68/464, loss: 0.693811297416687 2023-01-24 01:13:52.701446: step: 70/464, loss: 0.8950579762458801 2023-01-24 01:13:53.231225: step: 72/464, loss: 1.755286693572998 2023-01-24 01:13:53.802836: step: 74/464, loss: 1.2793998718261719 2023-01-24 01:13:54.434468: step: 76/464, loss: 0.23721827566623688 2023-01-24 01:13:54.997996: step: 78/464, loss: 1.0371506214141846 2023-01-24 01:13:55.589624: step: 80/464, loss: 0.9888033866882324 2023-01-24 01:13:56.223825: step: 82/464, loss: 0.5146254301071167 2023-01-24 01:13:56.859968: step: 84/464, loss: 0.8823146820068359 2023-01-24 01:13:57.373993: step: 86/464, loss: 0.6790046095848083 2023-01-24 01:13:58.098427: step: 88/464, loss: 0.34398648142814636 2023-01-24 01:13:58.759107: step: 90/464, loss: 0.4917071759700775 2023-01-24 01:13:59.339443: step: 92/464, loss: 1.7098023891448975 2023-01-24 01:13:59.967487: step: 94/464, loss: 0.45337536931037903 2023-01-24 01:14:00.645803: step: 96/464, loss: 1.211728572845459 2023-01-24 01:14:01.285965: step: 98/464, loss: 1.021390676498413 2023-01-24 01:14:01.910996: step: 100/464, loss: 0.7689708471298218 2023-01-24 01:14:02.601316: step: 102/464, loss: 1.190798282623291 2023-01-24 01:14:03.181593: step: 104/464, loss: 0.3627750873565674 2023-01-24 01:14:03.783640: step: 106/464, loss: 0.26812222599983215 2023-01-24 01:14:04.467355: step: 108/464, loss: 0.6177099347114563 2023-01-24 01:14:05.120461: step: 110/464, loss: 0.4237760007381439 2023-01-24 01:14:05.706371: step: 112/464, loss: 0.2965877056121826 2023-01-24 01:14:06.319074: step: 114/464, loss: 0.3406103849411011 2023-01-24 01:14:06.907894: step: 116/464, loss: 1.0979301929473877 2023-01-24 01:14:07.518966: step: 118/464, loss: 0.29808324575424194 2023-01-24 01:14:08.116732: step: 120/464, loss: 0.48130905628204346 2023-01-24 01:14:08.720322: step: 122/464, loss: 0.3830515444278717 2023-01-24 01:14:09.343006: step: 124/464, loss: 1.2250969409942627 2023-01-24 01:14:10.000734: step: 126/464, loss: 1.5770454406738281 2023-01-24 01:14:10.628490: step: 128/464, loss: 0.8493466973304749 2023-01-24 01:14:11.233109: step: 130/464, loss: 0.262433797121048 2023-01-24 01:14:11.839254: step: 132/464, loss: 0.8228477239608765 2023-01-24 01:14:12.437859: step: 134/464, loss: 0.5960792303085327 2023-01-24 01:14:12.989731: step: 136/464, loss: 3.4240593910217285 2023-01-24 01:14:13.555083: step: 138/464, loss: 1.304851770401001 2023-01-24 01:14:14.222357: step: 140/464, loss: 0.8198272585868835 2023-01-24 01:14:14.846720: step: 142/464, loss: 0.650009036064148 2023-01-24 01:14:15.496948: step: 144/464, loss: 0.6824265718460083 2023-01-24 01:14:16.080479: step: 146/464, loss: 0.849699079990387 2023-01-24 01:14:16.731927: step: 148/464, loss: 0.5839065313339233 2023-01-24 01:14:17.329400: step: 150/464, loss: 0.509751558303833 2023-01-24 01:14:17.983487: step: 152/464, loss: 0.324587881565094 2023-01-24 01:14:18.646568: step: 154/464, loss: 0.4301684498786926 2023-01-24 01:14:19.284142: step: 156/464, loss: 0.7258855700492859 2023-01-24 01:14:19.937354: step: 158/464, loss: 0.43304145336151123 2023-01-24 01:14:20.575895: step: 160/464, loss: 0.4341854155063629 2023-01-24 01:14:21.207397: step: 162/464, loss: 1.0745781660079956 2023-01-24 01:14:21.798258: step: 164/464, loss: 2.8442575931549072 2023-01-24 01:14:22.391240: step: 166/464, loss: 0.29139086604118347 2023-01-24 01:14:23.059805: step: 168/464, loss: 2.0749263763427734 2023-01-24 01:14:23.672338: step: 170/464, loss: 1.3194022178649902 2023-01-24 01:14:24.242056: step: 172/464, loss: 3.8606531620025635 2023-01-24 01:14:24.844570: step: 174/464, loss: 0.9364582896232605 2023-01-24 01:14:25.428805: step: 176/464, loss: 1.4038944244384766 2023-01-24 01:14:26.011751: step: 178/464, loss: 0.5467737913131714 2023-01-24 01:14:26.574468: step: 180/464, loss: 0.7304741740226746 2023-01-24 01:14:27.177023: step: 182/464, loss: 1.0861698389053345 2023-01-24 01:14:27.769531: step: 184/464, loss: 0.2547362148761749 2023-01-24 01:14:28.394919: step: 186/464, loss: 0.12271208316087723 2023-01-24 01:14:29.049106: step: 188/464, loss: 0.7968823313713074 2023-01-24 01:14:29.706788: step: 190/464, loss: 0.2650519609451294 2023-01-24 01:14:30.268109: step: 192/464, loss: 0.4801476299762726 2023-01-24 01:14:30.874063: step: 194/464, loss: 2.0805039405822754 2023-01-24 01:14:31.545134: step: 196/464, loss: 0.9843195676803589 2023-01-24 01:14:32.262985: step: 198/464, loss: 1.461974024772644 2023-01-24 01:14:32.870521: step: 200/464, loss: 0.3458203971385956 2023-01-24 01:14:33.510826: step: 202/464, loss: 3.31097149848938 2023-01-24 01:14:34.081541: step: 204/464, loss: 0.3172132670879364 2023-01-24 01:14:34.705892: step: 206/464, loss: 0.8039501905441284 2023-01-24 01:14:35.317027: step: 208/464, loss: 1.0592734813690186 2023-01-24 01:14:35.909413: step: 210/464, loss: 0.6010752320289612 2023-01-24 01:14:36.512891: step: 212/464, loss: 3.5686473846435547 2023-01-24 01:14:37.128461: step: 214/464, loss: 0.6694086790084839 2023-01-24 01:14:37.712888: step: 216/464, loss: 0.46622198820114136 2023-01-24 01:14:38.356184: step: 218/464, loss: 0.5628550052642822 2023-01-24 01:14:38.998856: step: 220/464, loss: 0.3373853266239166 2023-01-24 01:14:39.572674: step: 222/464, loss: 0.2996734082698822 2023-01-24 01:14:40.204585: step: 224/464, loss: 0.7901242971420288 2023-01-24 01:14:40.796935: step: 226/464, loss: 1.8366661071777344 2023-01-24 01:14:41.397843: step: 228/464, loss: 0.2719947099685669 2023-01-24 01:14:41.967936: step: 230/464, loss: 0.48229557275772095 2023-01-24 01:14:42.693660: step: 232/464, loss: 1.682943344116211 2023-01-24 01:14:43.322011: step: 234/464, loss: 0.5389991402626038 2023-01-24 01:14:43.991914: step: 236/464, loss: 1.144383192062378 2023-01-24 01:14:44.627885: step: 238/464, loss: 1.1018505096435547 2023-01-24 01:14:45.221811: step: 240/464, loss: 0.7944072484970093 2023-01-24 01:14:45.843381: step: 242/464, loss: 0.5787885189056396 2023-01-24 01:14:46.444262: step: 244/464, loss: 0.7037353515625 2023-01-24 01:14:47.040398: step: 246/464, loss: 0.9548017978668213 2023-01-24 01:14:47.661015: step: 248/464, loss: 0.4501967132091522 2023-01-24 01:14:48.302647: step: 250/464, loss: 0.16243723034858704 2023-01-24 01:14:49.010546: step: 252/464, loss: 1.9847934246063232 2023-01-24 01:14:49.615289: step: 254/464, loss: 0.5167011022567749 2023-01-24 01:14:50.264192: step: 256/464, loss: 0.6174933910369873 2023-01-24 01:14:50.974933: step: 258/464, loss: 2.048726797103882 2023-01-24 01:14:51.627246: step: 260/464, loss: 2.179675579071045 2023-01-24 01:14:52.270125: step: 262/464, loss: 4.515833377838135 2023-01-24 01:14:52.904998: step: 264/464, loss: 0.4232323467731476 2023-01-24 01:14:53.497778: step: 266/464, loss: 2.2544357776641846 2023-01-24 01:14:54.105234: step: 268/464, loss: 2.305783748626709 2023-01-24 01:14:54.709573: step: 270/464, loss: 0.31464582681655884 2023-01-24 01:14:55.320026: step: 272/464, loss: 3.5395264625549316 2023-01-24 01:14:55.926933: step: 274/464, loss: 0.4365318715572357 2023-01-24 01:14:56.578026: step: 276/464, loss: 1.1285804510116577 2023-01-24 01:14:57.284909: step: 278/464, loss: 1.1635280847549438 2023-01-24 01:14:57.907614: step: 280/464, loss: 0.29192355275154114 2023-01-24 01:14:58.555921: step: 282/464, loss: 0.5533744096755981 2023-01-24 01:14:59.195546: step: 284/464, loss: 0.31066614389419556 2023-01-24 01:14:59.839842: step: 286/464, loss: 0.7738747596740723 2023-01-24 01:15:00.529761: step: 288/464, loss: 1.1949348449707031 2023-01-24 01:15:01.160484: step: 290/464, loss: 0.6176042556762695 2023-01-24 01:15:01.753846: step: 292/464, loss: 2.0230178833007812 2023-01-24 01:15:02.395011: step: 294/464, loss: 1.5586206912994385 2023-01-24 01:15:03.059073: step: 296/464, loss: 0.7693490982055664 2023-01-24 01:15:03.630035: step: 298/464, loss: 0.6012693643569946 2023-01-24 01:15:04.245236: step: 300/464, loss: 1.4037246704101562 2023-01-24 01:15:04.827009: step: 302/464, loss: 0.4873807430267334 2023-01-24 01:15:05.429395: step: 304/464, loss: 0.6122986078262329 2023-01-24 01:15:06.110624: step: 306/464, loss: 1.4132821559906006 2023-01-24 01:15:06.745774: step: 308/464, loss: 0.44583189487457275 2023-01-24 01:15:07.439663: step: 310/464, loss: 0.8842347860336304 2023-01-24 01:15:08.018409: step: 312/464, loss: 0.3857048749923706 2023-01-24 01:15:08.632100: step: 314/464, loss: 0.2515755295753479 2023-01-24 01:15:09.202207: step: 316/464, loss: 0.999019205570221 2023-01-24 01:15:09.820018: step: 318/464, loss: 0.33942168951034546 2023-01-24 01:15:10.416954: step: 320/464, loss: 0.5291414260864258 2023-01-24 01:15:11.107802: step: 322/464, loss: 0.3920437693595886 2023-01-24 01:15:11.733395: step: 324/464, loss: 1.3074945211410522 2023-01-24 01:15:12.415468: step: 326/464, loss: 0.23588593304157257 2023-01-24 01:15:13.078314: step: 328/464, loss: 0.4870832562446594 2023-01-24 01:15:13.681730: step: 330/464, loss: 1.0733529329299927 2023-01-24 01:15:14.296849: step: 332/464, loss: 0.29912006855010986 2023-01-24 01:15:14.984718: step: 334/464, loss: 1.0160815715789795 2023-01-24 01:15:15.623376: step: 336/464, loss: 0.8775022625923157 2023-01-24 01:15:16.218856: step: 338/464, loss: 1.2015305757522583 2023-01-24 01:15:16.857095: step: 340/464, loss: 0.6995279788970947 2023-01-24 01:15:17.462481: step: 342/464, loss: 0.4498124420642853 2023-01-24 01:15:18.044288: step: 344/464, loss: 0.1893487572669983 2023-01-24 01:15:18.674052: step: 346/464, loss: 0.6980522871017456 2023-01-24 01:15:19.240110: step: 348/464, loss: 1.2606297731399536 2023-01-24 01:15:19.983812: step: 350/464, loss: 3.804093599319458 2023-01-24 01:15:20.545623: step: 352/464, loss: 0.7864630222320557 2023-01-24 01:15:21.165938: step: 354/464, loss: 0.36259323358535767 2023-01-24 01:15:21.804030: step: 356/464, loss: 0.5102464556694031 2023-01-24 01:15:22.429044: step: 358/464, loss: 0.34992679953575134 2023-01-24 01:15:23.009054: step: 360/464, loss: 0.4517691731452942 2023-01-24 01:15:23.723484: step: 362/464, loss: 0.6049309968948364 2023-01-24 01:15:24.396923: step: 364/464, loss: 0.6882205605506897 2023-01-24 01:15:25.052151: step: 366/464, loss: 1.3858107328414917 2023-01-24 01:15:25.618161: step: 368/464, loss: 0.28278064727783203 2023-01-24 01:15:26.281244: step: 370/464, loss: 0.41853728890419006 2023-01-24 01:15:26.933540: step: 372/464, loss: 1.2912614345550537 2023-01-24 01:15:27.586966: step: 374/464, loss: 0.9934861063957214 2023-01-24 01:15:28.226860: step: 376/464, loss: 1.2262226343154907 2023-01-24 01:15:28.918943: step: 378/464, loss: 0.5085292458534241 2023-01-24 01:15:29.597138: step: 380/464, loss: 0.6032164096832275 2023-01-24 01:15:30.231779: step: 382/464, loss: 1.4110809564590454 2023-01-24 01:15:30.841426: step: 384/464, loss: 0.8371944427490234 2023-01-24 01:15:31.534144: step: 386/464, loss: 0.6973156332969666 2023-01-24 01:15:32.169081: step: 388/464, loss: 0.9601186513900757 2023-01-24 01:15:32.765342: step: 390/464, loss: 1.5634307861328125 2023-01-24 01:15:33.387893: step: 392/464, loss: 0.2142696976661682 2023-01-24 01:15:33.997219: step: 394/464, loss: 1.0404516458511353 2023-01-24 01:15:34.626605: step: 396/464, loss: 1.0398080348968506 2023-01-24 01:15:35.213667: step: 398/464, loss: 0.3107651472091675 2023-01-24 01:15:35.835360: step: 400/464, loss: 0.8754726648330688 2023-01-24 01:15:36.457799: step: 402/464, loss: 3.0080599784851074 2023-01-24 01:15:37.139541: step: 404/464, loss: 2.916818380355835 2023-01-24 01:15:37.732778: step: 406/464, loss: 0.6109200119972229 2023-01-24 01:15:38.295026: step: 408/464, loss: 1.4414656162261963 2023-01-24 01:15:38.960135: step: 410/464, loss: 1.5391944646835327 2023-01-24 01:15:39.591712: step: 412/464, loss: 1.1875689029693604 2023-01-24 01:15:40.222869: step: 414/464, loss: 0.6385482549667358 2023-01-24 01:15:40.830200: step: 416/464, loss: 0.31985437870025635 2023-01-24 01:15:41.425819: step: 418/464, loss: 1.223050594329834 2023-01-24 01:15:42.143424: step: 420/464, loss: 0.47759753465652466 2023-01-24 01:15:42.749330: step: 422/464, loss: 0.7822179794311523 2023-01-24 01:15:43.376513: step: 424/464, loss: 0.6694520115852356 2023-01-24 01:15:44.007107: step: 426/464, loss: 0.48283255100250244 2023-01-24 01:15:44.648185: step: 428/464, loss: 0.5288424491882324 2023-01-24 01:15:45.303659: step: 430/464, loss: 0.6541222333908081 2023-01-24 01:15:45.891197: step: 432/464, loss: 1.5048766136169434 2023-01-24 01:15:46.471893: step: 434/464, loss: 0.977016031742096 2023-01-24 01:15:47.022599: step: 436/464, loss: 1.579056978225708 2023-01-24 01:15:47.643197: step: 438/464, loss: 1.9080028533935547 2023-01-24 01:15:48.249859: step: 440/464, loss: 1.2760496139526367 2023-01-24 01:15:48.846404: step: 442/464, loss: 3.4610836505889893 2023-01-24 01:15:49.429623: step: 444/464, loss: 0.7282527089118958 2023-01-24 01:15:50.133199: step: 446/464, loss: 3.853248357772827 2023-01-24 01:15:50.727817: step: 448/464, loss: 0.40053898096084595 2023-01-24 01:15:51.367672: step: 450/464, loss: 0.9175394177436829 2023-01-24 01:15:52.076276: step: 452/464, loss: 0.48572227358818054 2023-01-24 01:15:52.665871: step: 454/464, loss: 0.22067205607891083 2023-01-24 01:15:53.333268: step: 456/464, loss: 0.6411260366439819 2023-01-24 01:15:53.960543: step: 458/464, loss: 1.13019859790802 2023-01-24 01:15:54.580190: step: 460/464, loss: 0.9006060361862183 2023-01-24 01:15:55.175539: step: 462/464, loss: 0.8046503067016602 2023-01-24 01:15:55.812268: step: 464/464, loss: 1.4703752994537354 2023-01-24 01:15:56.481137: step: 466/464, loss: 0.3552650809288025 2023-01-24 01:15:57.085891: step: 468/464, loss: 1.3312273025512695 2023-01-24 01:15:57.767506: step: 470/464, loss: 0.3936591148376465 2023-01-24 01:15:58.421065: step: 472/464, loss: 0.6073078513145447 2023-01-24 01:15:59.105683: step: 474/464, loss: 0.47256404161453247 2023-01-24 01:15:59.720389: step: 476/464, loss: 0.3240320384502411 2023-01-24 01:16:00.364648: step: 478/464, loss: 3.5507354736328125 2023-01-24 01:16:00.981109: step: 480/464, loss: 0.995995283126831 2023-01-24 01:16:01.606660: step: 482/464, loss: 0.6410576701164246 2023-01-24 01:16:02.209414: step: 484/464, loss: 0.8179224729537964 2023-01-24 01:16:02.779331: step: 486/464, loss: 0.30543074011802673 2023-01-24 01:16:03.385054: step: 488/464, loss: 0.6726969480514526 2023-01-24 01:16:04.076159: step: 490/464, loss: 0.44736555218696594 2023-01-24 01:16:04.722108: step: 492/464, loss: 0.7336754202842712 2023-01-24 01:16:05.414774: step: 494/464, loss: 0.8342028260231018 2023-01-24 01:16:05.997899: step: 496/464, loss: 2.2336502075195312 2023-01-24 01:16:06.618075: step: 498/464, loss: 0.5034320950508118 2023-01-24 01:16:07.326320: step: 500/464, loss: 1.1821582317352295 2023-01-24 01:16:07.988296: step: 502/464, loss: 0.4648742377758026 2023-01-24 01:16:08.589106: step: 504/464, loss: 3.733365774154663 2023-01-24 01:16:09.210541: step: 506/464, loss: 1.5545661449432373 2023-01-24 01:16:09.853799: step: 508/464, loss: 0.7909056544303894 2023-01-24 01:16:10.483928: step: 510/464, loss: 0.8181418180465698 2023-01-24 01:16:11.088440: step: 512/464, loss: 1.5901449918746948 2023-01-24 01:16:11.694309: step: 514/464, loss: 0.7300226092338562 2023-01-24 01:16:12.335988: step: 516/464, loss: 2.1893913745880127 2023-01-24 01:16:13.022023: step: 518/464, loss: 0.2126644104719162 2023-01-24 01:16:13.663174: step: 520/464, loss: 0.6104111671447754 2023-01-24 01:16:14.312570: step: 522/464, loss: 0.49161142110824585 2023-01-24 01:16:14.915164: step: 524/464, loss: 1.7882384061813354 2023-01-24 01:16:15.598116: step: 526/464, loss: 3.518047332763672 2023-01-24 01:16:16.245189: step: 528/464, loss: 0.8232895731925964 2023-01-24 01:16:16.922761: step: 530/464, loss: 0.8883892893791199 2023-01-24 01:16:17.588865: step: 532/464, loss: 0.3270670175552368 2023-01-24 01:16:18.219764: step: 534/464, loss: 2.1954493522644043 2023-01-24 01:16:18.826467: step: 536/464, loss: 0.18684567511081696 2023-01-24 01:16:19.540057: step: 538/464, loss: 2.936201333999634 2023-01-24 01:16:20.162409: step: 540/464, loss: 1.008811593055725 2023-01-24 01:16:20.755149: step: 542/464, loss: 0.8713910579681396 2023-01-24 01:16:21.450384: step: 544/464, loss: 1.9630047082901 2023-01-24 01:16:22.106079: step: 546/464, loss: 1.2913490533828735 2023-01-24 01:16:22.703653: step: 548/464, loss: 0.835235595703125 2023-01-24 01:16:23.342792: step: 550/464, loss: 1.656591534614563 2023-01-24 01:16:24.151032: step: 552/464, loss: 0.384236603975296 2023-01-24 01:16:24.753884: step: 554/464, loss: 1.2251346111297607 2023-01-24 01:16:25.380712: step: 556/464, loss: 0.3945186138153076 2023-01-24 01:16:26.051532: step: 558/464, loss: 0.5363366603851318 2023-01-24 01:16:26.641245: step: 560/464, loss: 0.7180920243263245 2023-01-24 01:16:27.239225: step: 562/464, loss: 8.346687316894531 2023-01-24 01:16:27.881815: step: 564/464, loss: 1.5861282348632812 2023-01-24 01:16:28.468722: step: 566/464, loss: 0.1805248260498047 2023-01-24 01:16:29.090308: step: 568/464, loss: 0.3826966881752014 2023-01-24 01:16:29.731718: step: 570/464, loss: 1.587929368019104 2023-01-24 01:16:30.313435: step: 572/464, loss: 0.8956756591796875 2023-01-24 01:16:31.008601: step: 574/464, loss: 0.4481387734413147 2023-01-24 01:16:31.713578: step: 576/464, loss: 1.6725045442581177 2023-01-24 01:16:32.376687: step: 578/464, loss: 0.6068788766860962 2023-01-24 01:16:33.071252: step: 580/464, loss: 1.715501308441162 2023-01-24 01:16:33.718489: step: 582/464, loss: 1.3745126724243164 2023-01-24 01:16:34.303643: step: 584/464, loss: 0.45005327463150024 2023-01-24 01:16:34.836215: step: 586/464, loss: 0.4800697863101959 2023-01-24 01:16:35.514617: step: 588/464, loss: 0.12299307435750961 2023-01-24 01:16:36.053749: step: 590/464, loss: 1.523522138595581 2023-01-24 01:16:36.639415: step: 592/464, loss: 0.9146558046340942 2023-01-24 01:16:37.271473: step: 594/464, loss: 1.3624142408370972 2023-01-24 01:16:37.867755: step: 596/464, loss: 0.13913631439208984 2023-01-24 01:16:38.497338: step: 598/464, loss: 0.4703514575958252 2023-01-24 01:16:39.123829: step: 600/464, loss: 1.6409592628479004 2023-01-24 01:16:39.772562: step: 602/464, loss: 0.6029088497161865 2023-01-24 01:16:40.341024: step: 604/464, loss: 0.4723614752292633 2023-01-24 01:16:41.017520: step: 606/464, loss: 1.2699775695800781 2023-01-24 01:16:41.702393: step: 608/464, loss: 1.0261311531066895 2023-01-24 01:16:42.321578: step: 610/464, loss: 1.0511482954025269 2023-01-24 01:16:42.980197: step: 612/464, loss: 0.19793467223644257 2023-01-24 01:16:43.592647: step: 614/464, loss: 2.3930282592773438 2023-01-24 01:16:44.164738: step: 616/464, loss: 0.9835858345031738 2023-01-24 01:16:44.758281: step: 618/464, loss: 0.8501909375190735 2023-01-24 01:16:45.383300: step: 620/464, loss: 0.7180901765823364 2023-01-24 01:16:46.083230: step: 622/464, loss: 0.4181664288043976 2023-01-24 01:16:46.710801: step: 624/464, loss: 0.4150451719760895 2023-01-24 01:16:47.268735: step: 626/464, loss: 0.8431905508041382 2023-01-24 01:16:47.923141: step: 628/464, loss: 0.3675621449947357 2023-01-24 01:16:48.485135: step: 630/464, loss: 0.41591790318489075 2023-01-24 01:16:49.054912: step: 632/464, loss: 0.8589701652526855 2023-01-24 01:16:49.623034: step: 634/464, loss: 1.0192818641662598 2023-01-24 01:16:50.231409: step: 636/464, loss: 2.393925428390503 2023-01-24 01:16:50.817933: step: 638/464, loss: 0.49664920568466187 2023-01-24 01:16:51.448101: step: 640/464, loss: 1.0956499576568604 2023-01-24 01:16:52.104686: step: 642/464, loss: 0.6687727570533752 2023-01-24 01:16:52.715654: step: 644/464, loss: 1.0667164325714111 2023-01-24 01:16:53.449400: step: 646/464, loss: 3.0677218437194824 2023-01-24 01:16:54.031334: step: 648/464, loss: 1.1712805032730103 2023-01-24 01:16:54.643979: step: 650/464, loss: 0.5602961182594299 2023-01-24 01:16:55.282655: step: 652/464, loss: 1.0304670333862305 2023-01-24 01:16:55.877294: step: 654/464, loss: 0.3069724440574646 2023-01-24 01:16:56.531403: step: 656/464, loss: 1.068381905555725 2023-01-24 01:16:57.097424: step: 658/464, loss: 1.361100673675537 2023-01-24 01:16:57.680326: step: 660/464, loss: 0.9713236689567566 2023-01-24 01:16:58.291853: step: 662/464, loss: 0.4035855233669281 2023-01-24 01:16:58.909547: step: 664/464, loss: 1.217246651649475 2023-01-24 01:16:59.586806: step: 666/464, loss: 1.0265988111495972 2023-01-24 01:17:00.211835: step: 668/464, loss: 1.0197653770446777 2023-01-24 01:17:00.834310: step: 670/464, loss: 0.36315852403640747 2023-01-24 01:17:01.431498: step: 672/464, loss: 0.7223759889602661 2023-01-24 01:17:02.070863: step: 674/464, loss: 1.1136391162872314 2023-01-24 01:17:02.696480: step: 676/464, loss: 0.37483763694763184 2023-01-24 01:17:03.388609: step: 678/464, loss: 0.7622231841087341 2023-01-24 01:17:04.026014: step: 680/464, loss: 1.3584575653076172 2023-01-24 01:17:04.605880: step: 682/464, loss: 3.232109546661377 2023-01-24 01:17:05.254988: step: 684/464, loss: 1.17849862575531 2023-01-24 01:17:05.892444: step: 686/464, loss: 0.5659350752830505 2023-01-24 01:17:06.528132: step: 688/464, loss: 0.8451026082038879 2023-01-24 01:17:07.119546: step: 690/464, loss: 8.128095626831055 2023-01-24 01:17:07.706531: step: 692/464, loss: 0.3531476557254791 2023-01-24 01:17:08.330060: step: 694/464, loss: 0.9488494396209717 2023-01-24 01:17:08.830685: step: 696/464, loss: 0.7125240564346313 2023-01-24 01:17:09.468611: step: 698/464, loss: 0.43225884437561035 2023-01-24 01:17:10.101501: step: 700/464, loss: 1.4727071523666382 2023-01-24 01:17:10.710066: step: 702/464, loss: 0.9909815788269043 2023-01-24 01:17:11.283544: step: 704/464, loss: 0.717685878276825 2023-01-24 01:17:11.985992: step: 706/464, loss: 0.8827025890350342 2023-01-24 01:17:12.657756: step: 708/464, loss: 0.8928143978118896 2023-01-24 01:17:13.330572: step: 710/464, loss: 0.45587989687919617 2023-01-24 01:17:13.982884: step: 712/464, loss: 0.3754402697086334 2023-01-24 01:17:14.600405: step: 714/464, loss: 6.441854000091553 2023-01-24 01:17:15.263764: step: 716/464, loss: 0.9459872245788574 2023-01-24 01:17:15.940666: step: 718/464, loss: 2.9799163341522217 2023-01-24 01:17:16.523358: step: 720/464, loss: 0.3675011098384857 2023-01-24 01:17:17.133628: step: 722/464, loss: 0.39418911933898926 2023-01-24 01:17:17.831538: step: 724/464, loss: 0.24418479204177856 2023-01-24 01:17:18.452930: step: 726/464, loss: 3.739204168319702 2023-01-24 01:17:19.120388: step: 728/464, loss: 0.9147809743881226 2023-01-24 01:17:19.758876: step: 730/464, loss: 0.7824108600616455 2023-01-24 01:17:20.300423: step: 732/464, loss: 0.20895031094551086 2023-01-24 01:17:20.902873: step: 734/464, loss: 4.588395118713379 2023-01-24 01:17:21.531783: step: 736/464, loss: 1.1733981370925903 2023-01-24 01:17:22.132004: step: 738/464, loss: 0.8675525784492493 2023-01-24 01:17:22.784723: step: 740/464, loss: 0.52099609375 2023-01-24 01:17:23.412289: step: 742/464, loss: 0.5001940727233887 2023-01-24 01:17:24.078435: step: 744/464, loss: 0.8723838329315186 2023-01-24 01:17:24.743677: step: 746/464, loss: 6.227485656738281 2023-01-24 01:17:25.355086: step: 748/464, loss: 1.8690444231033325 2023-01-24 01:17:26.007720: step: 750/464, loss: 0.33338722586631775 2023-01-24 01:17:26.622208: step: 752/464, loss: 0.8549349904060364 2023-01-24 01:17:27.187624: step: 754/464, loss: 0.18853016197681427 2023-01-24 01:17:27.794501: step: 756/464, loss: 0.5232789516448975 2023-01-24 01:17:28.423246: step: 758/464, loss: 0.5195374488830566 2023-01-24 01:17:29.080388: step: 760/464, loss: 1.1466377973556519 2023-01-24 01:17:29.683095: step: 762/464, loss: 2.1955342292785645 2023-01-24 01:17:30.317813: step: 764/464, loss: 1.1806930303573608 2023-01-24 01:17:30.918231: step: 766/464, loss: 0.9567260146141052 2023-01-24 01:17:31.529080: step: 768/464, loss: 0.49507617950439453 2023-01-24 01:17:32.145439: step: 770/464, loss: 0.4921751022338867 2023-01-24 01:17:32.699763: step: 772/464, loss: 0.3551686406135559 2023-01-24 01:17:33.296002: step: 774/464, loss: 4.9337310791015625 2023-01-24 01:17:33.883572: step: 776/464, loss: 1.8172357082366943 2023-01-24 01:17:34.495834: step: 778/464, loss: 0.5766383409500122 2023-01-24 01:17:35.339055: step: 780/464, loss: 0.9690201878547668 2023-01-24 01:17:35.968653: step: 782/464, loss: 0.39389437437057495 2023-01-24 01:17:36.621264: step: 784/464, loss: 0.7511005401611328 2023-01-24 01:17:37.371061: step: 786/464, loss: 0.2664196491241455 2023-01-24 01:17:37.998786: step: 788/464, loss: 5.700353622436523 2023-01-24 01:17:38.653670: step: 790/464, loss: 0.8287971615791321 2023-01-24 01:17:39.298387: step: 792/464, loss: 2.5748291015625 2023-01-24 01:17:39.912911: step: 794/464, loss: 0.9550393223762512 2023-01-24 01:17:40.476895: step: 796/464, loss: 0.7970628142356873 2023-01-24 01:17:41.117788: step: 798/464, loss: 3.856137752532959 2023-01-24 01:17:41.759399: step: 800/464, loss: 0.6149031519889832 2023-01-24 01:17:42.358892: step: 802/464, loss: 0.25857508182525635 2023-01-24 01:17:43.004119: step: 804/464, loss: 0.5109097361564636 2023-01-24 01:17:43.650896: step: 806/464, loss: 2.601020336151123 2023-01-24 01:17:44.221539: step: 808/464, loss: 0.8694771528244019 2023-01-24 01:17:44.842693: step: 810/464, loss: 2.0033280849456787 2023-01-24 01:17:45.496836: step: 812/464, loss: 0.6385647058486938 2023-01-24 01:17:46.132750: step: 814/464, loss: 0.7749675512313843 2023-01-24 01:17:46.766952: step: 816/464, loss: 2.016286849975586 2023-01-24 01:17:47.447589: step: 818/464, loss: 0.6855290532112122 2023-01-24 01:17:48.083980: step: 820/464, loss: 0.5522828102111816 2023-01-24 01:17:48.701390: step: 822/464, loss: 0.7204458713531494 2023-01-24 01:17:49.298984: step: 824/464, loss: 9.690481185913086 2023-01-24 01:17:49.893559: step: 826/464, loss: 5.666155815124512 2023-01-24 01:17:50.506312: step: 828/464, loss: 0.40514591336250305 2023-01-24 01:17:51.207288: step: 830/464, loss: 1.7112529277801514 2023-01-24 01:17:51.813255: step: 832/464, loss: 0.20129477977752686 2023-01-24 01:17:52.441067: step: 834/464, loss: 0.801225483417511 2023-01-24 01:17:53.220715: step: 836/464, loss: 0.6476247310638428 2023-01-24 01:17:53.891078: step: 838/464, loss: 0.4347991347312927 2023-01-24 01:17:54.514803: step: 840/464, loss: 1.338456153869629 2023-01-24 01:17:55.133951: step: 842/464, loss: 1.0563338994979858 2023-01-24 01:17:55.759793: step: 844/464, loss: 0.5551201105117798 2023-01-24 01:17:56.422816: step: 846/464, loss: 0.4709798991680145 2023-01-24 01:17:57.047942: step: 848/464, loss: 0.2790720462799072 2023-01-24 01:17:57.684626: step: 850/464, loss: 0.9369035363197327 2023-01-24 01:17:58.353417: step: 852/464, loss: 0.46123066544532776 2023-01-24 01:17:58.967052: step: 854/464, loss: 1.2389100790023804 2023-01-24 01:17:59.640380: step: 856/464, loss: 1.399316430091858 2023-01-24 01:18:00.278140: step: 858/464, loss: 1.239919662475586 2023-01-24 01:18:00.870061: step: 860/464, loss: 4.345308303833008 2023-01-24 01:18:01.544217: step: 862/464, loss: 0.33345890045166016 2023-01-24 01:18:02.140996: step: 864/464, loss: 0.2196376919746399 2023-01-24 01:18:02.755179: step: 866/464, loss: 0.7692931890487671 2023-01-24 01:18:03.410582: step: 868/464, loss: 1.874446988105774 2023-01-24 01:18:04.043881: step: 870/464, loss: 1.640389084815979 2023-01-24 01:18:04.593229: step: 872/464, loss: 0.6178362965583801 2023-01-24 01:18:05.183380: step: 874/464, loss: 1.367974042892456 2023-01-24 01:18:05.854346: step: 876/464, loss: 0.2461293339729309 2023-01-24 01:18:06.517677: step: 878/464, loss: 0.5997183918952942 2023-01-24 01:18:07.086870: step: 880/464, loss: 0.7863847017288208 2023-01-24 01:18:07.726819: step: 882/464, loss: 0.7832212448120117 2023-01-24 01:18:08.318845: step: 884/464, loss: 0.4306342899799347 2023-01-24 01:18:08.950746: step: 886/464, loss: 0.5308839678764343 2023-01-24 01:18:09.563489: step: 888/464, loss: 0.610247790813446 2023-01-24 01:18:10.171586: step: 890/464, loss: 0.8606748580932617 2023-01-24 01:18:10.834265: step: 892/464, loss: 1.7569546699523926 2023-01-24 01:18:11.437076: step: 894/464, loss: 0.2615882158279419 2023-01-24 01:18:12.097595: step: 896/464, loss: 0.26798972487449646 2023-01-24 01:18:12.704683: step: 898/464, loss: 1.191440224647522 2023-01-24 01:18:13.360423: step: 900/464, loss: 0.333391398191452 2023-01-24 01:18:13.952400: step: 902/464, loss: 0.4805077612400055 2023-01-24 01:18:14.533637: step: 904/464, loss: 1.0383741855621338 2023-01-24 01:18:15.243559: step: 906/464, loss: 1.8628451824188232 2023-01-24 01:18:15.861433: step: 908/464, loss: 1.6816130876541138 2023-01-24 01:18:16.507817: step: 910/464, loss: 0.8756980895996094 2023-01-24 01:18:17.229729: step: 912/464, loss: 0.6809302568435669 2023-01-24 01:18:17.849748: step: 914/464, loss: 0.9276522994041443 2023-01-24 01:18:18.497792: step: 916/464, loss: 4.850551128387451 2023-01-24 01:18:19.053626: step: 918/464, loss: 0.8563334941864014 2023-01-24 01:18:19.650089: step: 920/464, loss: 0.20570218563079834 2023-01-24 01:18:20.233345: step: 922/464, loss: 4.263764381408691 2023-01-24 01:18:20.983586: step: 924/464, loss: 2.06984281539917 2023-01-24 01:18:21.625081: step: 926/464, loss: 0.24161413311958313 2023-01-24 01:18:22.244898: step: 928/464, loss: 0.3930075764656067 2023-01-24 01:18:22.811044: step: 930/464, loss: 0.3355711102485657 ================================================== Loss: 1.110 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34536215816703625, 'r': 0.2955566097406705, 'f1': 0.3185241990456715}, 'combined': 0.2347020414020737, 'epoch': 4} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32197046682912944, 'r': 0.26638695189444045, 'f1': 0.2915531563950208}, 'combined': 0.1903404026205835, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3410999449339207, 'r': 0.2938508064516129, 'f1': 0.31571738022426094}, 'combined': 0.2326338591126133, 'epoch': 4} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3248566110008369, 'r': 0.26191377819902184, 'f1': 0.2900092572025773}, 'combined': 0.18933246843277068, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2745098039215686, 'r': 0.2666666666666666, 'f1': 0.27053140096618356}, 'combined': 0.18035426731078902, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 4} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34536215816703625, 'r': 0.2955566097406705, 'f1': 0.3185241990456715}, 'combined': 0.2347020414020737, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32197046682912944, 'r': 0.26638695189444045, 'f1': 0.2915531563950208}, 'combined': 0.1903404026205835, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2745098039215686, 'r': 0.2666666666666666, 'f1': 0.27053140096618356}, 'combined': 0.18035426731078902, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3410999449339207, 'r': 0.2938508064516129, 'f1': 0.31571738022426094}, 'combined': 0.2326338591126133, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3248566110008369, 'r': 0.26191377819902184, 'f1': 0.2900092572025773}, 'combined': 0.18933246843277068, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:21:18.296573: step: 2/464, loss: 0.3140789270401001 2023-01-24 01:21:19.029571: step: 4/464, loss: 0.5168225169181824 2023-01-24 01:21:19.643166: step: 6/464, loss: 0.41354134678840637 2023-01-24 01:21:20.222113: step: 8/464, loss: 0.481793075799942 2023-01-24 01:21:20.796491: step: 10/464, loss: 0.40146562457084656 2023-01-24 01:21:21.535596: step: 12/464, loss: 0.2971474528312683 2023-01-24 01:21:22.167247: step: 14/464, loss: 0.2346367985010147 2023-01-24 01:21:22.789992: step: 16/464, loss: 1.0591741800308228 2023-01-24 01:21:23.379025: step: 18/464, loss: 0.5679463744163513 2023-01-24 01:21:23.945532: step: 20/464, loss: 0.1803908348083496 2023-01-24 01:21:24.552239: step: 22/464, loss: 0.9101299047470093 2023-01-24 01:21:25.220509: step: 24/464, loss: 0.6895893812179565 2023-01-24 01:21:25.823545: step: 26/464, loss: 0.9380605220794678 2023-01-24 01:21:26.408332: step: 28/464, loss: 1.3286418914794922 2023-01-24 01:21:27.056908: step: 30/464, loss: 0.17582368850708008 2023-01-24 01:21:27.706894: step: 32/464, loss: 0.3735285699367523 2023-01-24 01:21:28.337295: step: 34/464, loss: 1.2204426527023315 2023-01-24 01:21:29.104235: step: 36/464, loss: 1.5764824151992798 2023-01-24 01:21:29.778541: step: 38/464, loss: 0.26649218797683716 2023-01-24 01:21:30.413606: step: 40/464, loss: 0.17335833609104156 2023-01-24 01:21:31.001983: step: 42/464, loss: 0.6150107979774475 2023-01-24 01:21:31.686056: step: 44/464, loss: 1.0510187149047852 2023-01-24 01:21:32.299788: step: 46/464, loss: 0.301383912563324 2023-01-24 01:21:32.980654: step: 48/464, loss: 0.20942439138889313 2023-01-24 01:21:33.544271: step: 50/464, loss: 1.81709623336792 2023-01-24 01:21:34.185363: step: 52/464, loss: 0.25536850094795227 2023-01-24 01:21:34.790223: step: 54/464, loss: 0.4041978716850281 2023-01-24 01:21:35.386922: step: 56/464, loss: 0.3635302186012268 2023-01-24 01:21:36.055714: step: 58/464, loss: 0.16067996621131897 2023-01-24 01:21:36.644548: step: 60/464, loss: 0.16115136444568634 2023-01-24 01:21:37.244437: step: 62/464, loss: 0.35865315794944763 2023-01-24 01:21:37.805686: step: 64/464, loss: 1.2715744972229004 2023-01-24 01:21:38.396652: step: 66/464, loss: 1.9927908182144165 2023-01-24 01:21:39.079230: step: 68/464, loss: 0.31118354201316833 2023-01-24 01:21:39.716949: step: 70/464, loss: 0.34601879119873047 2023-01-24 01:21:40.386595: step: 72/464, loss: 1.2204174995422363 2023-01-24 01:21:40.915190: step: 74/464, loss: 0.5587912201881409 2023-01-24 01:21:41.517208: step: 76/464, loss: 0.6392359137535095 2023-01-24 01:21:42.218225: step: 78/464, loss: 0.5543754696846008 2023-01-24 01:21:42.814017: step: 80/464, loss: 1.1867753267288208 2023-01-24 01:21:43.470437: step: 82/464, loss: 0.7671768665313721 2023-01-24 01:21:44.110881: step: 84/464, loss: 0.17810527980327606 2023-01-24 01:21:44.747978: step: 86/464, loss: 0.910143256187439 2023-01-24 01:21:45.377934: step: 88/464, loss: 0.7807407975196838 2023-01-24 01:21:45.991670: step: 90/464, loss: 1.1668219566345215 2023-01-24 01:21:46.563683: step: 92/464, loss: 0.11305569857358932 2023-01-24 01:21:47.253439: step: 94/464, loss: 0.4058632254600525 2023-01-24 01:21:47.930565: step: 96/464, loss: 0.36806875467300415 2023-01-24 01:21:48.589542: step: 98/464, loss: 0.4692281186580658 2023-01-24 01:21:49.150907: step: 100/464, loss: 0.42481958866119385 2023-01-24 01:21:49.749907: step: 102/464, loss: 0.2551076412200928 2023-01-24 01:21:50.340002: step: 104/464, loss: 0.24660855531692505 2023-01-24 01:21:51.028015: step: 106/464, loss: 0.47354790568351746 2023-01-24 01:21:51.675413: step: 108/464, loss: 0.47715210914611816 2023-01-24 01:21:52.386189: step: 110/464, loss: 0.22332307696342468 2023-01-24 01:21:53.005919: step: 112/464, loss: 0.1295851618051529 2023-01-24 01:21:53.679888: step: 114/464, loss: 0.808097779750824 2023-01-24 01:21:54.266114: step: 116/464, loss: 2.0270867347717285 2023-01-24 01:21:54.934664: step: 118/464, loss: 1.2771189212799072 2023-01-24 01:21:55.675710: step: 120/464, loss: 0.3275717496871948 2023-01-24 01:21:56.279778: step: 122/464, loss: 0.4346131682395935 2023-01-24 01:21:56.898427: step: 124/464, loss: 1.078847050666809 2023-01-24 01:21:57.518967: step: 126/464, loss: 1.6871562004089355 2023-01-24 01:21:58.137768: step: 128/464, loss: 0.5023834109306335 2023-01-24 01:21:58.787475: step: 130/464, loss: 0.4305103123188019 2023-01-24 01:21:59.448359: step: 132/464, loss: 0.19542160630226135 2023-01-24 01:22:00.073228: step: 134/464, loss: 0.7721800804138184 2023-01-24 01:22:00.674697: step: 136/464, loss: 1.2299044132232666 2023-01-24 01:22:01.194073: step: 138/464, loss: 0.883644163608551 2023-01-24 01:22:01.820637: step: 140/464, loss: 0.7875983715057373 2023-01-24 01:22:02.378364: step: 142/464, loss: 0.19403350353240967 2023-01-24 01:22:03.006709: step: 144/464, loss: 0.787096381187439 2023-01-24 01:22:03.694548: step: 146/464, loss: 0.6089295148849487 2023-01-24 01:22:04.314032: step: 148/464, loss: 0.6090214252471924 2023-01-24 01:22:04.874046: step: 150/464, loss: 1.256813645362854 2023-01-24 01:22:05.471005: step: 152/464, loss: 0.6920052170753479 2023-01-24 01:22:06.063324: step: 154/464, loss: 1.9848008155822754 2023-01-24 01:22:06.676944: step: 156/464, loss: 0.5117339491844177 2023-01-24 01:22:07.374341: step: 158/464, loss: 0.5870523452758789 2023-01-24 01:22:08.029271: step: 160/464, loss: 1.0715384483337402 2023-01-24 01:22:08.664517: step: 162/464, loss: 0.8279769420623779 2023-01-24 01:22:09.321001: step: 164/464, loss: 1.3789533376693726 2023-01-24 01:22:09.932424: step: 166/464, loss: 1.1194424629211426 2023-01-24 01:22:10.486865: step: 168/464, loss: 1.0870202779769897 2023-01-24 01:22:11.134448: step: 170/464, loss: 0.4928306043148041 2023-01-24 01:22:11.700074: step: 172/464, loss: 0.7663793563842773 2023-01-24 01:22:12.377619: step: 174/464, loss: 0.753650963306427 2023-01-24 01:22:12.997499: step: 176/464, loss: 0.362321674823761 2023-01-24 01:22:13.590708: step: 178/464, loss: 1.0201119184494019 2023-01-24 01:22:14.226611: step: 180/464, loss: 2.436824083328247 2023-01-24 01:22:14.874429: step: 182/464, loss: 0.6812862157821655 2023-01-24 01:22:15.522778: step: 184/464, loss: 0.7665892243385315 2023-01-24 01:22:16.151054: step: 186/464, loss: 3.6190996170043945 2023-01-24 01:22:16.750718: step: 188/464, loss: 1.214016318321228 2023-01-24 01:22:17.391105: step: 190/464, loss: 1.0860960483551025 2023-01-24 01:22:18.040203: step: 192/464, loss: 0.4875420928001404 2023-01-24 01:22:18.639230: step: 194/464, loss: 0.17811265587806702 2023-01-24 01:22:19.349697: step: 196/464, loss: 0.43976664543151855 2023-01-24 01:22:20.077099: step: 198/464, loss: 0.7189344763755798 2023-01-24 01:22:20.716789: step: 200/464, loss: 0.6835828423500061 2023-01-24 01:22:21.351760: step: 202/464, loss: 0.5588489174842834 2023-01-24 01:22:21.993794: step: 204/464, loss: 0.6121894717216492 2023-01-24 01:22:22.636232: step: 206/464, loss: 1.1947513818740845 2023-01-24 01:22:23.248455: step: 208/464, loss: 0.6017618775367737 2023-01-24 01:22:23.847649: step: 210/464, loss: 0.4701478183269501 2023-01-24 01:22:24.493248: step: 212/464, loss: 0.9148519039154053 2023-01-24 01:22:25.157030: step: 214/464, loss: 2.0866644382476807 2023-01-24 01:22:25.687341: step: 216/464, loss: 0.6283119320869446 2023-01-24 01:22:26.368803: step: 218/464, loss: 0.5661494731903076 2023-01-24 01:22:26.966195: step: 220/464, loss: 0.5721613764762878 2023-01-24 01:22:27.574989: step: 222/464, loss: 1.6528464555740356 2023-01-24 01:22:28.210502: step: 224/464, loss: 1.1147488355636597 2023-01-24 01:22:28.846358: step: 226/464, loss: 0.4663833677768707 2023-01-24 01:22:29.463264: step: 228/464, loss: 0.6291152834892273 2023-01-24 01:22:30.107805: step: 230/464, loss: 0.9880251884460449 2023-01-24 01:22:30.761527: step: 232/464, loss: 0.8147752285003662 2023-01-24 01:22:31.372669: step: 234/464, loss: 6.869061470031738 2023-01-24 01:22:32.084073: step: 236/464, loss: 0.6083580255508423 2023-01-24 01:22:32.732862: step: 238/464, loss: 0.4468768537044525 2023-01-24 01:22:33.426940: step: 240/464, loss: 0.6752504706382751 2023-01-24 01:22:34.062745: step: 242/464, loss: 0.5524731278419495 2023-01-24 01:22:34.722329: step: 244/464, loss: 1.4635412693023682 2023-01-24 01:22:35.331432: step: 246/464, loss: 0.24824781715869904 2023-01-24 01:22:35.983235: step: 248/464, loss: 0.8254262208938599 2023-01-24 01:22:36.567737: step: 250/464, loss: 1.0828063488006592 2023-01-24 01:22:37.183048: step: 252/464, loss: 1.3140785694122314 2023-01-24 01:22:37.811526: step: 254/464, loss: 0.3395722806453705 2023-01-24 01:22:38.432734: step: 256/464, loss: 0.39691609144210815 2023-01-24 01:22:39.090253: step: 258/464, loss: 1.0010672807693481 2023-01-24 01:22:39.795579: step: 260/464, loss: 0.33052146434783936 2023-01-24 01:22:40.418281: step: 262/464, loss: 2.2361369132995605 2023-01-24 01:22:41.001391: step: 264/464, loss: 0.15512794256210327 2023-01-24 01:22:41.626905: step: 266/464, loss: 0.42286238074302673 2023-01-24 01:22:42.186349: step: 268/464, loss: 0.4806721806526184 2023-01-24 01:22:42.883822: step: 270/464, loss: 1.3433846235275269 2023-01-24 01:22:43.601650: step: 272/464, loss: 0.27988773584365845 2023-01-24 01:22:44.245715: step: 274/464, loss: 0.4252055287361145 2023-01-24 01:22:44.893924: step: 276/464, loss: 0.4951595366001129 2023-01-24 01:22:45.455948: step: 278/464, loss: 0.18977151811122894 2023-01-24 01:22:46.056632: step: 280/464, loss: 0.44850343465805054 2023-01-24 01:22:46.735012: step: 282/464, loss: 1.0554685592651367 2023-01-24 01:22:47.399777: step: 284/464, loss: 1.7502734661102295 2023-01-24 01:22:47.998248: step: 286/464, loss: 0.28199800848960876 2023-01-24 01:22:48.586215: step: 288/464, loss: 0.3071030080318451 2023-01-24 01:22:49.184393: step: 290/464, loss: 1.5086228847503662 2023-01-24 01:22:49.833111: step: 292/464, loss: 0.7221007347106934 2023-01-24 01:22:50.416332: step: 294/464, loss: 0.6204640865325928 2023-01-24 01:22:51.080985: step: 296/464, loss: 0.6025323271751404 2023-01-24 01:22:51.632097: step: 298/464, loss: 0.6046009063720703 2023-01-24 01:22:52.272668: step: 300/464, loss: 0.8346729278564453 2023-01-24 01:22:52.934461: step: 302/464, loss: 1.0257431268692017 2023-01-24 01:22:53.675868: step: 304/464, loss: 2.040140151977539 2023-01-24 01:22:54.385627: step: 306/464, loss: 1.0125569105148315 2023-01-24 01:22:54.994149: step: 308/464, loss: 0.6352773904800415 2023-01-24 01:22:55.605197: step: 310/464, loss: 0.3539637625217438 2023-01-24 01:22:56.238035: step: 312/464, loss: 0.8425735831260681 2023-01-24 01:22:56.879053: step: 314/464, loss: 0.39707064628601074 2023-01-24 01:22:57.583292: step: 316/464, loss: 1.3782291412353516 2023-01-24 01:22:58.224342: step: 318/464, loss: 1.362913727760315 2023-01-24 01:22:58.864031: step: 320/464, loss: 0.5295073390007019 2023-01-24 01:22:59.482254: step: 322/464, loss: 1.2126305103302002 2023-01-24 01:23:00.155483: step: 324/464, loss: 0.2630016505718231 2023-01-24 01:23:00.802134: step: 326/464, loss: 1.073133945465088 2023-01-24 01:23:01.411967: step: 328/464, loss: 0.4515211880207062 2023-01-24 01:23:02.077452: step: 330/464, loss: 1.6365458965301514 2023-01-24 01:23:02.757996: step: 332/464, loss: 0.25209635496139526 2023-01-24 01:23:03.464869: step: 334/464, loss: 0.41742879152297974 2023-01-24 01:23:04.103053: step: 336/464, loss: 1.067553162574768 2023-01-24 01:23:04.723150: step: 338/464, loss: 0.4192078113555908 2023-01-24 01:23:05.323279: step: 340/464, loss: 1.813993215560913 2023-01-24 01:23:05.870983: step: 342/464, loss: 1.2661583423614502 2023-01-24 01:23:06.490700: step: 344/464, loss: 1.9221551418304443 2023-01-24 01:23:07.143387: step: 346/464, loss: 1.029069185256958 2023-01-24 01:23:07.757559: step: 348/464, loss: 0.9227486848831177 2023-01-24 01:23:08.336970: step: 350/464, loss: 0.9554811120033264 2023-01-24 01:23:09.088540: step: 352/464, loss: 0.9297544956207275 2023-01-24 01:23:09.695096: step: 354/464, loss: 0.6628623008728027 2023-01-24 01:23:10.229252: step: 356/464, loss: 0.4751133322715759 2023-01-24 01:23:10.869600: step: 358/464, loss: 0.9389272928237915 2023-01-24 01:23:11.494660: step: 360/464, loss: 1.0912551879882812 2023-01-24 01:23:12.167560: step: 362/464, loss: 0.8685484528541565 2023-01-24 01:23:12.858788: step: 364/464, loss: 1.7897229194641113 2023-01-24 01:23:13.483341: step: 366/464, loss: 0.3098595142364502 2023-01-24 01:23:14.080556: step: 368/464, loss: 0.6312590837478638 2023-01-24 01:23:14.710819: step: 370/464, loss: 0.5151140689849854 2023-01-24 01:23:15.286011: step: 372/464, loss: 0.6841112375259399 2023-01-24 01:23:15.910525: step: 374/464, loss: 1.679078459739685 2023-01-24 01:23:16.535492: step: 376/464, loss: 0.6533408164978027 2023-01-24 01:23:17.164774: step: 378/464, loss: 1.0430176258087158 2023-01-24 01:23:17.794078: step: 380/464, loss: 0.39996644854545593 2023-01-24 01:23:18.384396: step: 382/464, loss: 0.7073900699615479 2023-01-24 01:23:19.059186: step: 384/464, loss: 4.618206977844238 2023-01-24 01:23:19.653469: step: 386/464, loss: 0.7551769614219666 2023-01-24 01:23:20.232485: step: 388/464, loss: 0.7826072573661804 2023-01-24 01:23:20.905809: step: 390/464, loss: 0.7515690326690674 2023-01-24 01:23:21.521114: step: 392/464, loss: 0.999121367931366 2023-01-24 01:23:22.186719: step: 394/464, loss: 0.9337067604064941 2023-01-24 01:23:22.857298: step: 396/464, loss: 0.9681384563446045 2023-01-24 01:23:23.442828: step: 398/464, loss: 1.0782345533370972 2023-01-24 01:23:24.080011: step: 400/464, loss: 1.4214262962341309 2023-01-24 01:23:24.635351: step: 402/464, loss: 0.7020053863525391 2023-01-24 01:23:25.230237: step: 404/464, loss: 0.4647718071937561 2023-01-24 01:23:25.890262: step: 406/464, loss: 4.217225074768066 2023-01-24 01:23:26.472217: step: 408/464, loss: 1.2776316404342651 2023-01-24 01:23:27.108088: step: 410/464, loss: 0.26782044768333435 2023-01-24 01:23:27.674518: step: 412/464, loss: 0.6994425654411316 2023-01-24 01:23:28.320785: step: 414/464, loss: 0.5873758792877197 2023-01-24 01:23:28.879314: step: 416/464, loss: 0.48875847458839417 2023-01-24 01:23:29.479855: step: 418/464, loss: 0.8659118413925171 2023-01-24 01:23:30.083942: step: 420/464, loss: 1.0143709182739258 2023-01-24 01:23:30.679862: step: 422/464, loss: 1.1839038133621216 2023-01-24 01:23:31.303869: step: 424/464, loss: 0.23417912423610687 2023-01-24 01:23:31.955627: step: 426/464, loss: 0.3920256495475769 2023-01-24 01:23:32.548883: step: 428/464, loss: 0.27268993854522705 2023-01-24 01:23:33.164289: step: 430/464, loss: 0.26965102553367615 2023-01-24 01:23:33.780912: step: 432/464, loss: 1.9450130462646484 2023-01-24 01:23:34.419192: step: 434/464, loss: 0.28021562099456787 2023-01-24 01:23:34.959588: step: 436/464, loss: 1.2575656175613403 2023-01-24 01:23:35.582917: step: 438/464, loss: 0.8403143286705017 2023-01-24 01:23:36.197343: step: 440/464, loss: 4.10808801651001 2023-01-24 01:23:36.834010: step: 442/464, loss: 0.5153523683547974 2023-01-24 01:23:37.451153: step: 444/464, loss: 0.261572927236557 2023-01-24 01:23:38.061186: step: 446/464, loss: 0.25158095359802246 2023-01-24 01:23:38.736125: step: 448/464, loss: 0.8015758395195007 2023-01-24 01:23:39.399721: step: 450/464, loss: 1.1201467514038086 2023-01-24 01:23:40.017778: step: 452/464, loss: 1.856890320777893 2023-01-24 01:23:40.639501: step: 454/464, loss: 1.2516084909439087 2023-01-24 01:23:41.254803: step: 456/464, loss: 0.6944349408149719 2023-01-24 01:23:41.857124: step: 458/464, loss: 3.0015387535095215 2023-01-24 01:23:42.450644: step: 460/464, loss: 0.2708805501461029 2023-01-24 01:23:43.156517: step: 462/464, loss: 0.2695115804672241 2023-01-24 01:23:43.744229: step: 464/464, loss: 0.16175496578216553 2023-01-24 01:23:44.370765: step: 466/464, loss: 0.9598912000656128 2023-01-24 01:23:44.924767: step: 468/464, loss: 0.7443088293075562 2023-01-24 01:23:45.551594: step: 470/464, loss: 0.40656954050064087 2023-01-24 01:23:46.218415: step: 472/464, loss: 1.2153903245925903 2023-01-24 01:23:46.835371: step: 474/464, loss: 0.4018632173538208 2023-01-24 01:23:47.391008: step: 476/464, loss: 1.0047473907470703 2023-01-24 01:23:48.010384: step: 478/464, loss: 0.20961833000183105 2023-01-24 01:23:48.632115: step: 480/464, loss: 0.3342295289039612 2023-01-24 01:23:49.211928: step: 482/464, loss: 0.5498560667037964 2023-01-24 01:23:49.873566: step: 484/464, loss: 0.690646767616272 2023-01-24 01:23:50.545833: step: 486/464, loss: 0.10926651954650879 2023-01-24 01:23:51.195843: step: 488/464, loss: 0.8070119619369507 2023-01-24 01:23:51.925297: step: 490/464, loss: 0.2085690051317215 2023-01-24 01:23:52.565894: step: 492/464, loss: 1.0676285028457642 2023-01-24 01:23:53.167797: step: 494/464, loss: 0.781734049320221 2023-01-24 01:23:53.772297: step: 496/464, loss: 0.7987548112869263 2023-01-24 01:23:54.356250: step: 498/464, loss: 0.43733590841293335 2023-01-24 01:23:54.970255: step: 500/464, loss: 0.9267998933792114 2023-01-24 01:23:55.616982: step: 502/464, loss: 0.6473803520202637 2023-01-24 01:23:56.246956: step: 504/464, loss: 0.3214409053325653 2023-01-24 01:23:56.919581: step: 506/464, loss: 0.14971300959587097 2023-01-24 01:23:57.554038: step: 508/464, loss: 0.7291408777236938 2023-01-24 01:23:58.154434: step: 510/464, loss: 1.3302973508834839 2023-01-24 01:23:58.766298: step: 512/464, loss: 0.25704845786094666 2023-01-24 01:23:59.379999: step: 514/464, loss: 0.8286025524139404 2023-01-24 01:23:59.988344: step: 516/464, loss: 0.8494953513145447 2023-01-24 01:24:00.648793: step: 518/464, loss: 0.7644921541213989 2023-01-24 01:24:01.324242: step: 520/464, loss: 1.463140845298767 2023-01-24 01:24:01.958173: step: 522/464, loss: 0.38025563955307007 2023-01-24 01:24:02.583089: step: 524/464, loss: 0.5238245129585266 2023-01-24 01:24:03.218124: step: 526/464, loss: 0.9393330216407776 2023-01-24 01:24:03.844440: step: 528/464, loss: 0.7470495700836182 2023-01-24 01:24:04.496284: step: 530/464, loss: 2.6326611042022705 2023-01-24 01:24:05.074749: step: 532/464, loss: 0.3551231920719147 2023-01-24 01:24:05.659321: step: 534/464, loss: 0.4791729748249054 2023-01-24 01:24:06.340680: step: 536/464, loss: 3.0031039714813232 2023-01-24 01:24:06.949915: step: 538/464, loss: 0.38098952174186707 2023-01-24 01:24:07.558389: step: 540/464, loss: 0.855867862701416 2023-01-24 01:24:08.189218: step: 542/464, loss: 1.410219669342041 2023-01-24 01:24:08.856314: step: 544/464, loss: 0.7034099102020264 2023-01-24 01:24:09.518938: step: 546/464, loss: 1.93264901638031 2023-01-24 01:24:10.152416: step: 548/464, loss: 1.4344180822372437 2023-01-24 01:24:10.830999: step: 550/464, loss: 0.9101685285568237 2023-01-24 01:24:11.464965: step: 552/464, loss: 0.23653540015220642 2023-01-24 01:24:12.053240: step: 554/464, loss: 0.6157763600349426 2023-01-24 01:24:12.697795: step: 556/464, loss: 0.8293452262878418 2023-01-24 01:24:13.369421: step: 558/464, loss: 0.8490480780601501 2023-01-24 01:24:13.991961: step: 560/464, loss: 0.24026226997375488 2023-01-24 01:24:14.608833: step: 562/464, loss: 0.83760666847229 2023-01-24 01:24:15.243568: step: 564/464, loss: 0.6933444142341614 2023-01-24 01:24:15.792031: step: 566/464, loss: 0.6754300594329834 2023-01-24 01:24:16.448889: step: 568/464, loss: 1.0457748174667358 2023-01-24 01:24:17.131627: step: 570/464, loss: 2.0500330924987793 2023-01-24 01:24:17.814697: step: 572/464, loss: 0.4848044514656067 2023-01-24 01:24:18.456805: step: 574/464, loss: 0.38042065501213074 2023-01-24 01:24:19.129953: step: 576/464, loss: 0.8919004797935486 2023-01-24 01:24:19.779454: step: 578/464, loss: 0.5954698920249939 2023-01-24 01:24:20.406231: step: 580/464, loss: 0.5718483328819275 2023-01-24 01:24:20.984161: step: 582/464, loss: 0.4611566662788391 2023-01-24 01:24:21.614431: step: 584/464, loss: 0.8591204881668091 2023-01-24 01:24:22.188498: step: 586/464, loss: 0.7177731990814209 2023-01-24 01:24:22.788410: step: 588/464, loss: 2.1465821266174316 2023-01-24 01:24:23.446960: step: 590/464, loss: 3.7014565467834473 2023-01-24 01:24:24.101222: step: 592/464, loss: 1.2863422632217407 2023-01-24 01:24:24.722503: step: 594/464, loss: 1.9341545104980469 2023-01-24 01:24:25.339256: step: 596/464, loss: 0.5215336084365845 2023-01-24 01:24:25.960955: step: 598/464, loss: 0.18646299839019775 2023-01-24 01:24:26.704550: step: 600/464, loss: 2.8123488426208496 2023-01-24 01:24:27.341246: step: 602/464, loss: 0.28051960468292236 2023-01-24 01:24:28.012162: step: 604/464, loss: 1.1275231838226318 2023-01-24 01:24:28.658317: step: 606/464, loss: 0.9808369874954224 2023-01-24 01:24:29.325954: step: 608/464, loss: 0.6259108781814575 2023-01-24 01:24:29.922739: step: 610/464, loss: 0.3483581244945526 2023-01-24 01:24:30.538428: step: 612/464, loss: 0.32177531719207764 2023-01-24 01:24:31.192232: step: 614/464, loss: 0.5979961156845093 2023-01-24 01:24:31.760268: step: 616/464, loss: 0.26392918825149536 2023-01-24 01:24:32.363147: step: 618/464, loss: 0.785770058631897 2023-01-24 01:24:32.983938: step: 620/464, loss: 1.825857400894165 2023-01-24 01:24:33.644161: step: 622/464, loss: 0.4133148789405823 2023-01-24 01:24:34.267138: step: 624/464, loss: 0.19767507910728455 2023-01-24 01:24:34.954370: step: 626/464, loss: 0.2690700590610504 2023-01-24 01:24:35.596701: step: 628/464, loss: 0.8146762251853943 2023-01-24 01:24:36.221788: step: 630/464, loss: 1.2115219831466675 2023-01-24 01:24:36.931269: step: 632/464, loss: 1.1510913372039795 2023-01-24 01:24:37.570354: step: 634/464, loss: 0.7050117254257202 2023-01-24 01:24:38.153884: step: 636/464, loss: 0.6447600722312927 2023-01-24 01:24:38.785306: step: 638/464, loss: 0.751822292804718 2023-01-24 01:24:39.397514: step: 640/464, loss: 1.283268928527832 2023-01-24 01:24:40.016768: step: 642/464, loss: 2.2203149795532227 2023-01-24 01:24:40.682172: step: 644/464, loss: 0.4357527196407318 2023-01-24 01:24:41.315595: step: 646/464, loss: 0.6004408001899719 2023-01-24 01:24:41.891905: step: 648/464, loss: 0.4118346571922302 2023-01-24 01:24:42.502157: step: 650/464, loss: 1.280852198600769 2023-01-24 01:24:43.197423: step: 652/464, loss: 0.7061904668807983 2023-01-24 01:24:43.766025: step: 654/464, loss: 0.7301682233810425 2023-01-24 01:24:44.310496: step: 656/464, loss: 0.27377596497535706 2023-01-24 01:24:44.954371: step: 658/464, loss: 6.4098052978515625 2023-01-24 01:24:45.652092: step: 660/464, loss: 0.6805958151817322 2023-01-24 01:24:46.311723: step: 662/464, loss: 1.777627944946289 2023-01-24 01:24:46.955468: step: 664/464, loss: 0.647487223148346 2023-01-24 01:24:47.608573: step: 666/464, loss: 1.2705881595611572 2023-01-24 01:24:48.212306: step: 668/464, loss: 0.36145371198654175 2023-01-24 01:24:48.812680: step: 670/464, loss: 1.693847894668579 2023-01-24 01:24:49.441587: step: 672/464, loss: 2.836374044418335 2023-01-24 01:24:50.041665: step: 674/464, loss: 0.3379068672657013 2023-01-24 01:24:50.602519: step: 676/464, loss: 0.2719695270061493 2023-01-24 01:24:51.233186: step: 678/464, loss: 2.592514991760254 2023-01-24 01:24:51.980622: step: 680/464, loss: 0.5013712048530579 2023-01-24 01:24:52.612696: step: 682/464, loss: 1.502289056777954 2023-01-24 01:24:53.243094: step: 684/464, loss: 0.6350601315498352 2023-01-24 01:24:53.872412: step: 686/464, loss: 0.3374522924423218 2023-01-24 01:24:54.505909: step: 688/464, loss: 1.4543397426605225 2023-01-24 01:24:55.133713: step: 690/464, loss: 1.5196514129638672 2023-01-24 01:24:55.750167: step: 692/464, loss: 0.31878867745399475 2023-01-24 01:24:56.373309: step: 694/464, loss: 0.4354569911956787 2023-01-24 01:24:57.067535: step: 696/464, loss: 0.707033097743988 2023-01-24 01:24:57.706241: step: 698/464, loss: 0.7893030643463135 2023-01-24 01:24:58.316947: step: 700/464, loss: 1.016629695892334 2023-01-24 01:24:58.967904: step: 702/464, loss: 0.307487428188324 2023-01-24 01:24:59.594978: step: 704/464, loss: 1.902664303779602 2023-01-24 01:25:00.245047: step: 706/464, loss: 0.557648241519928 2023-01-24 01:25:00.814177: step: 708/464, loss: 0.23311075568199158 2023-01-24 01:25:01.410306: step: 710/464, loss: 0.310183584690094 2023-01-24 01:25:02.050616: step: 712/464, loss: 0.36607521772384644 2023-01-24 01:25:02.653676: step: 714/464, loss: 0.2555963397026062 2023-01-24 01:25:03.276732: step: 716/464, loss: 0.9427339434623718 2023-01-24 01:25:03.919949: step: 718/464, loss: 1.401888132095337 2023-01-24 01:25:04.556819: step: 720/464, loss: 0.6344363689422607 2023-01-24 01:25:05.226791: step: 722/464, loss: 1.5003769397735596 2023-01-24 01:25:05.846554: step: 724/464, loss: 1.5138165950775146 2023-01-24 01:25:06.434788: step: 726/464, loss: 0.7028983235359192 2023-01-24 01:25:07.096559: step: 728/464, loss: 0.4118358790874481 2023-01-24 01:25:07.699093: step: 730/464, loss: 0.7308695316314697 2023-01-24 01:25:08.265393: step: 732/464, loss: 0.8298178911209106 2023-01-24 01:25:08.862926: step: 734/464, loss: 0.1830146610736847 2023-01-24 01:25:09.529385: step: 736/464, loss: 0.5564534068107605 2023-01-24 01:25:10.127324: step: 738/464, loss: 0.6699367165565491 2023-01-24 01:25:10.746695: step: 740/464, loss: 0.6066685318946838 2023-01-24 01:25:11.402335: step: 742/464, loss: 0.4273430407047272 2023-01-24 01:25:12.053524: step: 744/464, loss: 0.9662275910377502 2023-01-24 01:25:12.594563: step: 746/464, loss: 0.7614527940750122 2023-01-24 01:25:13.239432: step: 748/464, loss: 0.4929681718349457 2023-01-24 01:25:13.851955: step: 750/464, loss: 0.6777883768081665 2023-01-24 01:25:14.486377: step: 752/464, loss: 0.7379733920097351 2023-01-24 01:25:15.124168: step: 754/464, loss: 0.9559131264686584 2023-01-24 01:25:15.728213: step: 756/464, loss: 1.5169222354888916 2023-01-24 01:25:16.366014: step: 758/464, loss: 0.5358415246009827 2023-01-24 01:25:17.022891: step: 760/464, loss: 0.8351231813430786 2023-01-24 01:25:17.640326: step: 762/464, loss: 0.649901807308197 2023-01-24 01:25:18.234744: step: 764/464, loss: 0.19484145939350128 2023-01-24 01:25:18.913189: step: 766/464, loss: 0.13287372887134552 2023-01-24 01:25:19.531008: step: 768/464, loss: 0.17175209522247314 2023-01-24 01:25:20.141607: step: 770/464, loss: 1.7275898456573486 2023-01-24 01:25:20.818280: step: 772/464, loss: 0.49211055040359497 2023-01-24 01:25:21.420337: step: 774/464, loss: 0.42846372723579407 2023-01-24 01:25:22.091529: step: 776/464, loss: 0.6717638969421387 2023-01-24 01:25:22.704746: step: 778/464, loss: 0.29499301314353943 2023-01-24 01:25:23.329295: step: 780/464, loss: 0.5855064988136292 2023-01-24 01:25:23.934184: step: 782/464, loss: 0.24147354066371918 2023-01-24 01:25:24.493758: step: 784/464, loss: 0.8271307945251465 2023-01-24 01:25:25.119963: step: 786/464, loss: 0.5050127506256104 2023-01-24 01:25:25.745532: step: 788/464, loss: 0.4337732195854187 2023-01-24 01:25:26.352781: step: 790/464, loss: 0.6392048001289368 2023-01-24 01:25:27.000290: step: 792/464, loss: 2.8629088401794434 2023-01-24 01:25:27.587707: step: 794/464, loss: 0.2869815528392792 2023-01-24 01:25:28.236315: step: 796/464, loss: 1.8846849203109741 2023-01-24 01:25:28.824042: step: 798/464, loss: 2.81234073638916 2023-01-24 01:25:29.372093: step: 800/464, loss: 0.3832673132419586 2023-01-24 01:25:29.998001: step: 802/464, loss: 0.5510661005973816 2023-01-24 01:25:30.626911: step: 804/464, loss: 0.5066255331039429 2023-01-24 01:25:31.327874: step: 806/464, loss: 0.6949787139892578 2023-01-24 01:25:31.944473: step: 808/464, loss: 0.3027384281158447 2023-01-24 01:25:32.560137: step: 810/464, loss: 0.33572134375572205 2023-01-24 01:25:33.161404: step: 812/464, loss: 0.22994890809059143 2023-01-24 01:25:33.813675: step: 814/464, loss: 0.2857647240161896 2023-01-24 01:25:34.478072: step: 816/464, loss: 1.0291653871536255 2023-01-24 01:25:35.108905: step: 818/464, loss: 0.9672958850860596 2023-01-24 01:25:35.723564: step: 820/464, loss: 0.20487524569034576 2023-01-24 01:25:36.348701: step: 822/464, loss: 0.283672958612442 2023-01-24 01:25:36.942676: step: 824/464, loss: 0.361631840467453 2023-01-24 01:25:37.599634: step: 826/464, loss: 1.1465498208999634 2023-01-24 01:25:38.255226: step: 828/464, loss: 0.4269474446773529 2023-01-24 01:25:38.840461: step: 830/464, loss: 0.15169720351696014 2023-01-24 01:25:39.532679: step: 832/464, loss: 0.7894524335861206 2023-01-24 01:25:40.163323: step: 834/464, loss: 0.31384778022766113 2023-01-24 01:25:40.794086: step: 836/464, loss: 0.25070080161094666 2023-01-24 01:25:41.397509: step: 838/464, loss: 1.0677363872528076 2023-01-24 01:25:42.041929: step: 840/464, loss: 0.20605960488319397 2023-01-24 01:25:42.709266: step: 842/464, loss: 0.5931063294410706 2023-01-24 01:25:43.377927: step: 844/464, loss: 0.7979549169540405 2023-01-24 01:25:44.015488: step: 846/464, loss: 0.42603933811187744 2023-01-24 01:25:44.667882: step: 848/464, loss: 0.4495619535446167 2023-01-24 01:25:45.257756: step: 850/464, loss: 0.15042515099048615 2023-01-24 01:25:45.923256: step: 852/464, loss: 0.997209906578064 2023-01-24 01:25:46.588585: step: 854/464, loss: 0.8001546859741211 2023-01-24 01:25:47.193809: step: 856/464, loss: 0.37791669368743896 2023-01-24 01:25:47.814915: step: 858/464, loss: 0.3678218722343445 2023-01-24 01:25:48.434470: step: 860/464, loss: 0.22284124791622162 2023-01-24 01:25:49.106891: step: 862/464, loss: 1.3511481285095215 2023-01-24 01:25:49.750657: step: 864/464, loss: 1.718711256980896 2023-01-24 01:25:50.392795: step: 866/464, loss: 0.7483352422714233 2023-01-24 01:25:51.107733: step: 868/464, loss: 0.3021269142627716 2023-01-24 01:25:51.721094: step: 870/464, loss: 0.4957874119281769 2023-01-24 01:25:52.267887: step: 872/464, loss: 0.546061635017395 2023-01-24 01:25:52.891170: step: 874/464, loss: 1.8420979976654053 2023-01-24 01:25:53.544755: step: 876/464, loss: 2.1327435970306396 2023-01-24 01:25:54.184408: step: 878/464, loss: 0.83139967918396 2023-01-24 01:25:54.797817: step: 880/464, loss: 0.2847437560558319 2023-01-24 01:25:55.459374: step: 882/464, loss: 1.017668604850769 2023-01-24 01:25:56.077784: step: 884/464, loss: 0.5310131311416626 2023-01-24 01:25:56.776058: step: 886/464, loss: 0.13240331411361694 2023-01-24 01:25:57.464162: step: 888/464, loss: 1.3001729249954224 2023-01-24 01:25:58.068076: step: 890/464, loss: 0.9886908531188965 2023-01-24 01:25:58.712123: step: 892/464, loss: 1.2750895023345947 2023-01-24 01:25:59.337997: step: 894/464, loss: 0.9979209303855896 2023-01-24 01:25:59.977800: step: 896/464, loss: 0.6268845796585083 2023-01-24 01:26:00.630349: step: 898/464, loss: 0.674816906452179 2023-01-24 01:26:01.259942: step: 900/464, loss: 0.2394643872976303 2023-01-24 01:26:01.871507: step: 902/464, loss: 1.133461356163025 2023-01-24 01:26:02.518776: step: 904/464, loss: 0.8986351490020752 2023-01-24 01:26:03.129516: step: 906/464, loss: 1.2165088653564453 2023-01-24 01:26:03.719422: step: 908/464, loss: 0.5075541734695435 2023-01-24 01:26:04.386860: step: 910/464, loss: 0.42147156596183777 2023-01-24 01:26:04.986432: step: 912/464, loss: 1.6376748085021973 2023-01-24 01:26:05.616178: step: 914/464, loss: 0.8599750995635986 2023-01-24 01:26:06.204089: step: 916/464, loss: 1.6517157554626465 2023-01-24 01:26:06.898108: step: 918/464, loss: 0.7676430344581604 2023-01-24 01:26:07.452945: step: 920/464, loss: 0.538670539855957 2023-01-24 01:26:08.058936: step: 922/464, loss: 1.3265511989593506 2023-01-24 01:26:08.714630: step: 924/464, loss: 0.19481389224529266 2023-01-24 01:26:09.330589: step: 926/464, loss: 0.24411243200302124 2023-01-24 01:26:09.923181: step: 928/464, loss: 0.18539859354496002 2023-01-24 01:26:10.405948: step: 930/464, loss: 0.4154171943664551 ================================================== Loss: 0.850 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3196822117276663, 'r': 0.29304202741702745, 'f1': 0.30578298513081126}, 'combined': 0.22531377851743986, 'epoch': 5} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2946801632430426, 'r': 0.2603458689936239, 'f1': 0.2764510445231856}, 'combined': 0.1804809927975201, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3185678994918125, 'r': 0.305294237012987, 'f1': 0.3117898590770931}, 'combined': 0.2297398961620686, 'epoch': 5} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3115770328894794, 'r': 0.2692711605338437, 'f1': 0.28888343010028505}, 'combined': 0.18859747250070422, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31993440233236153, 'r': 0.2969088203463203, 'f1': 0.3079918607914679}, 'combined': 0.22694137110950266, 'epoch': 5} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30404472815418065, 'r': 0.2555091476965408, 'f1': 0.27767195512385795}, 'combined': 0.181278063966871, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2609126984126984, 'r': 0.31309523809523804, 'f1': 0.28463203463203457}, 'combined': 0.1897546897546897, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2708333333333333, 'r': 0.2826086956521739, 'f1': 0.2765957446808511}, 'combined': 0.13829787234042554, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5192307692307693, 'r': 0.23275862068965517, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 5} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3196822117276663, 'r': 0.29304202741702745, 'f1': 0.30578298513081126}, 'combined': 0.22531377851743986, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.2946801632430426, 'r': 0.2603458689936239, 'f1': 0.2764510445231856}, 'combined': 0.1804809927975201, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2609126984126984, 'r': 0.31309523809523804, 'f1': 0.28463203463203457}, 'combined': 0.1897546897546897, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31993440233236153, 'r': 0.2969088203463203, 'f1': 0.3079918607914679}, 'combined': 0.22694137110950266, 'epoch': 5} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30404472815418065, 'r': 0.2555091476965408, 'f1': 0.27767195512385795}, 'combined': 0.181278063966871, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5192307692307693, 'r': 0.23275862068965517, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 5} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:29:00.330706: step: 2/464, loss: 0.3751084506511688 2023-01-24 01:29:00.950803: step: 4/464, loss: 0.8991535305976868 2023-01-24 01:29:01.569373: step: 6/464, loss: 0.8244770169258118 2023-01-24 01:29:02.180400: step: 8/464, loss: 0.34533822536468506 2023-01-24 01:29:02.816828: step: 10/464, loss: 0.24936716258525848 2023-01-24 01:29:03.446236: step: 12/464, loss: 0.6579227447509766 2023-01-24 01:29:04.046882: step: 14/464, loss: 0.45752012729644775 2023-01-24 01:29:04.637316: step: 16/464, loss: 0.6975422501564026 2023-01-24 01:29:05.172717: step: 18/464, loss: 0.2402292788028717 2023-01-24 01:29:05.769731: step: 20/464, loss: 0.5649269819259644 2023-01-24 01:29:06.378897: step: 22/464, loss: 1.5623259544372559 2023-01-24 01:29:07.039069: step: 24/464, loss: 0.45161202549934387 2023-01-24 01:29:07.723258: step: 26/464, loss: 0.4018263816833496 2023-01-24 01:29:08.296020: step: 28/464, loss: 0.5523669719696045 2023-01-24 01:29:08.936172: step: 30/464, loss: 3.6010148525238037 2023-01-24 01:29:09.550366: step: 32/464, loss: 0.7040104269981384 2023-01-24 01:29:10.191416: step: 34/464, loss: 1.5320963859558105 2023-01-24 01:29:10.795062: step: 36/464, loss: 0.5989647507667542 2023-01-24 01:29:11.496719: step: 38/464, loss: 0.3131082057952881 2023-01-24 01:29:12.122272: step: 40/464, loss: 2.0335254669189453 2023-01-24 01:29:12.723202: step: 42/464, loss: 1.0707502365112305 2023-01-24 01:29:13.459808: step: 44/464, loss: 0.6073818206787109 2023-01-24 01:29:14.115010: step: 46/464, loss: 0.2975615859031677 2023-01-24 01:29:14.746257: step: 48/464, loss: 0.6314176917076111 2023-01-24 01:29:15.353510: step: 50/464, loss: 0.39682334661483765 2023-01-24 01:29:15.965656: step: 52/464, loss: 1.36899733543396 2023-01-24 01:29:16.638662: step: 54/464, loss: 0.5601557493209839 2023-01-24 01:29:17.254385: step: 56/464, loss: 0.4270496368408203 2023-01-24 01:29:17.847381: step: 58/464, loss: 0.6895309090614319 2023-01-24 01:29:18.448096: step: 60/464, loss: 0.9848753809928894 2023-01-24 01:29:19.195732: step: 62/464, loss: 0.35673847794532776 2023-01-24 01:29:19.833035: step: 64/464, loss: 0.5678101778030396 2023-01-24 01:29:20.400165: step: 66/464, loss: 0.17788580060005188 2023-01-24 01:29:21.060118: step: 68/464, loss: 0.21318335831165314 2023-01-24 01:29:21.654114: step: 70/464, loss: 0.1260576993227005 2023-01-24 01:29:22.301954: step: 72/464, loss: 0.21788544952869415 2023-01-24 01:29:22.946650: step: 74/464, loss: 0.25507208704948425 2023-01-24 01:29:23.502695: step: 76/464, loss: 0.387909859418869 2023-01-24 01:29:24.065980: step: 78/464, loss: 6.739643096923828 2023-01-24 01:29:24.685983: step: 80/464, loss: 1.3248546123504639 2023-01-24 01:29:25.293487: step: 82/464, loss: 0.20794707536697388 2023-01-24 01:29:25.855615: step: 84/464, loss: 0.36006394028663635 2023-01-24 01:29:26.468882: step: 86/464, loss: 0.3022036552429199 2023-01-24 01:29:27.057016: step: 88/464, loss: 0.21739986538887024 2023-01-24 01:29:27.647072: step: 90/464, loss: 0.35528916120529175 2023-01-24 01:29:28.285188: step: 92/464, loss: 0.5038557052612305 2023-01-24 01:29:28.883844: step: 94/464, loss: 0.6198413372039795 2023-01-24 01:29:29.549871: step: 96/464, loss: 0.5114516615867615 2023-01-24 01:29:30.132360: step: 98/464, loss: 1.2284324169158936 2023-01-24 01:29:30.805137: step: 100/464, loss: 1.3396265506744385 2023-01-24 01:29:31.557156: step: 102/464, loss: 0.38964733481407166 2023-01-24 01:29:32.218735: step: 104/464, loss: 1.7171967029571533 2023-01-24 01:29:32.832890: step: 106/464, loss: 0.1736004650592804 2023-01-24 01:29:33.464179: step: 108/464, loss: 0.13742183148860931 2023-01-24 01:29:34.006225: step: 110/464, loss: 0.6270983815193176 2023-01-24 01:29:34.645293: step: 112/464, loss: 0.7468340396881104 2023-01-24 01:29:35.318220: step: 114/464, loss: 0.22080263495445251 2023-01-24 01:29:35.916626: step: 116/464, loss: 0.24238267540931702 2023-01-24 01:29:36.577631: step: 118/464, loss: 0.4134461283683777 2023-01-24 01:29:37.215178: step: 120/464, loss: 0.24008819460868835 2023-01-24 01:29:37.825256: step: 122/464, loss: 0.34048280119895935 2023-01-24 01:29:38.420431: step: 124/464, loss: 0.2101212739944458 2023-01-24 01:29:39.042073: step: 126/464, loss: 0.31376171112060547 2023-01-24 01:29:39.705818: step: 128/464, loss: 0.6731464862823486 2023-01-24 01:29:40.325612: step: 130/464, loss: 0.5460286140441895 2023-01-24 01:29:40.948420: step: 132/464, loss: 0.8231890797615051 2023-01-24 01:29:41.597060: step: 134/464, loss: 0.27242153882980347 2023-01-24 01:29:42.211167: step: 136/464, loss: 0.16866791248321533 2023-01-24 01:29:42.896008: step: 138/464, loss: 0.11519985646009445 2023-01-24 01:29:43.539679: step: 140/464, loss: 0.4271545112133026 2023-01-24 01:29:44.213144: step: 142/464, loss: 0.34215304255485535 2023-01-24 01:29:44.874817: step: 144/464, loss: 0.24908070266246796 2023-01-24 01:29:45.546948: step: 146/464, loss: 0.572262167930603 2023-01-24 01:29:46.132967: step: 148/464, loss: 0.8973062038421631 2023-01-24 01:29:46.726633: step: 150/464, loss: 0.3133440315723419 2023-01-24 01:29:47.502399: step: 152/464, loss: 0.24786102771759033 2023-01-24 01:29:48.145062: step: 154/464, loss: 0.298935204744339 2023-01-24 01:29:48.776494: step: 156/464, loss: 0.28106117248535156 2023-01-24 01:29:49.401403: step: 158/464, loss: 0.4187696874141693 2023-01-24 01:29:50.006694: step: 160/464, loss: 2.7460684776306152 2023-01-24 01:29:50.637673: step: 162/464, loss: 1.8586090803146362 2023-01-24 01:29:51.300694: step: 164/464, loss: 1.2428754568099976 2023-01-24 01:29:51.924271: step: 166/464, loss: 2.1811106204986572 2023-01-24 01:29:52.583979: step: 168/464, loss: 1.0358861684799194 2023-01-24 01:29:53.227793: step: 170/464, loss: 0.3583020865917206 2023-01-24 01:29:53.838447: step: 172/464, loss: 0.6705204248428345 2023-01-24 01:29:54.390414: step: 174/464, loss: 0.3304373621940613 2023-01-24 01:29:55.014967: step: 176/464, loss: 0.9637370705604553 2023-01-24 01:29:55.645891: step: 178/464, loss: 0.09790074080228806 2023-01-24 01:29:56.295058: step: 180/464, loss: 0.2238306850194931 2023-01-24 01:29:56.883101: step: 182/464, loss: 1.275614619255066 2023-01-24 01:29:57.480556: step: 184/464, loss: 0.7819212079048157 2023-01-24 01:29:58.080852: step: 186/464, loss: 0.9053548574447632 2023-01-24 01:29:58.701269: step: 188/464, loss: 0.3270997107028961 2023-01-24 01:29:59.368351: step: 190/464, loss: 0.5571175813674927 2023-01-24 01:29:59.940584: step: 192/464, loss: 1.6119208335876465 2023-01-24 01:30:00.522975: step: 194/464, loss: 0.13971984386444092 2023-01-24 01:30:01.133008: step: 196/464, loss: 0.4664154648780823 2023-01-24 01:30:01.758327: step: 198/464, loss: 0.6606048941612244 2023-01-24 01:30:02.291409: step: 200/464, loss: 0.46219271421432495 2023-01-24 01:30:02.869829: step: 202/464, loss: 0.8494325876235962 2023-01-24 01:30:03.518239: step: 204/464, loss: 1.4003181457519531 2023-01-24 01:30:04.144448: step: 206/464, loss: 0.7771140933036804 2023-01-24 01:30:04.813641: step: 208/464, loss: 0.4586014151573181 2023-01-24 01:30:05.376499: step: 210/464, loss: 0.303373247385025 2023-01-24 01:30:06.019866: step: 212/464, loss: 0.2954310178756714 2023-01-24 01:30:06.738948: step: 214/464, loss: 0.1799723505973816 2023-01-24 01:30:07.341688: step: 216/464, loss: 0.6660267114639282 2023-01-24 01:30:07.940684: step: 218/464, loss: 1.0787197351455688 2023-01-24 01:30:08.605667: step: 220/464, loss: 0.3768312633037567 2023-01-24 01:30:09.210732: step: 222/464, loss: 0.40435791015625 2023-01-24 01:30:09.794812: step: 224/464, loss: 3.5142626762390137 2023-01-24 01:30:10.408101: step: 226/464, loss: 0.37316417694091797 2023-01-24 01:30:11.037969: step: 228/464, loss: 0.5245544910430908 2023-01-24 01:30:11.706011: step: 230/464, loss: 0.6997588276863098 2023-01-24 01:30:12.341766: step: 232/464, loss: 2.2032132148742676 2023-01-24 01:30:13.148714: step: 234/464, loss: 0.3766220510005951 2023-01-24 01:30:13.741650: step: 236/464, loss: 0.5885345339775085 2023-01-24 01:30:14.383229: step: 238/464, loss: 0.5886207818984985 2023-01-24 01:30:15.019982: step: 240/464, loss: 1.0273098945617676 2023-01-24 01:30:15.644859: step: 242/464, loss: 1.1725513935089111 2023-01-24 01:30:16.290321: step: 244/464, loss: 1.0625218152999878 2023-01-24 01:30:16.849982: step: 246/464, loss: 0.33822906017303467 2023-01-24 01:30:17.504135: step: 248/464, loss: 0.5773215293884277 2023-01-24 01:30:18.096632: step: 250/464, loss: 0.3922623097896576 2023-01-24 01:30:18.714724: step: 252/464, loss: 3.0866641998291016 2023-01-24 01:30:19.363801: step: 254/464, loss: 0.3755728006362915 2023-01-24 01:30:19.984649: step: 256/464, loss: 0.7650343179702759 2023-01-24 01:30:20.587644: step: 258/464, loss: 0.58442223072052 2023-01-24 01:30:21.182775: step: 260/464, loss: 0.4950478672981262 2023-01-24 01:30:21.869750: step: 262/464, loss: 0.7851177453994751 2023-01-24 01:30:22.568054: step: 264/464, loss: 0.22636084258556366 2023-01-24 01:30:23.197580: step: 266/464, loss: 0.2058066874742508 2023-01-24 01:30:23.797588: step: 268/464, loss: 0.27423572540283203 2023-01-24 01:30:24.407085: step: 270/464, loss: 0.6314135789871216 2023-01-24 01:30:25.114828: step: 272/464, loss: 1.1377860307693481 2023-01-24 01:30:25.725072: step: 274/464, loss: 1.4917114973068237 2023-01-24 01:30:26.382829: step: 276/464, loss: 0.4098925292491913 2023-01-24 01:30:27.003097: step: 278/464, loss: 0.2656930685043335 2023-01-24 01:30:27.603478: step: 280/464, loss: 0.7664421796798706 2023-01-24 01:30:28.193273: step: 282/464, loss: 1.195511817932129 2023-01-24 01:30:28.875761: step: 284/464, loss: 0.8020053505897522 2023-01-24 01:30:29.468216: step: 286/464, loss: 0.9370518922805786 2023-01-24 01:30:30.117108: step: 288/464, loss: 0.5342237949371338 2023-01-24 01:30:30.758044: step: 290/464, loss: 0.8717308044433594 2023-01-24 01:30:31.364464: step: 292/464, loss: 0.27026355266571045 2023-01-24 01:30:32.087487: step: 294/464, loss: 7.652609825134277 2023-01-24 01:30:32.783028: step: 296/464, loss: 1.2849352359771729 2023-01-24 01:30:33.452155: step: 298/464, loss: 0.43978744745254517 2023-01-24 01:30:34.164017: step: 300/464, loss: 0.5479200482368469 2023-01-24 01:30:34.820190: step: 302/464, loss: 0.3849724233150482 2023-01-24 01:30:35.436194: step: 304/464, loss: 0.41675111651420593 2023-01-24 01:30:36.061308: step: 306/464, loss: 0.8829919099807739 2023-01-24 01:30:36.716923: step: 308/464, loss: 0.2806163728237152 2023-01-24 01:30:37.344612: step: 310/464, loss: 0.23910051584243774 2023-01-24 01:30:37.989448: step: 312/464, loss: 0.46648287773132324 2023-01-24 01:30:38.646894: step: 314/464, loss: 0.4761250615119934 2023-01-24 01:30:39.309065: step: 316/464, loss: 0.15457160770893097 2023-01-24 01:30:39.964545: step: 318/464, loss: 0.7718314528465271 2023-01-24 01:30:40.547366: step: 320/464, loss: 0.16734570264816284 2023-01-24 01:30:41.217200: step: 322/464, loss: 1.4428234100341797 2023-01-24 01:30:41.821793: step: 324/464, loss: 0.5153453946113586 2023-01-24 01:30:42.415619: step: 326/464, loss: 0.41749900579452515 2023-01-24 01:30:43.071829: step: 328/464, loss: 3.295747756958008 2023-01-24 01:30:43.688700: step: 330/464, loss: 0.4043913185596466 2023-01-24 01:30:44.289058: step: 332/464, loss: 4.637999534606934 2023-01-24 01:30:44.905299: step: 334/464, loss: 1.2028005123138428 2023-01-24 01:30:45.553252: step: 336/464, loss: 0.6247386932373047 2023-01-24 01:30:46.177913: step: 338/464, loss: 0.3917686343193054 2023-01-24 01:30:46.792089: step: 340/464, loss: 0.8415156602859497 2023-01-24 01:30:47.387822: step: 342/464, loss: 0.6987343430519104 2023-01-24 01:30:48.034913: step: 344/464, loss: 0.7762897610664368 2023-01-24 01:30:48.682825: step: 346/464, loss: 1.5636210441589355 2023-01-24 01:30:49.319889: step: 348/464, loss: 1.3135454654693604 2023-01-24 01:30:49.901033: step: 350/464, loss: 0.2784609794616699 2023-01-24 01:30:50.497570: step: 352/464, loss: 0.46578752994537354 2023-01-24 01:30:51.087157: step: 354/464, loss: 0.8856964111328125 2023-01-24 01:30:51.738488: step: 356/464, loss: 1.1614477634429932 2023-01-24 01:30:52.352441: step: 358/464, loss: 0.5184161067008972 2023-01-24 01:30:53.007242: step: 360/464, loss: 0.844288170337677 2023-01-24 01:30:53.610193: step: 362/464, loss: 0.4373282194137573 2023-01-24 01:30:54.204490: step: 364/464, loss: 1.5751210451126099 2023-01-24 01:30:54.878785: step: 366/464, loss: 0.9275869131088257 2023-01-24 01:30:55.413781: step: 368/464, loss: 0.567407488822937 2023-01-24 01:30:56.016765: step: 370/464, loss: 0.40654146671295166 2023-01-24 01:30:56.719500: step: 372/464, loss: 1.1315515041351318 2023-01-24 01:30:57.355659: step: 374/464, loss: 3.533780336380005 2023-01-24 01:30:58.075357: step: 376/464, loss: 0.2930464744567871 2023-01-24 01:30:58.698793: step: 378/464, loss: 0.4550985097885132 2023-01-24 01:30:59.299873: step: 380/464, loss: 0.45705240964889526 2023-01-24 01:30:59.906926: step: 382/464, loss: 0.24261905252933502 2023-01-24 01:31:00.512682: step: 384/464, loss: 0.7945497632026672 2023-01-24 01:31:01.091121: step: 386/464, loss: 0.4450487792491913 2023-01-24 01:31:01.690289: step: 388/464, loss: 0.2671772241592407 2023-01-24 01:31:02.331750: step: 390/464, loss: 0.27833473682403564 2023-01-24 01:31:02.991703: step: 392/464, loss: 0.7007308602333069 2023-01-24 01:31:03.640336: step: 394/464, loss: 1.0011887550354004 2023-01-24 01:31:04.312894: step: 396/464, loss: 0.3089820444583893 2023-01-24 01:31:04.949428: step: 398/464, loss: 0.991563081741333 2023-01-24 01:31:05.578415: step: 400/464, loss: 1.2187684774398804 2023-01-24 01:31:06.136186: step: 402/464, loss: 0.5915836095809937 2023-01-24 01:31:06.800255: step: 404/464, loss: 0.9523316025733948 2023-01-24 01:31:07.383142: step: 406/464, loss: 1.4720518589019775 2023-01-24 01:31:07.988964: step: 408/464, loss: 0.2306549847126007 2023-01-24 01:31:08.658747: step: 410/464, loss: 0.5047092437744141 2023-01-24 01:31:09.246584: step: 412/464, loss: 0.4012000858783722 2023-01-24 01:31:09.851478: step: 414/464, loss: 1.0264873504638672 2023-01-24 01:31:10.467217: step: 416/464, loss: 0.4799194037914276 2023-01-24 01:31:11.125607: step: 418/464, loss: 0.6494021415710449 2023-01-24 01:31:11.764237: step: 420/464, loss: 1.1561380624771118 2023-01-24 01:31:12.367595: step: 422/464, loss: 0.3921971321105957 2023-01-24 01:31:12.974385: step: 424/464, loss: 0.6223727464675903 2023-01-24 01:31:13.612989: step: 426/464, loss: 0.4728529751300812 2023-01-24 01:31:14.361356: step: 428/464, loss: 0.3983856439590454 2023-01-24 01:31:14.969998: step: 430/464, loss: 0.37754401564598083 2023-01-24 01:31:15.584697: step: 432/464, loss: 0.5962048768997192 2023-01-24 01:31:16.208343: step: 434/464, loss: 0.258344441652298 2023-01-24 01:31:16.951957: step: 436/464, loss: 0.604759156703949 2023-01-24 01:31:17.597561: step: 438/464, loss: 0.5698238611221313 2023-01-24 01:31:18.259022: step: 440/464, loss: 0.9591268301010132 2023-01-24 01:31:18.854289: step: 442/464, loss: 0.13089722394943237 2023-01-24 01:31:19.502812: step: 444/464, loss: 0.37283504009246826 2023-01-24 01:31:20.160215: step: 446/464, loss: 0.8682311773300171 2023-01-24 01:31:20.756851: step: 448/464, loss: 0.28524962067604065 2023-01-24 01:31:21.367182: step: 450/464, loss: 1.1062726974487305 2023-01-24 01:31:21.968546: step: 452/464, loss: 4.481321334838867 2023-01-24 01:31:22.637422: step: 454/464, loss: 1.0563178062438965 2023-01-24 01:31:23.283743: step: 456/464, loss: 0.2760435938835144 2023-01-24 01:31:23.924652: step: 458/464, loss: 0.24600005149841309 2023-01-24 01:31:24.532795: step: 460/464, loss: 0.2210104614496231 2023-01-24 01:31:25.152639: step: 462/464, loss: 1.7358520030975342 2023-01-24 01:31:25.740976: step: 464/464, loss: 4.569807529449463 2023-01-24 01:31:26.350538: step: 466/464, loss: 0.37099695205688477 2023-01-24 01:31:27.026711: step: 468/464, loss: 0.5953925251960754 2023-01-24 01:31:27.641130: step: 470/464, loss: 0.3241918385028839 2023-01-24 01:31:28.217316: step: 472/464, loss: 0.5551413893699646 2023-01-24 01:31:28.869406: step: 474/464, loss: 0.4248945116996765 2023-01-24 01:31:29.458990: step: 476/464, loss: 0.6409351825714111 2023-01-24 01:31:30.093096: step: 478/464, loss: 0.18332374095916748 2023-01-24 01:31:30.713837: step: 480/464, loss: 0.6044588685035706 2023-01-24 01:31:31.411820: step: 482/464, loss: 0.5401633977890015 2023-01-24 01:31:32.035081: step: 484/464, loss: 0.6177085638046265 2023-01-24 01:31:32.697929: step: 486/464, loss: 0.361588716506958 2023-01-24 01:31:33.356930: step: 488/464, loss: 0.4207829535007477 2023-01-24 01:31:33.958714: step: 490/464, loss: 0.5759197473526001 2023-01-24 01:31:34.571085: step: 492/464, loss: 0.22131508588790894 2023-01-24 01:31:35.190851: step: 494/464, loss: 1.4635684490203857 2023-01-24 01:31:35.804105: step: 496/464, loss: 0.40518543124198914 2023-01-24 01:31:36.386427: step: 498/464, loss: 1.0234875679016113 2023-01-24 01:31:37.039469: step: 500/464, loss: 2.552335739135742 2023-01-24 01:31:37.672166: step: 502/464, loss: 1.8648881912231445 2023-01-24 01:31:38.324829: step: 504/464, loss: 0.7620370388031006 2023-01-24 01:31:38.942865: step: 506/464, loss: 0.42183205485343933 2023-01-24 01:31:39.562773: step: 508/464, loss: 0.18581755459308624 2023-01-24 01:31:40.133904: step: 510/464, loss: 0.7951881289482117 2023-01-24 01:31:40.841335: step: 512/464, loss: 0.6534091234207153 2023-01-24 01:31:41.429794: step: 514/464, loss: 0.37720787525177 2023-01-24 01:31:42.124507: step: 516/464, loss: 0.39885827898979187 2023-01-24 01:31:42.745350: step: 518/464, loss: 1.3286685943603516 2023-01-24 01:31:43.447309: step: 520/464, loss: 0.5616076588630676 2023-01-24 01:31:44.096990: step: 522/464, loss: 0.16582699120044708 2023-01-24 01:31:44.746525: step: 524/464, loss: 0.18395079672336578 2023-01-24 01:31:45.340247: step: 526/464, loss: 0.9664916396141052 2023-01-24 01:31:45.923395: step: 528/464, loss: 0.26636892557144165 2023-01-24 01:31:46.504878: step: 530/464, loss: 1.677715539932251 2023-01-24 01:31:47.061999: step: 532/464, loss: 0.21337102353572845 2023-01-24 01:31:47.708273: step: 534/464, loss: 0.9010314345359802 2023-01-24 01:31:48.342421: step: 536/464, loss: 0.503167450428009 2023-01-24 01:31:48.941614: step: 538/464, loss: 0.18851390480995178 2023-01-24 01:31:49.506860: step: 540/464, loss: 0.16893097758293152 2023-01-24 01:31:50.132836: step: 542/464, loss: 0.21447324752807617 2023-01-24 01:31:50.766940: step: 544/464, loss: 0.4860786199569702 2023-01-24 01:31:51.456866: step: 546/464, loss: 0.9197517037391663 2023-01-24 01:31:52.160444: step: 548/464, loss: 0.9261701107025146 2023-01-24 01:31:52.862032: step: 550/464, loss: 1.1221946477890015 2023-01-24 01:31:53.485624: step: 552/464, loss: 0.8768857717514038 2023-01-24 01:31:54.159561: step: 554/464, loss: 0.7108152508735657 2023-01-24 01:31:54.851040: step: 556/464, loss: 0.24862165749073029 2023-01-24 01:31:55.505431: step: 558/464, loss: 0.7068761587142944 2023-01-24 01:31:56.119478: step: 560/464, loss: 1.691831111907959 2023-01-24 01:31:56.811631: step: 562/464, loss: 0.4378052353858948 2023-01-24 01:31:57.489958: step: 564/464, loss: 0.2569923400878906 2023-01-24 01:31:58.107023: step: 566/464, loss: 0.7471778988838196 2023-01-24 01:31:58.786405: step: 568/464, loss: 1.2964478731155396 2023-01-24 01:31:59.418199: step: 570/464, loss: 0.366839736700058 2023-01-24 01:32:00.044158: step: 572/464, loss: 0.4140085279941559 2023-01-24 01:32:00.602834: step: 574/464, loss: 0.33276864886283875 2023-01-24 01:32:01.305796: step: 576/464, loss: 0.5633888840675354 2023-01-24 01:32:01.923338: step: 578/464, loss: 0.847203254699707 2023-01-24 01:32:02.491768: step: 580/464, loss: 0.5664862394332886 2023-01-24 01:32:03.160569: step: 582/464, loss: 0.9577569365501404 2023-01-24 01:32:03.779887: step: 584/464, loss: 0.4908193051815033 2023-01-24 01:32:04.510994: step: 586/464, loss: 0.28701263666152954 2023-01-24 01:32:05.132450: step: 588/464, loss: 0.18440507352352142 2023-01-24 01:32:05.759680: step: 590/464, loss: 2.0263559818267822 2023-01-24 01:32:06.369807: step: 592/464, loss: 0.5223295092582703 2023-01-24 01:32:07.007922: step: 594/464, loss: 0.6685274839401245 2023-01-24 01:32:07.666327: step: 596/464, loss: 0.8350842595100403 2023-01-24 01:32:08.248710: step: 598/464, loss: 0.297424852848053 2023-01-24 01:32:08.896673: step: 600/464, loss: 0.24484041333198547 2023-01-24 01:32:09.489617: step: 602/464, loss: 1.6383540630340576 2023-01-24 01:32:10.163667: step: 604/464, loss: 1.1496939659118652 2023-01-24 01:32:10.823411: step: 606/464, loss: 1.071155309677124 2023-01-24 01:32:11.441643: step: 608/464, loss: 0.5096967220306396 2023-01-24 01:32:12.042936: step: 610/464, loss: 1.3994693756103516 2023-01-24 01:32:12.704451: step: 612/464, loss: 1.5042353868484497 2023-01-24 01:32:13.308511: step: 614/464, loss: 0.18285751342773438 2023-01-24 01:32:13.899539: step: 616/464, loss: 0.4884994328022003 2023-01-24 01:32:14.531615: step: 618/464, loss: 0.25369390845298767 2023-01-24 01:32:15.101692: step: 620/464, loss: 0.2683863639831543 2023-01-24 01:32:15.723637: step: 622/464, loss: 0.5101136565208435 2023-01-24 01:32:16.286552: step: 624/464, loss: 0.21888434886932373 2023-01-24 01:32:16.918340: step: 626/464, loss: 0.7710225582122803 2023-01-24 01:32:17.491165: step: 628/464, loss: 0.8689543604850769 2023-01-24 01:32:18.086935: step: 630/464, loss: 5.544312000274658 2023-01-24 01:32:18.707795: step: 632/464, loss: 0.41301852464675903 2023-01-24 01:32:19.331697: step: 634/464, loss: 0.23599055409431458 2023-01-24 01:32:20.020012: step: 636/464, loss: 0.3407132923603058 2023-01-24 01:32:20.627026: step: 638/464, loss: 1.0151219367980957 2023-01-24 01:32:21.248408: step: 640/464, loss: 0.36375269293785095 2023-01-24 01:32:21.827268: step: 642/464, loss: 1.4476675987243652 2023-01-24 01:32:22.415589: step: 644/464, loss: 0.49768805503845215 2023-01-24 01:32:23.016297: step: 646/464, loss: 1.441270351409912 2023-01-24 01:32:23.760130: step: 648/464, loss: 0.802528440952301 2023-01-24 01:32:24.384559: step: 650/464, loss: 0.31411662697792053 2023-01-24 01:32:24.956333: step: 652/464, loss: 0.21255828440189362 2023-01-24 01:32:25.537162: step: 654/464, loss: 0.6642628312110901 2023-01-24 01:32:26.230044: step: 656/464, loss: 0.27438193559646606 2023-01-24 01:32:26.888594: step: 658/464, loss: 0.16417086124420166 2023-01-24 01:32:27.524061: step: 660/464, loss: 0.2855834364891052 2023-01-24 01:32:28.131971: step: 662/464, loss: 0.450242280960083 2023-01-24 01:32:28.729705: step: 664/464, loss: 0.5720327496528625 2023-01-24 01:32:29.358158: step: 666/464, loss: 1.0651787519454956 2023-01-24 01:32:29.962853: step: 668/464, loss: 0.4257969558238983 2023-01-24 01:32:30.641155: step: 670/464, loss: 0.6946537494659424 2023-01-24 01:32:31.277758: step: 672/464, loss: 0.3971732258796692 2023-01-24 01:32:31.980032: step: 674/464, loss: 0.17086222767829895 2023-01-24 01:32:32.509088: step: 676/464, loss: 0.6342722177505493 2023-01-24 01:32:33.151900: step: 678/464, loss: 0.3377092480659485 2023-01-24 01:32:33.743880: step: 680/464, loss: 0.13712920248508453 2023-01-24 01:32:34.370405: step: 682/464, loss: 0.5758203268051147 2023-01-24 01:32:34.974754: step: 684/464, loss: 0.5807439088821411 2023-01-24 01:32:35.666477: step: 686/464, loss: 0.9199244379997253 2023-01-24 01:32:36.292682: step: 688/464, loss: 0.15518754720687866 2023-01-24 01:32:36.933894: step: 690/464, loss: 0.343379408121109 2023-01-24 01:32:37.522880: step: 692/464, loss: 0.5740495920181274 2023-01-24 01:32:38.130287: step: 694/464, loss: 0.15535210072994232 2023-01-24 01:32:38.701000: step: 696/464, loss: 0.45480844378471375 2023-01-24 01:32:39.363401: step: 698/464, loss: 0.41847825050354004 2023-01-24 01:32:39.953521: step: 700/464, loss: 0.4854355454444885 2023-01-24 01:32:40.612582: step: 702/464, loss: 0.3150523900985718 2023-01-24 01:32:41.281635: step: 704/464, loss: 0.6841596961021423 2023-01-24 01:32:41.890926: step: 706/464, loss: 2.1344518661499023 2023-01-24 01:32:42.583011: step: 708/464, loss: 0.7121348977088928 2023-01-24 01:32:43.120508: step: 710/464, loss: 0.12060072273015976 2023-01-24 01:32:43.766432: step: 712/464, loss: 1.5457645654678345 2023-01-24 01:32:44.432296: step: 714/464, loss: 0.530612587928772 2023-01-24 01:32:45.062611: step: 716/464, loss: 0.3051770329475403 2023-01-24 01:32:45.794694: step: 718/464, loss: 0.6609290838241577 2023-01-24 01:32:46.494346: step: 720/464, loss: 0.9934588670730591 2023-01-24 01:32:47.134325: step: 722/464, loss: 1.0542535781860352 2023-01-24 01:32:47.686651: step: 724/464, loss: 0.2150609791278839 2023-01-24 01:32:48.297077: step: 726/464, loss: 0.07730886340141296 2023-01-24 01:32:48.842013: step: 728/464, loss: 2.659024715423584 2023-01-24 01:32:49.499370: step: 730/464, loss: 0.14896048605442047 2023-01-24 01:32:50.110585: step: 732/464, loss: 0.49377694725990295 2023-01-24 01:32:50.712138: step: 734/464, loss: 0.36152413487434387 2023-01-24 01:32:51.341226: step: 736/464, loss: 0.44300612807273865 2023-01-24 01:32:51.974152: step: 738/464, loss: 0.38425812125205994 2023-01-24 01:32:52.630971: step: 740/464, loss: 0.18084846436977386 2023-01-24 01:32:53.230035: step: 742/464, loss: 0.7159069180488586 2023-01-24 01:32:53.912877: step: 744/464, loss: 0.2239290177822113 2023-01-24 01:32:54.598296: step: 746/464, loss: 0.21971198916435242 2023-01-24 01:32:55.250721: step: 748/464, loss: 0.1720772087574005 2023-01-24 01:32:55.895126: step: 750/464, loss: 1.9814131259918213 2023-01-24 01:32:56.514617: step: 752/464, loss: 0.7665748596191406 2023-01-24 01:32:57.180307: step: 754/464, loss: 0.5029324293136597 2023-01-24 01:32:57.840078: step: 756/464, loss: 0.35377073287963867 2023-01-24 01:32:58.542713: step: 758/464, loss: 0.6000191569328308 2023-01-24 01:32:59.177567: step: 760/464, loss: 0.7425803542137146 2023-01-24 01:32:59.848228: step: 762/464, loss: 0.19316062331199646 2023-01-24 01:33:00.485886: step: 764/464, loss: 0.5338562726974487 2023-01-24 01:33:01.068302: step: 766/464, loss: 0.33601799607276917 2023-01-24 01:33:01.710108: step: 768/464, loss: 0.46154558658599854 2023-01-24 01:33:02.347043: step: 770/464, loss: 0.3042210042476654 2023-01-24 01:33:03.010692: step: 772/464, loss: 0.4761200249195099 2023-01-24 01:33:03.771003: step: 774/464, loss: 0.559170126914978 2023-01-24 01:33:04.362074: step: 776/464, loss: 0.5083188414573669 2023-01-24 01:33:04.921346: step: 778/464, loss: 0.13107992708683014 2023-01-24 01:33:05.547129: step: 780/464, loss: 0.8679192662239075 2023-01-24 01:33:06.098073: step: 782/464, loss: 4.753159523010254 2023-01-24 01:33:06.683560: step: 784/464, loss: 0.3700558543205261 2023-01-24 01:33:07.273132: step: 786/464, loss: 0.7138491272926331 2023-01-24 01:33:07.866146: step: 788/464, loss: 0.270452082157135 2023-01-24 01:33:08.472563: step: 790/464, loss: 0.15871131420135498 2023-01-24 01:33:09.116969: step: 792/464, loss: 0.501331090927124 2023-01-24 01:33:09.691932: step: 794/464, loss: 0.5413829684257507 2023-01-24 01:33:10.363876: step: 796/464, loss: 0.3064698874950409 2023-01-24 01:33:10.977098: step: 798/464, loss: 0.557877779006958 2023-01-24 01:33:11.569796: step: 800/464, loss: 0.45851510763168335 2023-01-24 01:33:12.205535: step: 802/464, loss: 0.6986446976661682 2023-01-24 01:33:12.827225: step: 804/464, loss: 0.48981723189353943 2023-01-24 01:33:13.438146: step: 806/464, loss: 0.5766662359237671 2023-01-24 01:33:14.015843: step: 808/464, loss: 0.21251314878463745 2023-01-24 01:33:14.576836: step: 810/464, loss: 0.16906926035881042 2023-01-24 01:33:15.191326: step: 812/464, loss: 0.42454585433006287 2023-01-24 01:33:15.849856: step: 814/464, loss: 0.5092841982841492 2023-01-24 01:33:16.390928: step: 816/464, loss: 0.31051722168922424 2023-01-24 01:33:17.009531: step: 818/464, loss: 1.3641095161437988 2023-01-24 01:33:17.694478: step: 820/464, loss: 13.601166725158691 2023-01-24 01:33:18.318237: step: 822/464, loss: 3.046140670776367 2023-01-24 01:33:18.987267: step: 824/464, loss: 0.29892173409461975 2023-01-24 01:33:19.616842: step: 826/464, loss: 0.2666424810886383 2023-01-24 01:33:20.222617: step: 828/464, loss: 0.5248275995254517 2023-01-24 01:33:20.842337: step: 830/464, loss: 0.6012407541275024 2023-01-24 01:33:21.511228: step: 832/464, loss: 1.2032874822616577 2023-01-24 01:33:22.213854: step: 834/464, loss: 2.7928154468536377 2023-01-24 01:33:22.901838: step: 836/464, loss: 0.5200483798980713 2023-01-24 01:33:23.533039: step: 838/464, loss: 1.0235695838928223 2023-01-24 01:33:24.092856: step: 840/464, loss: 1.1554210186004639 2023-01-24 01:33:24.743699: step: 842/464, loss: 0.6198804378509521 2023-01-24 01:33:25.390279: step: 844/464, loss: 0.7470586895942688 2023-01-24 01:33:26.018842: step: 846/464, loss: 0.3621271848678589 2023-01-24 01:33:26.723888: step: 848/464, loss: 0.42628931999206543 2023-01-24 01:33:27.320122: step: 850/464, loss: 0.5762926936149597 2023-01-24 01:33:27.923406: step: 852/464, loss: 1.4255084991455078 2023-01-24 01:33:28.655163: step: 854/464, loss: 0.3030523955821991 2023-01-24 01:33:29.283659: step: 856/464, loss: 0.26940327882766724 2023-01-24 01:33:29.892081: step: 858/464, loss: 0.9834439158439636 2023-01-24 01:33:30.554632: step: 860/464, loss: 0.8142734169960022 2023-01-24 01:33:31.166852: step: 862/464, loss: 0.23037023842334747 2023-01-24 01:33:31.786550: step: 864/464, loss: 1.6187280416488647 2023-01-24 01:33:32.467381: step: 866/464, loss: 1.5179880857467651 2023-01-24 01:33:33.092457: step: 868/464, loss: 0.785308837890625 2023-01-24 01:33:33.705495: step: 870/464, loss: 0.8136089444160461 2023-01-24 01:33:34.438768: step: 872/464, loss: 2.153433322906494 2023-01-24 01:33:35.072063: step: 874/464, loss: 0.34129735827445984 2023-01-24 01:33:35.690908: step: 876/464, loss: 0.4100603759288788 2023-01-24 01:33:36.298291: step: 878/464, loss: 1.1842362880706787 2023-01-24 01:33:36.944628: step: 880/464, loss: 0.5711566805839539 2023-01-24 01:33:37.537513: step: 882/464, loss: 0.3342677652835846 2023-01-24 01:33:38.138539: step: 884/464, loss: 0.9091795682907104 2023-01-24 01:33:38.790608: step: 886/464, loss: 0.49165284633636475 2023-01-24 01:33:39.477080: step: 888/464, loss: 0.9058147072792053 2023-01-24 01:33:40.096703: step: 890/464, loss: 0.14935161173343658 2023-01-24 01:33:40.715314: step: 892/464, loss: 0.3612709939479828 2023-01-24 01:33:41.323700: step: 894/464, loss: 0.23926648497581482 2023-01-24 01:33:41.905702: step: 896/464, loss: 0.18639202415943146 2023-01-24 01:33:42.524115: step: 898/464, loss: 0.6763578653335571 2023-01-24 01:33:43.181789: step: 900/464, loss: 1.2428151369094849 2023-01-24 01:33:43.850420: step: 902/464, loss: 0.260333389043808 2023-01-24 01:33:44.459415: step: 904/464, loss: 0.40388208627700806 2023-01-24 01:33:45.085264: step: 906/464, loss: 1.1095035076141357 2023-01-24 01:33:45.696783: step: 908/464, loss: 1.2671597003936768 2023-01-24 01:33:46.235529: step: 910/464, loss: 0.5920587778091431 2023-01-24 01:33:46.838155: step: 912/464, loss: 1.0389655828475952 2023-01-24 01:33:47.446646: step: 914/464, loss: 0.8073325157165527 2023-01-24 01:33:48.022709: step: 916/464, loss: 0.3246050179004669 2023-01-24 01:33:48.647542: step: 918/464, loss: 0.20470765233039856 2023-01-24 01:33:49.287403: step: 920/464, loss: 0.7123910188674927 2023-01-24 01:33:49.920850: step: 922/464, loss: 0.13054336607456207 2023-01-24 01:33:50.553094: step: 924/464, loss: 0.4696577191352844 2023-01-24 01:33:51.202264: step: 926/464, loss: 1.0713528394699097 2023-01-24 01:33:51.762847: step: 928/464, loss: 0.5203782916069031 2023-01-24 01:33:52.259945: step: 930/464, loss: 0.20702479779720306 ================================================== Loss: 0.777 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3092048534956505, 'r': 0.3092048534956505, 'f1': 0.3092048534956505}, 'combined': 0.22783515520732142, 'epoch': 6} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.317877160674492, 'r': 0.28781164410013693, 'f1': 0.3020981980000474}, 'combined': 0.19722473030054907, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32106176638992273, 'r': 0.3040034562211982, 'f1': 0.31229984683932055}, 'combined': 0.2301156766184467, 'epoch': 6} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3050576757105693, 'r': 0.27958955783014566, 'f1': 0.29176890190029564}, 'combined': 0.19048125201780958, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29464285714285715, 'r': 0.358695652173913, 'f1': 0.3235294117647059}, 'combined': 0.16176470588235295, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4791666666666667, 'r': 0.19827586206896552, 'f1': 0.28048780487804875}, 'combined': 0.18699186991869915, 'epoch': 6} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31993440233236153, 'r': 0.2969088203463203, 'f1': 0.3079918607914679}, 'combined': 0.22694137110950266, 'epoch': 5} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30404472815418065, 'r': 0.2555091476965408, 'f1': 0.27767195512385795}, 'combined': 0.181278063966871, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5192307692307693, 'r': 0.23275862068965517, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 5} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:36:37.088245: step: 2/464, loss: 0.5017930269241333 2023-01-24 01:36:37.724462: step: 4/464, loss: 0.126792773604393 2023-01-24 01:36:38.353407: step: 6/464, loss: 0.6471153497695923 2023-01-24 01:36:39.042699: step: 8/464, loss: 0.17067140340805054 2023-01-24 01:36:39.625083: step: 10/464, loss: 0.15195642411708832 2023-01-24 01:36:40.270063: step: 12/464, loss: 0.5397238731384277 2023-01-24 01:36:40.882628: step: 14/464, loss: 0.2516040802001953 2023-01-24 01:36:41.516844: step: 16/464, loss: 0.3369365334510803 2023-01-24 01:36:42.112066: step: 18/464, loss: 0.5769916772842407 2023-01-24 01:36:42.736875: step: 20/464, loss: 0.2530610263347626 2023-01-24 01:36:43.508213: step: 22/464, loss: 0.7644116878509521 2023-01-24 01:36:44.138818: step: 24/464, loss: 0.662712812423706 2023-01-24 01:36:44.757951: step: 26/464, loss: 0.6387485265731812 2023-01-24 01:36:45.366441: step: 28/464, loss: 0.19953793287277222 2023-01-24 01:36:45.980503: step: 30/464, loss: 0.3266963064670563 2023-01-24 01:36:46.587536: step: 32/464, loss: 0.3829626142978668 2023-01-24 01:36:47.274100: step: 34/464, loss: 0.29524633288383484 2023-01-24 01:36:47.907198: step: 36/464, loss: 0.5962638258934021 2023-01-24 01:36:48.581502: step: 38/464, loss: 0.09318985044956207 2023-01-24 01:36:49.226369: step: 40/464, loss: 0.09047455340623856 2023-01-24 01:36:49.816189: step: 42/464, loss: 0.23816466331481934 2023-01-24 01:36:50.453664: step: 44/464, loss: 0.22475215792655945 2023-01-24 01:36:51.064154: step: 46/464, loss: 3.0335137844085693 2023-01-24 01:36:51.681624: step: 48/464, loss: 1.1206142902374268 2023-01-24 01:36:52.375596: step: 50/464, loss: 0.2682996094226837 2023-01-24 01:36:52.961782: step: 52/464, loss: 0.4726658761501312 2023-01-24 01:36:53.557254: step: 54/464, loss: 0.5618013143539429 2023-01-24 01:36:54.169011: step: 56/464, loss: 1.9542077779769897 2023-01-24 01:36:54.802328: step: 58/464, loss: 0.4544164836406708 2023-01-24 01:36:55.375283: step: 60/464, loss: 1.5395740270614624 2023-01-24 01:36:55.980646: step: 62/464, loss: 1.0734786987304688 2023-01-24 01:36:56.556823: step: 64/464, loss: 0.20159535109996796 2023-01-24 01:36:57.236981: step: 66/464, loss: 0.8156777024269104 2023-01-24 01:36:57.843186: step: 68/464, loss: 2.720673084259033 2023-01-24 01:36:58.465703: step: 70/464, loss: 0.6011072397232056 2023-01-24 01:36:59.239120: step: 72/464, loss: 0.6634661555290222 2023-01-24 01:36:59.841528: step: 74/464, loss: 1.25831937789917 2023-01-24 01:37:00.646443: step: 76/464, loss: 0.3787534534931183 2023-01-24 01:37:01.278077: step: 78/464, loss: 0.45780324935913086 2023-01-24 01:37:01.878118: step: 80/464, loss: 0.15130579471588135 2023-01-24 01:37:02.581106: step: 82/464, loss: 0.1392175853252411 2023-01-24 01:37:03.132340: step: 84/464, loss: 0.6962271332740784 2023-01-24 01:37:03.794390: step: 86/464, loss: 0.7196193933486938 2023-01-24 01:37:04.465417: step: 88/464, loss: 0.30974024534225464 2023-01-24 01:37:05.089909: step: 90/464, loss: 0.5273309946060181 2023-01-24 01:37:05.757464: step: 92/464, loss: 1.1774016618728638 2023-01-24 01:37:06.417353: step: 94/464, loss: 0.23848721385002136 2023-01-24 01:37:07.032144: step: 96/464, loss: 0.4920649230480194 2023-01-24 01:37:07.680261: step: 98/464, loss: 0.4335693418979645 2023-01-24 01:37:08.278583: step: 100/464, loss: 0.682791531085968 2023-01-24 01:37:08.959871: step: 102/464, loss: 0.5497469902038574 2023-01-24 01:37:09.599537: step: 104/464, loss: 0.4676864743232727 2023-01-24 01:37:10.155084: step: 106/464, loss: 0.16365735232830048 2023-01-24 01:37:10.807686: step: 108/464, loss: 0.5850328207015991 2023-01-24 01:37:11.399746: step: 110/464, loss: 0.2704683840274811 2023-01-24 01:37:12.056877: step: 112/464, loss: 0.09844101220369339 2023-01-24 01:37:12.757124: step: 114/464, loss: 0.2614676058292389 2023-01-24 01:37:13.444411: step: 116/464, loss: 0.1152581125497818 2023-01-24 01:37:14.072432: step: 118/464, loss: 0.8495705723762512 2023-01-24 01:37:14.739641: step: 120/464, loss: 0.5793513059616089 2023-01-24 01:37:15.377495: step: 122/464, loss: 0.21496832370758057 2023-01-24 01:37:15.935333: step: 124/464, loss: 0.3885643780231476 2023-01-24 01:37:16.592810: step: 126/464, loss: 1.2978103160858154 2023-01-24 01:37:17.191985: step: 128/464, loss: 0.6677225828170776 2023-01-24 01:37:17.831134: step: 130/464, loss: 1.0965478420257568 2023-01-24 01:37:18.481145: step: 132/464, loss: 0.2660011351108551 2023-01-24 01:37:19.096134: step: 134/464, loss: 0.17295731604099274 2023-01-24 01:37:19.751765: step: 136/464, loss: 0.13129302859306335 2023-01-24 01:37:20.359335: step: 138/464, loss: 0.24394181370735168 2023-01-24 01:37:20.983666: step: 140/464, loss: 0.4994749426841736 2023-01-24 01:37:21.604060: step: 142/464, loss: 0.878116250038147 2023-01-24 01:37:22.168126: step: 144/464, loss: 0.31296879053115845 2023-01-24 01:37:22.766585: step: 146/464, loss: 0.15763776004314423 2023-01-24 01:37:23.396135: step: 148/464, loss: 0.25190410017967224 2023-01-24 01:37:24.057833: step: 150/464, loss: 1.953006386756897 2023-01-24 01:37:24.696690: step: 152/464, loss: 0.582210123538971 2023-01-24 01:37:25.357341: step: 154/464, loss: 4.223416328430176 2023-01-24 01:37:26.038253: step: 156/464, loss: 0.4259297549724579 2023-01-24 01:37:26.668460: step: 158/464, loss: 1.2600336074829102 2023-01-24 01:37:27.303871: step: 160/464, loss: 0.362914502620697 2023-01-24 01:37:27.892441: step: 162/464, loss: 0.26163020730018616 2023-01-24 01:37:28.565739: step: 164/464, loss: 0.45940202474594116 2023-01-24 01:37:29.222361: step: 166/464, loss: 0.4737444221973419 2023-01-24 01:37:29.897241: step: 168/464, loss: 0.425026535987854 2023-01-24 01:37:30.574432: step: 170/464, loss: 0.42416107654571533 2023-01-24 01:37:31.227483: step: 172/464, loss: 0.32647231221199036 2023-01-24 01:37:31.926717: step: 174/464, loss: 0.21015892922878265 2023-01-24 01:37:32.559480: step: 176/464, loss: 0.19564324617385864 2023-01-24 01:37:33.264362: step: 178/464, loss: 1.4059756994247437 2023-01-24 01:37:34.020306: step: 180/464, loss: 0.26178687810897827 2023-01-24 01:37:34.665065: step: 182/464, loss: 0.19771495461463928 2023-01-24 01:37:35.303168: step: 184/464, loss: 0.6399397850036621 2023-01-24 01:37:35.872953: step: 186/464, loss: 0.283091276884079 2023-01-24 01:37:36.552684: step: 188/464, loss: 0.25623446702957153 2023-01-24 01:37:37.275692: step: 190/464, loss: 0.27372244000434875 2023-01-24 01:37:37.915645: step: 192/464, loss: 0.18834808468818665 2023-01-24 01:37:38.517284: step: 194/464, loss: 0.09465673565864563 2023-01-24 01:37:39.104899: step: 196/464, loss: 0.4658953845500946 2023-01-24 01:37:39.753893: step: 198/464, loss: 0.32783472537994385 2023-01-24 01:37:40.398084: step: 200/464, loss: 0.7937031984329224 2023-01-24 01:37:41.023178: step: 202/464, loss: 0.13799446821212769 2023-01-24 01:37:41.657316: step: 204/464, loss: 0.3625771403312683 2023-01-24 01:37:42.291603: step: 206/464, loss: 0.3306332528591156 2023-01-24 01:37:42.976293: step: 208/464, loss: 0.2596589922904968 2023-01-24 01:37:43.568318: step: 210/464, loss: 0.2501373589038849 2023-01-24 01:37:44.176588: step: 212/464, loss: 0.42752233147621155 2023-01-24 01:37:44.817297: step: 214/464, loss: 0.2059427797794342 2023-01-24 01:37:45.432537: step: 216/464, loss: 1.4278908967971802 2023-01-24 01:37:46.179250: step: 218/464, loss: 0.21744832396507263 2023-01-24 01:37:46.800362: step: 220/464, loss: 0.6722741723060608 2023-01-24 01:37:47.373360: step: 222/464, loss: 0.7491254210472107 2023-01-24 01:37:48.067660: step: 224/464, loss: 0.4034697711467743 2023-01-24 01:37:48.671353: step: 226/464, loss: 0.5779275894165039 2023-01-24 01:37:49.289402: step: 228/464, loss: 0.1363086998462677 2023-01-24 01:37:49.948331: step: 230/464, loss: 0.7873183488845825 2023-01-24 01:37:50.528122: step: 232/464, loss: 0.16622385382652283 2023-01-24 01:37:51.124920: step: 234/464, loss: 0.5206785798072815 2023-01-24 01:37:51.729278: step: 236/464, loss: 0.15616653859615326 2023-01-24 01:37:52.314162: step: 238/464, loss: 0.2109265774488449 2023-01-24 01:37:52.921007: step: 240/464, loss: 0.5359236598014832 2023-01-24 01:37:53.541858: step: 242/464, loss: 2.9070255756378174 2023-01-24 01:37:54.171575: step: 244/464, loss: 0.3161059617996216 2023-01-24 01:37:54.733909: step: 246/464, loss: 0.27038678526878357 2023-01-24 01:37:55.384307: step: 248/464, loss: 3.062499761581421 2023-01-24 01:37:55.997860: step: 250/464, loss: 0.4249669015407562 2023-01-24 01:37:56.572140: step: 252/464, loss: 1.4178982973098755 2023-01-24 01:37:57.129029: step: 254/464, loss: 0.5252469778060913 2023-01-24 01:37:57.817564: step: 256/464, loss: 0.22781912982463837 2023-01-24 01:37:58.425250: step: 258/464, loss: 0.12805120646953583 2023-01-24 01:37:59.057527: step: 260/464, loss: 0.7952200174331665 2023-01-24 01:37:59.704769: step: 262/464, loss: 4.063023567199707 2023-01-24 01:38:00.389606: step: 264/464, loss: 0.8507735133171082 2023-01-24 01:38:01.067925: step: 266/464, loss: 0.31542882323265076 2023-01-24 01:38:01.693395: step: 268/464, loss: 0.13520748913288116 2023-01-24 01:38:02.343219: step: 270/464, loss: 0.3633846938610077 2023-01-24 01:38:02.958149: step: 272/464, loss: 0.47487807273864746 2023-01-24 01:38:03.565869: step: 274/464, loss: 0.3144719898700714 2023-01-24 01:38:04.230492: step: 276/464, loss: 0.1806526780128479 2023-01-24 01:38:04.950889: step: 278/464, loss: 0.490356981754303 2023-01-24 01:38:05.656224: step: 280/464, loss: 0.217569962143898 2023-01-24 01:38:06.321013: step: 282/464, loss: 0.1519968956708908 2023-01-24 01:38:06.946154: step: 284/464, loss: 0.24375273287296295 2023-01-24 01:38:07.487250: step: 286/464, loss: 0.24348284304141998 2023-01-24 01:38:08.140108: step: 288/464, loss: 0.5655607581138611 2023-01-24 01:38:08.732650: step: 290/464, loss: 0.25451746582984924 2023-01-24 01:38:09.368665: step: 292/464, loss: 0.3051149249076843 2023-01-24 01:38:09.949722: step: 294/464, loss: 0.3064817190170288 2023-01-24 01:38:10.559236: step: 296/464, loss: 0.1428401917219162 2023-01-24 01:38:11.172365: step: 298/464, loss: 1.3966853618621826 2023-01-24 01:38:11.862965: step: 300/464, loss: 0.22358113527297974 2023-01-24 01:38:12.475112: step: 302/464, loss: 0.09851841628551483 2023-01-24 01:38:13.154270: step: 304/464, loss: 0.10326429456472397 2023-01-24 01:38:13.749569: step: 306/464, loss: 0.25323233008384705 2023-01-24 01:38:14.408174: step: 308/464, loss: 0.9947264790534973 2023-01-24 01:38:15.022645: step: 310/464, loss: 0.516456663608551 2023-01-24 01:38:15.655833: step: 312/464, loss: 2.4057650566101074 2023-01-24 01:38:16.331340: step: 314/464, loss: 0.6443356871604919 2023-01-24 01:38:16.992058: step: 316/464, loss: 0.5381366014480591 2023-01-24 01:38:17.637872: step: 318/464, loss: 0.8102694749832153 2023-01-24 01:38:18.259467: step: 320/464, loss: 2.134423017501831 2023-01-24 01:38:18.852839: step: 322/464, loss: 0.17424273490905762 2023-01-24 01:38:19.557342: step: 324/464, loss: 0.2220447063446045 2023-01-24 01:38:20.171132: step: 326/464, loss: 0.3784591853618622 2023-01-24 01:38:20.724755: step: 328/464, loss: 1.0891444683074951 2023-01-24 01:38:21.348553: step: 330/464, loss: 0.3071114718914032 2023-01-24 01:38:21.935442: step: 332/464, loss: 0.33946919441223145 2023-01-24 01:38:22.571761: step: 334/464, loss: 0.7581555247306824 2023-01-24 01:38:23.137997: step: 336/464, loss: 0.3212588429450989 2023-01-24 01:38:23.740826: step: 338/464, loss: 2.019733428955078 2023-01-24 01:38:24.382596: step: 340/464, loss: 0.4056606590747833 2023-01-24 01:38:25.004464: step: 342/464, loss: 0.6060588955879211 2023-01-24 01:38:25.629313: step: 344/464, loss: 0.4233732521533966 2023-01-24 01:38:26.299040: step: 346/464, loss: 0.4451332986354828 2023-01-24 01:38:26.899427: step: 348/464, loss: 1.2351199388504028 2023-01-24 01:38:27.559071: step: 350/464, loss: 0.27783745527267456 2023-01-24 01:38:28.220548: step: 352/464, loss: 0.43464934825897217 2023-01-24 01:38:28.840427: step: 354/464, loss: 0.17238818109035492 2023-01-24 01:38:29.529192: step: 356/464, loss: 0.7823545336723328 2023-01-24 01:38:30.191401: step: 358/464, loss: 0.7749119997024536 2023-01-24 01:38:30.788774: step: 360/464, loss: 0.758780837059021 2023-01-24 01:38:31.416558: step: 362/464, loss: 0.22388920187950134 2023-01-24 01:38:32.008641: step: 364/464, loss: 0.2760586738586426 2023-01-24 01:38:32.692301: step: 366/464, loss: 0.3953625559806824 2023-01-24 01:38:33.363336: step: 368/464, loss: 0.6412069201469421 2023-01-24 01:38:34.077193: step: 370/464, loss: 1.1693072319030762 2023-01-24 01:38:34.750796: step: 372/464, loss: 0.2744199335575104 2023-01-24 01:38:35.367748: step: 374/464, loss: 0.8128865957260132 2023-01-24 01:38:36.004527: step: 376/464, loss: 0.328846275806427 2023-01-24 01:38:36.691288: step: 378/464, loss: 0.9959462881088257 2023-01-24 01:38:37.234675: step: 380/464, loss: 0.6619890928268433 2023-01-24 01:38:37.787065: step: 382/464, loss: 0.21763145923614502 2023-01-24 01:38:38.410167: step: 384/464, loss: 0.30981579422950745 2023-01-24 01:38:39.095939: step: 386/464, loss: 0.24547596275806427 2023-01-24 01:38:39.734673: step: 388/464, loss: 0.48541855812072754 2023-01-24 01:38:40.359473: step: 390/464, loss: 0.2570076584815979 2023-01-24 01:38:40.991781: step: 392/464, loss: 0.43805766105651855 2023-01-24 01:38:41.653562: step: 394/464, loss: 0.3368385434150696 2023-01-24 01:38:42.211954: step: 396/464, loss: 0.31896984577178955 2023-01-24 01:38:42.904438: step: 398/464, loss: 0.4800797998905182 2023-01-24 01:38:43.510333: step: 400/464, loss: 0.4680897295475006 2023-01-24 01:38:44.175433: step: 402/464, loss: 0.4075247049331665 2023-01-24 01:38:44.805931: step: 404/464, loss: 0.3742852807044983 2023-01-24 01:38:45.493937: step: 406/464, loss: 0.6806045174598694 2023-01-24 01:38:46.059830: step: 408/464, loss: 0.34732770919799805 2023-01-24 01:38:46.745960: step: 410/464, loss: 0.5915437936782837 2023-01-24 01:38:47.431698: step: 412/464, loss: 0.5556359887123108 2023-01-24 01:38:48.094327: step: 414/464, loss: 0.676139771938324 2023-01-24 01:38:48.697549: step: 416/464, loss: 0.20332291722297668 2023-01-24 01:38:49.309592: step: 418/464, loss: 0.34385713934898376 2023-01-24 01:38:49.984144: step: 420/464, loss: 0.9345203042030334 2023-01-24 01:38:50.519888: step: 422/464, loss: 0.31550469994544983 2023-01-24 01:38:51.210336: step: 424/464, loss: 0.47289949655532837 2023-01-24 01:38:51.820282: step: 426/464, loss: 0.098211370408535 2023-01-24 01:38:52.408564: step: 428/464, loss: 0.1142539530992508 2023-01-24 01:38:52.938864: step: 430/464, loss: 0.4738588035106659 2023-01-24 01:38:53.560934: step: 432/464, loss: 0.3524607717990875 2023-01-24 01:38:54.146351: step: 434/464, loss: 0.0996241495013237 2023-01-24 01:38:54.685963: step: 436/464, loss: 0.22331421077251434 2023-01-24 01:38:55.319965: step: 438/464, loss: 0.9253294467926025 2023-01-24 01:38:55.969420: step: 440/464, loss: 0.3071550130844116 2023-01-24 01:38:56.545751: step: 442/464, loss: 0.44775813817977905 2023-01-24 01:38:57.155691: step: 444/464, loss: 5.230406761169434 2023-01-24 01:38:57.774584: step: 446/464, loss: 0.38780683279037476 2023-01-24 01:38:58.431651: step: 448/464, loss: 0.16223810613155365 2023-01-24 01:38:59.152130: step: 450/464, loss: 0.394161194562912 2023-01-24 01:38:59.741061: step: 452/464, loss: 1.3057899475097656 2023-01-24 01:39:00.318788: step: 454/464, loss: 2.0477466583251953 2023-01-24 01:39:00.992103: step: 456/464, loss: 0.2690102756023407 2023-01-24 01:39:01.679277: step: 458/464, loss: 0.4495788812637329 2023-01-24 01:39:02.276413: step: 460/464, loss: 2.760188341140747 2023-01-24 01:39:02.949423: step: 462/464, loss: 0.1652659773826599 2023-01-24 01:39:03.654126: step: 464/464, loss: 0.8136246204376221 2023-01-24 01:39:04.245199: step: 466/464, loss: 0.64471435546875 2023-01-24 01:39:04.878939: step: 468/464, loss: 1.094394564628601 2023-01-24 01:39:05.535263: step: 470/464, loss: 0.3615909814834595 2023-01-24 01:39:06.209832: step: 472/464, loss: 0.12893834710121155 2023-01-24 01:39:06.877123: step: 474/464, loss: 0.3952723443508148 2023-01-24 01:39:07.501629: step: 476/464, loss: 0.6580344438552856 2023-01-24 01:39:08.213052: step: 478/464, loss: 0.6497165560722351 2023-01-24 01:39:08.795261: step: 480/464, loss: 0.45428207516670227 2023-01-24 01:39:09.372371: step: 482/464, loss: 0.20757251977920532 2023-01-24 01:39:09.938810: step: 484/464, loss: 0.4601346552371979 2023-01-24 01:39:10.570270: step: 486/464, loss: 0.8326131701469421 2023-01-24 01:39:11.256015: step: 488/464, loss: 0.3128070831298828 2023-01-24 01:39:11.911330: step: 490/464, loss: 0.519420862197876 2023-01-24 01:39:12.547935: step: 492/464, loss: 0.44036027789115906 2023-01-24 01:39:13.180665: step: 494/464, loss: 0.20831894874572754 2023-01-24 01:39:13.817276: step: 496/464, loss: 0.6344039440155029 2023-01-24 01:39:14.443691: step: 498/464, loss: 0.98247891664505 2023-01-24 01:39:15.037807: step: 500/464, loss: 0.3340934216976166 2023-01-24 01:39:15.678594: step: 502/464, loss: 0.3000660538673401 2023-01-24 01:39:16.326232: step: 504/464, loss: 0.2559548616409302 2023-01-24 01:39:16.997080: step: 506/464, loss: 0.9728012084960938 2023-01-24 01:39:17.638624: step: 508/464, loss: 1.2932955026626587 2023-01-24 01:39:18.302680: step: 510/464, loss: 0.6466187834739685 2023-01-24 01:39:18.819673: step: 512/464, loss: 0.1755853146314621 2023-01-24 01:39:19.412585: step: 514/464, loss: 0.22444133460521698 2023-01-24 01:39:19.996896: step: 516/464, loss: 0.14731839299201965 2023-01-24 01:39:20.605848: step: 518/464, loss: 0.32209834456443787 2023-01-24 01:39:21.242574: step: 520/464, loss: 0.19032403826713562 2023-01-24 01:39:21.779771: step: 522/464, loss: 0.3677624464035034 2023-01-24 01:39:22.437171: step: 524/464, loss: 1.061833143234253 2023-01-24 01:39:23.035027: step: 526/464, loss: 0.13024339079856873 2023-01-24 01:39:23.649378: step: 528/464, loss: 0.40342581272125244 2023-01-24 01:39:24.236388: step: 530/464, loss: 0.12615080177783966 2023-01-24 01:39:24.821123: step: 532/464, loss: 0.3409889340400696 2023-01-24 01:39:25.513753: step: 534/464, loss: 0.3384639024734497 2023-01-24 01:39:26.175062: step: 536/464, loss: 0.2110147923231125 2023-01-24 01:39:26.778329: step: 538/464, loss: 2.6247472763061523 2023-01-24 01:39:27.392058: step: 540/464, loss: 0.30893072485923767 2023-01-24 01:39:28.002169: step: 542/464, loss: 0.2871909439563751 2023-01-24 01:39:28.622817: step: 544/464, loss: 0.7530099153518677 2023-01-24 01:39:29.269247: step: 546/464, loss: 0.7604941129684448 2023-01-24 01:39:29.907896: step: 548/464, loss: 0.5688288807868958 2023-01-24 01:39:30.526596: step: 550/464, loss: 0.5931585431098938 2023-01-24 01:39:31.158210: step: 552/464, loss: 0.3792456388473511 2023-01-24 01:39:31.796834: step: 554/464, loss: 0.5893974304199219 2023-01-24 01:39:32.435422: step: 556/464, loss: 0.23130621016025543 2023-01-24 01:39:33.085061: step: 558/464, loss: 1.0410833358764648 2023-01-24 01:39:33.768889: step: 560/464, loss: 0.344408243894577 2023-01-24 01:39:34.504307: step: 562/464, loss: 0.7938024401664734 2023-01-24 01:39:35.127719: step: 564/464, loss: 1.2642261981964111 2023-01-24 01:39:35.784893: step: 566/464, loss: 0.3455996811389923 2023-01-24 01:39:36.383124: step: 568/464, loss: 0.5413110256195068 2023-01-24 01:39:37.050447: step: 570/464, loss: 0.1074780598282814 2023-01-24 01:39:37.620059: step: 572/464, loss: 0.6459642648696899 2023-01-24 01:39:38.236036: step: 574/464, loss: 0.5658280849456787 2023-01-24 01:39:38.925909: step: 576/464, loss: 0.42404305934906006 2023-01-24 01:39:39.549964: step: 578/464, loss: 0.18226858973503113 2023-01-24 01:39:40.220466: step: 580/464, loss: 2.8185646533966064 2023-01-24 01:39:40.848094: step: 582/464, loss: 0.381437748670578 2023-01-24 01:39:41.569307: step: 584/464, loss: 1.764461636543274 2023-01-24 01:39:42.220291: step: 586/464, loss: 0.28118273615837097 2023-01-24 01:39:42.877990: step: 588/464, loss: 0.4842296838760376 2023-01-24 01:39:43.484247: step: 590/464, loss: 0.3451806604862213 2023-01-24 01:39:44.139094: step: 592/464, loss: 0.38013893365859985 2023-01-24 01:39:44.652013: step: 594/464, loss: 0.2910463213920593 2023-01-24 01:39:45.315539: step: 596/464, loss: 0.2851117253303528 2023-01-24 01:39:45.926926: step: 598/464, loss: 0.8281416893005371 2023-01-24 01:39:46.572399: step: 600/464, loss: 1.1513570547103882 2023-01-24 01:39:47.200412: step: 602/464, loss: 0.38009434938430786 2023-01-24 01:39:47.884131: step: 604/464, loss: 0.43365761637687683 2023-01-24 01:39:48.522493: step: 606/464, loss: 0.8094660043716431 2023-01-24 01:39:49.137686: step: 608/464, loss: 0.24837060272693634 2023-01-24 01:39:49.771851: step: 610/464, loss: 0.9438183307647705 2023-01-24 01:39:50.374329: step: 612/464, loss: 0.4907517731189728 2023-01-24 01:39:51.001229: step: 614/464, loss: 0.30295583605766296 2023-01-24 01:39:51.592818: step: 616/464, loss: 0.1532827913761139 2023-01-24 01:39:52.246648: step: 618/464, loss: 0.47530195116996765 2023-01-24 01:39:52.827027: step: 620/464, loss: 0.32910770177841187 2023-01-24 01:39:53.446448: step: 622/464, loss: 0.25522682070732117 2023-01-24 01:39:54.104769: step: 624/464, loss: 0.5116230249404907 2023-01-24 01:39:54.665514: step: 626/464, loss: 0.3476697504520416 2023-01-24 01:39:55.388670: step: 628/464, loss: 0.1793689727783203 2023-01-24 01:39:56.015446: step: 630/464, loss: 0.7922105193138123 2023-01-24 01:39:56.627923: step: 632/464, loss: 0.3281751275062561 2023-01-24 01:39:57.259366: step: 634/464, loss: 0.13465668261051178 2023-01-24 01:39:57.902273: step: 636/464, loss: 0.7905817031860352 2023-01-24 01:39:58.459773: step: 638/464, loss: 1.7722148895263672 2023-01-24 01:39:59.102347: step: 640/464, loss: 0.4341278076171875 2023-01-24 01:39:59.679176: step: 642/464, loss: 0.5807147026062012 2023-01-24 01:40:00.352935: step: 644/464, loss: 0.25977441668510437 2023-01-24 01:40:00.974570: step: 646/464, loss: 0.6894766092300415 2023-01-24 01:40:01.564345: step: 648/464, loss: 0.36047056317329407 2023-01-24 01:40:02.102953: step: 650/464, loss: 0.6689058542251587 2023-01-24 01:40:02.746873: step: 652/464, loss: 0.8208218216896057 2023-01-24 01:40:03.396046: step: 654/464, loss: 0.21951250731945038 2023-01-24 01:40:04.058829: step: 656/464, loss: 0.6928043365478516 2023-01-24 01:40:04.677425: step: 658/464, loss: 0.7265662550926208 2023-01-24 01:40:05.313447: step: 660/464, loss: 0.390576034784317 2023-01-24 01:40:05.987351: step: 662/464, loss: 0.728776752948761 2023-01-24 01:40:06.573048: step: 664/464, loss: 0.3489152193069458 2023-01-24 01:40:07.196488: step: 666/464, loss: 0.29477861523628235 2023-01-24 01:40:07.734653: step: 668/464, loss: 0.95859295129776 2023-01-24 01:40:08.321459: step: 670/464, loss: 0.4374726116657257 2023-01-24 01:40:08.907745: step: 672/464, loss: 0.29404279589653015 2023-01-24 01:40:09.493942: step: 674/464, loss: 0.20971252024173737 2023-01-24 01:40:10.102920: step: 676/464, loss: 0.49006038904190063 2023-01-24 01:40:10.791457: step: 678/464, loss: 0.49993690848350525 2023-01-24 01:40:11.462586: step: 680/464, loss: 0.0939859002828598 2023-01-24 01:40:12.040632: step: 682/464, loss: 0.21504995226860046 2023-01-24 01:40:12.640273: step: 684/464, loss: 0.27052539587020874 2023-01-24 01:40:13.301076: step: 686/464, loss: 0.4174782335758209 2023-01-24 01:40:13.918870: step: 688/464, loss: 0.28174564242362976 2023-01-24 01:40:14.506377: step: 690/464, loss: 0.7273067235946655 2023-01-24 01:40:15.110239: step: 692/464, loss: 0.2717307209968567 2023-01-24 01:40:15.756116: step: 694/464, loss: 0.5884332060813904 2023-01-24 01:40:16.345469: step: 696/464, loss: 0.15132024884223938 2023-01-24 01:40:17.038746: step: 698/464, loss: 1.1313226222991943 2023-01-24 01:40:17.697019: step: 700/464, loss: 0.2777230143547058 2023-01-24 01:40:18.379377: step: 702/464, loss: 0.385998010635376 2023-01-24 01:40:18.989328: step: 704/464, loss: 0.6033426523208618 2023-01-24 01:40:19.574377: step: 706/464, loss: 0.635158896446228 2023-01-24 01:40:20.194118: step: 708/464, loss: 0.6942959427833557 2023-01-24 01:40:20.825794: step: 710/464, loss: 0.2962024211883545 2023-01-24 01:40:21.362555: step: 712/464, loss: 0.5487346053123474 2023-01-24 01:40:21.987534: step: 714/464, loss: 1.2508326768875122 2023-01-24 01:40:22.546353: step: 716/464, loss: 0.24779725074768066 2023-01-24 01:40:23.160265: step: 718/464, loss: 0.41226813197135925 2023-01-24 01:40:23.835894: step: 720/464, loss: 0.23897162079811096 2023-01-24 01:40:24.446163: step: 722/464, loss: 2.119680404663086 2023-01-24 01:40:25.085899: step: 724/464, loss: 5.4505295753479 2023-01-24 01:40:25.683890: step: 726/464, loss: 0.42558956146240234 2023-01-24 01:40:26.353243: step: 728/464, loss: 1.017122507095337 2023-01-24 01:40:26.993574: step: 730/464, loss: 1.044908881187439 2023-01-24 01:40:27.709327: step: 732/464, loss: 0.33385512232780457 2023-01-24 01:40:28.353012: step: 734/464, loss: 0.29980790615081787 2023-01-24 01:40:28.949023: step: 736/464, loss: 0.31169456243515015 2023-01-24 01:40:29.622246: step: 738/464, loss: 0.1868811845779419 2023-01-24 01:40:30.267000: step: 740/464, loss: 0.3444088101387024 2023-01-24 01:40:30.931704: step: 742/464, loss: 0.8411497473716736 2023-01-24 01:40:31.515651: step: 744/464, loss: 0.2264484465122223 2023-01-24 01:40:32.150039: step: 746/464, loss: 0.9234243631362915 2023-01-24 01:40:32.775700: step: 748/464, loss: 0.4514959454536438 2023-01-24 01:40:33.356256: step: 750/464, loss: 0.6468408107757568 2023-01-24 01:40:33.959287: step: 752/464, loss: 1.5808587074279785 2023-01-24 01:40:34.547396: step: 754/464, loss: 0.9604551792144775 2023-01-24 01:40:35.147922: step: 756/464, loss: 0.7801356315612793 2023-01-24 01:40:35.760761: step: 758/464, loss: 0.9812237024307251 2023-01-24 01:40:36.355662: step: 760/464, loss: 1.0901546478271484 2023-01-24 01:40:36.966498: step: 762/464, loss: 0.48776379227638245 2023-01-24 01:40:37.585243: step: 764/464, loss: 0.34306395053863525 2023-01-24 01:40:38.215679: step: 766/464, loss: 0.42013850808143616 2023-01-24 01:40:38.835360: step: 768/464, loss: 0.15818996727466583 2023-01-24 01:40:39.617646: step: 770/464, loss: 0.7389655709266663 2023-01-24 01:40:40.298022: step: 772/464, loss: 0.5518840551376343 2023-01-24 01:40:40.962539: step: 774/464, loss: 0.4506901800632477 2023-01-24 01:40:41.603793: step: 776/464, loss: 0.24919630587100983 2023-01-24 01:40:42.204890: step: 778/464, loss: 0.33095237612724304 2023-01-24 01:40:42.812707: step: 780/464, loss: 0.397079735994339 2023-01-24 01:40:43.436508: step: 782/464, loss: 0.14219141006469727 2023-01-24 01:40:44.080643: step: 784/464, loss: 0.7468022108078003 2023-01-24 01:40:44.843053: step: 786/464, loss: 0.26718056201934814 2023-01-24 01:40:45.521763: step: 788/464, loss: 0.5908616781234741 2023-01-24 01:40:46.175948: step: 790/464, loss: 0.34508705139160156 2023-01-24 01:40:46.837596: step: 792/464, loss: 0.2724108099937439 2023-01-24 01:40:47.423956: step: 794/464, loss: 0.2742052376270294 2023-01-24 01:40:47.959378: step: 796/464, loss: 0.1287633329629898 2023-01-24 01:40:48.578314: step: 798/464, loss: 0.8055970668792725 2023-01-24 01:40:49.212884: step: 800/464, loss: 0.2703949511051178 2023-01-24 01:40:49.842322: step: 802/464, loss: 0.17353391647338867 2023-01-24 01:40:50.497903: step: 804/464, loss: 0.8829239010810852 2023-01-24 01:40:51.088331: step: 806/464, loss: 0.2704838216304779 2023-01-24 01:40:51.708340: step: 808/464, loss: 0.20319268107414246 2023-01-24 01:40:52.432454: step: 810/464, loss: 0.2762671709060669 2023-01-24 01:40:53.143027: step: 812/464, loss: 0.8066801428794861 2023-01-24 01:40:53.775385: step: 814/464, loss: 0.4635451138019562 2023-01-24 01:40:54.394962: step: 816/464, loss: 0.48344099521636963 2023-01-24 01:40:55.048164: step: 818/464, loss: 1.341935396194458 2023-01-24 01:40:55.638425: step: 820/464, loss: 0.2656901180744171 2023-01-24 01:40:56.237566: step: 822/464, loss: 0.21394850313663483 2023-01-24 01:40:56.871574: step: 824/464, loss: 0.6438600420951843 2023-01-24 01:40:57.495567: step: 826/464, loss: 0.5125842690467834 2023-01-24 01:40:58.146696: step: 828/464, loss: 0.7177165746688843 2023-01-24 01:40:58.773560: step: 830/464, loss: 1.0645838975906372 2023-01-24 01:40:59.411854: step: 832/464, loss: 0.374090313911438 2023-01-24 01:41:00.022570: step: 834/464, loss: 0.9594247341156006 2023-01-24 01:41:00.600860: step: 836/464, loss: 1.5267280340194702 2023-01-24 01:41:01.225735: step: 838/464, loss: 0.12863865494728088 2023-01-24 01:41:01.856278: step: 840/464, loss: 0.5739308595657349 2023-01-24 01:41:02.487771: step: 842/464, loss: 0.41546162962913513 2023-01-24 01:41:03.115021: step: 844/464, loss: 0.4548647105693817 2023-01-24 01:41:03.776308: step: 846/464, loss: 0.3953627049922943 2023-01-24 01:41:04.380991: step: 848/464, loss: 0.6522714495658875 2023-01-24 01:41:05.031067: step: 850/464, loss: 0.42764395475387573 2023-01-24 01:41:05.692405: step: 852/464, loss: 0.47073379158973694 2023-01-24 01:41:06.270057: step: 854/464, loss: 0.48062995076179504 2023-01-24 01:41:06.915773: step: 856/464, loss: 0.22322116792201996 2023-01-24 01:41:07.445451: step: 858/464, loss: 0.31415146589279175 2023-01-24 01:41:08.046036: step: 860/464, loss: 0.3948202133178711 2023-01-24 01:41:08.642602: step: 862/464, loss: 0.3536752462387085 2023-01-24 01:41:09.264501: step: 864/464, loss: 0.4739671051502228 2023-01-24 01:41:09.822644: step: 866/464, loss: 0.8376191854476929 2023-01-24 01:41:10.436889: step: 868/464, loss: 1.064820408821106 2023-01-24 01:41:11.086648: step: 870/464, loss: 0.45677515864372253 2023-01-24 01:41:11.726271: step: 872/464, loss: 0.4026552140712738 2023-01-24 01:41:12.352602: step: 874/464, loss: 0.8032701015472412 2023-01-24 01:41:12.976077: step: 876/464, loss: 0.14895497262477875 2023-01-24 01:41:13.599581: step: 878/464, loss: 1.5880987644195557 2023-01-24 01:41:14.241570: step: 880/464, loss: 0.3673778176307678 2023-01-24 01:41:14.813523: step: 882/464, loss: 0.43994849920272827 2023-01-24 01:41:15.482363: step: 884/464, loss: 1.6903603076934814 2023-01-24 01:41:16.091792: step: 886/464, loss: 0.43105217814445496 2023-01-24 01:41:16.740175: step: 888/464, loss: 0.4356974959373474 2023-01-24 01:41:17.374378: step: 890/464, loss: 0.5950191617012024 2023-01-24 01:41:17.982204: step: 892/464, loss: 0.3932482898235321 2023-01-24 01:41:18.577686: step: 894/464, loss: 0.19387339055538177 2023-01-24 01:41:19.186534: step: 896/464, loss: 0.3198925256729126 2023-01-24 01:41:19.792613: step: 898/464, loss: 0.4456925392150879 2023-01-24 01:41:20.520233: step: 900/464, loss: 0.17775744199752808 2023-01-24 01:41:21.127737: step: 902/464, loss: 0.40187326073646545 2023-01-24 01:41:21.695510: step: 904/464, loss: 0.18394802510738373 2023-01-24 01:41:22.311615: step: 906/464, loss: 0.38583412766456604 2023-01-24 01:41:22.945800: step: 908/464, loss: 0.2537674903869629 2023-01-24 01:41:23.607964: step: 910/464, loss: 0.318142294883728 2023-01-24 01:41:24.334276: step: 912/464, loss: 0.6849666833877563 2023-01-24 01:41:24.969936: step: 914/464, loss: 0.23542797565460205 2023-01-24 01:41:25.649960: step: 916/464, loss: 1.5675033330917358 2023-01-24 01:41:26.274652: step: 918/464, loss: 1.523693561553955 2023-01-24 01:41:26.849220: step: 920/464, loss: 0.32572609186172485 2023-01-24 01:41:27.451481: step: 922/464, loss: 0.35879573225975037 2023-01-24 01:41:28.075859: step: 924/464, loss: 0.34379321336746216 2023-01-24 01:41:28.728703: step: 926/464, loss: 0.3060515522956848 2023-01-24 01:41:29.343919: step: 928/464, loss: 0.1787095069885254 2023-01-24 01:41:29.848812: step: 930/464, loss: 0.1375858634710312 ================================================== Loss: 0.586 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31658952866861034, 'r': 0.2943621803560134, 'f1': 0.30507152221754}, 'combined': 0.22478954268660842, 'epoch': 7} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3343227038625394, 'r': 0.26524776504796516, 'f1': 0.2958062633253805}, 'combined': 0.1931170423782277, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31676964169456845, 'r': 0.3077534279840969, 'f1': 0.3121964514872359}, 'combined': 0.23003949056954223, 'epoch': 7} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.34612105562425766, 'r': 0.26698042858069465, 'f1': 0.30144291003045776}, 'combined': 0.1967969257193662, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32600247524752474, 'r': 0.31239326375711574, 'f1': 0.3190528100775194}, 'combined': 0.23509154426764586, 'epoch': 7} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33851282874668454, 'r': 0.25489487564029506, 'f1': 0.29081248776561686}, 'combined': 0.18985685729776022, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3212365591397849, 'r': 0.2845238095238095, 'f1': 0.30176767676767674}, 'combined': 0.20117845117845115, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2777777777777778, 'r': 0.32608695652173914, 'f1': 0.3}, 'combined': 0.15, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31993440233236153, 'r': 0.2969088203463203, 'f1': 0.3079918607914679}, 'combined': 0.22694137110950266, 'epoch': 5} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30404472815418065, 'r': 0.2555091476965408, 'f1': 0.27767195512385795}, 'combined': 0.181278063966871, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5192307692307693, 'r': 0.23275862068965517, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 5} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:44:05.882523: step: 2/464, loss: 0.5256147980690002 2023-01-24 01:44:06.519897: step: 4/464, loss: 0.16271987557411194 2023-01-24 01:44:07.101584: step: 6/464, loss: 0.24483849108219147 2023-01-24 01:44:07.720921: step: 8/464, loss: 0.11545782536268234 2023-01-24 01:44:08.354651: step: 10/464, loss: 0.4342447817325592 2023-01-24 01:44:08.990863: step: 12/464, loss: 0.13910457491874695 2023-01-24 01:44:09.530281: step: 14/464, loss: 0.20827490091323853 2023-01-24 01:44:10.127045: step: 16/464, loss: 0.2899113893508911 2023-01-24 01:44:10.749089: step: 18/464, loss: 1.2569013833999634 2023-01-24 01:44:11.361166: step: 20/464, loss: 0.6739532947540283 2023-01-24 01:44:11.966067: step: 22/464, loss: 0.6435673832893372 2023-01-24 01:44:12.519964: step: 24/464, loss: 0.2875503897666931 2023-01-24 01:44:13.180929: step: 26/464, loss: 0.6059324145317078 2023-01-24 01:44:13.857235: step: 28/464, loss: 0.37544775009155273 2023-01-24 01:44:14.507508: step: 30/464, loss: 0.6912835836410522 2023-01-24 01:44:15.104248: step: 32/464, loss: 0.20572799444198608 2023-01-24 01:44:15.625913: step: 34/464, loss: 0.49429890513420105 2023-01-24 01:44:16.299996: step: 36/464, loss: 0.45233234763145447 2023-01-24 01:44:16.969562: step: 38/464, loss: 0.16969938576221466 2023-01-24 01:44:17.631714: step: 40/464, loss: 0.11263077706098557 2023-01-24 01:44:18.257028: step: 42/464, loss: 0.5201073288917542 2023-01-24 01:44:18.916632: step: 44/464, loss: 0.3596522808074951 2023-01-24 01:44:19.501938: step: 46/464, loss: 0.4956699013710022 2023-01-24 01:44:20.178490: step: 48/464, loss: 0.17830289900302887 2023-01-24 01:44:20.796475: step: 50/464, loss: 0.30615612864494324 2023-01-24 01:44:21.387502: step: 52/464, loss: 0.20722505450248718 2023-01-24 01:44:22.023202: step: 54/464, loss: 0.13355082273483276 2023-01-24 01:44:22.629175: step: 56/464, loss: 0.3708045184612274 2023-01-24 01:44:23.270703: step: 58/464, loss: 0.3382740020751953 2023-01-24 01:44:23.862472: step: 60/464, loss: 0.2672390639781952 2023-01-24 01:44:24.526721: step: 62/464, loss: 0.3419777750968933 2023-01-24 01:44:25.148241: step: 64/464, loss: 0.22744600474834442 2023-01-24 01:44:25.756210: step: 66/464, loss: 0.5304995179176331 2023-01-24 01:44:26.362648: step: 68/464, loss: 0.5001859068870544 2023-01-24 01:44:26.969457: step: 70/464, loss: 1.7130024433135986 2023-01-24 01:44:27.547336: step: 72/464, loss: 0.48129764199256897 2023-01-24 01:44:28.150169: step: 74/464, loss: 0.40763619542121887 2023-01-24 01:44:28.791784: step: 76/464, loss: 0.3825840950012207 2023-01-24 01:44:29.438949: step: 78/464, loss: 0.35622477531433105 2023-01-24 01:44:29.990340: step: 80/464, loss: 3.721479654312134 2023-01-24 01:44:30.613167: step: 82/464, loss: 1.761260986328125 2023-01-24 01:44:31.327486: step: 84/464, loss: 0.5433917045593262 2023-01-24 01:44:31.935607: step: 86/464, loss: 3.278985023498535 2023-01-24 01:44:32.518382: step: 88/464, loss: 0.36821499466896057 2023-01-24 01:44:33.137431: step: 90/464, loss: 0.6947041749954224 2023-01-24 01:44:33.795935: step: 92/464, loss: 0.18669536709785461 2023-01-24 01:44:34.375453: step: 94/464, loss: 0.603237509727478 2023-01-24 01:44:35.013989: step: 96/464, loss: 0.5378149747848511 2023-01-24 01:44:35.629185: step: 98/464, loss: 0.9626664519309998 2023-01-24 01:44:36.221008: step: 100/464, loss: 0.14888140559196472 2023-01-24 01:44:36.853132: step: 102/464, loss: 0.12726211547851562 2023-01-24 01:44:37.447164: step: 104/464, loss: 0.18909582495689392 2023-01-24 01:44:38.045565: step: 106/464, loss: 0.2111038863658905 2023-01-24 01:44:38.630193: step: 108/464, loss: 0.28433895111083984 2023-01-24 01:44:39.220612: step: 110/464, loss: 0.1759897619485855 2023-01-24 01:44:39.744150: step: 112/464, loss: 1.5583981275558472 2023-01-24 01:44:40.348048: step: 114/464, loss: 6.815537929534912 2023-01-24 01:44:40.931413: step: 116/464, loss: 1.2100276947021484 2023-01-24 01:44:41.524602: step: 118/464, loss: 0.5964804887771606 2023-01-24 01:44:42.177185: step: 120/464, loss: 0.2303905189037323 2023-01-24 01:44:42.741919: step: 122/464, loss: 0.14260776340961456 2023-01-24 01:44:43.334157: step: 124/464, loss: 0.147199809551239 2023-01-24 01:44:43.898946: step: 126/464, loss: 0.3138817846775055 2023-01-24 01:44:44.520208: step: 128/464, loss: 0.6843975186347961 2023-01-24 01:44:45.137470: step: 130/464, loss: 0.1680045872926712 2023-01-24 01:44:45.782843: step: 132/464, loss: 0.20160116255283356 2023-01-24 01:44:46.488928: step: 134/464, loss: 0.4476246237754822 2023-01-24 01:44:47.156071: step: 136/464, loss: 0.7357347011566162 2023-01-24 01:44:47.787308: step: 138/464, loss: 0.2661999464035034 2023-01-24 01:44:48.413077: step: 140/464, loss: 0.5534363985061646 2023-01-24 01:44:49.064782: step: 142/464, loss: 0.1794956773519516 2023-01-24 01:44:49.673323: step: 144/464, loss: 0.22185489535331726 2023-01-24 01:44:50.375936: step: 146/464, loss: 0.771228551864624 2023-01-24 01:44:51.003273: step: 148/464, loss: 1.5506646633148193 2023-01-24 01:44:51.661955: step: 150/464, loss: 0.5256957411766052 2023-01-24 01:44:52.336548: step: 152/464, loss: 0.30285948514938354 2023-01-24 01:44:53.012673: step: 154/464, loss: 0.6013203263282776 2023-01-24 01:44:53.654321: step: 156/464, loss: 0.38056620955467224 2023-01-24 01:44:54.286646: step: 158/464, loss: 0.1869347244501114 2023-01-24 01:44:54.939895: step: 160/464, loss: 0.6900219321250916 2023-01-24 01:44:55.593983: step: 162/464, loss: 0.124812051653862 2023-01-24 01:44:56.160647: step: 164/464, loss: 0.5109988451004028 2023-01-24 01:44:56.789256: step: 166/464, loss: 0.2805618643760681 2023-01-24 01:44:57.395945: step: 168/464, loss: 0.6953806281089783 2023-01-24 01:44:57.983682: step: 170/464, loss: 1.1952989101409912 2023-01-24 01:44:58.605046: step: 172/464, loss: 0.3715342581272125 2023-01-24 01:44:59.188419: step: 174/464, loss: 0.294126957654953 2023-01-24 01:44:59.891932: step: 176/464, loss: 0.9353902339935303 2023-01-24 01:45:00.540741: step: 178/464, loss: 0.13832412660121918 2023-01-24 01:45:01.259666: step: 180/464, loss: 0.29111751914024353 2023-01-24 01:45:01.839969: step: 182/464, loss: 0.9309651851654053 2023-01-24 01:45:02.497399: step: 184/464, loss: 0.5732741355895996 2023-01-24 01:45:03.126094: step: 186/464, loss: 2.244755744934082 2023-01-24 01:45:03.711262: step: 188/464, loss: 0.6004652976989746 2023-01-24 01:45:04.347832: step: 190/464, loss: 0.3898637592792511 2023-01-24 01:45:05.148005: step: 192/464, loss: 0.27575916051864624 2023-01-24 01:45:05.792911: step: 194/464, loss: 0.189696803689003 2023-01-24 01:45:06.473579: step: 196/464, loss: 0.34838491678237915 2023-01-24 01:45:07.097205: step: 198/464, loss: 0.3968013525009155 2023-01-24 01:45:07.722583: step: 200/464, loss: 0.3276688754558563 2023-01-24 01:45:08.322825: step: 202/464, loss: 0.22431330382823944 2023-01-24 01:45:09.027430: step: 204/464, loss: 0.18088360130786896 2023-01-24 01:45:09.614980: step: 206/464, loss: 0.3663378953933716 2023-01-24 01:45:10.242458: step: 208/464, loss: 0.7812042832374573 2023-01-24 01:45:10.910033: step: 210/464, loss: 0.11790280789136887 2023-01-24 01:45:11.521543: step: 212/464, loss: 0.6330567002296448 2023-01-24 01:45:12.095318: step: 214/464, loss: 0.20008674263954163 2023-01-24 01:45:12.698493: step: 216/464, loss: 0.29765304923057556 2023-01-24 01:45:13.359148: step: 218/464, loss: 0.4665645658969879 2023-01-24 01:45:14.053952: step: 220/464, loss: 1.2618739604949951 2023-01-24 01:45:14.691408: step: 222/464, loss: 0.2906162142753601 2023-01-24 01:45:15.292416: step: 224/464, loss: 0.20724472403526306 2023-01-24 01:45:15.887550: step: 226/464, loss: 0.3578889071941376 2023-01-24 01:45:16.483319: step: 228/464, loss: 0.4452539086341858 2023-01-24 01:45:17.145766: step: 230/464, loss: 0.19588086009025574 2023-01-24 01:45:17.791774: step: 232/464, loss: 0.6037926077842712 2023-01-24 01:45:18.469520: step: 234/464, loss: 4.56406307220459 2023-01-24 01:45:19.181708: step: 236/464, loss: 0.6469090580940247 2023-01-24 01:45:19.799437: step: 238/464, loss: 0.6941478848457336 2023-01-24 01:45:20.460351: step: 240/464, loss: 0.3625757694244385 2023-01-24 01:45:21.091017: step: 242/464, loss: 0.6837671399116516 2023-01-24 01:45:21.788613: step: 244/464, loss: 0.07192370295524597 2023-01-24 01:45:22.430473: step: 246/464, loss: 0.5115048289299011 2023-01-24 01:45:23.015143: step: 248/464, loss: 0.26653966307640076 2023-01-24 01:45:23.639618: step: 250/464, loss: 0.12437698245048523 2023-01-24 01:45:24.258794: step: 252/464, loss: 0.18795190751552582 2023-01-24 01:45:24.858402: step: 254/464, loss: 0.47079092264175415 2023-01-24 01:45:25.438807: step: 256/464, loss: 0.10718715935945511 2023-01-24 01:45:26.111025: step: 258/464, loss: 0.1507108211517334 2023-01-24 01:45:26.707976: step: 260/464, loss: 2.347313642501831 2023-01-24 01:45:27.405101: step: 262/464, loss: 1.1006702184677124 2023-01-24 01:45:27.992667: step: 264/464, loss: 0.4398345947265625 2023-01-24 01:45:28.650481: step: 266/464, loss: 0.6085986495018005 2023-01-24 01:45:29.330687: step: 268/464, loss: 0.2565450966358185 2023-01-24 01:45:29.965517: step: 270/464, loss: 0.5254449844360352 2023-01-24 01:45:30.666412: step: 272/464, loss: 0.1736554354429245 2023-01-24 01:45:31.376481: step: 274/464, loss: 0.44962701201438904 2023-01-24 01:45:32.028480: step: 276/464, loss: 0.3792150914669037 2023-01-24 01:45:32.636582: step: 278/464, loss: 0.26783522963523865 2023-01-24 01:45:33.251988: step: 280/464, loss: 0.34746065735816956 2023-01-24 01:45:33.850400: step: 282/464, loss: 0.8172089457511902 2023-01-24 01:45:34.501423: step: 284/464, loss: 0.23747019469738007 2023-01-24 01:45:35.143147: step: 286/464, loss: 0.9747133255004883 2023-01-24 01:45:35.763246: step: 288/464, loss: 0.45197030901908875 2023-01-24 01:45:36.403357: step: 290/464, loss: 0.4176959693431854 2023-01-24 01:45:37.025391: step: 292/464, loss: 0.1349785178899765 2023-01-24 01:45:37.570682: step: 294/464, loss: 0.23899558186531067 2023-01-24 01:45:38.150977: step: 296/464, loss: 0.6888447999954224 2023-01-24 01:45:38.757065: step: 298/464, loss: 0.30062979459762573 2023-01-24 01:45:39.468151: step: 300/464, loss: 0.16735124588012695 2023-01-24 01:45:40.027262: step: 302/464, loss: 0.4858199954032898 2023-01-24 01:45:40.634656: step: 304/464, loss: 0.8488729000091553 2023-01-24 01:45:41.242880: step: 306/464, loss: 0.36384105682373047 2023-01-24 01:45:41.878040: step: 308/464, loss: 0.23121513426303864 2023-01-24 01:45:42.513090: step: 310/464, loss: 0.2734513282775879 2023-01-24 01:45:43.152551: step: 312/464, loss: 0.17578211426734924 2023-01-24 01:45:43.759741: step: 314/464, loss: 0.3193581998348236 2023-01-24 01:45:44.482409: step: 316/464, loss: 0.4633854031562805 2023-01-24 01:45:45.091248: step: 318/464, loss: 0.3597419857978821 2023-01-24 01:45:45.689346: step: 320/464, loss: 0.1768357753753662 2023-01-24 01:45:46.285868: step: 322/464, loss: 0.9580966234207153 2023-01-24 01:45:46.880212: step: 324/464, loss: 0.8831745386123657 2023-01-24 01:45:47.477335: step: 326/464, loss: 0.656175971031189 2023-01-24 01:45:48.133358: step: 328/464, loss: 0.12558133900165558 2023-01-24 01:45:48.797230: step: 330/464, loss: 0.2244042605161667 2023-01-24 01:45:49.376397: step: 332/464, loss: 0.1423230618238449 2023-01-24 01:45:49.990691: step: 334/464, loss: 0.2261742502450943 2023-01-24 01:45:50.653630: step: 336/464, loss: 0.16952291131019592 2023-01-24 01:45:51.322234: step: 338/464, loss: 0.2626737654209137 2023-01-24 01:45:51.948292: step: 340/464, loss: 0.9168149828910828 2023-01-24 01:45:52.539409: step: 342/464, loss: 0.3371291756629944 2023-01-24 01:45:53.149625: step: 344/464, loss: 0.5078762769699097 2023-01-24 01:45:53.778398: step: 346/464, loss: 0.5057971477508545 2023-01-24 01:45:54.373417: step: 348/464, loss: 0.5341809391975403 2023-01-24 01:45:54.977772: step: 350/464, loss: 0.07459886372089386 2023-01-24 01:45:55.548440: step: 352/464, loss: 0.1680305004119873 2023-01-24 01:45:56.160979: step: 354/464, loss: 0.21369802951812744 2023-01-24 01:45:56.778087: step: 356/464, loss: 0.4864957630634308 2023-01-24 01:45:57.468121: step: 358/464, loss: 0.8802257180213928 2023-01-24 01:45:58.080188: step: 360/464, loss: 0.188669815659523 2023-01-24 01:45:58.755898: step: 362/464, loss: 0.14713504910469055 2023-01-24 01:45:59.398611: step: 364/464, loss: 0.2680954337120056 2023-01-24 01:46:00.027179: step: 366/464, loss: 0.4230412542819977 2023-01-24 01:46:00.652379: step: 368/464, loss: 0.41321080923080444 2023-01-24 01:46:01.334793: step: 370/464, loss: 0.3964707851409912 2023-01-24 01:46:01.930337: step: 372/464, loss: 0.5696877837181091 2023-01-24 01:46:02.573803: step: 374/464, loss: 0.30090492963790894 2023-01-24 01:46:03.154082: step: 376/464, loss: 1.4616659879684448 2023-01-24 01:46:03.775915: step: 378/464, loss: 0.12314710021018982 2023-01-24 01:46:04.433648: step: 380/464, loss: 0.4488513171672821 2023-01-24 01:46:05.116960: step: 382/464, loss: 0.23523855209350586 2023-01-24 01:46:05.733490: step: 384/464, loss: 0.3382420241832733 2023-01-24 01:46:06.423906: step: 386/464, loss: 0.24432437121868134 2023-01-24 01:46:07.081251: step: 388/464, loss: 0.6148150563240051 2023-01-24 01:46:07.685748: step: 390/464, loss: 0.11040592193603516 2023-01-24 01:46:08.294826: step: 392/464, loss: 0.5171048045158386 2023-01-24 01:46:08.935017: step: 394/464, loss: 0.631478488445282 2023-01-24 01:46:09.562774: step: 396/464, loss: 1.022544264793396 2023-01-24 01:46:10.238866: step: 398/464, loss: 0.0752885639667511 2023-01-24 01:46:10.851517: step: 400/464, loss: 0.37636426091194153 2023-01-24 01:46:11.439090: step: 402/464, loss: 0.8598151206970215 2023-01-24 01:46:12.074204: step: 404/464, loss: 0.5534162521362305 2023-01-24 01:46:12.719181: step: 406/464, loss: 0.7628689408302307 2023-01-24 01:46:13.352401: step: 408/464, loss: 0.41472098231315613 2023-01-24 01:46:13.972559: step: 410/464, loss: 0.31681686639785767 2023-01-24 01:46:14.552090: step: 412/464, loss: 0.6400054097175598 2023-01-24 01:46:15.185330: step: 414/464, loss: 0.48053276538848877 2023-01-24 01:46:15.848033: step: 416/464, loss: 0.07062732428312302 2023-01-24 01:46:16.462077: step: 418/464, loss: 0.37162572145462036 2023-01-24 01:46:17.076590: step: 420/464, loss: 0.10916796326637268 2023-01-24 01:46:17.708420: step: 422/464, loss: 0.29724493622779846 2023-01-24 01:46:18.353428: step: 424/464, loss: 0.16576127707958221 2023-01-24 01:46:19.114637: step: 426/464, loss: 0.15553560853004456 2023-01-24 01:46:19.698373: step: 428/464, loss: 0.4601963460445404 2023-01-24 01:46:20.274139: step: 430/464, loss: 0.9218921065330505 2023-01-24 01:46:20.899441: step: 432/464, loss: 1.5235856771469116 2023-01-24 01:46:21.555372: step: 434/464, loss: 0.3329862654209137 2023-01-24 01:46:22.196881: step: 436/464, loss: 0.2956153154373169 2023-01-24 01:46:22.828947: step: 438/464, loss: 0.7929648756980896 2023-01-24 01:46:23.481817: step: 440/464, loss: 0.24007189273834229 2023-01-24 01:46:24.142651: step: 442/464, loss: 0.4005393981933594 2023-01-24 01:46:24.796034: step: 444/464, loss: 1.0202969312667847 2023-01-24 01:46:25.453475: step: 446/464, loss: 0.5497901439666748 2023-01-24 01:46:26.085925: step: 448/464, loss: 0.406026691198349 2023-01-24 01:46:26.711663: step: 450/464, loss: 0.4039490818977356 2023-01-24 01:46:27.377571: step: 452/464, loss: 0.6189019083976746 2023-01-24 01:46:27.967828: step: 454/464, loss: 0.8497124910354614 2023-01-24 01:46:28.592080: step: 456/464, loss: 0.23253557085990906 2023-01-24 01:46:29.139916: step: 458/464, loss: 0.254323273897171 2023-01-24 01:46:29.791180: step: 460/464, loss: 0.2543869912624359 2023-01-24 01:46:30.458054: step: 462/464, loss: 1.3793638944625854 2023-01-24 01:46:31.030056: step: 464/464, loss: 0.15979808568954468 2023-01-24 01:46:31.631457: step: 466/464, loss: 0.3608567714691162 2023-01-24 01:46:32.282015: step: 468/464, loss: 0.2662655711174011 2023-01-24 01:46:32.977458: step: 470/464, loss: 0.15268206596374512 2023-01-24 01:46:33.631903: step: 472/464, loss: 0.4905347526073456 2023-01-24 01:46:34.260682: step: 474/464, loss: 1.036557912826538 2023-01-24 01:46:34.883229: step: 476/464, loss: 0.8116432428359985 2023-01-24 01:46:35.541052: step: 478/464, loss: 0.47300270199775696 2023-01-24 01:46:36.396077: step: 480/464, loss: 0.6200789213180542 2023-01-24 01:46:37.022007: step: 482/464, loss: 0.12582731246948242 2023-01-24 01:46:37.712415: step: 484/464, loss: 0.2855589985847473 2023-01-24 01:46:38.406645: step: 486/464, loss: 1.0768077373504639 2023-01-24 01:46:39.119026: step: 488/464, loss: 0.8582307696342468 2023-01-24 01:46:39.807212: step: 490/464, loss: 0.1929677426815033 2023-01-24 01:46:40.457964: step: 492/464, loss: 0.7221046090126038 2023-01-24 01:46:41.051415: step: 494/464, loss: 0.2424413561820984 2023-01-24 01:46:41.745394: step: 496/464, loss: 0.3820532560348511 2023-01-24 01:46:42.400623: step: 498/464, loss: 0.6332082152366638 2023-01-24 01:46:43.009944: step: 500/464, loss: 0.4443873465061188 2023-01-24 01:46:43.647183: step: 502/464, loss: 0.9196009039878845 2023-01-24 01:46:44.176339: step: 504/464, loss: 0.22076547145843506 2023-01-24 01:46:44.833835: step: 506/464, loss: 0.778414249420166 2023-01-24 01:46:45.465237: step: 508/464, loss: 0.3775312900543213 2023-01-24 01:46:46.057312: step: 510/464, loss: 0.14615599811077118 2023-01-24 01:46:46.672642: step: 512/464, loss: 0.1535091996192932 2023-01-24 01:46:47.300070: step: 514/464, loss: 0.9353958368301392 2023-01-24 01:46:48.017602: step: 516/464, loss: 0.7229629158973694 2023-01-24 01:46:48.755829: step: 518/464, loss: 1.0084935426712036 2023-01-24 01:46:49.365700: step: 520/464, loss: 0.3375189006328583 2023-01-24 01:46:50.037720: step: 522/464, loss: 0.3442927896976471 2023-01-24 01:46:50.725157: step: 524/464, loss: 0.19687649607658386 2023-01-24 01:46:51.389939: step: 526/464, loss: 0.8136903047561646 2023-01-24 01:46:52.049718: step: 528/464, loss: 1.1159623861312866 2023-01-24 01:46:52.726742: step: 530/464, loss: 0.4194756746292114 2023-01-24 01:46:53.382184: step: 532/464, loss: 0.1660752147436142 2023-01-24 01:46:54.003444: step: 534/464, loss: 0.29503533244132996 2023-01-24 01:46:54.646389: step: 536/464, loss: 0.4381676912307739 2023-01-24 01:46:55.306335: step: 538/464, loss: 0.46901679039001465 2023-01-24 01:46:56.068886: step: 540/464, loss: 0.5324099063873291 2023-01-24 01:46:56.682766: step: 542/464, loss: 0.4037749469280243 2023-01-24 01:46:57.407604: step: 544/464, loss: 0.383460134267807 2023-01-24 01:46:58.094801: step: 546/464, loss: 0.6488150954246521 2023-01-24 01:46:58.691803: step: 548/464, loss: 0.15714922547340393 2023-01-24 01:46:59.378482: step: 550/464, loss: 2.6088953018188477 2023-01-24 01:47:00.006226: step: 552/464, loss: 0.14634662866592407 2023-01-24 01:47:00.607631: step: 554/464, loss: 0.2663712501525879 2023-01-24 01:47:01.237896: step: 556/464, loss: 0.31006959080696106 2023-01-24 01:47:01.865360: step: 558/464, loss: 0.552808403968811 2023-01-24 01:47:02.448380: step: 560/464, loss: 0.6006173491477966 2023-01-24 01:47:03.046699: step: 562/464, loss: 0.5467162132263184 2023-01-24 01:47:03.629580: step: 564/464, loss: 0.3103739023208618 2023-01-24 01:47:04.308475: step: 566/464, loss: 0.3589913249015808 2023-01-24 01:47:04.929309: step: 568/464, loss: 0.2639947831630707 2023-01-24 01:47:05.560591: step: 570/464, loss: 1.988520622253418 2023-01-24 01:47:06.158781: step: 572/464, loss: 0.44152554869651794 2023-01-24 01:47:06.782922: step: 574/464, loss: 0.4914637804031372 2023-01-24 01:47:07.418440: step: 576/464, loss: 0.22426076233386993 2023-01-24 01:47:08.040396: step: 578/464, loss: 0.79241943359375 2023-01-24 01:47:08.670880: step: 580/464, loss: 0.28034767508506775 2023-01-24 01:47:09.345400: step: 582/464, loss: 0.535868227481842 2023-01-24 01:47:10.013636: step: 584/464, loss: 0.3306492567062378 2023-01-24 01:47:10.634205: step: 586/464, loss: 0.3533237874507904 2023-01-24 01:47:11.278731: step: 588/464, loss: 0.10611508041620255 2023-01-24 01:47:11.872696: step: 590/464, loss: 0.26176005601882935 2023-01-24 01:47:12.525602: step: 592/464, loss: 0.20076461136341095 2023-01-24 01:47:13.196105: step: 594/464, loss: 0.46523594856262207 2023-01-24 01:47:13.900770: step: 596/464, loss: 0.32261672616004944 2023-01-24 01:47:14.546871: step: 598/464, loss: 0.2716301381587982 2023-01-24 01:47:15.140326: step: 600/464, loss: 0.3709212839603424 2023-01-24 01:47:15.748741: step: 602/464, loss: 0.5088427662849426 2023-01-24 01:47:16.382222: step: 604/464, loss: 2.6977601051330566 2023-01-24 01:47:16.935126: step: 606/464, loss: 0.16669407486915588 2023-01-24 01:47:17.599299: step: 608/464, loss: 0.5212874412536621 2023-01-24 01:47:18.224677: step: 610/464, loss: 0.37637704610824585 2023-01-24 01:47:18.860732: step: 612/464, loss: 1.7240705490112305 2023-01-24 01:47:19.454755: step: 614/464, loss: 0.829563558101654 2023-01-24 01:47:20.051206: step: 616/464, loss: 0.166648268699646 2023-01-24 01:47:20.734005: step: 618/464, loss: 0.1423303782939911 2023-01-24 01:47:21.422675: step: 620/464, loss: 0.16027969121932983 2023-01-24 01:47:22.066318: step: 622/464, loss: 0.24504999816417694 2023-01-24 01:47:22.640165: step: 624/464, loss: 0.2669730484485626 2023-01-24 01:47:23.284462: step: 626/464, loss: 0.367501437664032 2023-01-24 01:47:23.884779: step: 628/464, loss: 0.1951684206724167 2023-01-24 01:47:24.504827: step: 630/464, loss: 0.29548734426498413 2023-01-24 01:47:25.110597: step: 632/464, loss: 0.10060364007949829 2023-01-24 01:47:25.762674: step: 634/464, loss: 0.2757560610771179 2023-01-24 01:47:26.460412: step: 636/464, loss: 0.1910465508699417 2023-01-24 01:47:27.047858: step: 638/464, loss: 0.4311179220676422 2023-01-24 01:47:27.684943: step: 640/464, loss: 0.45955127477645874 2023-01-24 01:47:28.282836: step: 642/464, loss: 0.5652684569358826 2023-01-24 01:47:28.940555: step: 644/464, loss: 1.5861278772354126 2023-01-24 01:47:29.515824: step: 646/464, loss: 1.1334205865859985 2023-01-24 01:47:30.138652: step: 648/464, loss: 0.27795305848121643 2023-01-24 01:47:30.723481: step: 650/464, loss: 0.3862375319004059 2023-01-24 01:47:31.351866: step: 652/464, loss: 1.0548150539398193 2023-01-24 01:47:31.975426: step: 654/464, loss: 0.6420807242393494 2023-01-24 01:47:32.634502: step: 656/464, loss: 0.45009008049964905 2023-01-24 01:47:33.284868: step: 658/464, loss: 0.5131018161773682 2023-01-24 01:47:33.917344: step: 660/464, loss: 0.24536296725273132 2023-01-24 01:47:34.551358: step: 662/464, loss: 0.5035173296928406 2023-01-24 01:47:35.172158: step: 664/464, loss: 0.7572827935218811 2023-01-24 01:47:35.801411: step: 666/464, loss: 0.17303897440433502 2023-01-24 01:47:36.390504: step: 668/464, loss: 0.5711219310760498 2023-01-24 01:47:37.005086: step: 670/464, loss: 0.6885030269622803 2023-01-24 01:47:37.620840: step: 672/464, loss: 0.770183801651001 2023-01-24 01:47:38.291864: step: 674/464, loss: 0.20264945924282074 2023-01-24 01:47:38.921381: step: 676/464, loss: 0.25271549820899963 2023-01-24 01:47:39.551306: step: 678/464, loss: 0.26536649465560913 2023-01-24 01:47:40.150417: step: 680/464, loss: 0.1744324117898941 2023-01-24 01:47:40.758785: step: 682/464, loss: 0.25122275948524475 2023-01-24 01:47:41.403273: step: 684/464, loss: 0.5525637269020081 2023-01-24 01:47:42.034466: step: 686/464, loss: 0.37694841623306274 2023-01-24 01:47:42.619911: step: 688/464, loss: 1.0671063661575317 2023-01-24 01:47:43.267839: step: 690/464, loss: 0.2689855098724365 2023-01-24 01:47:43.928813: step: 692/464, loss: 0.2012881338596344 2023-01-24 01:47:44.515147: step: 694/464, loss: 0.6586238145828247 2023-01-24 01:47:45.112098: step: 696/464, loss: 2.0456178188323975 2023-01-24 01:47:45.724550: step: 698/464, loss: 0.3495972454547882 2023-01-24 01:47:46.319027: step: 700/464, loss: 0.22533591091632843 2023-01-24 01:47:46.971428: step: 702/464, loss: 0.6673823595046997 2023-01-24 01:47:47.553348: step: 704/464, loss: 0.31505724787712097 2023-01-24 01:47:48.120393: step: 706/464, loss: 0.17165252566337585 2023-01-24 01:47:48.684065: step: 708/464, loss: 1.0771108865737915 2023-01-24 01:47:49.357347: step: 710/464, loss: 0.32624274492263794 2023-01-24 01:47:49.943779: step: 712/464, loss: 0.19555053114891052 2023-01-24 01:47:50.673946: step: 714/464, loss: 0.499306321144104 2023-01-24 01:47:51.261711: step: 716/464, loss: 0.431405246257782 2023-01-24 01:47:51.909441: step: 718/464, loss: 0.19072787463665009 2023-01-24 01:47:52.530414: step: 720/464, loss: 0.16556185483932495 2023-01-24 01:47:53.226578: step: 722/464, loss: 0.5962976217269897 2023-01-24 01:47:53.890471: step: 724/464, loss: 0.297981321811676 2023-01-24 01:47:54.471939: step: 726/464, loss: 0.13247878849506378 2023-01-24 01:47:55.093036: step: 728/464, loss: 0.3829472064971924 2023-01-24 01:47:55.727646: step: 730/464, loss: 0.09066735953092575 2023-01-24 01:47:56.414539: step: 732/464, loss: 0.6439650058746338 2023-01-24 01:47:57.045496: step: 734/464, loss: 0.20518773794174194 2023-01-24 01:47:57.696141: step: 736/464, loss: 0.6721473336219788 2023-01-24 01:47:58.271031: step: 738/464, loss: 1.1977266073226929 2023-01-24 01:47:58.873716: step: 740/464, loss: 0.23774825036525726 2023-01-24 01:47:59.521003: step: 742/464, loss: 0.28592649102211 2023-01-24 01:48:00.181616: step: 744/464, loss: 0.3035857081413269 2023-01-24 01:48:00.856752: step: 746/464, loss: 0.8129310011863708 2023-01-24 01:48:01.517084: step: 748/464, loss: 0.3011573851108551 2023-01-24 01:48:02.145005: step: 750/464, loss: 0.3996354043483734 2023-01-24 01:48:02.801321: step: 752/464, loss: 0.3707866668701172 2023-01-24 01:48:03.482303: step: 754/464, loss: 0.1652347296476364 2023-01-24 01:48:04.097699: step: 756/464, loss: 0.18049731850624084 2023-01-24 01:48:04.779179: step: 758/464, loss: 1.1612261533737183 2023-01-24 01:48:05.389007: step: 760/464, loss: 0.2037789523601532 2023-01-24 01:48:06.046625: step: 762/464, loss: 0.2417970597743988 2023-01-24 01:48:06.689672: step: 764/464, loss: 0.2190207988023758 2023-01-24 01:48:07.415694: step: 766/464, loss: 0.7531817555427551 2023-01-24 01:48:08.017662: step: 768/464, loss: 0.36380305886268616 2023-01-24 01:48:08.651462: step: 770/464, loss: 0.15014781057834625 2023-01-24 01:48:09.290853: step: 772/464, loss: 0.1852748692035675 2023-01-24 01:48:09.938653: step: 774/464, loss: 0.6573358178138733 2023-01-24 01:48:10.618201: step: 776/464, loss: 0.5150532126426697 2023-01-24 01:48:11.274187: step: 778/464, loss: 0.539428174495697 2023-01-24 01:48:11.868908: step: 780/464, loss: 0.07932940870523453 2023-01-24 01:48:12.467108: step: 782/464, loss: 0.27510306239128113 2023-01-24 01:48:13.143568: step: 784/464, loss: 0.2898877263069153 2023-01-24 01:48:13.728695: step: 786/464, loss: 0.16442044079303741 2023-01-24 01:48:14.386905: step: 788/464, loss: 0.24723902344703674 2023-01-24 01:48:14.996686: step: 790/464, loss: 0.6519625782966614 2023-01-24 01:48:15.620981: step: 792/464, loss: 0.451835036277771 2023-01-24 01:48:16.244462: step: 794/464, loss: 0.7727434635162354 2023-01-24 01:48:16.879961: step: 796/464, loss: 4.483155250549316 2023-01-24 01:48:17.496986: step: 798/464, loss: 0.3991261124610901 2023-01-24 01:48:18.112953: step: 800/464, loss: 1.022581696510315 2023-01-24 01:48:18.734252: step: 802/464, loss: 0.5598032474517822 2023-01-24 01:48:19.364307: step: 804/464, loss: 0.21302887797355652 2023-01-24 01:48:20.023578: step: 806/464, loss: 0.4852057099342346 2023-01-24 01:48:20.601293: step: 808/464, loss: 0.5502801537513733 2023-01-24 01:48:21.271674: step: 810/464, loss: 0.3856951892375946 2023-01-24 01:48:21.890294: step: 812/464, loss: 0.41724491119384766 2023-01-24 01:48:22.508526: step: 814/464, loss: 0.4339942932128906 2023-01-24 01:48:23.141783: step: 816/464, loss: 0.19563280045986176 2023-01-24 01:48:23.810593: step: 818/464, loss: 0.13324302434921265 2023-01-24 01:48:24.461851: step: 820/464, loss: 0.41319727897644043 2023-01-24 01:48:25.047687: step: 822/464, loss: 0.867550253868103 2023-01-24 01:48:25.628818: step: 824/464, loss: 0.7590504884719849 2023-01-24 01:48:26.255220: step: 826/464, loss: 1.368444800376892 2023-01-24 01:48:26.854207: step: 828/464, loss: 0.4613424241542816 2023-01-24 01:48:27.585781: step: 830/464, loss: 0.18761426210403442 2023-01-24 01:48:28.160485: step: 832/464, loss: 0.1387253850698471 2023-01-24 01:48:28.793088: step: 834/464, loss: 0.35486987233161926 2023-01-24 01:48:29.408835: step: 836/464, loss: 0.6135989427566528 2023-01-24 01:48:30.037458: step: 838/464, loss: 0.1506817638874054 2023-01-24 01:48:30.650417: step: 840/464, loss: 0.6843259334564209 2023-01-24 01:48:31.313353: step: 842/464, loss: 0.41133636236190796 2023-01-24 01:48:31.970980: step: 844/464, loss: 0.3200354278087616 2023-01-24 01:48:32.560998: step: 846/464, loss: 1.0006072521209717 2023-01-24 01:48:33.233123: step: 848/464, loss: 0.16702058911323547 2023-01-24 01:48:33.806716: step: 850/464, loss: 0.9346050024032593 2023-01-24 01:48:34.448823: step: 852/464, loss: 0.42332419753074646 2023-01-24 01:48:35.077687: step: 854/464, loss: 0.4530613422393799 2023-01-24 01:48:35.750366: step: 856/464, loss: 0.6461949348449707 2023-01-24 01:48:36.468621: step: 858/464, loss: 0.7953711748123169 2023-01-24 01:48:37.034865: step: 860/464, loss: 0.26844945549964905 2023-01-24 01:48:37.612581: step: 862/464, loss: 0.5950400233268738 2023-01-24 01:48:38.208944: step: 864/464, loss: 0.17732596397399902 2023-01-24 01:48:38.831667: step: 866/464, loss: 0.12965376675128937 2023-01-24 01:48:39.419190: step: 868/464, loss: 0.29925432801246643 2023-01-24 01:48:39.990640: step: 870/464, loss: 0.30027610063552856 2023-01-24 01:48:40.693650: step: 872/464, loss: 0.12318747490644455 2023-01-24 01:48:41.296095: step: 874/464, loss: 0.768347442150116 2023-01-24 01:48:41.954583: step: 876/464, loss: 0.46050161123275757 2023-01-24 01:48:42.573818: step: 878/464, loss: 0.492687463760376 2023-01-24 01:48:43.221170: step: 880/464, loss: 0.7648109793663025 2023-01-24 01:48:43.815614: step: 882/464, loss: 0.16960103809833527 2023-01-24 01:48:44.398942: step: 884/464, loss: 0.229720339179039 2023-01-24 01:48:44.992330: step: 886/464, loss: 0.21732476353645325 2023-01-24 01:48:45.590609: step: 888/464, loss: 0.9167367815971375 2023-01-24 01:48:46.139588: step: 890/464, loss: 0.23118619620800018 2023-01-24 01:48:46.723153: step: 892/464, loss: 0.5303144454956055 2023-01-24 01:48:47.395099: step: 894/464, loss: 0.4033811092376709 2023-01-24 01:48:48.039398: step: 896/464, loss: 0.0627986267209053 2023-01-24 01:48:48.655925: step: 898/464, loss: 8.275155067443848 2023-01-24 01:48:49.348334: step: 900/464, loss: 0.125852569937706 2023-01-24 01:48:49.949040: step: 902/464, loss: 0.6692149043083191 2023-01-24 01:48:50.618227: step: 904/464, loss: 0.16022256016731262 2023-01-24 01:48:51.204289: step: 906/464, loss: 0.6760783791542053 2023-01-24 01:48:51.779660: step: 908/464, loss: 0.3508135676383972 2023-01-24 01:48:52.456746: step: 910/464, loss: 1.5788806676864624 2023-01-24 01:48:53.085552: step: 912/464, loss: 0.07519754767417908 2023-01-24 01:48:53.690363: step: 914/464, loss: 0.4660152494907379 2023-01-24 01:48:54.338118: step: 916/464, loss: 0.8508497476577759 2023-01-24 01:48:54.889109: step: 918/464, loss: 0.10930764675140381 2023-01-24 01:48:55.540689: step: 920/464, loss: 0.6356523633003235 2023-01-24 01:48:56.180862: step: 922/464, loss: 0.36704057455062866 2023-01-24 01:48:56.812814: step: 924/464, loss: 1.3801870346069336 2023-01-24 01:48:57.421325: step: 926/464, loss: 0.44619035720825195 2023-01-24 01:48:58.048657: step: 928/464, loss: 0.3960069715976715 2023-01-24 01:48:58.556192: step: 930/464, loss: 0.07545240968465805 ================================================== Loss: 0.537 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2984013733288329, 'r': 0.323881566497329, 'f1': 0.3106198099073565}, 'combined': 0.22887775466857846, 'epoch': 8} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31432387112441545, 'r': 0.29327282287479867, 'f1': 0.3034336753047276}, 'combined': 0.19809659631293097, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3049803022819904, 'r': 0.34086033784457753, 'f1': 0.3219236524087677}, 'combined': 0.23720690177488143, 'epoch': 8} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32331629845421916, 'r': 0.2962742183445372, 'f1': 0.30920513147132517}, 'combined': 0.2018644899760983, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3081667751632722, 'r': 0.33272655610607565, 'f1': 0.31997608589033183}, 'combined': 0.2357718527612971, 'epoch': 8} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3278785592032937, 'r': 0.2881357035422884, 'f1': 0.3067251037708232}, 'combined': 0.2002454045343198, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2568027210884353, 'r': 0.35952380952380947, 'f1': 0.29960317460317454}, 'combined': 0.19973544973544968, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2894736842105263, 'r': 0.4782608695652174, 'f1': 0.360655737704918}, 'combined': 0.180327868852459, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31993440233236153, 'r': 0.2969088203463203, 'f1': 0.3079918607914679}, 'combined': 0.22694137110950266, 'epoch': 5} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30404472815418065, 'r': 0.2555091476965408, 'f1': 0.27767195512385795}, 'combined': 0.181278063966871, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5192307692307693, 'r': 0.23275862068965517, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 5} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:51:35.339136: step: 2/464, loss: 0.19961535930633545 2023-01-24 01:51:35.927634: step: 4/464, loss: 0.19874349236488342 2023-01-24 01:51:36.580905: step: 6/464, loss: 0.7833148241043091 2023-01-24 01:51:37.189184: step: 8/464, loss: 0.2644481658935547 2023-01-24 01:51:37.829763: step: 10/464, loss: 0.520695686340332 2023-01-24 01:51:38.502294: step: 12/464, loss: 0.38046133518218994 2023-01-24 01:51:39.174674: step: 14/464, loss: 0.16472403705120087 2023-01-24 01:51:39.778325: step: 16/464, loss: 0.27650564908981323 2023-01-24 01:51:40.423959: step: 18/464, loss: 0.1332404911518097 2023-01-24 01:51:41.113106: step: 20/464, loss: 0.3479761481285095 2023-01-24 01:51:41.713915: step: 22/464, loss: 0.33198633790016174 2023-01-24 01:51:42.322212: step: 24/464, loss: 0.20194487273693085 2023-01-24 01:51:43.087094: step: 26/464, loss: 0.1725732982158661 2023-01-24 01:51:43.720150: step: 28/464, loss: 0.7356686592102051 2023-01-24 01:51:44.327917: step: 30/464, loss: 0.3056510090827942 2023-01-24 01:51:44.949042: step: 32/464, loss: 0.629055917263031 2023-01-24 01:51:45.592173: step: 34/464, loss: 0.09917306154966354 2023-01-24 01:51:46.227429: step: 36/464, loss: 0.4345434308052063 2023-01-24 01:51:46.877214: step: 38/464, loss: 0.2141372114419937 2023-01-24 01:51:47.430070: step: 40/464, loss: 0.20127037167549133 2023-01-24 01:51:48.104595: step: 42/464, loss: 0.08401795476675034 2023-01-24 01:51:48.718922: step: 44/464, loss: 0.5465341806411743 2023-01-24 01:51:49.312248: step: 46/464, loss: 0.10158457607030869 2023-01-24 01:51:49.884780: step: 48/464, loss: 0.4749198853969574 2023-01-24 01:51:50.538940: step: 50/464, loss: 0.5016505718231201 2023-01-24 01:51:51.229204: step: 52/464, loss: 0.0881711021065712 2023-01-24 01:51:51.879741: step: 54/464, loss: 0.24765042960643768 2023-01-24 01:51:52.484032: step: 56/464, loss: 0.11251507699489594 2023-01-24 01:51:53.146045: step: 58/464, loss: 0.2939249873161316 2023-01-24 01:51:53.799767: step: 60/464, loss: 0.4889867305755615 2023-01-24 01:51:54.375651: step: 62/464, loss: 1.0417712926864624 2023-01-24 01:51:54.988999: step: 64/464, loss: 0.14317934215068817 2023-01-24 01:51:55.586915: step: 66/464, loss: 0.1017264723777771 2023-01-24 01:51:56.262333: step: 68/464, loss: 0.1042710468173027 2023-01-24 01:51:56.873851: step: 70/464, loss: 0.442725270986557 2023-01-24 01:51:57.457583: step: 72/464, loss: 0.14025932550430298 2023-01-24 01:51:58.059287: step: 74/464, loss: 0.3430282771587372 2023-01-24 01:51:58.664202: step: 76/464, loss: 1.3112821578979492 2023-01-24 01:51:59.345801: step: 78/464, loss: 0.4583187699317932 2023-01-24 01:51:59.972343: step: 80/464, loss: 0.319332093000412 2023-01-24 01:52:00.646068: step: 82/464, loss: 0.6261417865753174 2023-01-24 01:52:01.370363: step: 84/464, loss: 0.2486996352672577 2023-01-24 01:52:01.970617: step: 86/464, loss: 0.09624868631362915 2023-01-24 01:52:02.541995: step: 88/464, loss: 0.2241324484348297 2023-01-24 01:52:03.135796: step: 90/464, loss: 0.8514329791069031 2023-01-24 01:52:03.757299: step: 92/464, loss: 0.12544073164463043 2023-01-24 01:52:04.358619: step: 94/464, loss: 0.20031552016735077 2023-01-24 01:52:04.993472: step: 96/464, loss: 0.1628793179988861 2023-01-24 01:52:05.677168: step: 98/464, loss: 0.3327937424182892 2023-01-24 01:52:06.276684: step: 100/464, loss: 0.10830485820770264 2023-01-24 01:52:06.848009: step: 102/464, loss: 0.16933751106262207 2023-01-24 01:52:07.411437: step: 104/464, loss: 0.3470763564109802 2023-01-24 01:52:08.040901: step: 106/464, loss: 0.0833381935954094 2023-01-24 01:52:08.692893: step: 108/464, loss: 0.1391914188861847 2023-01-24 01:52:09.291558: step: 110/464, loss: 0.1907893568277359 2023-01-24 01:52:09.933241: step: 112/464, loss: 0.11749202758073807 2023-01-24 01:52:10.561974: step: 114/464, loss: 0.5847851037979126 2023-01-24 01:52:11.197152: step: 116/464, loss: 0.13662198185920715 2023-01-24 01:52:11.817504: step: 118/464, loss: 0.679173469543457 2023-01-24 01:52:12.486708: step: 120/464, loss: 0.1345047503709793 2023-01-24 01:52:13.173761: step: 122/464, loss: 0.07206592708826065 2023-01-24 01:52:13.888765: step: 124/464, loss: 0.1326543688774109 2023-01-24 01:52:14.480643: step: 126/464, loss: 0.16537462174892426 2023-01-24 01:52:15.076604: step: 128/464, loss: 0.08092430233955383 2023-01-24 01:52:15.706489: step: 130/464, loss: 0.18472331762313843 2023-01-24 01:52:16.335732: step: 132/464, loss: 0.42979902029037476 2023-01-24 01:52:16.924416: step: 134/464, loss: 2.8610308170318604 2023-01-24 01:52:17.565787: step: 136/464, loss: 0.5815833210945129 2023-01-24 01:52:18.204891: step: 138/464, loss: 0.41417253017425537 2023-01-24 01:52:18.819303: step: 140/464, loss: 0.16000278294086456 2023-01-24 01:52:19.472854: step: 142/464, loss: 0.1544022113084793 2023-01-24 01:52:20.092680: step: 144/464, loss: 0.2434360384941101 2023-01-24 01:52:20.709363: step: 146/464, loss: 0.1725931018590927 2023-01-24 01:52:21.353083: step: 148/464, loss: 0.2619199752807617 2023-01-24 01:52:21.974407: step: 150/464, loss: 0.2151547074317932 2023-01-24 01:52:22.621591: step: 152/464, loss: 0.389448881149292 2023-01-24 01:52:23.270024: step: 154/464, loss: 0.1410638689994812 2023-01-24 01:52:23.892422: step: 156/464, loss: 2.4310078620910645 2023-01-24 01:52:24.537110: step: 158/464, loss: 0.2114315629005432 2023-01-24 01:52:25.195370: step: 160/464, loss: 0.3406426012516022 2023-01-24 01:52:25.824062: step: 162/464, loss: 0.10214785486459732 2023-01-24 01:52:26.454880: step: 164/464, loss: 0.31256258487701416 2023-01-24 01:52:27.046240: step: 166/464, loss: 0.1485060155391693 2023-01-24 01:52:27.607311: step: 168/464, loss: 0.15247027575969696 2023-01-24 01:52:28.227826: step: 170/464, loss: 0.12883232533931732 2023-01-24 01:52:28.876313: step: 172/464, loss: 0.2534656226634979 2023-01-24 01:52:29.542545: step: 174/464, loss: 0.4512403905391693 2023-01-24 01:52:30.142801: step: 176/464, loss: 0.5365868806838989 2023-01-24 01:52:30.788399: step: 178/464, loss: 0.3706887364387512 2023-01-24 01:52:31.410894: step: 180/464, loss: 0.18491411209106445 2023-01-24 01:52:31.986579: step: 182/464, loss: 1.0071208477020264 2023-01-24 01:52:32.624301: step: 184/464, loss: 0.2296251654624939 2023-01-24 01:52:33.229152: step: 186/464, loss: 0.31761446595191956 2023-01-24 01:52:33.866906: step: 188/464, loss: 0.41353002190589905 2023-01-24 01:52:34.480407: step: 190/464, loss: 0.22576896846294403 2023-01-24 01:52:35.118932: step: 192/464, loss: 0.8026258945465088 2023-01-24 01:52:35.788229: step: 194/464, loss: 0.8088206052780151 2023-01-24 01:52:36.419211: step: 196/464, loss: 0.1576479822397232 2023-01-24 01:52:37.045724: step: 198/464, loss: 0.9424635171890259 2023-01-24 01:52:37.628434: step: 200/464, loss: 0.14036808907985687 2023-01-24 01:52:38.252662: step: 202/464, loss: 0.13937592506408691 2023-01-24 01:52:38.888809: step: 204/464, loss: 0.2324567437171936 2023-01-24 01:52:39.513768: step: 206/464, loss: 0.8766568303108215 2023-01-24 01:52:40.135639: step: 208/464, loss: 0.3298742175102234 2023-01-24 01:52:40.778741: step: 210/464, loss: 0.7993811368942261 2023-01-24 01:52:41.416029: step: 212/464, loss: 0.9075860381126404 2023-01-24 01:52:42.061938: step: 214/464, loss: 0.12574732303619385 2023-01-24 01:52:42.658922: step: 216/464, loss: 0.17546413838863373 2023-01-24 01:52:43.368068: step: 218/464, loss: 0.18658989667892456 2023-01-24 01:52:44.068142: step: 220/464, loss: 1.1319619417190552 2023-01-24 01:52:44.705047: step: 222/464, loss: 0.21586492657661438 2023-01-24 01:52:45.344107: step: 224/464, loss: 0.27885472774505615 2023-01-24 01:52:45.944114: step: 226/464, loss: 0.3149697780609131 2023-01-24 01:52:46.593126: step: 228/464, loss: 0.21803054213523865 2023-01-24 01:52:47.254822: step: 230/464, loss: 0.19087247550487518 2023-01-24 01:52:47.873523: step: 232/464, loss: 0.13294973969459534 2023-01-24 01:52:48.476524: step: 234/464, loss: 0.0896846204996109 2023-01-24 01:52:49.184190: step: 236/464, loss: 0.7859571576118469 2023-01-24 01:52:49.821543: step: 238/464, loss: 0.27156299352645874 2023-01-24 01:52:50.407314: step: 240/464, loss: 1.2523858547210693 2023-01-24 01:52:51.025820: step: 242/464, loss: 0.07696082442998886 2023-01-24 01:52:51.701960: step: 244/464, loss: 0.1793721616268158 2023-01-24 01:52:52.280054: step: 246/464, loss: 0.21266333758831024 2023-01-24 01:52:52.878056: step: 248/464, loss: 0.5519155859947205 2023-01-24 01:52:53.482690: step: 250/464, loss: 0.16949373483657837 2023-01-24 01:52:54.057834: step: 252/464, loss: 0.21048606932163239 2023-01-24 01:52:54.597993: step: 254/464, loss: 0.07983586937189102 2023-01-24 01:52:55.181858: step: 256/464, loss: 0.7817102074623108 2023-01-24 01:52:55.815084: step: 258/464, loss: 0.4482273459434509 2023-01-24 01:52:56.405152: step: 260/464, loss: 0.730652391910553 2023-01-24 01:52:56.970587: step: 262/464, loss: 0.3141850531101227 2023-01-24 01:52:57.624213: step: 264/464, loss: 0.2017117440700531 2023-01-24 01:52:58.253154: step: 266/464, loss: 0.2721196115016937 2023-01-24 01:52:58.831658: step: 268/464, loss: 0.6391931176185608 2023-01-24 01:52:59.516991: step: 270/464, loss: 0.7095168828964233 2023-01-24 01:53:00.126796: step: 272/464, loss: 0.2975116968154907 2023-01-24 01:53:00.770957: step: 274/464, loss: 0.08494570851325989 2023-01-24 01:53:01.351772: step: 276/464, loss: 0.21788422763347626 2023-01-24 01:53:01.909857: step: 278/464, loss: 0.32937175035476685 2023-01-24 01:53:02.510277: step: 280/464, loss: 0.3043481707572937 2023-01-24 01:53:03.190863: step: 282/464, loss: 0.2767504155635834 2023-01-24 01:53:03.917216: step: 284/464, loss: 0.455371618270874 2023-01-24 01:53:04.557920: step: 286/464, loss: 0.8453356623649597 2023-01-24 01:53:05.151448: step: 288/464, loss: 0.08785227686166763 2023-01-24 01:53:05.735716: step: 290/464, loss: 0.4613943099975586 2023-01-24 01:53:06.325809: step: 292/464, loss: 0.08880679309368134 2023-01-24 01:53:07.010451: step: 294/464, loss: 0.23107773065567017 2023-01-24 01:53:07.641867: step: 296/464, loss: 0.13927944004535675 2023-01-24 01:53:08.257891: step: 298/464, loss: 0.4584111273288727 2023-01-24 01:53:08.897976: step: 300/464, loss: 0.09672047942876816 2023-01-24 01:53:09.621919: step: 302/464, loss: 0.5475522875785828 2023-01-24 01:53:10.238526: step: 304/464, loss: 0.36058226227760315 2023-01-24 01:53:10.851627: step: 306/464, loss: 0.20680579543113708 2023-01-24 01:53:11.470087: step: 308/464, loss: 0.4903566539287567 2023-01-24 01:53:12.271522: step: 310/464, loss: 0.36605599522590637 2023-01-24 01:53:12.884273: step: 312/464, loss: 0.13913631439208984 2023-01-24 01:53:13.498314: step: 314/464, loss: 0.22459517419338226 2023-01-24 01:53:14.142081: step: 316/464, loss: 0.35183778405189514 2023-01-24 01:53:14.764212: step: 318/464, loss: 1.192873239517212 2023-01-24 01:53:15.346989: step: 320/464, loss: 0.1527167409658432 2023-01-24 01:53:15.960442: step: 322/464, loss: 0.42959147691726685 2023-01-24 01:53:16.544600: step: 324/464, loss: 0.3765203356742859 2023-01-24 01:53:17.124589: step: 326/464, loss: 0.21764370799064636 2023-01-24 01:53:17.729691: step: 328/464, loss: 0.23091094195842743 2023-01-24 01:53:18.321774: step: 330/464, loss: 0.22147002816200256 2023-01-24 01:53:18.960709: step: 332/464, loss: 0.3773364722728729 2023-01-24 01:53:19.547284: step: 334/464, loss: 0.06285819411277771 2023-01-24 01:53:20.148497: step: 336/464, loss: 5.5828537940979 2023-01-24 01:53:20.742373: step: 338/464, loss: 0.1912716180086136 2023-01-24 01:53:21.403592: step: 340/464, loss: 0.8449513912200928 2023-01-24 01:53:22.038217: step: 342/464, loss: 0.2755206823348999 2023-01-24 01:53:22.761869: step: 344/464, loss: 0.2016700953245163 2023-01-24 01:53:23.385116: step: 346/464, loss: 0.2179202437400818 2023-01-24 01:53:24.027488: step: 348/464, loss: 0.07725241780281067 2023-01-24 01:53:24.634223: step: 350/464, loss: 0.17912504076957703 2023-01-24 01:53:25.281314: step: 352/464, loss: 0.16181229054927826 2023-01-24 01:53:25.860542: step: 354/464, loss: 0.16142886877059937 2023-01-24 01:53:26.542681: step: 356/464, loss: 0.1609855741262436 2023-01-24 01:53:27.183956: step: 358/464, loss: 0.5756404399871826 2023-01-24 01:53:27.822799: step: 360/464, loss: 0.36991944909095764 2023-01-24 01:53:28.436426: step: 362/464, loss: 0.22064253687858582 2023-01-24 01:53:29.016320: step: 364/464, loss: 0.4196416139602661 2023-01-24 01:53:29.635201: step: 366/464, loss: 0.1866072416305542 2023-01-24 01:53:30.248388: step: 368/464, loss: 0.0899113342165947 2023-01-24 01:53:30.853306: step: 370/464, loss: 0.15155668556690216 2023-01-24 01:53:31.483745: step: 372/464, loss: 0.22216102480888367 2023-01-24 01:53:32.142239: step: 374/464, loss: 0.20138542354106903 2023-01-24 01:53:32.683535: step: 376/464, loss: 0.5346720218658447 2023-01-24 01:53:33.376283: step: 378/464, loss: 0.3250393867492676 2023-01-24 01:53:33.973195: step: 380/464, loss: 0.46061569452285767 2023-01-24 01:53:34.604838: step: 382/464, loss: 0.10293516516685486 2023-01-24 01:53:35.222679: step: 384/464, loss: 0.13653913140296936 2023-01-24 01:53:35.831219: step: 386/464, loss: 0.17543986439704895 2023-01-24 01:53:36.436243: step: 388/464, loss: 0.07186532020568848 2023-01-24 01:53:37.033235: step: 390/464, loss: 0.3736836910247803 2023-01-24 01:53:37.638296: step: 392/464, loss: 1.7982009649276733 2023-01-24 01:53:38.330611: step: 394/464, loss: 0.05094000697135925 2023-01-24 01:53:38.938175: step: 396/464, loss: 0.08247574418783188 2023-01-24 01:53:39.537949: step: 398/464, loss: 0.7131425142288208 2023-01-24 01:53:40.184137: step: 400/464, loss: 0.7239433526992798 2023-01-24 01:53:40.792400: step: 402/464, loss: 0.20473116636276245 2023-01-24 01:53:41.431603: step: 404/464, loss: 0.3672104775905609 2023-01-24 01:53:42.084899: step: 406/464, loss: 0.18884846568107605 2023-01-24 01:53:42.707632: step: 408/464, loss: 0.2608042359352112 2023-01-24 01:53:43.385602: step: 410/464, loss: 0.17296145856380463 2023-01-24 01:53:44.036374: step: 412/464, loss: 0.19148777425289154 2023-01-24 01:53:44.676425: step: 414/464, loss: 0.7486370801925659 2023-01-24 01:53:45.315905: step: 416/464, loss: 0.1606966108083725 2023-01-24 01:53:45.980643: step: 418/464, loss: 0.1943890005350113 2023-01-24 01:53:46.634544: step: 420/464, loss: 0.2487289309501648 2023-01-24 01:53:47.255350: step: 422/464, loss: 0.6002469062805176 2023-01-24 01:53:47.965664: step: 424/464, loss: 0.9607329368591309 2023-01-24 01:53:48.534482: step: 426/464, loss: 0.27625828981399536 2023-01-24 01:53:49.192175: step: 428/464, loss: 1.326363444328308 2023-01-24 01:53:49.830092: step: 430/464, loss: 0.5038409233093262 2023-01-24 01:53:50.392134: step: 432/464, loss: 0.4234456717967987 2023-01-24 01:53:51.030817: step: 434/464, loss: 0.702119767665863 2023-01-24 01:53:51.683759: step: 436/464, loss: 0.17340542376041412 2023-01-24 01:53:52.202987: step: 438/464, loss: 0.6242377161979675 2023-01-24 01:53:52.839175: step: 440/464, loss: 0.16712586581707 2023-01-24 01:53:53.479095: step: 442/464, loss: 0.14836061000823975 2023-01-24 01:53:54.133656: step: 444/464, loss: 0.3254392743110657 2023-01-24 01:53:54.724212: step: 446/464, loss: 0.22568395733833313 2023-01-24 01:53:55.360187: step: 448/464, loss: 0.4411729574203491 2023-01-24 01:53:55.974865: step: 450/464, loss: 0.5406442284584045 2023-01-24 01:53:56.544070: step: 452/464, loss: 0.16906428337097168 2023-01-24 01:53:57.206638: step: 454/464, loss: 0.03588523343205452 2023-01-24 01:53:57.796794: step: 456/464, loss: 0.06504597514867783 2023-01-24 01:53:58.430458: step: 458/464, loss: 0.17932981252670288 2023-01-24 01:53:59.127740: step: 460/464, loss: 0.3948647975921631 2023-01-24 01:53:59.723101: step: 462/464, loss: 0.2129187285900116 2023-01-24 01:54:00.308033: step: 464/464, loss: 0.2704509198665619 2023-01-24 01:54:00.930714: step: 466/464, loss: 0.2650047540664673 2023-01-24 01:54:01.544960: step: 468/464, loss: 0.3367736339569092 2023-01-24 01:54:02.129658: step: 470/464, loss: 0.27748000621795654 2023-01-24 01:54:02.747120: step: 472/464, loss: 0.11794216930866241 2023-01-24 01:54:03.322289: step: 474/464, loss: 0.2931903600692749 2023-01-24 01:54:03.950345: step: 476/464, loss: 0.18659132719039917 2023-01-24 01:54:04.583508: step: 478/464, loss: 0.29165756702423096 2023-01-24 01:54:05.256946: step: 480/464, loss: 0.5868270397186279 2023-01-24 01:54:05.903295: step: 482/464, loss: 0.42066115140914917 2023-01-24 01:54:06.508329: step: 484/464, loss: 0.8886086344718933 2023-01-24 01:54:07.181810: step: 486/464, loss: 0.5854116678237915 2023-01-24 01:54:07.808437: step: 488/464, loss: 0.1770869493484497 2023-01-24 01:54:08.411929: step: 490/464, loss: 0.29514598846435547 2023-01-24 01:54:09.035999: step: 492/464, loss: 0.5091790556907654 2023-01-24 01:54:09.684869: step: 494/464, loss: 0.566424548625946 2023-01-24 01:54:10.298170: step: 496/464, loss: 0.09062954783439636 2023-01-24 01:54:10.957608: step: 498/464, loss: 0.19813255965709686 2023-01-24 01:54:11.617919: step: 500/464, loss: 0.17183098196983337 2023-01-24 01:54:12.235922: step: 502/464, loss: 0.7004691958427429 2023-01-24 01:54:12.904989: step: 504/464, loss: 0.2983857989311218 2023-01-24 01:54:13.565946: step: 506/464, loss: 0.4526219069957733 2023-01-24 01:54:14.155748: step: 508/464, loss: 0.5289148092269897 2023-01-24 01:54:14.769037: step: 510/464, loss: 0.19427700340747833 2023-01-24 01:54:15.381547: step: 512/464, loss: 0.49163126945495605 2023-01-24 01:54:16.020045: step: 514/464, loss: 0.13287903368473053 2023-01-24 01:54:16.677794: step: 516/464, loss: 0.40076348185539246 2023-01-24 01:54:17.277028: step: 518/464, loss: 0.7974677681922913 2023-01-24 01:54:17.895141: step: 520/464, loss: 0.5654568672180176 2023-01-24 01:54:18.520198: step: 522/464, loss: 0.8992674946784973 2023-01-24 01:54:19.173247: step: 524/464, loss: 0.6453098654747009 2023-01-24 01:54:19.801326: step: 526/464, loss: 0.15305867791175842 2023-01-24 01:54:20.428659: step: 528/464, loss: 0.7035385370254517 2023-01-24 01:54:21.087105: step: 530/464, loss: 0.4323200583457947 2023-01-24 01:54:21.706004: step: 532/464, loss: 0.77094566822052 2023-01-24 01:54:22.379029: step: 534/464, loss: 0.1831674575805664 2023-01-24 01:54:23.106529: step: 536/464, loss: 2.2393898963928223 2023-01-24 01:54:23.747429: step: 538/464, loss: 1.6402591466903687 2023-01-24 01:54:24.441799: step: 540/464, loss: 0.15160660445690155 2023-01-24 01:54:25.085516: step: 542/464, loss: 0.16926424205303192 2023-01-24 01:54:25.749771: step: 544/464, loss: 0.15502096712589264 2023-01-24 01:54:26.442928: step: 546/464, loss: 0.5304486751556396 2023-01-24 01:54:27.111351: step: 548/464, loss: 0.821464478969574 2023-01-24 01:54:27.740223: step: 550/464, loss: 0.132523313164711 2023-01-24 01:54:28.368788: step: 552/464, loss: 0.11264272779226303 2023-01-24 01:54:28.994310: step: 554/464, loss: 0.13352425396442413 2023-01-24 01:54:29.632889: step: 556/464, loss: 0.7259401679039001 2023-01-24 01:54:30.233696: step: 558/464, loss: 0.24717287719249725 2023-01-24 01:54:30.831472: step: 560/464, loss: 0.2604392468929291 2023-01-24 01:54:31.427534: step: 562/464, loss: 0.10367932915687561 2023-01-24 01:54:32.105847: step: 564/464, loss: 0.4997091591358185 2023-01-24 01:54:32.733598: step: 566/464, loss: 0.34742504358291626 2023-01-24 01:54:33.340187: step: 568/464, loss: 0.1672915518283844 2023-01-24 01:54:33.963369: step: 570/464, loss: 0.2621702253818512 2023-01-24 01:54:34.639278: step: 572/464, loss: 0.13340547680854797 2023-01-24 01:54:35.258693: step: 574/464, loss: 1.081810474395752 2023-01-24 01:54:35.959330: step: 576/464, loss: 0.17170488834381104 2023-01-24 01:54:36.611498: step: 578/464, loss: 0.28923726081848145 2023-01-24 01:54:37.202357: step: 580/464, loss: 0.20817041397094727 2023-01-24 01:54:37.842494: step: 582/464, loss: 0.10076570510864258 2023-01-24 01:54:38.499162: step: 584/464, loss: 0.8717387914657593 2023-01-24 01:54:39.208658: step: 586/464, loss: 0.09899066388607025 2023-01-24 01:54:39.829518: step: 588/464, loss: 0.5495445132255554 2023-01-24 01:54:40.487873: step: 590/464, loss: 0.3652946949005127 2023-01-24 01:54:41.106023: step: 592/464, loss: 0.2133476883172989 2023-01-24 01:54:41.725726: step: 594/464, loss: 0.16517765820026398 2023-01-24 01:54:42.349190: step: 596/464, loss: 0.49846720695495605 2023-01-24 01:54:42.964005: step: 598/464, loss: 0.8974509835243225 2023-01-24 01:54:43.632280: step: 600/464, loss: 0.818812370300293 2023-01-24 01:54:44.272812: step: 602/464, loss: 1.0239721536636353 2023-01-24 01:54:44.883399: step: 604/464, loss: 0.5344446301460266 2023-01-24 01:54:45.485057: step: 606/464, loss: 0.43328937888145447 2023-01-24 01:54:46.127866: step: 608/464, loss: 0.2958345413208008 2023-01-24 01:54:46.721870: step: 610/464, loss: 0.18794173002243042 2023-01-24 01:54:47.371511: step: 612/464, loss: 0.3947238028049469 2023-01-24 01:54:48.047867: step: 614/464, loss: 1.7605561017990112 2023-01-24 01:54:48.642721: step: 616/464, loss: 0.5139614343643188 2023-01-24 01:54:49.248881: step: 618/464, loss: 0.16109994053840637 2023-01-24 01:54:49.893419: step: 620/464, loss: 0.5573995113372803 2023-01-24 01:54:50.512690: step: 622/464, loss: 0.18353329598903656 2023-01-24 01:54:51.151053: step: 624/464, loss: 0.4768543541431427 2023-01-24 01:54:51.751573: step: 626/464, loss: 0.3241148293018341 2023-01-24 01:54:52.379127: step: 628/464, loss: 0.19876053929328918 2023-01-24 01:54:52.940867: step: 630/464, loss: 0.3442919850349426 2023-01-24 01:54:53.539934: step: 632/464, loss: 0.6632809638977051 2023-01-24 01:54:54.218124: step: 634/464, loss: 0.852631688117981 2023-01-24 01:54:54.833803: step: 636/464, loss: 0.45948293805122375 2023-01-24 01:54:55.542903: step: 638/464, loss: 0.3417765200138092 2023-01-24 01:54:56.267821: step: 640/464, loss: 0.156797856092453 2023-01-24 01:54:56.883300: step: 642/464, loss: 0.19794075191020966 2023-01-24 01:54:57.509634: step: 644/464, loss: 0.31666621565818787 2023-01-24 01:54:58.147338: step: 646/464, loss: 0.24088281393051147 2023-01-24 01:54:58.768532: step: 648/464, loss: 1.0673682689666748 2023-01-24 01:54:59.438424: step: 650/464, loss: 0.1969476342201233 2023-01-24 01:55:00.044589: step: 652/464, loss: 0.6506825685501099 2023-01-24 01:55:00.673448: step: 654/464, loss: 0.4994967579841614 2023-01-24 01:55:01.299252: step: 656/464, loss: 0.3196776211261749 2023-01-24 01:55:01.916069: step: 658/464, loss: 0.19478558003902435 2023-01-24 01:55:02.544078: step: 660/464, loss: 0.3909696638584137 2023-01-24 01:55:03.138014: step: 662/464, loss: 1.1307225227355957 2023-01-24 01:55:03.808775: step: 664/464, loss: 0.3461707532405853 2023-01-24 01:55:04.459820: step: 666/464, loss: 0.4930865168571472 2023-01-24 01:55:05.167930: step: 668/464, loss: 0.10782203823328018 2023-01-24 01:55:05.820133: step: 670/464, loss: 0.2896060645580292 2023-01-24 01:55:06.453833: step: 672/464, loss: 0.2806074619293213 2023-01-24 01:55:07.129249: step: 674/464, loss: 0.38923388719558716 2023-01-24 01:55:07.863353: step: 676/464, loss: 0.40678125619888306 2023-01-24 01:55:08.530112: step: 678/464, loss: 0.3723219037055969 2023-01-24 01:55:09.170018: step: 680/464, loss: 0.21378019452095032 2023-01-24 01:55:09.778085: step: 682/464, loss: 0.09969107806682587 2023-01-24 01:55:10.379472: step: 684/464, loss: 0.5234864354133606 2023-01-24 01:55:11.014004: step: 686/464, loss: 0.48588019609451294 2023-01-24 01:55:11.635420: step: 688/464, loss: 0.16970089077949524 2023-01-24 01:55:12.269904: step: 690/464, loss: 0.3834255337715149 2023-01-24 01:55:12.900050: step: 692/464, loss: 0.7420527935028076 2023-01-24 01:55:13.491527: step: 694/464, loss: 3.329572916030884 2023-01-24 01:55:14.136010: step: 696/464, loss: 0.696709930896759 2023-01-24 01:55:14.723294: step: 698/464, loss: 0.4788230061531067 2023-01-24 01:55:15.337290: step: 700/464, loss: 0.5059593319892883 2023-01-24 01:55:15.964377: step: 702/464, loss: 0.40401121973991394 2023-01-24 01:55:16.610351: step: 704/464, loss: 0.14822030067443848 2023-01-24 01:55:17.235951: step: 706/464, loss: 0.2039600908756256 2023-01-24 01:55:17.881447: step: 708/464, loss: 0.5814012885093689 2023-01-24 01:55:18.498614: step: 710/464, loss: 0.14190104603767395 2023-01-24 01:55:19.090038: step: 712/464, loss: 0.5267803072929382 2023-01-24 01:55:19.746613: step: 714/464, loss: 0.28079739212989807 2023-01-24 01:55:20.311919: step: 716/464, loss: 0.2747531533241272 2023-01-24 01:55:21.042343: step: 718/464, loss: 0.4952031075954437 2023-01-24 01:55:21.708524: step: 720/464, loss: 0.4949815571308136 2023-01-24 01:55:22.294124: step: 722/464, loss: 0.39597049355506897 2023-01-24 01:55:22.994158: step: 724/464, loss: 0.6801155805587769 2023-01-24 01:55:23.598169: step: 726/464, loss: 0.5496456027030945 2023-01-24 01:55:24.276221: step: 728/464, loss: 0.2377110719680786 2023-01-24 01:55:24.933258: step: 730/464, loss: 1.3523091077804565 2023-01-24 01:55:25.510607: step: 732/464, loss: 0.1813611090183258 2023-01-24 01:55:26.123118: step: 734/464, loss: 0.05107222497463226 2023-01-24 01:55:26.832874: step: 736/464, loss: 0.13555821776390076 2023-01-24 01:55:27.429275: step: 738/464, loss: 1.779261827468872 2023-01-24 01:55:28.081179: step: 740/464, loss: 0.20782986283302307 2023-01-24 01:55:28.718388: step: 742/464, loss: 1.2898242473602295 2023-01-24 01:55:29.362709: step: 744/464, loss: 0.32072851061820984 2023-01-24 01:55:29.990570: step: 746/464, loss: 0.2118932455778122 2023-01-24 01:55:30.620847: step: 748/464, loss: 0.34228479862213135 2023-01-24 01:55:31.258043: step: 750/464, loss: 0.20209060609340668 2023-01-24 01:55:31.884310: step: 752/464, loss: 0.16558676958084106 2023-01-24 01:55:32.517458: step: 754/464, loss: 0.5465676188468933 2023-01-24 01:55:33.149817: step: 756/464, loss: 0.2919003963470459 2023-01-24 01:55:33.827787: step: 758/464, loss: 0.7971205711364746 2023-01-24 01:55:34.464048: step: 760/464, loss: 0.1666930615901947 2023-01-24 01:55:35.066148: step: 762/464, loss: 0.5473403930664062 2023-01-24 01:55:35.713722: step: 764/464, loss: 0.1011856272816658 2023-01-24 01:55:36.360599: step: 766/464, loss: 0.5450992584228516 2023-01-24 01:55:37.061882: step: 768/464, loss: 0.4951463043689728 2023-01-24 01:55:37.686692: step: 770/464, loss: 0.15516912937164307 2023-01-24 01:55:38.290111: step: 772/464, loss: 0.37668851017951965 2023-01-24 01:55:38.919291: step: 774/464, loss: 0.5910404324531555 2023-01-24 01:55:39.544677: step: 776/464, loss: 0.37189438939094543 2023-01-24 01:55:40.208433: step: 778/464, loss: 0.910901665687561 2023-01-24 01:55:40.829197: step: 780/464, loss: 0.17977401614189148 2023-01-24 01:55:41.457322: step: 782/464, loss: 0.6486947536468506 2023-01-24 01:55:42.021975: step: 784/464, loss: 0.20105552673339844 2023-01-24 01:55:42.662813: step: 786/464, loss: 0.20137327909469604 2023-01-24 01:55:43.347260: step: 788/464, loss: 0.17438651621341705 2023-01-24 01:55:43.972092: step: 790/464, loss: 0.19812040030956268 2023-01-24 01:55:44.604542: step: 792/464, loss: 0.36395344138145447 2023-01-24 01:55:45.225635: step: 794/464, loss: 0.13106000423431396 2023-01-24 01:55:45.810725: step: 796/464, loss: 0.300517737865448 2023-01-24 01:55:46.447153: step: 798/464, loss: 0.606153666973114 2023-01-24 01:55:47.077687: step: 800/464, loss: 0.12423531711101532 2023-01-24 01:55:47.661170: step: 802/464, loss: 0.6810371279716492 2023-01-24 01:55:48.297203: step: 804/464, loss: 0.264427125453949 2023-01-24 01:55:48.896812: step: 806/464, loss: 0.11184506118297577 2023-01-24 01:55:49.527534: step: 808/464, loss: 0.1165018230676651 2023-01-24 01:55:50.194851: step: 810/464, loss: 0.22094683349132538 2023-01-24 01:55:50.844938: step: 812/464, loss: 0.2403636872768402 2023-01-24 01:55:51.466769: step: 814/464, loss: 0.25624537467956543 2023-01-24 01:55:52.122595: step: 816/464, loss: 0.18656274676322937 2023-01-24 01:55:52.794269: step: 818/464, loss: 0.19701141119003296 2023-01-24 01:55:53.491820: step: 820/464, loss: 0.17605522274971008 2023-01-24 01:55:54.105374: step: 822/464, loss: 0.17397205531597137 2023-01-24 01:55:54.711931: step: 824/464, loss: 0.1863817274570465 2023-01-24 01:55:55.432213: step: 826/464, loss: 0.16790562868118286 2023-01-24 01:55:56.035255: step: 828/464, loss: 0.17844824492931366 2023-01-24 01:55:56.654919: step: 830/464, loss: 0.18206892907619476 2023-01-24 01:55:57.296498: step: 832/464, loss: 0.2522604465484619 2023-01-24 01:55:57.937812: step: 834/464, loss: 0.16825295984745026 2023-01-24 01:55:58.566261: step: 836/464, loss: 0.2886344790458679 2023-01-24 01:55:59.195298: step: 838/464, loss: 0.2774253785610199 2023-01-24 01:55:59.836687: step: 840/464, loss: 0.6883927583694458 2023-01-24 01:56:00.423467: step: 842/464, loss: 0.4036794900894165 2023-01-24 01:56:01.052226: step: 844/464, loss: 1.1966632604599 2023-01-24 01:56:01.667846: step: 846/464, loss: 0.34209907054901123 2023-01-24 01:56:02.299405: step: 848/464, loss: 0.7691254019737244 2023-01-24 01:56:02.968019: step: 850/464, loss: 0.41199547052383423 2023-01-24 01:56:03.583201: step: 852/464, loss: 0.17533652484416962 2023-01-24 01:56:04.244726: step: 854/464, loss: 0.17096589505672455 2023-01-24 01:56:04.892785: step: 856/464, loss: 0.2661553919315338 2023-01-24 01:56:05.518309: step: 858/464, loss: 0.17946060001850128 2023-01-24 01:56:06.088947: step: 860/464, loss: 0.946364164352417 2023-01-24 01:56:06.705044: step: 862/464, loss: 0.7228373885154724 2023-01-24 01:56:07.345346: step: 864/464, loss: 0.3066113293170929 2023-01-24 01:56:07.987574: step: 866/464, loss: 0.5188855528831482 2023-01-24 01:56:08.615019: step: 868/464, loss: 0.34665554761886597 2023-01-24 01:56:09.204144: step: 870/464, loss: 0.16440880298614502 2023-01-24 01:56:09.837325: step: 872/464, loss: 0.36245080828666687 2023-01-24 01:56:10.516615: step: 874/464, loss: 12.406877517700195 2023-01-24 01:56:11.146002: step: 876/464, loss: 0.190341517329216 2023-01-24 01:56:11.842208: step: 878/464, loss: 0.2363278865814209 2023-01-24 01:56:12.469845: step: 880/464, loss: 0.29699596762657166 2023-01-24 01:56:13.134989: step: 882/464, loss: 6.164422988891602 2023-01-24 01:56:13.763365: step: 884/464, loss: 0.2677531838417053 2023-01-24 01:56:14.490737: step: 886/464, loss: 0.7761355638504028 2023-01-24 01:56:15.086864: step: 888/464, loss: 0.44219255447387695 2023-01-24 01:56:15.680017: step: 890/464, loss: 0.6220502257347107 2023-01-24 01:56:16.283666: step: 892/464, loss: 0.3263578712940216 2023-01-24 01:56:16.828431: step: 894/464, loss: 1.4174706935882568 2023-01-24 01:56:17.475063: step: 896/464, loss: 0.4406396448612213 2023-01-24 01:56:18.106905: step: 898/464, loss: 0.28490161895751953 2023-01-24 01:56:18.718965: step: 900/464, loss: 0.25278356671333313 2023-01-24 01:56:19.449348: step: 902/464, loss: 0.22905734181404114 2023-01-24 01:56:20.024284: step: 904/464, loss: 0.11674833297729492 2023-01-24 01:56:20.653216: step: 906/464, loss: 0.2179863154888153 2023-01-24 01:56:21.317965: step: 908/464, loss: 0.31769436597824097 2023-01-24 01:56:21.938805: step: 910/464, loss: 0.5596010684967041 2023-01-24 01:56:22.536162: step: 912/464, loss: 0.07528844475746155 2023-01-24 01:56:23.150549: step: 914/464, loss: 0.30073490738868713 2023-01-24 01:56:23.743023: step: 916/464, loss: 0.4311095178127289 2023-01-24 01:56:24.391009: step: 918/464, loss: 0.10860362648963928 2023-01-24 01:56:25.016495: step: 920/464, loss: 0.23732000589370728 2023-01-24 01:56:25.648122: step: 922/464, loss: 0.5516647100448608 2023-01-24 01:56:26.419885: step: 924/464, loss: 0.28733107447624207 2023-01-24 01:56:27.015713: step: 926/464, loss: 0.10183804482221603 2023-01-24 01:56:27.702714: step: 928/464, loss: 0.15522173047065735 2023-01-24 01:56:28.150994: step: 930/464, loss: 0.421254426240921 ================================================== Loss: 0.442 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3142933006535948, 'r': 0.304154807084124, 'f1': 0.3091409514625522}, 'combined': 0.22778806949872268, 'epoch': 9} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30278332473689085, 'r': 0.2947202242618038, 'f1': 0.2986973701452809}, 'combined': 0.19500450071660827, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3138254029054591, 'r': 0.31799386176758093, 'f1': 0.3158958815297176}, 'combined': 0.2327653863903182, 'epoch': 9} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3161297063367123, 'r': 0.2998583243929109, 'f1': 0.30777911032027083}, 'combined': 0.200933512437068, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3286018465502433, 'r': 0.31114292491190026, 'f1': 0.31963415483152324}, 'combined': 0.23551990356006974, 'epoch': 9} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3135392987502794, 'r': 0.2890440410354138, 'f1': 0.30079379880108104}, 'combined': 0.19637315362143115, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27192982456140347, 'r': 0.2952380952380952, 'f1': 0.28310502283105016}, 'combined': 0.18873668188736675, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2647058823529412, 'r': 0.391304347826087, 'f1': 0.3157894736842105}, 'combined': 0.15789473684210525, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5454545454545454, 'r': 0.20689655172413793, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31993440233236153, 'r': 0.2969088203463203, 'f1': 0.3079918607914679}, 'combined': 0.22694137110950266, 'epoch': 5} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30404472815418065, 'r': 0.2555091476965408, 'f1': 0.27767195512385795}, 'combined': 0.181278063966871, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5192307692307693, 'r': 0.23275862068965517, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 5} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:59:04.208665: step: 2/464, loss: 0.28201234340667725 2023-01-24 01:59:04.880783: step: 4/464, loss: 0.11395406723022461 2023-01-24 01:59:05.456057: step: 6/464, loss: 0.09517635405063629 2023-01-24 01:59:06.143858: step: 8/464, loss: 0.08937979489564896 2023-01-24 01:59:06.753079: step: 10/464, loss: 0.5818873047828674 2023-01-24 01:59:07.384304: step: 12/464, loss: 0.2057216912508011 2023-01-24 01:59:08.188977: step: 14/464, loss: 1.59352707862854 2023-01-24 01:59:08.823910: step: 16/464, loss: 0.37595269083976746 2023-01-24 01:59:09.390189: step: 18/464, loss: 0.08871912211179733 2023-01-24 01:59:10.064076: step: 20/464, loss: 0.29423627257347107 2023-01-24 01:59:10.707007: step: 22/464, loss: 0.27823805809020996 2023-01-24 01:59:11.366432: step: 24/464, loss: 0.149687722325325 2023-01-24 01:59:11.959753: step: 26/464, loss: 0.198866605758667 2023-01-24 01:59:12.578215: step: 28/464, loss: 0.49141961336135864 2023-01-24 01:59:13.204016: step: 30/464, loss: 0.11584275215864182 2023-01-24 01:59:13.832176: step: 32/464, loss: 0.1265438348054886 2023-01-24 01:59:14.420308: step: 34/464, loss: 0.2609385848045349 2023-01-24 01:59:15.113503: step: 36/464, loss: 0.8702123761177063 2023-01-24 01:59:15.794593: step: 38/464, loss: 0.11494985222816467 2023-01-24 01:59:16.378077: step: 40/464, loss: 0.17757941782474518 2023-01-24 01:59:16.983997: step: 42/464, loss: 0.06117549166083336 2023-01-24 01:59:17.576406: step: 44/464, loss: 0.5850616693496704 2023-01-24 01:59:18.150257: step: 46/464, loss: 0.1942889392375946 2023-01-24 01:59:18.941049: step: 48/464, loss: 0.7532861232757568 2023-01-24 01:59:19.582477: step: 50/464, loss: 0.46198928356170654 2023-01-24 01:59:20.234632: step: 52/464, loss: 0.08040419965982437 2023-01-24 01:59:20.933861: step: 54/464, loss: 0.1533208042383194 2023-01-24 01:59:21.543900: step: 56/464, loss: 0.11138932406902313 2023-01-24 01:59:22.169663: step: 58/464, loss: 0.19376027584075928 2023-01-24 01:59:22.901525: step: 60/464, loss: 0.6468014717102051 2023-01-24 01:59:23.476824: step: 62/464, loss: 0.3951880931854248 2023-01-24 01:59:24.085183: step: 64/464, loss: 0.0764017328619957 2023-01-24 01:59:24.746879: step: 66/464, loss: 0.1439065784215927 2023-01-24 01:59:25.357656: step: 68/464, loss: 0.17877231538295746 2023-01-24 01:59:25.995966: step: 70/464, loss: 0.20332950353622437 2023-01-24 01:59:26.611522: step: 72/464, loss: 0.12935759127140045 2023-01-24 01:59:27.299967: step: 74/464, loss: 0.40820690989494324 2023-01-24 01:59:27.964707: step: 76/464, loss: 0.15177741646766663 2023-01-24 01:59:28.628621: step: 78/464, loss: 0.3033875823020935 2023-01-24 01:59:29.264568: step: 80/464, loss: 0.24807725846767426 2023-01-24 01:59:29.972291: step: 82/464, loss: 0.25280049443244934 2023-01-24 01:59:30.569231: step: 84/464, loss: 0.22164084017276764 2023-01-24 01:59:31.105633: step: 86/464, loss: 0.0625792145729065 2023-01-24 01:59:31.743223: step: 88/464, loss: 0.3868742287158966 2023-01-24 01:59:32.348747: step: 90/464, loss: 0.10269523411989212 2023-01-24 01:59:32.940222: step: 92/464, loss: 0.19511628150939941 2023-01-24 01:59:33.533719: step: 94/464, loss: 0.22719942033290863 2023-01-24 01:59:34.117450: step: 96/464, loss: 0.16706952452659607 2023-01-24 01:59:34.734324: step: 98/464, loss: 0.3117726147174835 2023-01-24 01:59:35.386661: step: 100/464, loss: 0.12993161380290985 2023-01-24 01:59:36.013859: step: 102/464, loss: 0.28551098704338074 2023-01-24 01:59:36.612341: step: 104/464, loss: 0.4209898114204407 2023-01-24 01:59:37.220143: step: 106/464, loss: 0.114765964448452 2023-01-24 01:59:37.914257: step: 108/464, loss: 0.2144257128238678 2023-01-24 01:59:38.541851: step: 110/464, loss: 0.09205218404531479 2023-01-24 01:59:39.138588: step: 112/464, loss: 0.18294711410999298 2023-01-24 01:59:39.710014: step: 114/464, loss: 0.23293060064315796 2023-01-24 01:59:40.327170: step: 116/464, loss: 1.5510797500610352 2023-01-24 01:59:40.916715: step: 118/464, loss: 0.08900728076696396 2023-01-24 01:59:41.545670: step: 120/464, loss: 0.3488987982273102 2023-01-24 01:59:42.180445: step: 122/464, loss: 0.16980993747711182 2023-01-24 01:59:42.864782: step: 124/464, loss: 0.2912577986717224 2023-01-24 01:59:43.511765: step: 126/464, loss: 0.17310898005962372 2023-01-24 01:59:44.082816: step: 128/464, loss: 0.3511984944343567 2023-01-24 01:59:44.798411: step: 130/464, loss: 0.4029984474182129 2023-01-24 01:59:45.372542: step: 132/464, loss: 0.13000822067260742 2023-01-24 01:59:46.097150: step: 134/464, loss: 0.0925740897655487 2023-01-24 01:59:46.725585: step: 136/464, loss: 0.10538043081760406 2023-01-24 01:59:47.358708: step: 138/464, loss: 0.09516558796167374 2023-01-24 01:59:47.949285: step: 140/464, loss: 1.171372652053833 2023-01-24 01:59:48.554062: step: 142/464, loss: 0.13884663581848145 2023-01-24 01:59:49.182390: step: 144/464, loss: 0.12954020500183105 2023-01-24 01:59:49.793539: step: 146/464, loss: 0.41415587067604065 2023-01-24 01:59:50.422167: step: 148/464, loss: 0.17221221327781677 2023-01-24 01:59:51.022100: step: 150/464, loss: 0.4241466224193573 2023-01-24 01:59:51.728573: step: 152/464, loss: 0.14755919575691223 2023-01-24 01:59:52.390090: step: 154/464, loss: 0.1533004343509674 2023-01-24 01:59:53.019974: step: 156/464, loss: 0.2734523415565491 2023-01-24 01:59:53.682470: step: 158/464, loss: 0.09559568762779236 2023-01-24 01:59:54.308749: step: 160/464, loss: 0.15946142375469208 2023-01-24 01:59:54.968022: step: 162/464, loss: 0.18343742191791534 2023-01-24 01:59:55.574144: step: 164/464, loss: 0.12799851596355438 2023-01-24 01:59:56.221804: step: 166/464, loss: 0.0605124905705452 2023-01-24 01:59:56.833152: step: 168/464, loss: 0.12303553521633148 2023-01-24 01:59:57.412071: step: 170/464, loss: 0.05820485204458237 2023-01-24 01:59:57.979993: step: 172/464, loss: 0.6615782976150513 2023-01-24 01:59:58.573081: step: 174/464, loss: 0.21617081761360168 2023-01-24 01:59:59.153842: step: 176/464, loss: 0.28681764006614685 2023-01-24 01:59:59.913146: step: 178/464, loss: 0.10227718204259872 2023-01-24 02:00:00.569615: step: 180/464, loss: 0.26523256301879883 2023-01-24 02:00:01.123616: step: 182/464, loss: 0.16301316022872925 2023-01-24 02:00:01.766648: step: 184/464, loss: 0.2153080403804779 2023-01-24 02:00:02.459438: step: 186/464, loss: 0.2818724513053894 2023-01-24 02:00:03.130757: step: 188/464, loss: 0.0786951556801796 2023-01-24 02:00:03.747480: step: 190/464, loss: 0.6118411421775818 2023-01-24 02:00:04.351930: step: 192/464, loss: 0.10918118059635162 2023-01-24 02:00:04.919757: step: 194/464, loss: 0.2923741638660431 2023-01-24 02:00:05.513885: step: 196/464, loss: 0.15965557098388672 2023-01-24 02:00:06.179422: step: 198/464, loss: 0.24203696846961975 2023-01-24 02:00:06.753008: step: 200/464, loss: 0.4994286000728607 2023-01-24 02:00:07.360800: step: 202/464, loss: 0.3190830647945404 2023-01-24 02:00:08.016197: step: 204/464, loss: 0.23423121869564056 2023-01-24 02:00:08.654394: step: 206/464, loss: 0.19695164263248444 2023-01-24 02:00:09.268199: step: 208/464, loss: 0.16323286294937134 2023-01-24 02:00:09.857309: step: 210/464, loss: 0.5013033151626587 2023-01-24 02:00:10.484535: step: 212/464, loss: 0.2052978128194809 2023-01-24 02:00:11.101564: step: 214/464, loss: 0.11153028160333633 2023-01-24 02:00:11.718316: step: 216/464, loss: 0.4214012622833252 2023-01-24 02:00:12.346079: step: 218/464, loss: 0.45658013224601746 2023-01-24 02:00:13.017987: step: 220/464, loss: 0.36566397547721863 2023-01-24 02:00:13.644057: step: 222/464, loss: 0.42506176233291626 2023-01-24 02:00:14.234433: step: 224/464, loss: 0.05392969027161598 2023-01-24 02:00:14.812316: step: 226/464, loss: 0.2593022882938385 2023-01-24 02:00:15.458090: step: 228/464, loss: 0.3885348439216614 2023-01-24 02:00:16.034157: step: 230/464, loss: 0.5690717697143555 2023-01-24 02:00:16.645196: step: 232/464, loss: 0.4385407567024231 2023-01-24 02:00:17.252402: step: 234/464, loss: 0.15703076124191284 2023-01-24 02:00:17.812679: step: 236/464, loss: 0.6849433779716492 2023-01-24 02:00:18.461800: step: 238/464, loss: 2.7772321701049805 2023-01-24 02:00:19.124593: step: 240/464, loss: 0.2341809868812561 2023-01-24 02:00:19.808795: step: 242/464, loss: 0.16340501606464386 2023-01-24 02:00:20.426267: step: 244/464, loss: 0.5387740731239319 2023-01-24 02:00:21.058954: step: 246/464, loss: 0.11686232686042786 2023-01-24 02:00:21.660403: step: 248/464, loss: 0.2346784472465515 2023-01-24 02:00:22.337161: step: 250/464, loss: 0.5751308798789978 2023-01-24 02:00:22.935524: step: 252/464, loss: 0.187391459941864 2023-01-24 02:00:23.539858: step: 254/464, loss: 0.3938005268573761 2023-01-24 02:00:24.834167: step: 256/464, loss: 0.16256101429462433 2023-01-24 02:00:25.434423: step: 258/464, loss: 0.16977809369564056 2023-01-24 02:00:26.051367: step: 260/464, loss: 0.13777224719524384 2023-01-24 02:00:26.658798: step: 262/464, loss: 0.6039695143699646 2023-01-24 02:00:27.220049: step: 264/464, loss: 0.04727890342473984 2023-01-24 02:00:27.799975: step: 266/464, loss: 0.3389414846897125 2023-01-24 02:00:28.371669: step: 268/464, loss: 0.505021333694458 2023-01-24 02:00:28.993338: step: 270/464, loss: 0.09833864122629166 2023-01-24 02:00:29.667402: step: 272/464, loss: 0.40759316086769104 2023-01-24 02:00:30.267041: step: 274/464, loss: 0.3481176495552063 2023-01-24 02:00:30.857757: step: 276/464, loss: 0.17788687348365784 2023-01-24 02:00:31.440095: step: 278/464, loss: 0.22429144382476807 2023-01-24 02:00:32.040122: step: 280/464, loss: 0.2772428095340729 2023-01-24 02:00:32.626432: step: 282/464, loss: 0.04167911782860756 2023-01-24 02:00:33.273542: step: 284/464, loss: 1.6752103567123413 2023-01-24 02:00:33.905867: step: 286/464, loss: 0.13085174560546875 2023-01-24 02:00:34.542094: step: 288/464, loss: 0.3987504541873932 2023-01-24 02:00:35.231306: step: 290/464, loss: 0.4655897915363312 2023-01-24 02:00:35.856562: step: 292/464, loss: 0.4385165274143219 2023-01-24 02:00:36.470108: step: 294/464, loss: 0.11675146967172623 2023-01-24 02:00:37.087428: step: 296/464, loss: 0.4354645311832428 2023-01-24 02:00:37.661711: step: 298/464, loss: 0.6538931727409363 2023-01-24 02:00:38.294508: step: 300/464, loss: 0.18928231298923492 2023-01-24 02:00:38.936297: step: 302/464, loss: 0.2807188034057617 2023-01-24 02:00:39.517513: step: 304/464, loss: 0.24691380560398102 2023-01-24 02:00:40.127699: step: 306/464, loss: 0.15931986272335052 2023-01-24 02:00:40.738775: step: 308/464, loss: 0.17005343735218048 2023-01-24 02:00:41.416220: step: 310/464, loss: 0.38724708557128906 2023-01-24 02:00:42.005273: step: 312/464, loss: 0.07521989941596985 2023-01-24 02:00:42.668422: step: 314/464, loss: 0.4269282817840576 2023-01-24 02:00:43.274412: step: 316/464, loss: 0.5009620785713196 2023-01-24 02:00:43.870707: step: 318/464, loss: 0.15193048119544983 2023-01-24 02:00:44.444601: step: 320/464, loss: 0.14470677077770233 2023-01-24 02:00:45.095692: step: 322/464, loss: 0.17145533859729767 2023-01-24 02:00:45.703511: step: 324/464, loss: 0.18966177105903625 2023-01-24 02:00:46.369404: step: 326/464, loss: 0.2075006663799286 2023-01-24 02:00:46.994950: step: 328/464, loss: 0.16791434586048126 2023-01-24 02:00:47.608509: step: 330/464, loss: 0.19567112624645233 2023-01-24 02:00:48.279017: step: 332/464, loss: 0.6014448404312134 2023-01-24 02:00:48.909857: step: 334/464, loss: 0.30597642064094543 2023-01-24 02:00:49.516806: step: 336/464, loss: 0.12940239906311035 2023-01-24 02:00:50.170433: step: 338/464, loss: 0.25960540771484375 2023-01-24 02:00:50.775328: step: 340/464, loss: 0.09363491088151932 2023-01-24 02:00:51.410509: step: 342/464, loss: 0.0951174795627594 2023-01-24 02:00:51.997276: step: 344/464, loss: 0.6307440400123596 2023-01-24 02:00:52.597766: step: 346/464, loss: 0.23237261176109314 2023-01-24 02:00:53.189899: step: 348/464, loss: 0.20420436561107635 2023-01-24 02:00:53.772966: step: 350/464, loss: 0.29692232608795166 2023-01-24 02:00:54.456768: step: 352/464, loss: 0.22677010297775269 2023-01-24 02:00:55.176429: step: 354/464, loss: 0.24775464832782745 2023-01-24 02:00:55.782008: step: 356/464, loss: 0.16239655017852783 2023-01-24 02:00:56.417919: step: 358/464, loss: 0.5020847320556641 2023-01-24 02:00:57.047465: step: 360/464, loss: 0.3233250081539154 2023-01-24 02:00:57.689572: step: 362/464, loss: 0.05828201398253441 2023-01-24 02:00:58.347121: step: 364/464, loss: 0.22994956374168396 2023-01-24 02:00:59.007077: step: 366/464, loss: 0.18350572884082794 2023-01-24 02:00:59.601539: step: 368/464, loss: 0.330773264169693 2023-01-24 02:01:00.234416: step: 370/464, loss: 1.7163708209991455 2023-01-24 02:01:00.854662: step: 372/464, loss: 0.15031535923480988 2023-01-24 02:01:01.460646: step: 374/464, loss: 0.13070261478424072 2023-01-24 02:01:02.087656: step: 376/464, loss: 4.782593250274658 2023-01-24 02:01:02.717259: step: 378/464, loss: 0.19723346829414368 2023-01-24 02:01:03.419112: step: 380/464, loss: 0.21597571671009064 2023-01-24 02:01:04.106235: step: 382/464, loss: 0.1993137151002884 2023-01-24 02:01:04.695340: step: 384/464, loss: 0.4163748025894165 2023-01-24 02:01:05.297807: step: 386/464, loss: 0.5400003790855408 2023-01-24 02:01:05.929449: step: 388/464, loss: 0.0977616235613823 2023-01-24 02:01:06.547628: step: 390/464, loss: 0.2727479934692383 2023-01-24 02:01:07.188672: step: 392/464, loss: 0.4199751317501068 2023-01-24 02:01:07.815851: step: 394/464, loss: 0.34497177600860596 2023-01-24 02:01:08.427116: step: 396/464, loss: 0.47766903042793274 2023-01-24 02:01:09.073093: step: 398/464, loss: 0.37423473596572876 2023-01-24 02:01:09.686274: step: 400/464, loss: 0.1728307157754898 2023-01-24 02:01:10.318046: step: 402/464, loss: 0.5170645117759705 2023-01-24 02:01:10.904001: step: 404/464, loss: 0.3612293004989624 2023-01-24 02:01:11.597221: step: 406/464, loss: 0.31947973370552063 2023-01-24 02:01:12.250787: step: 408/464, loss: 0.11924263834953308 2023-01-24 02:01:12.872759: step: 410/464, loss: 0.52806156873703 2023-01-24 02:01:13.468585: step: 412/464, loss: 0.45087093114852905 2023-01-24 02:01:14.108992: step: 414/464, loss: 0.07716657966375351 2023-01-24 02:01:14.651141: step: 416/464, loss: 0.22068598866462708 2023-01-24 02:01:15.259849: step: 418/464, loss: 0.2416449636220932 2023-01-24 02:01:15.892213: step: 420/464, loss: 0.23852917551994324 2023-01-24 02:01:16.492197: step: 422/464, loss: 0.4021141231060028 2023-01-24 02:01:17.113127: step: 424/464, loss: 0.2847093343734741 2023-01-24 02:01:17.742879: step: 426/464, loss: 0.1429942548274994 2023-01-24 02:01:18.393067: step: 428/464, loss: 0.47880011796951294 2023-01-24 02:01:19.009499: step: 430/464, loss: 0.9428687691688538 2023-01-24 02:01:19.646746: step: 432/464, loss: 0.621206521987915 2023-01-24 02:01:20.275970: step: 434/464, loss: 0.1105286180973053 2023-01-24 02:01:20.954797: step: 436/464, loss: 0.13531525433063507 2023-01-24 02:01:21.631784: step: 438/464, loss: 0.3364936113357544 2023-01-24 02:01:22.228829: step: 440/464, loss: 0.12852786481380463 2023-01-24 02:01:22.836468: step: 442/464, loss: 0.25219330191612244 2023-01-24 02:01:23.417842: step: 444/464, loss: 1.138298749923706 2023-01-24 02:01:24.063423: step: 446/464, loss: 0.24194155633449554 2023-01-24 02:01:24.676366: step: 448/464, loss: 1.1671202182769775 2023-01-24 02:01:25.342777: step: 450/464, loss: 0.10700088739395142 2023-01-24 02:01:25.970618: step: 452/464, loss: 0.28190112113952637 2023-01-24 02:01:26.551431: step: 454/464, loss: 0.31971225142478943 2023-01-24 02:01:27.197284: step: 456/464, loss: 0.9074978828430176 2023-01-24 02:01:27.833530: step: 458/464, loss: 0.13902981579303741 2023-01-24 02:01:28.447588: step: 460/464, loss: 0.20467960834503174 2023-01-24 02:01:29.011368: step: 462/464, loss: 0.04869261011481285 2023-01-24 02:01:29.642993: step: 464/464, loss: 0.17675481736660004 2023-01-24 02:01:30.265758: step: 466/464, loss: 0.9886869788169861 2023-01-24 02:01:30.943622: step: 468/464, loss: 0.11232612282037735 2023-01-24 02:01:31.531301: step: 470/464, loss: 0.06668158620595932 2023-01-24 02:01:32.111128: step: 472/464, loss: 0.11958461254835129 2023-01-24 02:01:32.695467: step: 474/464, loss: 0.6422286033630371 2023-01-24 02:01:33.330004: step: 476/464, loss: 0.0932425931096077 2023-01-24 02:01:33.945305: step: 478/464, loss: 0.23732468485832214 2023-01-24 02:01:34.575652: step: 480/464, loss: 0.21387946605682373 2023-01-24 02:01:35.235631: step: 482/464, loss: 0.25116702914237976 2023-01-24 02:01:35.875641: step: 484/464, loss: 0.30064210295677185 2023-01-24 02:01:36.485230: step: 486/464, loss: 0.35024961829185486 2023-01-24 02:01:37.119153: step: 488/464, loss: 0.24333778023719788 2023-01-24 02:01:37.792469: step: 490/464, loss: 0.14430415630340576 2023-01-24 02:01:38.433005: step: 492/464, loss: 0.2119111269712448 2023-01-24 02:01:39.075218: step: 494/464, loss: 0.07665427029132843 2023-01-24 02:01:39.731993: step: 496/464, loss: 0.20361050963401794 2023-01-24 02:01:40.366099: step: 498/464, loss: 0.2889696955680847 2023-01-24 02:01:40.988968: step: 500/464, loss: 0.14266535639762878 2023-01-24 02:01:41.574528: step: 502/464, loss: 0.8530055284500122 2023-01-24 02:01:42.196380: step: 504/464, loss: 0.9472627639770508 2023-01-24 02:01:42.809310: step: 506/464, loss: 0.20206870138645172 2023-01-24 02:01:43.574634: step: 508/464, loss: 0.10462598502635956 2023-01-24 02:01:44.196167: step: 510/464, loss: 0.147408589720726 2023-01-24 02:01:44.777493: step: 512/464, loss: 0.24323834478855133 2023-01-24 02:01:45.417036: step: 514/464, loss: 0.16047310829162598 2023-01-24 02:01:46.071776: step: 516/464, loss: 0.15635882318019867 2023-01-24 02:01:46.727182: step: 518/464, loss: 0.4227506220340729 2023-01-24 02:01:47.376928: step: 520/464, loss: 0.2195674329996109 2023-01-24 02:01:47.987535: step: 522/464, loss: 0.11909165978431702 2023-01-24 02:01:48.572966: step: 524/464, loss: 0.18910080194473267 2023-01-24 02:01:49.177350: step: 526/464, loss: 0.3546684682369232 2023-01-24 02:01:49.819463: step: 528/464, loss: 0.12872424721717834 2023-01-24 02:01:50.426659: step: 530/464, loss: 0.07886382937431335 2023-01-24 02:01:51.041819: step: 532/464, loss: 0.16459733247756958 2023-01-24 02:01:51.666284: step: 534/464, loss: 0.1703285276889801 2023-01-24 02:01:52.294587: step: 536/464, loss: 0.6366952061653137 2023-01-24 02:01:52.955101: step: 538/464, loss: 0.09655551612377167 2023-01-24 02:01:53.541629: step: 540/464, loss: 0.21718533337116241 2023-01-24 02:01:54.166350: step: 542/464, loss: 0.17979641258716583 2023-01-24 02:01:54.762079: step: 544/464, loss: 0.37652286887168884 2023-01-24 02:01:55.336646: step: 546/464, loss: 0.08691912144422531 2023-01-24 02:01:55.958970: step: 548/464, loss: 0.3053312599658966 2023-01-24 02:01:56.657723: step: 550/464, loss: 0.28237849473953247 2023-01-24 02:01:57.340737: step: 552/464, loss: 0.594807505607605 2023-01-24 02:01:57.952517: step: 554/464, loss: 0.6040641665458679 2023-01-24 02:01:58.577266: step: 556/464, loss: 0.17427340149879456 2023-01-24 02:01:59.219641: step: 558/464, loss: 0.34472134709358215 2023-01-24 02:01:59.811174: step: 560/464, loss: 0.2316531389951706 2023-01-24 02:02:00.463149: step: 562/464, loss: 0.6255875825881958 2023-01-24 02:02:01.031408: step: 564/464, loss: 0.24147167801856995 2023-01-24 02:02:01.659066: step: 566/464, loss: 0.6458610892295837 2023-01-24 02:02:02.292680: step: 568/464, loss: 0.47449079155921936 2023-01-24 02:02:02.855080: step: 570/464, loss: 0.42779847979545593 2023-01-24 02:02:03.506843: step: 572/464, loss: 0.18389077484607697 2023-01-24 02:02:04.099178: step: 574/464, loss: 0.1575031727552414 2023-01-24 02:02:04.737885: step: 576/464, loss: 0.25704607367515564 2023-01-24 02:02:05.380367: step: 578/464, loss: 0.1292218267917633 2023-01-24 02:02:06.060321: step: 580/464, loss: 0.5134697556495667 2023-01-24 02:02:06.677758: step: 582/464, loss: 0.3436373472213745 2023-01-24 02:02:07.277376: step: 584/464, loss: 0.25400784611701965 2023-01-24 02:02:07.865908: step: 586/464, loss: 0.35351771116256714 2023-01-24 02:02:08.477175: step: 588/464, loss: 0.15195152163505554 2023-01-24 02:02:09.088009: step: 590/464, loss: 1.1360224485397339 2023-01-24 02:02:09.660160: step: 592/464, loss: 0.13217885792255402 2023-01-24 02:02:10.267461: step: 594/464, loss: 0.2512986361980438 2023-01-24 02:02:10.899882: step: 596/464, loss: 0.2633204162120819 2023-01-24 02:02:11.530634: step: 598/464, loss: 0.07968783378601074 2023-01-24 02:02:12.142695: step: 600/464, loss: 0.16802997887134552 2023-01-24 02:02:12.825411: step: 602/464, loss: 0.11943110078573227 2023-01-24 02:02:13.500164: step: 604/464, loss: 0.2673475444316864 2023-01-24 02:02:14.142747: step: 606/464, loss: 0.11374931782484055 2023-01-24 02:02:14.789446: step: 608/464, loss: 1.5972604751586914 2023-01-24 02:02:15.401746: step: 610/464, loss: 0.14802365005016327 2023-01-24 02:02:16.050579: step: 612/464, loss: 0.09203001856803894 2023-01-24 02:02:16.652953: step: 614/464, loss: 0.1717901974916458 2023-01-24 02:02:17.252951: step: 616/464, loss: 0.34820762276649475 2023-01-24 02:02:17.852386: step: 618/464, loss: 0.14696389436721802 2023-01-24 02:02:18.449490: step: 620/464, loss: 0.19122423231601715 2023-01-24 02:02:19.096480: step: 622/464, loss: 0.1879727691411972 2023-01-24 02:02:19.682373: step: 624/464, loss: 0.18596497178077698 2023-01-24 02:02:20.321899: step: 626/464, loss: 0.4350574314594269 2023-01-24 02:02:20.992120: step: 628/464, loss: 0.1632673740386963 2023-01-24 02:02:21.584519: step: 630/464, loss: 0.1334041804075241 2023-01-24 02:02:22.183492: step: 632/464, loss: 0.1441114842891693 2023-01-24 02:02:22.790996: step: 634/464, loss: 0.3799474835395813 2023-01-24 02:02:23.420355: step: 636/464, loss: 0.3114156424999237 2023-01-24 02:02:24.023662: step: 638/464, loss: 0.23049066960811615 2023-01-24 02:02:24.632085: step: 640/464, loss: 0.3577210307121277 2023-01-24 02:02:25.237085: step: 642/464, loss: 0.15999066829681396 2023-01-24 02:02:25.853696: step: 644/464, loss: 0.20487497746944427 2023-01-24 02:02:26.441088: step: 646/464, loss: 0.2615987956523895 2023-01-24 02:02:27.125453: step: 648/464, loss: 0.49903854727745056 2023-01-24 02:02:27.769912: step: 650/464, loss: 0.2588825821876526 2023-01-24 02:02:28.343516: step: 652/464, loss: 0.8313612937927246 2023-01-24 02:02:28.958722: step: 654/464, loss: 0.0567249171435833 2023-01-24 02:02:29.617395: step: 656/464, loss: 0.11456841230392456 2023-01-24 02:02:30.210765: step: 658/464, loss: 0.33578962087631226 2023-01-24 02:02:30.847354: step: 660/464, loss: 0.887792706489563 2023-01-24 02:02:31.457165: step: 662/464, loss: 0.1429959535598755 2023-01-24 02:02:32.033067: step: 664/464, loss: 0.9350042343139648 2023-01-24 02:02:32.665667: step: 666/464, loss: 0.16369758546352386 2023-01-24 02:02:33.282087: step: 668/464, loss: 0.24192282557487488 2023-01-24 02:02:33.913144: step: 670/464, loss: 0.22956036031246185 2023-01-24 02:02:34.562880: step: 672/464, loss: 0.5422670245170593 2023-01-24 02:02:35.147865: step: 674/464, loss: 0.3296627998352051 2023-01-24 02:02:35.733803: step: 676/464, loss: 0.15746179223060608 2023-01-24 02:02:36.396804: step: 678/464, loss: 0.37195703387260437 2023-01-24 02:02:37.067703: step: 680/464, loss: 0.6448568105697632 2023-01-24 02:02:37.643812: step: 682/464, loss: 0.4757457971572876 2023-01-24 02:02:38.254942: step: 684/464, loss: 0.16026271879673004 2023-01-24 02:02:38.879131: step: 686/464, loss: 0.34827935695648193 2023-01-24 02:02:39.541811: step: 688/464, loss: 0.4063800871372223 2023-01-24 02:02:40.188012: step: 690/464, loss: 0.1765885055065155 2023-01-24 02:02:40.813314: step: 692/464, loss: 0.0919233188033104 2023-01-24 02:02:41.500241: step: 694/464, loss: 0.3090978264808655 2023-01-24 02:02:42.119025: step: 696/464, loss: 0.06764783710241318 2023-01-24 02:02:42.870852: step: 698/464, loss: 0.2639875113964081 2023-01-24 02:02:43.468009: step: 700/464, loss: 0.13689519464969635 2023-01-24 02:02:44.104218: step: 702/464, loss: 0.38917234539985657 2023-01-24 02:02:44.741517: step: 704/464, loss: 0.12273726612329483 2023-01-24 02:02:45.331178: step: 706/464, loss: 0.21885795891284943 2023-01-24 02:02:45.946511: step: 708/464, loss: 0.2251049429178238 2023-01-24 02:02:46.529497: step: 710/464, loss: 0.1878732144832611 2023-01-24 02:02:47.124991: step: 712/464, loss: 0.8885036110877991 2023-01-24 02:02:47.758642: step: 714/464, loss: 0.0979180634021759 2023-01-24 02:02:48.332979: step: 716/464, loss: 0.32866814732551575 2023-01-24 02:02:48.918188: step: 718/464, loss: 0.09184763580560684 2023-01-24 02:02:49.519627: step: 720/464, loss: 0.26306676864624023 2023-01-24 02:02:50.114961: step: 722/464, loss: 0.08239500224590302 2023-01-24 02:02:50.720612: step: 724/464, loss: 0.06577450037002563 2023-01-24 02:02:51.438975: step: 726/464, loss: 0.38353481888771057 2023-01-24 02:02:52.112236: step: 728/464, loss: 0.23454375565052032 2023-01-24 02:02:52.750215: step: 730/464, loss: 0.13665996491909027 2023-01-24 02:02:53.336439: step: 732/464, loss: 0.227525994181633 2023-01-24 02:02:53.982790: step: 734/464, loss: 0.9282320737838745 2023-01-24 02:02:54.655271: step: 736/464, loss: 0.2728773057460785 2023-01-24 02:02:55.245869: step: 738/464, loss: 0.2511763870716095 2023-01-24 02:02:55.975587: step: 740/464, loss: 2.0587427616119385 2023-01-24 02:02:56.614052: step: 742/464, loss: 0.24321237206459045 2023-01-24 02:02:57.223463: step: 744/464, loss: 0.15974242985248566 2023-01-24 02:02:57.879600: step: 746/464, loss: 0.6454801559448242 2023-01-24 02:02:58.461399: step: 748/464, loss: 0.4255438446998596 2023-01-24 02:02:59.098078: step: 750/464, loss: 0.17316550016403198 2023-01-24 02:02:59.717074: step: 752/464, loss: 0.9697102308273315 2023-01-24 02:03:00.334467: step: 754/464, loss: 0.19061174988746643 2023-01-24 02:03:00.938552: step: 756/464, loss: 0.749290406703949 2023-01-24 02:03:01.585198: step: 758/464, loss: 0.23312531411647797 2023-01-24 02:03:02.162702: step: 760/464, loss: 0.5433177947998047 2023-01-24 02:03:02.790486: step: 762/464, loss: 0.3394450545310974 2023-01-24 02:03:03.457009: step: 764/464, loss: 0.24980996549129486 2023-01-24 02:03:04.034307: step: 766/464, loss: 0.1455468237400055 2023-01-24 02:03:04.686287: step: 768/464, loss: 0.24350574612617493 2023-01-24 02:03:05.345880: step: 770/464, loss: 0.8219181299209595 2023-01-24 02:03:05.955677: step: 772/464, loss: 0.16108842194080353 2023-01-24 02:03:06.561906: step: 774/464, loss: 0.09629150480031967 2023-01-24 02:03:07.206247: step: 776/464, loss: 0.2186581939458847 2023-01-24 02:03:07.822801: step: 778/464, loss: 0.11710034310817719 2023-01-24 02:03:08.499680: step: 780/464, loss: 0.13322243094444275 2023-01-24 02:03:09.092527: step: 782/464, loss: 0.3277522027492523 2023-01-24 02:03:09.733066: step: 784/464, loss: 0.2634742856025696 2023-01-24 02:03:10.477070: step: 786/464, loss: 0.21504506468772888 2023-01-24 02:03:11.147807: step: 788/464, loss: 0.21060091257095337 2023-01-24 02:03:11.834826: step: 790/464, loss: 0.13833431899547577 2023-01-24 02:03:12.493824: step: 792/464, loss: 0.26143184304237366 2023-01-24 02:03:13.156655: step: 794/464, loss: 0.11014603078365326 2023-01-24 02:03:13.826327: step: 796/464, loss: 0.4107128381729126 2023-01-24 02:03:14.381091: step: 798/464, loss: 0.14051704108715057 2023-01-24 02:03:14.920668: step: 800/464, loss: 0.5600093603134155 2023-01-24 02:03:15.477329: step: 802/464, loss: 0.20583002269268036 2023-01-24 02:03:16.070575: step: 804/464, loss: 0.07505656778812408 2023-01-24 02:03:16.695293: step: 806/464, loss: 0.10342224687337875 2023-01-24 02:03:17.266612: step: 808/464, loss: 0.07260114699602127 2023-01-24 02:03:17.878465: step: 810/464, loss: 0.3308141529560089 2023-01-24 02:03:18.532532: step: 812/464, loss: 0.4845127761363983 2023-01-24 02:03:19.243167: step: 814/464, loss: 0.14502455294132233 2023-01-24 02:03:19.899144: step: 816/464, loss: 0.41885289549827576 2023-01-24 02:03:20.553251: step: 818/464, loss: 0.1776328831911087 2023-01-24 02:03:21.201720: step: 820/464, loss: 1.3490067720413208 2023-01-24 02:03:21.830368: step: 822/464, loss: 0.7090396881103516 2023-01-24 02:03:22.453686: step: 824/464, loss: 0.05094294250011444 2023-01-24 02:03:23.135967: step: 826/464, loss: 0.1734030693769455 2023-01-24 02:03:23.770383: step: 828/464, loss: 0.9050090909004211 2023-01-24 02:03:24.407117: step: 830/464, loss: 0.10672441869974136 2023-01-24 02:03:25.030047: step: 832/464, loss: 0.14033271372318268 2023-01-24 02:03:25.681009: step: 834/464, loss: 0.4053362309932709 2023-01-24 02:03:26.260901: step: 836/464, loss: 0.14464549720287323 2023-01-24 02:03:26.932800: step: 838/464, loss: 0.39528706669807434 2023-01-24 02:03:27.546235: step: 840/464, loss: 0.4757036566734314 2023-01-24 02:03:28.131209: step: 842/464, loss: 0.13344378769397736 2023-01-24 02:03:28.778482: step: 844/464, loss: 0.20037075877189636 2023-01-24 02:03:29.422709: step: 846/464, loss: 1.9156296253204346 2023-01-24 02:03:30.064681: step: 848/464, loss: 0.2232384830713272 2023-01-24 02:03:30.718124: step: 850/464, loss: 0.41045475006103516 2023-01-24 02:03:31.340716: step: 852/464, loss: 1.0136959552764893 2023-01-24 02:03:31.930906: step: 854/464, loss: 0.2570632994174957 2023-01-24 02:03:32.535041: step: 856/464, loss: 5.904565811157227 2023-01-24 02:03:33.102910: step: 858/464, loss: 0.7196681499481201 2023-01-24 02:03:33.763989: step: 860/464, loss: 0.39808255434036255 2023-01-24 02:03:34.356478: step: 862/464, loss: 0.5597915649414062 2023-01-24 02:03:34.934623: step: 864/464, loss: 0.07103001326322556 2023-01-24 02:03:35.641338: step: 866/464, loss: 0.2170611023902893 2023-01-24 02:03:36.299373: step: 868/464, loss: 0.3302440643310547 2023-01-24 02:03:36.988652: step: 870/464, loss: 0.16965359449386597 2023-01-24 02:03:37.599752: step: 872/464, loss: 0.45563748478889465 2023-01-24 02:03:38.254821: step: 874/464, loss: 0.1041213721036911 2023-01-24 02:03:38.881932: step: 876/464, loss: 0.19837379455566406 2023-01-24 02:03:39.500022: step: 878/464, loss: 0.26904308795928955 2023-01-24 02:03:40.132120: step: 880/464, loss: 0.4328576326370239 2023-01-24 02:03:40.781374: step: 882/464, loss: 0.26968351006507874 2023-01-24 02:03:41.455014: step: 884/464, loss: 0.8352260589599609 2023-01-24 02:03:42.123838: step: 886/464, loss: 0.0773007869720459 2023-01-24 02:03:42.783107: step: 888/464, loss: 0.12449493259191513 2023-01-24 02:03:43.479365: step: 890/464, loss: 1.1425083875656128 2023-01-24 02:03:44.111382: step: 892/464, loss: 0.5259579420089722 2023-01-24 02:03:44.663328: step: 894/464, loss: 0.09961270540952682 2023-01-24 02:03:45.282788: step: 896/464, loss: 0.3245554566383362 2023-01-24 02:03:45.901556: step: 898/464, loss: 0.5239310264587402 2023-01-24 02:03:46.535223: step: 900/464, loss: 0.2951996922492981 2023-01-24 02:03:47.107114: step: 902/464, loss: 0.17997859418392181 2023-01-24 02:03:47.716560: step: 904/464, loss: 0.2505132257938385 2023-01-24 02:03:48.271644: step: 906/464, loss: 0.14798392355442047 2023-01-24 02:03:48.929623: step: 908/464, loss: 1.538906455039978 2023-01-24 02:03:49.562679: step: 910/464, loss: 0.38162997364997864 2023-01-24 02:03:50.171787: step: 912/464, loss: 0.15071247518062592 2023-01-24 02:03:50.885199: step: 914/464, loss: 0.231694296002388 2023-01-24 02:03:51.503284: step: 916/464, loss: 0.20453636348247528 2023-01-24 02:03:52.069514: step: 918/464, loss: 0.34366247057914734 2023-01-24 02:03:52.786788: step: 920/464, loss: 0.12969070672988892 2023-01-24 02:03:53.375702: step: 922/464, loss: 0.21827757358551025 2023-01-24 02:03:54.002634: step: 924/464, loss: 0.20832978188991547 2023-01-24 02:03:54.598656: step: 926/464, loss: 0.1685469001531601 2023-01-24 02:03:55.260260: step: 928/464, loss: 0.20983704924583435 2023-01-24 02:03:55.873560: step: 930/464, loss: 0.025569764897227287 ================================================== Loss: 0.343 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30757610701107013, 'r': 0.31633064516129034, 'f1': 0.31189195509822265}, 'combined': 0.22981512480921668, 'epoch': 10} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30321984562011245, 'r': 0.2837112158467596, 'f1': 0.2931413132395769}, 'combined': 0.19137723040511237, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3088085122242099, 'r': 0.3275596932321316, 'f1': 0.3179078422344997}, 'combined': 0.2342478837517366, 'epoch': 10} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30684155489834625, 'r': 0.2859718167894514, 'f1': 0.2960393307963112}, 'combined': 0.1932692004162446, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3219197450033085, 'r': 0.33169339950056265, 'f1': 0.3267334981996197}, 'combined': 0.240750998673404, 'epoch': 10} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31394373537261344, 'r': 0.28451151018143095, 'f1': 0.29850387953461605}, 'combined': 0.1948781804215628, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3020833333333333, 'r': 0.3452380952380952, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5454545454545454, 'r': 0.20689655172413793, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31993440233236153, 'r': 0.2969088203463203, 'f1': 0.3079918607914679}, 'combined': 0.22694137110950266, 'epoch': 5} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30404472815418065, 'r': 0.2555091476965408, 'f1': 0.27767195512385795}, 'combined': 0.181278063966871, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5192307692307693, 'r': 0.23275862068965517, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 5} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:06:32.849924: step: 2/464, loss: 0.12749086320400238 2023-01-24 02:06:33.506314: step: 4/464, loss: 0.31675276160240173 2023-01-24 02:06:34.084136: step: 6/464, loss: 0.0454997643828392 2023-01-24 02:06:34.706683: step: 8/464, loss: 0.16314424574375153 2023-01-24 02:06:35.303969: step: 10/464, loss: 0.16916592419147491 2023-01-24 02:06:35.906834: step: 12/464, loss: 0.3637135326862335 2023-01-24 02:06:36.407498: step: 14/464, loss: 0.25179359316825867 2023-01-24 02:06:37.003872: step: 16/464, loss: 0.054944101721048355 2023-01-24 02:06:37.579612: step: 18/464, loss: 0.17395174503326416 2023-01-24 02:06:38.214998: step: 20/464, loss: 0.18405458331108093 2023-01-24 02:06:38.896090: step: 22/464, loss: 0.09289663285017014 2023-01-24 02:06:39.484194: step: 24/464, loss: 0.30236729979515076 2023-01-24 02:06:40.118689: step: 26/464, loss: 0.3274882137775421 2023-01-24 02:06:40.765454: step: 28/464, loss: 0.13694429397583008 2023-01-24 02:06:41.392212: step: 30/464, loss: 0.10084377229213715 2023-01-24 02:06:41.971705: step: 32/464, loss: 0.10855576395988464 2023-01-24 02:06:42.623227: step: 34/464, loss: 0.11531636863946915 2023-01-24 02:06:43.256611: step: 36/464, loss: 0.3022859990596771 2023-01-24 02:06:43.836018: step: 38/464, loss: 0.1443987339735031 2023-01-24 02:06:44.446589: step: 40/464, loss: 0.7737499475479126 2023-01-24 02:06:45.034194: step: 42/464, loss: 0.24089673161506653 2023-01-24 02:06:45.673238: step: 44/464, loss: 0.16677124798297882 2023-01-24 02:06:46.252466: step: 46/464, loss: 0.16070647537708282 2023-01-24 02:06:46.818473: step: 48/464, loss: 0.09703407436609268 2023-01-24 02:06:47.566280: step: 50/464, loss: 0.1399368792772293 2023-01-24 02:06:48.187187: step: 52/464, loss: 0.2855876386165619 2023-01-24 02:06:48.787217: step: 54/464, loss: 0.083491250872612 2023-01-24 02:06:49.550873: step: 56/464, loss: 0.27693670988082886 2023-01-24 02:06:50.295074: step: 58/464, loss: 0.9555200338363647 2023-01-24 02:06:50.862168: step: 60/464, loss: 0.14794579148292542 2023-01-24 02:06:51.410334: step: 62/464, loss: 0.14458629488945007 2023-01-24 02:06:52.061595: step: 64/464, loss: 0.09685725718736649 2023-01-24 02:06:52.739956: step: 66/464, loss: 0.17956098914146423 2023-01-24 02:06:53.304762: step: 68/464, loss: 0.4257870018482208 2023-01-24 02:06:53.955248: step: 70/464, loss: 0.18782329559326172 2023-01-24 02:06:54.593603: step: 72/464, loss: 0.22681228816509247 2023-01-24 02:06:55.155035: step: 74/464, loss: 0.16160351037979126 2023-01-24 02:06:55.823404: step: 76/464, loss: 0.12337357550859451 2023-01-24 02:06:56.423566: step: 78/464, loss: 0.4088905155658722 2023-01-24 02:06:57.035368: step: 80/464, loss: 0.13035966455936432 2023-01-24 02:06:57.654683: step: 82/464, loss: 0.03869122639298439 2023-01-24 02:06:58.246624: step: 84/464, loss: 0.14494037628173828 2023-01-24 02:06:58.834712: step: 86/464, loss: 0.35110121965408325 2023-01-24 02:06:59.438854: step: 88/464, loss: 0.03901585564017296 2023-01-24 02:07:00.078677: step: 90/464, loss: 0.11286500841379166 2023-01-24 02:07:00.718137: step: 92/464, loss: 0.14447273313999176 2023-01-24 02:07:01.360627: step: 94/464, loss: 0.2669197916984558 2023-01-24 02:07:01.943872: step: 96/464, loss: 0.038198988884687424 2023-01-24 02:07:02.610835: step: 98/464, loss: 0.2722930610179901 2023-01-24 02:07:03.166258: step: 100/464, loss: 0.08510913699865341 2023-01-24 02:07:03.783796: step: 102/464, loss: 0.1735830307006836 2023-01-24 02:07:04.411688: step: 104/464, loss: 0.24530547857284546 2023-01-24 02:07:05.041707: step: 106/464, loss: 0.15999534726142883 2023-01-24 02:07:05.636558: step: 108/464, loss: 1.8271517753601074 2023-01-24 02:07:06.251993: step: 110/464, loss: 0.5154167413711548 2023-01-24 02:07:06.905659: step: 112/464, loss: 0.19308911263942719 2023-01-24 02:07:07.554399: step: 114/464, loss: 0.23946955800056458 2023-01-24 02:07:08.194587: step: 116/464, loss: 0.08525333553552628 2023-01-24 02:07:08.846264: step: 118/464, loss: 0.45863062143325806 2023-01-24 02:07:09.480765: step: 120/464, loss: 0.5794384479522705 2023-01-24 02:07:10.122246: step: 122/464, loss: 0.43312111496925354 2023-01-24 02:07:10.722803: step: 124/464, loss: 0.23851554095745087 2023-01-24 02:07:11.343795: step: 126/464, loss: 0.039798635989427567 2023-01-24 02:07:11.981117: step: 128/464, loss: 0.4510522484779358 2023-01-24 02:07:12.622777: step: 130/464, loss: 0.15508149564266205 2023-01-24 02:07:13.232568: step: 132/464, loss: 0.11749690026044846 2023-01-24 02:07:13.849788: step: 134/464, loss: 0.10931003838777542 2023-01-24 02:07:14.488563: step: 136/464, loss: 0.22598673403263092 2023-01-24 02:07:15.159233: step: 138/464, loss: 0.3516949415206909 2023-01-24 02:07:15.824278: step: 140/464, loss: 0.17344337701797485 2023-01-24 02:07:16.424788: step: 142/464, loss: 0.15874630212783813 2023-01-24 02:07:17.017800: step: 144/464, loss: 1.534786343574524 2023-01-24 02:07:17.708592: step: 146/464, loss: 0.7723768949508667 2023-01-24 02:07:18.271252: step: 148/464, loss: 0.3668593466281891 2023-01-24 02:07:18.900062: step: 150/464, loss: 0.14745070040225983 2023-01-24 02:07:19.525977: step: 152/464, loss: 0.1011207103729248 2023-01-24 02:07:20.197682: step: 154/464, loss: 0.25891536474227905 2023-01-24 02:07:20.849856: step: 156/464, loss: 0.051904287189245224 2023-01-24 02:07:21.485326: step: 158/464, loss: 0.3749949336051941 2023-01-24 02:07:22.057529: step: 160/464, loss: 0.10414428263902664 2023-01-24 02:07:22.695689: step: 162/464, loss: 0.0400250181555748 2023-01-24 02:07:23.276418: step: 164/464, loss: 0.19313499331474304 2023-01-24 02:07:23.936633: step: 166/464, loss: 1.6297383308410645 2023-01-24 02:07:24.512576: step: 168/464, loss: 0.21383130550384521 2023-01-24 02:07:25.158865: step: 170/464, loss: 0.3675953149795532 2023-01-24 02:07:25.803525: step: 172/464, loss: 0.0551636628806591 2023-01-24 02:07:26.409476: step: 174/464, loss: 0.2502910792827606 2023-01-24 02:07:27.036014: step: 176/464, loss: 0.1080092266201973 2023-01-24 02:07:27.614606: step: 178/464, loss: 0.2360805720090866 2023-01-24 02:07:28.233482: step: 180/464, loss: 0.16856279969215393 2023-01-24 02:07:28.802891: step: 182/464, loss: 0.08450154960155487 2023-01-24 02:07:29.594514: step: 184/464, loss: 0.09318307787179947 2023-01-24 02:07:30.291994: step: 186/464, loss: 0.22691823542118073 2023-01-24 02:07:30.972266: step: 188/464, loss: 0.10292818397283554 2023-01-24 02:07:31.635778: step: 190/464, loss: 0.201989084482193 2023-01-24 02:07:32.258484: step: 192/464, loss: 1.099289894104004 2023-01-24 02:07:32.840302: step: 194/464, loss: 0.2058786004781723 2023-01-24 02:07:33.355742: step: 196/464, loss: 0.15418343245983124 2023-01-24 02:07:33.941773: step: 198/464, loss: 0.23155158758163452 2023-01-24 02:07:34.595275: step: 200/464, loss: 0.4182059168815613 2023-01-24 02:07:35.215373: step: 202/464, loss: 0.14667606353759766 2023-01-24 02:07:35.813180: step: 204/464, loss: 3.515292167663574 2023-01-24 02:07:36.428818: step: 206/464, loss: 0.16485071182250977 2023-01-24 02:07:37.082837: step: 208/464, loss: 0.04763878136873245 2023-01-24 02:07:37.724741: step: 210/464, loss: 0.1819470077753067 2023-01-24 02:07:38.365275: step: 212/464, loss: 0.2851487100124359 2023-01-24 02:07:38.949050: step: 214/464, loss: 0.18764753639698029 2023-01-24 02:07:39.550197: step: 216/464, loss: 0.2979755997657776 2023-01-24 02:07:40.174801: step: 218/464, loss: 0.17477896809577942 2023-01-24 02:07:40.770995: step: 220/464, loss: 0.12481864541769028 2023-01-24 02:07:41.414288: step: 222/464, loss: 0.5897207260131836 2023-01-24 02:07:42.015602: step: 224/464, loss: 0.6215092539787292 2023-01-24 02:07:42.619905: step: 226/464, loss: 0.1567331850528717 2023-01-24 02:07:43.211993: step: 228/464, loss: 0.19493567943572998 2023-01-24 02:07:43.786653: step: 230/464, loss: 0.2208445817232132 2023-01-24 02:07:44.395673: step: 232/464, loss: 0.2621890902519226 2023-01-24 02:07:44.988019: step: 234/464, loss: 0.21368613839149475 2023-01-24 02:07:45.633214: step: 236/464, loss: 0.1271626353263855 2023-01-24 02:07:46.225181: step: 238/464, loss: 0.7963235378265381 2023-01-24 02:07:46.837933: step: 240/464, loss: 0.19868683815002441 2023-01-24 02:07:47.392027: step: 242/464, loss: 0.2507416009902954 2023-01-24 02:07:48.027292: step: 244/464, loss: 0.07598677277565002 2023-01-24 02:07:48.652366: step: 246/464, loss: 0.8013763427734375 2023-01-24 02:07:49.295634: step: 248/464, loss: 0.3838903605937958 2023-01-24 02:07:49.978563: step: 250/464, loss: 0.13968589901924133 2023-01-24 02:07:50.588328: step: 252/464, loss: 1.1072745323181152 2023-01-24 02:07:51.179590: step: 254/464, loss: 0.11604411900043488 2023-01-24 02:07:51.817937: step: 256/464, loss: 0.7088308930397034 2023-01-24 02:07:52.445379: step: 258/464, loss: 0.23516815900802612 2023-01-24 02:07:53.134953: step: 260/464, loss: 0.14172761142253876 2023-01-24 02:07:53.731579: step: 262/464, loss: 0.3576776385307312 2023-01-24 02:07:54.367570: step: 264/464, loss: 0.17829737067222595 2023-01-24 02:07:54.980023: step: 266/464, loss: 0.12431307137012482 2023-01-24 02:07:55.554839: step: 268/464, loss: 0.5392585396766663 2023-01-24 02:07:56.122619: step: 270/464, loss: 0.09986944496631622 2023-01-24 02:07:56.723274: step: 272/464, loss: 0.16034409403800964 2023-01-24 02:07:57.347932: step: 274/464, loss: 0.3127243220806122 2023-01-24 02:07:57.969007: step: 276/464, loss: 0.5250251889228821 2023-01-24 02:07:58.599912: step: 278/464, loss: 0.12543682754039764 2023-01-24 02:07:59.220598: step: 280/464, loss: 0.40157195925712585 2023-01-24 02:07:59.833704: step: 282/464, loss: 0.09980019181966782 2023-01-24 02:08:00.470413: step: 284/464, loss: 4.3395771980285645 2023-01-24 02:08:01.057395: step: 286/464, loss: 0.057638343423604965 2023-01-24 02:08:01.717064: step: 288/464, loss: 0.1725718080997467 2023-01-24 02:08:02.355730: step: 290/464, loss: 0.2406628280878067 2023-01-24 02:08:02.971668: step: 292/464, loss: 0.2707064151763916 2023-01-24 02:08:03.604026: step: 294/464, loss: 0.10121627897024155 2023-01-24 02:08:04.230269: step: 296/464, loss: 0.0865129828453064 2023-01-24 02:08:04.811756: step: 298/464, loss: 0.08518489450216293 2023-01-24 02:08:05.441044: step: 300/464, loss: 0.2202875018119812 2023-01-24 02:08:05.998772: step: 302/464, loss: 0.043326690793037415 2023-01-24 02:08:06.673874: step: 304/464, loss: 0.21041861176490784 2023-01-24 02:08:07.333628: step: 306/464, loss: 0.12960094213485718 2023-01-24 02:08:07.969270: step: 308/464, loss: 0.20338009297847748 2023-01-24 02:08:08.725805: step: 310/464, loss: 0.11522030830383301 2023-01-24 02:08:09.416487: step: 312/464, loss: 0.11141736805438995 2023-01-24 02:08:10.103923: step: 314/464, loss: 0.06860148161649704 2023-01-24 02:08:10.741632: step: 316/464, loss: 0.3496860861778259 2023-01-24 02:08:11.357961: step: 318/464, loss: 0.1790461540222168 2023-01-24 02:08:12.031340: step: 320/464, loss: 0.6583364009857178 2023-01-24 02:08:12.626443: step: 322/464, loss: 0.2994745969772339 2023-01-24 02:08:13.349209: step: 324/464, loss: 0.22190432250499725 2023-01-24 02:08:13.954935: step: 326/464, loss: 0.08175873756408691 2023-01-24 02:08:14.569268: step: 328/464, loss: 0.16329525411128998 2023-01-24 02:08:15.112486: step: 330/464, loss: 0.038639314472675323 2023-01-24 02:08:15.697405: step: 332/464, loss: 0.20517034828662872 2023-01-24 02:08:16.364885: step: 334/464, loss: 0.2505493760108948 2023-01-24 02:08:16.924281: step: 336/464, loss: 0.1626298874616623 2023-01-24 02:08:17.534119: step: 338/464, loss: 0.24210214614868164 2023-01-24 02:08:18.108935: step: 340/464, loss: 0.46474650502204895 2023-01-24 02:08:18.770515: step: 342/464, loss: 0.05986591428518295 2023-01-24 02:08:19.349186: step: 344/464, loss: 0.38242480158805847 2023-01-24 02:08:19.976412: step: 346/464, loss: 1.0713443756103516 2023-01-24 02:08:20.623877: step: 348/464, loss: 0.27796074748039246 2023-01-24 02:08:21.280064: step: 350/464, loss: 0.2751466929912567 2023-01-24 02:08:21.944715: step: 352/464, loss: 0.3686355650424957 2023-01-24 02:08:22.579524: step: 354/464, loss: 0.062489546835422516 2023-01-24 02:08:23.227907: step: 356/464, loss: 0.1203119084239006 2023-01-24 02:08:23.845745: step: 358/464, loss: 0.0970081016421318 2023-01-24 02:08:24.462524: step: 360/464, loss: 0.09083372354507446 2023-01-24 02:08:25.117671: step: 362/464, loss: 0.11161081492900848 2023-01-24 02:08:25.760826: step: 364/464, loss: 0.43868952989578247 2023-01-24 02:08:26.456461: step: 366/464, loss: 0.14594672620296478 2023-01-24 02:08:27.074162: step: 368/464, loss: 0.23021414875984192 2023-01-24 02:08:27.676113: step: 370/464, loss: 0.26811346411705017 2023-01-24 02:08:28.250838: step: 372/464, loss: 0.1601974219083786 2023-01-24 02:08:28.931466: step: 374/464, loss: 0.1352207511663437 2023-01-24 02:08:29.564081: step: 376/464, loss: 0.11734617501497269 2023-01-24 02:08:30.178523: step: 378/464, loss: 0.8292566537857056 2023-01-24 02:08:30.768245: step: 380/464, loss: 0.13411815464496613 2023-01-24 02:08:31.437482: step: 382/464, loss: 0.0297938734292984 2023-01-24 02:08:32.050383: step: 384/464, loss: 0.47775954008102417 2023-01-24 02:08:32.682796: step: 386/464, loss: 0.25933510065078735 2023-01-24 02:08:33.337873: step: 388/464, loss: 0.43880245089530945 2023-01-24 02:08:33.950274: step: 390/464, loss: 0.15018054842948914 2023-01-24 02:08:34.551656: step: 392/464, loss: 1.495927333831787 2023-01-24 02:08:35.128554: step: 394/464, loss: 0.14321881532669067 2023-01-24 02:08:35.758533: step: 396/464, loss: 0.4615509510040283 2023-01-24 02:08:36.392660: step: 398/464, loss: 0.2685154676437378 2023-01-24 02:08:37.021682: step: 400/464, loss: 0.35869333148002625 2023-01-24 02:08:37.697774: step: 402/464, loss: 0.49212753772735596 2023-01-24 02:08:38.276586: step: 404/464, loss: 0.21866217255592346 2023-01-24 02:08:38.911121: step: 406/464, loss: 0.216976597905159 2023-01-24 02:08:39.517747: step: 408/464, loss: 0.6453323364257812 2023-01-24 02:08:40.095679: step: 410/464, loss: 0.0943211019039154 2023-01-24 02:08:40.710642: step: 412/464, loss: 0.0908517837524414 2023-01-24 02:08:41.303897: step: 414/464, loss: 0.17199210822582245 2023-01-24 02:08:41.967197: step: 416/464, loss: 0.08901306241750717 2023-01-24 02:08:42.550139: step: 418/464, loss: 0.05542987212538719 2023-01-24 02:08:43.179264: step: 420/464, loss: 0.17803852260112762 2023-01-24 02:08:43.767360: step: 422/464, loss: 0.09037181735038757 2023-01-24 02:08:44.429332: step: 424/464, loss: 0.2514219284057617 2023-01-24 02:08:45.089110: step: 426/464, loss: 0.18447107076644897 2023-01-24 02:08:45.765306: step: 428/464, loss: 0.09749721735715866 2023-01-24 02:08:46.338519: step: 430/464, loss: 0.8195016980171204 2023-01-24 02:08:46.934690: step: 432/464, loss: 0.07239938527345657 2023-01-24 02:08:47.477635: step: 434/464, loss: 0.1674031764268875 2023-01-24 02:08:48.143591: step: 436/464, loss: 0.19262349605560303 2023-01-24 02:08:48.781648: step: 438/464, loss: 0.42374101281166077 2023-01-24 02:08:49.407146: step: 440/464, loss: 0.12468132376670837 2023-01-24 02:08:50.058262: step: 442/464, loss: 0.18052774667739868 2023-01-24 02:08:50.656937: step: 444/464, loss: 0.15312236547470093 2023-01-24 02:08:51.271854: step: 446/464, loss: 0.19045044481754303 2023-01-24 02:08:51.862799: step: 448/464, loss: 0.15186341106891632 2023-01-24 02:08:52.498071: step: 450/464, loss: 0.12853415310382843 2023-01-24 02:08:53.164787: step: 452/464, loss: 0.26743295788764954 2023-01-24 02:08:53.779184: step: 454/464, loss: 0.41534423828125 2023-01-24 02:08:54.404034: step: 456/464, loss: 0.10010375827550888 2023-01-24 02:08:55.047254: step: 458/464, loss: 0.055969975888729095 2023-01-24 02:08:55.690242: step: 460/464, loss: 0.4095710515975952 2023-01-24 02:08:56.247900: step: 462/464, loss: 0.1918717473745346 2023-01-24 02:08:56.895061: step: 464/464, loss: 0.09351630508899689 2023-01-24 02:08:57.467367: step: 466/464, loss: 0.02616412192583084 2023-01-24 02:08:58.126230: step: 468/464, loss: 0.7575204968452454 2023-01-24 02:08:58.768116: step: 470/464, loss: 0.2488774061203003 2023-01-24 02:08:59.407126: step: 472/464, loss: 0.5412296652793884 2023-01-24 02:09:00.098478: step: 474/464, loss: 0.13925917446613312 2023-01-24 02:09:00.764965: step: 476/464, loss: 0.3951810598373413 2023-01-24 02:09:01.378195: step: 478/464, loss: 0.18069101870059967 2023-01-24 02:09:01.982053: step: 480/464, loss: 0.09641866385936737 2023-01-24 02:09:02.600591: step: 482/464, loss: 0.2705845534801483 2023-01-24 02:09:03.265953: step: 484/464, loss: 0.20070071518421173 2023-01-24 02:09:03.942534: step: 486/464, loss: 0.49247515201568604 2023-01-24 02:09:04.561140: step: 488/464, loss: 0.5439991354942322 2023-01-24 02:09:05.155249: step: 490/464, loss: 0.16222120821475983 2023-01-24 02:09:05.684598: step: 492/464, loss: 0.21172896027565002 2023-01-24 02:09:06.308173: step: 494/464, loss: 0.286189466714859 2023-01-24 02:09:07.007690: step: 496/464, loss: 0.08380153030157089 2023-01-24 02:09:07.727885: step: 498/464, loss: 0.3246591091156006 2023-01-24 02:09:08.337044: step: 500/464, loss: 0.09528906643390656 2023-01-24 02:09:08.996936: step: 502/464, loss: 0.22733502089977264 2023-01-24 02:09:09.630120: step: 504/464, loss: 0.052736204117536545 2023-01-24 02:09:10.247196: step: 506/464, loss: 0.17266158759593964 2023-01-24 02:09:10.955690: step: 508/464, loss: 0.6875770688056946 2023-01-24 02:09:11.573589: step: 510/464, loss: 0.09864681214094162 2023-01-24 02:09:12.226142: step: 512/464, loss: 0.14266711473464966 2023-01-24 02:09:12.892940: step: 514/464, loss: 0.25594624876976013 2023-01-24 02:09:13.587356: step: 516/464, loss: 0.10775838047266006 2023-01-24 02:09:14.244037: step: 518/464, loss: 0.484315425157547 2023-01-24 02:09:14.886692: step: 520/464, loss: 0.10059746354818344 2023-01-24 02:09:15.431997: step: 522/464, loss: 0.2775045335292816 2023-01-24 02:09:16.062416: step: 524/464, loss: 0.2562503516674042 2023-01-24 02:09:16.755482: step: 526/464, loss: 0.29064902663230896 2023-01-24 02:09:17.341033: step: 528/464, loss: 0.19548296928405762 2023-01-24 02:09:17.923751: step: 530/464, loss: 0.4946255683898926 2023-01-24 02:09:18.476779: step: 532/464, loss: 0.11917847394943237 2023-01-24 02:09:19.028817: step: 534/464, loss: 0.0935017466545105 2023-01-24 02:09:19.678147: step: 536/464, loss: 0.10296916961669922 2023-01-24 02:09:20.325872: step: 538/464, loss: 0.5450205206871033 2023-01-24 02:09:21.056646: step: 540/464, loss: 0.04718095809221268 2023-01-24 02:09:21.751288: step: 542/464, loss: 0.5736770629882812 2023-01-24 02:09:22.423722: step: 544/464, loss: 0.1732597053050995 2023-01-24 02:09:23.016691: step: 546/464, loss: 0.1497419774532318 2023-01-24 02:09:23.642534: step: 548/464, loss: 0.41093266010284424 2023-01-24 02:09:24.404171: step: 550/464, loss: 0.2896675765514374 2023-01-24 02:09:25.075544: step: 552/464, loss: 0.10698677599430084 2023-01-24 02:09:25.732612: step: 554/464, loss: 0.26328244805336 2023-01-24 02:09:26.398814: step: 556/464, loss: 0.18400733172893524 2023-01-24 02:09:27.043817: step: 558/464, loss: 0.119113028049469 2023-01-24 02:09:27.619615: step: 560/464, loss: 0.1332729309797287 2023-01-24 02:09:28.295334: step: 562/464, loss: 0.16722966730594635 2023-01-24 02:09:28.967383: step: 564/464, loss: 4.722121715545654 2023-01-24 02:09:29.623717: step: 566/464, loss: 0.2186322808265686 2023-01-24 02:09:30.211737: step: 568/464, loss: 0.09318897873163223 2023-01-24 02:09:30.828615: step: 570/464, loss: 0.2538876533508301 2023-01-24 02:09:31.393614: step: 572/464, loss: 0.13304342329502106 2023-01-24 02:09:32.046376: step: 574/464, loss: 0.15800759196281433 2023-01-24 02:09:32.636955: step: 576/464, loss: 0.25743475556373596 2023-01-24 02:09:33.243262: step: 578/464, loss: 0.09190484136343002 2023-01-24 02:09:33.949783: step: 580/464, loss: 0.09873532503843307 2023-01-24 02:09:34.617502: step: 582/464, loss: 0.4712373912334442 2023-01-24 02:09:35.218687: step: 584/464, loss: 0.12569165229797363 2023-01-24 02:09:35.847256: step: 586/464, loss: 0.19897960126399994 2023-01-24 02:09:36.513974: step: 588/464, loss: 0.3795648217201233 2023-01-24 02:09:37.156096: step: 590/464, loss: 0.08776821941137314 2023-01-24 02:09:37.791998: step: 592/464, loss: 0.12162689119577408 2023-01-24 02:09:38.383935: step: 594/464, loss: 0.24996206164360046 2023-01-24 02:09:39.019660: step: 596/464, loss: 0.40573614835739136 2023-01-24 02:09:39.545092: step: 598/464, loss: 0.15663489699363708 2023-01-24 02:09:40.149466: step: 600/464, loss: 0.2295760214328766 2023-01-24 02:09:40.853657: step: 602/464, loss: 0.6561931371688843 2023-01-24 02:09:41.498103: step: 604/464, loss: 0.13190947473049164 2023-01-24 02:09:42.080642: step: 606/464, loss: 0.13859710097312927 2023-01-24 02:09:42.670859: step: 608/464, loss: 0.24577781558036804 2023-01-24 02:09:43.334476: step: 610/464, loss: 0.05066034570336342 2023-01-24 02:09:43.899631: step: 612/464, loss: 0.11498422175645828 2023-01-24 02:09:44.582653: step: 614/464, loss: 2.149258613586426 2023-01-24 02:09:45.217703: step: 616/464, loss: 0.140337735414505 2023-01-24 02:09:45.857208: step: 618/464, loss: 0.18237602710723877 2023-01-24 02:09:46.535056: step: 620/464, loss: 0.1998087614774704 2023-01-24 02:09:47.253620: step: 622/464, loss: 0.19751664996147156 2023-01-24 02:09:47.904064: step: 624/464, loss: 0.10175623744726181 2023-01-24 02:09:48.569473: step: 626/464, loss: 0.49671512842178345 2023-01-24 02:09:49.162965: step: 628/464, loss: 0.031716883182525635 2023-01-24 02:09:49.791642: step: 630/464, loss: 0.11761437356472015 2023-01-24 02:09:50.391350: step: 632/464, loss: 0.34634527564048767 2023-01-24 02:09:51.057169: step: 634/464, loss: 0.29505980014801025 2023-01-24 02:09:51.660171: step: 636/464, loss: 0.15596410632133484 2023-01-24 02:09:52.266653: step: 638/464, loss: 0.10715252161026001 2023-01-24 02:09:52.902187: step: 640/464, loss: 0.9346346259117126 2023-01-24 02:09:53.521623: step: 642/464, loss: 0.11918459087610245 2023-01-24 02:09:54.135288: step: 644/464, loss: 0.6793946027755737 2023-01-24 02:09:54.786399: step: 646/464, loss: 0.11271430552005768 2023-01-24 02:09:55.378001: step: 648/464, loss: 0.08095047622919083 2023-01-24 02:09:56.009857: step: 650/464, loss: 0.21067102253437042 2023-01-24 02:09:56.771373: step: 652/464, loss: 0.5532771944999695 2023-01-24 02:09:57.365706: step: 654/464, loss: 0.12611809372901917 2023-01-24 02:09:58.075260: step: 656/464, loss: 0.13094867765903473 2023-01-24 02:09:58.760190: step: 658/464, loss: 0.7898499965667725 2023-01-24 02:09:59.391648: step: 660/464, loss: 0.12929470837116241 2023-01-24 02:09:59.964795: step: 662/464, loss: 0.06109068915247917 2023-01-24 02:10:00.618489: step: 664/464, loss: 0.0926525816321373 2023-01-24 02:10:01.215420: step: 666/464, loss: 0.5199193358421326 2023-01-24 02:10:01.827070: step: 668/464, loss: 0.4801885783672333 2023-01-24 02:10:02.452836: step: 670/464, loss: 0.19196748733520508 2023-01-24 02:10:03.028502: step: 672/464, loss: 0.37548545002937317 2023-01-24 02:10:03.600004: step: 674/464, loss: 0.1620676964521408 2023-01-24 02:10:04.284798: step: 676/464, loss: 0.3277484178543091 2023-01-24 02:10:04.918057: step: 678/464, loss: 0.08762902021408081 2023-01-24 02:10:05.520455: step: 680/464, loss: 0.13059160113334656 2023-01-24 02:10:06.157184: step: 682/464, loss: 0.31305909156799316 2023-01-24 02:10:06.828149: step: 684/464, loss: 0.23233528435230255 2023-01-24 02:10:07.483272: step: 686/464, loss: 0.19373933970928192 2023-01-24 02:10:08.102636: step: 688/464, loss: 0.09218301624059677 2023-01-24 02:10:08.790087: step: 690/464, loss: 0.1672954112291336 2023-01-24 02:10:09.467323: step: 692/464, loss: 0.6126482486724854 2023-01-24 02:10:10.105365: step: 694/464, loss: 0.15671324729919434 2023-01-24 02:10:10.715577: step: 696/464, loss: 0.3100976049900055 2023-01-24 02:10:11.383340: step: 698/464, loss: 0.5160097479820251 2023-01-24 02:10:11.969473: step: 700/464, loss: 0.5026370286941528 2023-01-24 02:10:12.601206: step: 702/464, loss: 0.4017609655857086 2023-01-24 02:10:13.202371: step: 704/464, loss: 0.11298071593046188 2023-01-24 02:10:13.895693: step: 706/464, loss: 0.2641523778438568 2023-01-24 02:10:14.575633: step: 708/464, loss: 0.1176559180021286 2023-01-24 02:10:15.239302: step: 710/464, loss: 0.15013401210308075 2023-01-24 02:10:15.987357: step: 712/464, loss: 0.29811954498291016 2023-01-24 02:10:16.525858: step: 714/464, loss: 0.13270290195941925 2023-01-24 02:10:17.123494: step: 716/464, loss: 0.22287288308143616 2023-01-24 02:10:17.766319: step: 718/464, loss: 0.3418169319629669 2023-01-24 02:10:18.471735: step: 720/464, loss: 0.08123631030321121 2023-01-24 02:10:19.157565: step: 722/464, loss: 0.14334438741207123 2023-01-24 02:10:19.800989: step: 724/464, loss: 0.24343787133693695 2023-01-24 02:10:20.414230: step: 726/464, loss: 0.18862269818782806 2023-01-24 02:10:21.034608: step: 728/464, loss: 0.24572555720806122 2023-01-24 02:10:21.658811: step: 730/464, loss: 0.20189018547534943 2023-01-24 02:10:22.252993: step: 732/464, loss: 0.3579574525356293 2023-01-24 02:10:22.893100: step: 734/464, loss: 0.39582088589668274 2023-01-24 02:10:23.503978: step: 736/464, loss: 0.09848456084728241 2023-01-24 02:10:24.178426: step: 738/464, loss: 0.387652724981308 2023-01-24 02:10:24.800484: step: 740/464, loss: 0.12421456724405289 2023-01-24 02:10:25.379574: step: 742/464, loss: 0.13614031672477722 2023-01-24 02:10:26.068988: step: 744/464, loss: 0.5611193776130676 2023-01-24 02:10:26.668032: step: 746/464, loss: 0.18564994633197784 2023-01-24 02:10:27.338043: step: 748/464, loss: 0.17745666205883026 2023-01-24 02:10:28.066939: step: 750/464, loss: 0.34961196780204773 2023-01-24 02:10:28.714527: step: 752/464, loss: 1.3152897357940674 2023-01-24 02:10:29.314374: step: 754/464, loss: 0.1217946857213974 2023-01-24 02:10:29.942959: step: 756/464, loss: 0.36618006229400635 2023-01-24 02:10:30.597336: step: 758/464, loss: 0.2611903250217438 2023-01-24 02:10:31.183322: step: 760/464, loss: 0.12786732614040375 2023-01-24 02:10:31.819361: step: 762/464, loss: 0.46200627088546753 2023-01-24 02:10:32.490981: step: 764/464, loss: 0.4778285622596741 2023-01-24 02:10:33.090764: step: 766/464, loss: 0.7928224205970764 2023-01-24 02:10:33.828198: step: 768/464, loss: 0.2523520886898041 2023-01-24 02:10:34.492969: step: 770/464, loss: 0.1595786213874817 2023-01-24 02:10:35.116179: step: 772/464, loss: 0.1493327021598816 2023-01-24 02:10:35.722211: step: 774/464, loss: 0.23997730016708374 2023-01-24 02:10:36.344999: step: 776/464, loss: 0.06494729220867157 2023-01-24 02:10:37.022431: step: 778/464, loss: 0.07723943889141083 2023-01-24 02:10:37.632984: step: 780/464, loss: 0.08414718508720398 2023-01-24 02:10:38.254352: step: 782/464, loss: 0.4716000556945801 2023-01-24 02:10:38.852919: step: 784/464, loss: 0.16538885235786438 2023-01-24 02:10:39.466285: step: 786/464, loss: 0.29278749227523804 2023-01-24 02:10:40.112610: step: 788/464, loss: 0.37954071164131165 2023-01-24 02:10:40.776203: step: 790/464, loss: 0.21551840007305145 2023-01-24 02:10:41.390119: step: 792/464, loss: 0.07903808355331421 2023-01-24 02:10:42.036606: step: 794/464, loss: 0.593177855014801 2023-01-24 02:10:42.596639: step: 796/464, loss: 0.5082368850708008 2023-01-24 02:10:43.190982: step: 798/464, loss: 0.36979803442955017 2023-01-24 02:10:43.799347: step: 800/464, loss: 0.35664430260658264 2023-01-24 02:10:44.515725: step: 802/464, loss: 0.09309973567724228 2023-01-24 02:10:45.108216: step: 804/464, loss: 0.11440564692020416 2023-01-24 02:10:45.667866: step: 806/464, loss: 0.8553968667984009 2023-01-24 02:10:46.295150: step: 808/464, loss: 0.1172894537448883 2023-01-24 02:10:46.866484: step: 810/464, loss: 0.2246769815683365 2023-01-24 02:10:47.424381: step: 812/464, loss: 0.12205179780721664 2023-01-24 02:10:48.077209: step: 814/464, loss: 0.7095149159431458 2023-01-24 02:10:48.737025: step: 816/464, loss: 0.7112863659858704 2023-01-24 02:10:49.353543: step: 818/464, loss: 0.2133084535598755 2023-01-24 02:10:49.910085: step: 820/464, loss: 0.3527364730834961 2023-01-24 02:10:50.472486: step: 822/464, loss: 0.18876755237579346 2023-01-24 02:10:51.100011: step: 824/464, loss: 0.21099744737148285 2023-01-24 02:10:51.778273: step: 826/464, loss: 0.0990365520119667 2023-01-24 02:10:52.485613: step: 828/464, loss: 0.18760396540164948 2023-01-24 02:10:53.183566: step: 830/464, loss: 0.3649967610836029 2023-01-24 02:10:53.775936: step: 832/464, loss: 0.04117352142930031 2023-01-24 02:10:54.379231: step: 834/464, loss: 0.21748271584510803 2023-01-24 02:10:54.982742: step: 836/464, loss: 0.5736399292945862 2023-01-24 02:10:55.520799: step: 838/464, loss: 0.9643673896789551 2023-01-24 02:10:56.152037: step: 840/464, loss: 0.09535722434520721 2023-01-24 02:10:56.754293: step: 842/464, loss: 0.12945391237735748 2023-01-24 02:10:57.434499: step: 844/464, loss: 0.2260231077671051 2023-01-24 02:10:58.092690: step: 846/464, loss: 0.1597353219985962 2023-01-24 02:10:58.751460: step: 848/464, loss: 0.19716760516166687 2023-01-24 02:10:59.419322: step: 850/464, loss: 0.21844589710235596 2023-01-24 02:11:00.055403: step: 852/464, loss: 0.5882822871208191 2023-01-24 02:11:00.611380: step: 854/464, loss: 0.10058721154928207 2023-01-24 02:11:01.208354: step: 856/464, loss: 0.33563870191574097 2023-01-24 02:11:01.827449: step: 858/464, loss: 0.14041025936603546 2023-01-24 02:11:02.412396: step: 860/464, loss: 0.13803352415561676 2023-01-24 02:11:03.038551: step: 862/464, loss: 0.12181901931762695 2023-01-24 02:11:03.585132: step: 864/464, loss: 0.06367615610361099 2023-01-24 02:11:04.276677: step: 866/464, loss: 0.42367619276046753 2023-01-24 02:11:04.909167: step: 868/464, loss: 0.2047378569841385 2023-01-24 02:11:05.486399: step: 870/464, loss: 0.07182841747999191 2023-01-24 02:11:06.117075: step: 872/464, loss: 0.12919709086418152 2023-01-24 02:11:06.732052: step: 874/464, loss: 0.26941314339637756 2023-01-24 02:11:07.360687: step: 876/464, loss: 0.5692474246025085 2023-01-24 02:11:08.001121: step: 878/464, loss: 0.17177428305149078 2023-01-24 02:11:08.620828: step: 880/464, loss: 0.6481063961982727 2023-01-24 02:11:09.252968: step: 882/464, loss: 0.3271678388118744 2023-01-24 02:11:09.923922: step: 884/464, loss: 0.2979361116886139 2023-01-24 02:11:10.550862: step: 886/464, loss: 0.0645914226770401 2023-01-24 02:11:11.183139: step: 888/464, loss: 2.7499001026153564 2023-01-24 02:11:11.767692: step: 890/464, loss: 0.13479718565940857 2023-01-24 02:11:12.391096: step: 892/464, loss: 0.11270148307085037 2023-01-24 02:11:13.027377: step: 894/464, loss: 0.32310348749160767 2023-01-24 02:11:13.666319: step: 896/464, loss: 0.388225257396698 2023-01-24 02:11:14.286596: step: 898/464, loss: 0.042076922953128815 2023-01-24 02:11:14.945272: step: 900/464, loss: 0.03660179674625397 2023-01-24 02:11:15.574519: step: 902/464, loss: 0.1979013830423355 2023-01-24 02:11:16.202613: step: 904/464, loss: 0.20062708854675293 2023-01-24 02:11:16.769199: step: 906/464, loss: 0.1717149317264557 2023-01-24 02:11:17.399019: step: 908/464, loss: 0.20906086266040802 2023-01-24 02:11:18.053029: step: 910/464, loss: 0.45263901352882385 2023-01-24 02:11:18.700713: step: 912/464, loss: 0.22234554588794708 2023-01-24 02:11:19.308531: step: 914/464, loss: 0.13453200459480286 2023-01-24 02:11:19.949259: step: 916/464, loss: 0.24855343997478485 2023-01-24 02:11:20.544750: step: 918/464, loss: 0.47845444083213806 2023-01-24 02:11:21.202913: step: 920/464, loss: 0.11308290809392929 2023-01-24 02:11:21.852201: step: 922/464, loss: 0.26682430505752563 2023-01-24 02:11:22.525175: step: 924/464, loss: 0.30981704592704773 2023-01-24 02:11:23.167543: step: 926/464, loss: 0.1841297149658203 2023-01-24 02:11:23.818874: step: 928/464, loss: 0.1580587923526764 2023-01-24 02:11:24.370446: step: 930/464, loss: 0.3135313391685486 ================================================== Loss: 0.298 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33320316084044405, 'r': 0.3281450483419174, 'f1': 0.33065476190476184}, 'combined': 0.24364035087719293, 'epoch': 11} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.308353779460635, 'r': 0.28539729404123115, 'f1': 0.29643174789198995}, 'combined': 0.19352538981549602, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3385108497690121, 'r': 0.34429186997379596, 'f1': 0.34137688706715047}, 'combined': 0.2515408641547424, 'epoch': 11} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3269903185795963, 'r': 0.29242792277384855, 'f1': 0.3087448616962128}, 'combined': 0.20156400297265706, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34332139462248773, 'r': 0.342669930875576, 'f1': 0.34299535341202003}, 'combined': 0.2527334183035937, 'epoch': 11} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32152524302041047, 'r': 0.2819619899765767, 'f1': 0.30044677796917024}, 'combined': 0.19614660116121996, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32175925925925924, 'r': 0.33095238095238094, 'f1': 0.32629107981220656}, 'combined': 0.21752738654147102, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2903225806451613, 'r': 0.391304347826087, 'f1': 0.33333333333333337}, 'combined': 0.16666666666666669, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31993440233236153, 'r': 0.2969088203463203, 'f1': 0.3079918607914679}, 'combined': 0.22694137110950266, 'epoch': 5} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30404472815418065, 'r': 0.2555091476965408, 'f1': 0.27767195512385795}, 'combined': 0.181278063966871, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5192307692307693, 'r': 0.23275862068965517, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 5} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:14:00.169955: step: 2/464, loss: 0.09609854221343994 2023-01-24 02:14:00.791329: step: 4/464, loss: 1.1962323188781738 2023-01-24 02:14:01.415136: step: 6/464, loss: 0.8360907435417175 2023-01-24 02:14:02.034038: step: 8/464, loss: 0.14039205014705658 2023-01-24 02:14:02.671191: step: 10/464, loss: 0.14549694955348969 2023-01-24 02:14:03.272669: step: 12/464, loss: 0.7983697056770325 2023-01-24 02:14:03.949640: step: 14/464, loss: 0.12540067732334137 2023-01-24 02:14:04.528361: step: 16/464, loss: 0.08263260126113892 2023-01-24 02:14:05.135478: step: 18/464, loss: 0.33770740032196045 2023-01-24 02:14:05.744958: step: 20/464, loss: 0.11727096885442734 2023-01-24 02:14:06.399635: step: 22/464, loss: 0.09696599841117859 2023-01-24 02:14:07.026104: step: 24/464, loss: 0.131135493516922 2023-01-24 02:14:07.656052: step: 26/464, loss: 0.12408984452486038 2023-01-24 02:14:08.314342: step: 28/464, loss: 0.0949990451335907 2023-01-24 02:14:09.010513: step: 30/464, loss: 0.1661735624074936 2023-01-24 02:14:09.575681: step: 32/464, loss: 0.14546817541122437 2023-01-24 02:14:10.197369: step: 34/464, loss: 0.049527350813150406 2023-01-24 02:14:10.818179: step: 36/464, loss: 0.07935893535614014 2023-01-24 02:14:11.486365: step: 38/464, loss: 0.07667119055986404 2023-01-24 02:14:12.141714: step: 40/464, loss: 0.1613290011882782 2023-01-24 02:14:12.770472: step: 42/464, loss: 0.07237593084573746 2023-01-24 02:14:13.401853: step: 44/464, loss: 0.07318137586116791 2023-01-24 02:14:14.078094: step: 46/464, loss: 0.041637614369392395 2023-01-24 02:14:14.682593: step: 48/464, loss: 0.03524008020758629 2023-01-24 02:14:15.332007: step: 50/464, loss: 0.08365655690431595 2023-01-24 02:14:15.939527: step: 52/464, loss: 0.07698964327573776 2023-01-24 02:14:16.591712: step: 54/464, loss: 0.11151023954153061 2023-01-24 02:14:17.230467: step: 56/464, loss: 0.3668684661388397 2023-01-24 02:14:17.789423: step: 58/464, loss: 0.23254403471946716 2023-01-24 02:14:18.420364: step: 60/464, loss: 0.34759053587913513 2023-01-24 02:14:19.045215: step: 62/464, loss: 0.10199126601219177 2023-01-24 02:14:19.630037: step: 64/464, loss: 0.46143120527267456 2023-01-24 02:14:20.241802: step: 66/464, loss: 0.07514477521181107 2023-01-24 02:14:20.918388: step: 68/464, loss: 0.24961446225643158 2023-01-24 02:14:21.628514: step: 70/464, loss: 0.19061315059661865 2023-01-24 02:14:22.251018: step: 72/464, loss: 0.1399098038673401 2023-01-24 02:14:22.928739: step: 74/464, loss: 0.11505573987960815 2023-01-24 02:14:23.487160: step: 76/464, loss: 0.14768767356872559 2023-01-24 02:14:24.144407: step: 78/464, loss: 0.27581924200057983 2023-01-24 02:14:24.810394: step: 80/464, loss: 0.13880829513072968 2023-01-24 02:14:25.430273: step: 82/464, loss: 0.0872029960155487 2023-01-24 02:14:26.102208: step: 84/464, loss: 0.4102860391139984 2023-01-24 02:14:26.756800: step: 86/464, loss: 0.4079272150993347 2023-01-24 02:14:27.368138: step: 88/464, loss: 0.18566866219043732 2023-01-24 02:14:27.976422: step: 90/464, loss: 0.3931496739387512 2023-01-24 02:14:28.592520: step: 92/464, loss: 0.0867336243391037 2023-01-24 02:14:29.205576: step: 94/464, loss: 0.29617542028427124 2023-01-24 02:14:29.772547: step: 96/464, loss: 0.12288712710142136 2023-01-24 02:14:30.448702: step: 98/464, loss: 0.08559727668762207 2023-01-24 02:14:30.990897: step: 100/464, loss: 0.23719806969165802 2023-01-24 02:14:31.596623: step: 102/464, loss: 0.24966849386692047 2023-01-24 02:14:32.209244: step: 104/464, loss: 0.22053930163383484 2023-01-24 02:14:32.864121: step: 106/464, loss: 2.525239944458008 2023-01-24 02:14:33.449416: step: 108/464, loss: 0.05087382718920708 2023-01-24 02:14:34.148461: step: 110/464, loss: 0.20529168844223022 2023-01-24 02:14:34.787891: step: 112/464, loss: 0.14163416624069214 2023-01-24 02:14:35.456974: step: 114/464, loss: 0.44657936692237854 2023-01-24 02:14:36.087857: step: 116/464, loss: 0.6462829113006592 2023-01-24 02:14:36.688271: step: 118/464, loss: 0.15209028124809265 2023-01-24 02:14:37.342351: step: 120/464, loss: 0.12344679981470108 2023-01-24 02:14:37.934306: step: 122/464, loss: 0.5198908448219299 2023-01-24 02:14:38.571144: step: 124/464, loss: 0.11611258238554001 2023-01-24 02:14:39.179308: step: 126/464, loss: 0.71803218126297 2023-01-24 02:14:39.866290: step: 128/464, loss: 0.12478084117174149 2023-01-24 02:14:40.570664: step: 130/464, loss: 0.09767883270978928 2023-01-24 02:14:41.215949: step: 132/464, loss: 0.17500635981559753 2023-01-24 02:14:41.775214: step: 134/464, loss: 0.05641498789191246 2023-01-24 02:14:42.481072: step: 136/464, loss: 0.23416012525558472 2023-01-24 02:14:43.120300: step: 138/464, loss: 0.10653456300497055 2023-01-24 02:14:43.692671: step: 140/464, loss: 0.17489150166511536 2023-01-24 02:14:44.296115: step: 142/464, loss: 0.10827875882387161 2023-01-24 02:14:44.917587: step: 144/464, loss: 0.22888274490833282 2023-01-24 02:14:45.589244: step: 146/464, loss: 0.15548251569271088 2023-01-24 02:14:46.204071: step: 148/464, loss: 0.09006007760763168 2023-01-24 02:14:46.793701: step: 150/464, loss: 0.05137726292014122 2023-01-24 02:14:47.389908: step: 152/464, loss: 0.14549240469932556 2023-01-24 02:14:48.037599: step: 154/464, loss: 0.21249419450759888 2023-01-24 02:14:48.629269: step: 156/464, loss: 0.04181840643286705 2023-01-24 02:14:49.279982: step: 158/464, loss: 0.09198150783777237 2023-01-24 02:14:49.905594: step: 160/464, loss: 0.17108485102653503 2023-01-24 02:14:50.529387: step: 162/464, loss: 0.20262686908245087 2023-01-24 02:14:51.150957: step: 164/464, loss: 0.15493832528591156 2023-01-24 02:14:51.777132: step: 166/464, loss: 0.08424071967601776 2023-01-24 02:14:52.401316: step: 168/464, loss: 0.12358138710260391 2023-01-24 02:14:52.991996: step: 170/464, loss: 0.22446371614933014 2023-01-24 02:14:53.579289: step: 172/464, loss: 0.2094225436449051 2023-01-24 02:14:54.228499: step: 174/464, loss: 0.6193203926086426 2023-01-24 02:14:54.778865: step: 176/464, loss: 0.05181468650698662 2023-01-24 02:14:55.446132: step: 178/464, loss: 0.13235141336917877 2023-01-24 02:14:56.129910: step: 180/464, loss: 0.10980284214019775 2023-01-24 02:14:56.767233: step: 182/464, loss: 0.17049528658390045 2023-01-24 02:14:57.339664: step: 184/464, loss: 0.27909931540489197 2023-01-24 02:14:57.890624: step: 186/464, loss: 0.10104744136333466 2023-01-24 02:14:58.479398: step: 188/464, loss: 0.1292169988155365 2023-01-24 02:14:59.047711: step: 190/464, loss: 0.6000388860702515 2023-01-24 02:14:59.725482: step: 192/464, loss: 0.18760737776756287 2023-01-24 02:15:00.324718: step: 194/464, loss: 0.4577399492263794 2023-01-24 02:15:00.915363: step: 196/464, loss: 0.14205476641654968 2023-01-24 02:15:01.490860: step: 198/464, loss: 0.1507430076599121 2023-01-24 02:15:02.108768: step: 200/464, loss: 0.0702497735619545 2023-01-24 02:15:02.688178: step: 202/464, loss: 0.12581519782543182 2023-01-24 02:15:03.362149: step: 204/464, loss: 0.17592482268810272 2023-01-24 02:15:03.991419: step: 206/464, loss: 0.11105167120695114 2023-01-24 02:15:04.527903: step: 208/464, loss: 0.36974987387657166 2023-01-24 02:15:05.135551: step: 210/464, loss: 0.12456315755844116 2023-01-24 02:15:05.811154: step: 212/464, loss: 0.15190669894218445 2023-01-24 02:15:06.450385: step: 214/464, loss: 0.16267246007919312 2023-01-24 02:15:07.152069: step: 216/464, loss: 0.2663927972316742 2023-01-24 02:15:07.670652: step: 218/464, loss: 0.0649973526597023 2023-01-24 02:15:08.326344: step: 220/464, loss: 0.05866961181163788 2023-01-24 02:15:09.034705: step: 222/464, loss: 0.20051930844783783 2023-01-24 02:15:09.700403: step: 224/464, loss: 0.1325078159570694 2023-01-24 02:15:10.378554: step: 226/464, loss: 0.06449079513549805 2023-01-24 02:15:11.034569: step: 228/464, loss: 0.19155147671699524 2023-01-24 02:15:11.671394: step: 230/464, loss: 0.18524989485740662 2023-01-24 02:15:12.274620: step: 232/464, loss: 0.18985101580619812 2023-01-24 02:15:12.908823: step: 234/464, loss: 0.32292234897613525 2023-01-24 02:15:13.607239: step: 236/464, loss: 0.10213154554367065 2023-01-24 02:15:14.266904: step: 238/464, loss: 0.08985261619091034 2023-01-24 02:15:14.963219: step: 240/464, loss: 0.28371939063072205 2023-01-24 02:15:15.562837: step: 242/464, loss: 0.03562863916158676 2023-01-24 02:15:16.221076: step: 244/464, loss: 0.14082902669906616 2023-01-24 02:15:16.842488: step: 246/464, loss: 0.08222658187150955 2023-01-24 02:15:17.383004: step: 248/464, loss: 0.06636505573987961 2023-01-24 02:15:18.071289: step: 250/464, loss: 0.7063173055648804 2023-01-24 02:15:18.695007: step: 252/464, loss: 0.1520986109972 2023-01-24 02:15:19.359294: step: 254/464, loss: 0.06759010255336761 2023-01-24 02:15:19.942413: step: 256/464, loss: 0.11777284741401672 2023-01-24 02:15:20.670390: step: 258/464, loss: 0.15299879014492035 2023-01-24 02:15:21.294566: step: 260/464, loss: 0.5783911943435669 2023-01-24 02:15:21.856717: step: 262/464, loss: 0.09978681057691574 2023-01-24 02:15:22.482009: step: 264/464, loss: 0.18470978736877441 2023-01-24 02:15:23.098928: step: 266/464, loss: 0.8560441136360168 2023-01-24 02:15:23.687078: step: 268/464, loss: 0.10765422135591507 2023-01-24 02:15:24.340627: step: 270/464, loss: 0.624764621257782 2023-01-24 02:15:24.969306: step: 272/464, loss: 0.16080394387245178 2023-01-24 02:15:25.513826: step: 274/464, loss: 0.22208231687545776 2023-01-24 02:15:26.161913: step: 276/464, loss: 0.14986518025398254 2023-01-24 02:15:26.799958: step: 278/464, loss: 0.10917126387357712 2023-01-24 02:15:27.422934: step: 280/464, loss: 0.059265293180942535 2023-01-24 02:15:27.992152: step: 282/464, loss: 0.05906907096505165 2023-01-24 02:15:28.586512: step: 284/464, loss: 0.08650758862495422 2023-01-24 02:15:29.234952: step: 286/464, loss: 0.8987934589385986 2023-01-24 02:15:29.951452: step: 288/464, loss: 0.05748949572443962 2023-01-24 02:15:30.543327: step: 290/464, loss: 0.1825551688671112 2023-01-24 02:15:31.208216: step: 292/464, loss: 0.10999234020709991 2023-01-24 02:15:31.829355: step: 294/464, loss: 0.12873074412345886 2023-01-24 02:15:32.490877: step: 296/464, loss: 0.1095266342163086 2023-01-24 02:15:33.087139: step: 298/464, loss: 0.3119054138660431 2023-01-24 02:15:33.701095: step: 300/464, loss: 0.16263194382190704 2023-01-24 02:15:34.339382: step: 302/464, loss: 0.14298595488071442 2023-01-24 02:15:34.955322: step: 304/464, loss: 0.2573631703853607 2023-01-24 02:15:35.505217: step: 306/464, loss: 0.12914666533470154 2023-01-24 02:15:36.155813: step: 308/464, loss: 0.19286595284938812 2023-01-24 02:15:36.741006: step: 310/464, loss: 0.4885352551937103 2023-01-24 02:15:37.324057: step: 312/464, loss: 0.09529785811901093 2023-01-24 02:15:37.917857: step: 314/464, loss: 0.04528629034757614 2023-01-24 02:15:38.655202: step: 316/464, loss: 0.4325774610042572 2023-01-24 02:15:39.273630: step: 318/464, loss: 0.3529732823371887 2023-01-24 02:15:39.878487: step: 320/464, loss: 0.40400734543800354 2023-01-24 02:15:40.478236: step: 322/464, loss: 0.0664587914943695 2023-01-24 02:15:41.142277: step: 324/464, loss: 0.13620255887508392 2023-01-24 02:15:41.748255: step: 326/464, loss: 0.07878082245588303 2023-01-24 02:15:42.421417: step: 328/464, loss: 0.5997202396392822 2023-01-24 02:15:43.141112: step: 330/464, loss: 0.18104591965675354 2023-01-24 02:15:43.788638: step: 332/464, loss: 0.049419764429330826 2023-01-24 02:15:44.522013: step: 334/464, loss: 0.08512122929096222 2023-01-24 02:15:45.153274: step: 336/464, loss: 0.09855330735445023 2023-01-24 02:15:45.820740: step: 338/464, loss: 0.08050018548965454 2023-01-24 02:15:46.485830: step: 340/464, loss: 0.1313687264919281 2023-01-24 02:15:47.092447: step: 342/464, loss: 0.3599167466163635 2023-01-24 02:15:47.751946: step: 344/464, loss: 0.0932551920413971 2023-01-24 02:15:48.363150: step: 346/464, loss: 0.05233725160360336 2023-01-24 02:15:49.009967: step: 348/464, loss: 0.1757393330335617 2023-01-24 02:15:49.625260: step: 350/464, loss: 0.14035636186599731 2023-01-24 02:15:50.341981: step: 352/464, loss: 0.4218212068080902 2023-01-24 02:15:51.000867: step: 354/464, loss: 0.524379551410675 2023-01-24 02:15:51.674893: step: 356/464, loss: 0.07185309380292892 2023-01-24 02:15:52.305623: step: 358/464, loss: 0.21396894752979279 2023-01-24 02:15:52.845002: step: 360/464, loss: 0.0998459979891777 2023-01-24 02:15:53.444243: step: 362/464, loss: 0.13245929777622223 2023-01-24 02:15:54.099175: step: 364/464, loss: 0.2340196967124939 2023-01-24 02:15:54.738058: step: 366/464, loss: 0.14111104607582092 2023-01-24 02:15:55.436079: step: 368/464, loss: 0.2503955364227295 2023-01-24 02:15:56.067128: step: 370/464, loss: 0.04617612808942795 2023-01-24 02:15:56.664009: step: 372/464, loss: 0.09113742411136627 2023-01-24 02:15:57.324918: step: 374/464, loss: 0.08529345691204071 2023-01-24 02:15:57.931901: step: 376/464, loss: 0.1966417133808136 2023-01-24 02:15:58.574156: step: 378/464, loss: 0.6417378187179565 2023-01-24 02:15:59.214032: step: 380/464, loss: 0.10361170023679733 2023-01-24 02:15:59.896645: step: 382/464, loss: 0.746138870716095 2023-01-24 02:16:00.428635: step: 384/464, loss: 0.4580155909061432 2023-01-24 02:16:01.034555: step: 386/464, loss: 0.15263885259628296 2023-01-24 02:16:01.688620: step: 388/464, loss: 0.09458209574222565 2023-01-24 02:16:02.299505: step: 390/464, loss: 0.8690895438194275 2023-01-24 02:16:02.919574: step: 392/464, loss: 0.49207213521003723 2023-01-24 02:16:03.556592: step: 394/464, loss: 0.08876999467611313 2023-01-24 02:16:04.181447: step: 396/464, loss: 0.031726229935884476 2023-01-24 02:16:04.688633: step: 398/464, loss: 0.0860455110669136 2023-01-24 02:16:05.305959: step: 400/464, loss: 0.17339417338371277 2023-01-24 02:16:05.889582: step: 402/464, loss: 0.07491616904735565 2023-01-24 02:16:06.538362: step: 404/464, loss: 0.1457730084657669 2023-01-24 02:16:07.208535: step: 406/464, loss: 0.14544224739074707 2023-01-24 02:16:07.821982: step: 408/464, loss: 0.0813344195485115 2023-01-24 02:16:08.450086: step: 410/464, loss: 0.09824948012828827 2023-01-24 02:16:09.118911: step: 412/464, loss: 0.10653730481863022 2023-01-24 02:16:09.739170: step: 414/464, loss: 0.1890765279531479 2023-01-24 02:16:10.379717: step: 416/464, loss: 1.9404715299606323 2023-01-24 02:16:11.016058: step: 418/464, loss: 5.300214767456055 2023-01-24 02:16:11.631599: step: 420/464, loss: 0.11093682050704956 2023-01-24 02:16:12.237779: step: 422/464, loss: 0.12824968993663788 2023-01-24 02:16:12.927587: step: 424/464, loss: 0.11972280591726303 2023-01-24 02:16:13.585447: step: 426/464, loss: 0.049842387437820435 2023-01-24 02:16:14.196008: step: 428/464, loss: 0.24512696266174316 2023-01-24 02:16:14.809225: step: 430/464, loss: 0.1847914457321167 2023-01-24 02:16:15.429256: step: 432/464, loss: 0.24950304627418518 2023-01-24 02:16:16.042051: step: 434/464, loss: 0.41385746002197266 2023-01-24 02:16:16.656583: step: 436/464, loss: 0.3146797716617584 2023-01-24 02:16:17.293922: step: 438/464, loss: 0.10622680932283401 2023-01-24 02:16:17.854242: step: 440/464, loss: 0.15090520679950714 2023-01-24 02:16:18.406524: step: 442/464, loss: 0.08917783200740814 2023-01-24 02:16:19.061045: step: 444/464, loss: 0.407958447933197 2023-01-24 02:16:19.732286: step: 446/464, loss: 0.10987649857997894 2023-01-24 02:16:20.381707: step: 448/464, loss: 0.1464168280363083 2023-01-24 02:16:20.973584: step: 450/464, loss: 0.094017393887043 2023-01-24 02:16:21.700568: step: 452/464, loss: 0.1568855494260788 2023-01-24 02:16:22.330859: step: 454/464, loss: 0.4010990262031555 2023-01-24 02:16:22.966024: step: 456/464, loss: 0.48869457840919495 2023-01-24 02:16:23.574091: step: 458/464, loss: 0.09566915780305862 2023-01-24 02:16:24.184665: step: 460/464, loss: 0.13694754242897034 2023-01-24 02:16:24.851472: step: 462/464, loss: 0.4534783959388733 2023-01-24 02:16:25.459941: step: 464/464, loss: 0.10260273516178131 2023-01-24 02:16:26.042110: step: 466/464, loss: 0.2750387191772461 2023-01-24 02:16:26.677310: step: 468/464, loss: 0.34514862298965454 2023-01-24 02:16:27.307151: step: 470/464, loss: 0.20128001272678375 2023-01-24 02:16:27.893375: step: 472/464, loss: 0.19402840733528137 2023-01-24 02:16:28.477207: step: 474/464, loss: 0.04928231239318848 2023-01-24 02:16:29.046354: step: 476/464, loss: 0.7598804831504822 2023-01-24 02:16:29.678562: step: 478/464, loss: 0.3005625307559967 2023-01-24 02:16:30.288303: step: 480/464, loss: 0.09437231719493866 2023-01-24 02:16:30.920488: step: 482/464, loss: 0.2333991974592209 2023-01-24 02:16:31.536311: step: 484/464, loss: 0.13361912965774536 2023-01-24 02:16:32.158025: step: 486/464, loss: 0.052449826151132584 2023-01-24 02:16:32.839974: step: 488/464, loss: 0.04909124970436096 2023-01-24 02:16:33.421574: step: 490/464, loss: 0.141932412981987 2023-01-24 02:16:34.024662: step: 492/464, loss: 1.3987499475479126 2023-01-24 02:16:34.657003: step: 494/464, loss: 0.025841237977147102 2023-01-24 02:16:35.317269: step: 496/464, loss: 0.13612967729568481 2023-01-24 02:16:35.979863: step: 498/464, loss: 0.10867279767990112 2023-01-24 02:16:36.594542: step: 500/464, loss: 0.4297686517238617 2023-01-24 02:16:37.254655: step: 502/464, loss: 0.2767006456851959 2023-01-24 02:16:37.881938: step: 504/464, loss: 0.14466437697410583 2023-01-24 02:16:38.502040: step: 506/464, loss: 0.10168630629777908 2023-01-24 02:16:39.167511: step: 508/464, loss: 0.27599096298217773 2023-01-24 02:16:39.803017: step: 510/464, loss: 0.411590576171875 2023-01-24 02:16:40.481070: step: 512/464, loss: 0.3165765404701233 2023-01-24 02:16:41.120560: step: 514/464, loss: 0.08729775249958038 2023-01-24 02:16:41.740337: step: 516/464, loss: 0.10420161485671997 2023-01-24 02:16:42.367267: step: 518/464, loss: 0.5963342785835266 2023-01-24 02:16:43.029424: step: 520/464, loss: 0.0534040592610836 2023-01-24 02:16:43.628477: step: 522/464, loss: 0.4430117905139923 2023-01-24 02:16:44.267851: step: 524/464, loss: 0.6797560453414917 2023-01-24 02:16:44.924268: step: 526/464, loss: 0.223937526345253 2023-01-24 02:16:45.549551: step: 528/464, loss: 0.1542477160692215 2023-01-24 02:16:46.218244: step: 530/464, loss: 0.1835298091173172 2023-01-24 02:16:46.819525: step: 532/464, loss: 0.28976961970329285 2023-01-24 02:16:47.403767: step: 534/464, loss: 0.11457552760839462 2023-01-24 02:16:48.037691: step: 536/464, loss: 0.39779454469680786 2023-01-24 02:16:48.719425: step: 538/464, loss: 0.09472489356994629 2023-01-24 02:16:49.382867: step: 540/464, loss: 0.07289186120033264 2023-01-24 02:16:50.022581: step: 542/464, loss: 0.1270940750837326 2023-01-24 02:16:50.591090: step: 544/464, loss: 0.12064804881811142 2023-01-24 02:16:51.132877: step: 546/464, loss: 0.1936420202255249 2023-01-24 02:16:51.797680: step: 548/464, loss: 0.10025503486394882 2023-01-24 02:16:52.494459: step: 550/464, loss: 1.1451830863952637 2023-01-24 02:16:53.138453: step: 552/464, loss: 0.2011127769947052 2023-01-24 02:16:53.733248: step: 554/464, loss: 0.17952358722686768 2023-01-24 02:16:54.315303: step: 556/464, loss: 0.054836973547935486 2023-01-24 02:16:54.898865: step: 558/464, loss: 0.03980335220694542 2023-01-24 02:16:55.516740: step: 560/464, loss: 0.20447109639644623 2023-01-24 02:16:56.162443: step: 562/464, loss: 0.29087498784065247 2023-01-24 02:16:56.749084: step: 564/464, loss: 0.16058194637298584 2023-01-24 02:16:57.347314: step: 566/464, loss: 0.05219912528991699 2023-01-24 02:16:57.933426: step: 568/464, loss: 0.21024443209171295 2023-01-24 02:16:58.550366: step: 570/464, loss: 0.3421464264392853 2023-01-24 02:16:59.209568: step: 572/464, loss: 0.20216087996959686 2023-01-24 02:16:59.845882: step: 574/464, loss: 0.041584938764572144 2023-01-24 02:17:00.469712: step: 576/464, loss: 0.08998734503984451 2023-01-24 02:17:01.049531: step: 578/464, loss: 0.6275544762611389 2023-01-24 02:17:01.606850: step: 580/464, loss: 0.1089167669415474 2023-01-24 02:17:02.250022: step: 582/464, loss: 0.2104983776807785 2023-01-24 02:17:02.840633: step: 584/464, loss: 0.19659323990345 2023-01-24 02:17:03.443053: step: 586/464, loss: 0.1263107806444168 2023-01-24 02:17:04.023420: step: 588/464, loss: 0.7749478220939636 2023-01-24 02:17:04.747293: step: 590/464, loss: 0.1915738433599472 2023-01-24 02:17:05.336759: step: 592/464, loss: 0.2550014853477478 2023-01-24 02:17:05.936115: step: 594/464, loss: 0.05610324442386627 2023-01-24 02:17:06.572016: step: 596/464, loss: 0.37603238224983215 2023-01-24 02:17:07.227974: step: 598/464, loss: 0.29247811436653137 2023-01-24 02:17:07.790791: step: 600/464, loss: 0.13564375042915344 2023-01-24 02:17:08.403274: step: 602/464, loss: 0.4842967689037323 2023-01-24 02:17:09.048239: step: 604/464, loss: 0.06512904167175293 2023-01-24 02:17:09.671195: step: 606/464, loss: 0.21476440131664276 2023-01-24 02:17:10.295339: step: 608/464, loss: 0.25330716371536255 2023-01-24 02:17:11.040109: step: 610/464, loss: 0.10401315242052078 2023-01-24 02:17:11.667940: step: 612/464, loss: 0.24575677514076233 2023-01-24 02:17:12.411434: step: 614/464, loss: 0.07836044579744339 2023-01-24 02:17:13.000237: step: 616/464, loss: 0.03666771203279495 2023-01-24 02:17:13.649473: step: 618/464, loss: 0.06859150528907776 2023-01-24 02:17:14.295414: step: 620/464, loss: 0.17963087558746338 2023-01-24 02:17:14.923583: step: 622/464, loss: 0.5812908411026001 2023-01-24 02:17:15.525585: step: 624/464, loss: 0.15075625479221344 2023-01-24 02:17:16.146789: step: 626/464, loss: 0.1244310662150383 2023-01-24 02:17:16.850298: step: 628/464, loss: 0.3986271023750305 2023-01-24 02:17:17.623628: step: 630/464, loss: 0.21589024364948273 2023-01-24 02:17:18.300139: step: 632/464, loss: 0.24251723289489746 2023-01-24 02:17:18.961190: step: 634/464, loss: 0.6837549209594727 2023-01-24 02:17:19.549440: step: 636/464, loss: 0.077407605946064 2023-01-24 02:17:20.167405: step: 638/464, loss: 0.17971009016036987 2023-01-24 02:17:20.891608: step: 640/464, loss: 0.13150545954704285 2023-01-24 02:17:21.480871: step: 642/464, loss: 0.05822813883423805 2023-01-24 02:17:22.067211: step: 644/464, loss: 0.21754500269889832 2023-01-24 02:17:22.720391: step: 646/464, loss: 0.1172441616654396 2023-01-24 02:17:23.355548: step: 648/464, loss: 0.16576620936393738 2023-01-24 02:17:23.923634: step: 650/464, loss: 0.5241413116455078 2023-01-24 02:17:24.555107: step: 652/464, loss: 0.15110956132411957 2023-01-24 02:17:25.134084: step: 654/464, loss: 0.051666341722011566 2023-01-24 02:17:25.715025: step: 656/464, loss: 0.23533783853054047 2023-01-24 02:17:26.273559: step: 658/464, loss: 0.44304656982421875 2023-01-24 02:17:26.971021: step: 660/464, loss: 0.21447089314460754 2023-01-24 02:17:27.580739: step: 662/464, loss: 2.8307924270629883 2023-01-24 02:17:28.283473: step: 664/464, loss: 0.10394848883152008 2023-01-24 02:17:28.938836: step: 666/464, loss: 0.0736704096198082 2023-01-24 02:17:29.610752: step: 668/464, loss: 0.14852645993232727 2023-01-24 02:17:30.258559: step: 670/464, loss: 0.5525936484336853 2023-01-24 02:17:30.850958: step: 672/464, loss: 0.26049676537513733 2023-01-24 02:17:31.481425: step: 674/464, loss: 0.3359794020652771 2023-01-24 02:17:32.082179: step: 676/464, loss: 0.23344263434410095 2023-01-24 02:17:32.686786: step: 678/464, loss: 0.38956648111343384 2023-01-24 02:17:33.291337: step: 680/464, loss: 0.28944116830825806 2023-01-24 02:17:34.117090: step: 682/464, loss: 0.16485880315303802 2023-01-24 02:17:34.760629: step: 684/464, loss: 0.10292749106884003 2023-01-24 02:17:35.443734: step: 686/464, loss: 0.24333380162715912 2023-01-24 02:17:36.027446: step: 688/464, loss: 0.15236453711986542 2023-01-24 02:17:36.630145: step: 690/464, loss: 0.4543951749801636 2023-01-24 02:17:37.238244: step: 692/464, loss: 0.23155415058135986 2023-01-24 02:17:37.859416: step: 694/464, loss: 0.1280626654624939 2023-01-24 02:17:38.512496: step: 696/464, loss: 0.16600367426872253 2023-01-24 02:17:39.208657: step: 698/464, loss: 0.17227059602737427 2023-01-24 02:17:39.854281: step: 700/464, loss: 0.15010888874530792 2023-01-24 02:17:40.504305: step: 702/464, loss: 0.2915771007537842 2023-01-24 02:17:41.115081: step: 704/464, loss: 0.14665408432483673 2023-01-24 02:17:41.743839: step: 706/464, loss: 0.09335776418447495 2023-01-24 02:17:42.318852: step: 708/464, loss: 0.12245311588048935 2023-01-24 02:17:42.974374: step: 710/464, loss: 0.3094889521598816 2023-01-24 02:17:43.614252: step: 712/464, loss: 0.28910723328590393 2023-01-24 02:17:44.293188: step: 714/464, loss: 0.16503068804740906 2023-01-24 02:17:44.843603: step: 716/464, loss: 0.16619819402694702 2023-01-24 02:17:45.409682: step: 718/464, loss: 0.24636615812778473 2023-01-24 02:17:46.107536: step: 720/464, loss: 0.12881028652191162 2023-01-24 02:17:46.773982: step: 722/464, loss: 0.2925278842449188 2023-01-24 02:17:47.401961: step: 724/464, loss: 0.16039560735225677 2023-01-24 02:17:48.023423: step: 726/464, loss: 0.20397178828716278 2023-01-24 02:17:48.718910: step: 728/464, loss: 0.11749129742383957 2023-01-24 02:17:49.346718: step: 730/464, loss: 0.6463688611984253 2023-01-24 02:17:49.978605: step: 732/464, loss: 0.5270076394081116 2023-01-24 02:17:50.555851: step: 734/464, loss: 0.0638769268989563 2023-01-24 02:17:51.186473: step: 736/464, loss: 0.1781592071056366 2023-01-24 02:17:51.846776: step: 738/464, loss: 0.9981155395507812 2023-01-24 02:17:52.493152: step: 740/464, loss: 0.1648676097393036 2023-01-24 02:17:53.112121: step: 742/464, loss: 0.1325865089893341 2023-01-24 02:17:53.702992: step: 744/464, loss: 0.046779971569776535 2023-01-24 02:17:54.364177: step: 746/464, loss: 0.2365027666091919 2023-01-24 02:17:54.951956: step: 748/464, loss: 0.11213462054729462 2023-01-24 02:17:55.501001: step: 750/464, loss: 0.1705876886844635 2023-01-24 02:17:56.201832: step: 752/464, loss: 0.2322748452425003 2023-01-24 02:17:56.765375: step: 754/464, loss: 0.1390574723482132 2023-01-24 02:17:57.395461: step: 756/464, loss: 0.2047601044178009 2023-01-24 02:17:58.028143: step: 758/464, loss: 0.9834222197532654 2023-01-24 02:17:58.655841: step: 760/464, loss: 0.49498993158340454 2023-01-24 02:17:59.371329: step: 762/464, loss: 0.11589276790618896 2023-01-24 02:18:00.062173: step: 764/464, loss: 0.07173576205968857 2023-01-24 02:18:00.709768: step: 766/464, loss: 0.21668976545333862 2023-01-24 02:18:01.392154: step: 768/464, loss: 0.08592022955417633 2023-01-24 02:18:02.024963: step: 770/464, loss: 0.16447365283966064 2023-01-24 02:18:02.627578: step: 772/464, loss: 0.21605470776557922 2023-01-24 02:18:03.248401: step: 774/464, loss: 1.5658270120620728 2023-01-24 02:18:03.848318: step: 776/464, loss: 0.0884871706366539 2023-01-24 02:18:04.504541: step: 778/464, loss: 0.1517365276813507 2023-01-24 02:18:05.130153: step: 780/464, loss: 0.09208640456199646 2023-01-24 02:18:05.674183: step: 782/464, loss: 0.11096998304128647 2023-01-24 02:18:06.342184: step: 784/464, loss: 0.2630148231983185 2023-01-24 02:18:06.899508: step: 786/464, loss: 0.1333618462085724 2023-01-24 02:18:07.538294: step: 788/464, loss: 0.32209762930870056 2023-01-24 02:18:08.118444: step: 790/464, loss: 0.12842051684856415 2023-01-24 02:18:08.732112: step: 792/464, loss: 0.15147587656974792 2023-01-24 02:18:09.328753: step: 794/464, loss: 0.4042651653289795 2023-01-24 02:18:09.978009: step: 796/464, loss: 0.08628109842538834 2023-01-24 02:18:10.600393: step: 798/464, loss: 0.16129985451698303 2023-01-24 02:18:11.245089: step: 800/464, loss: 0.3072906732559204 2023-01-24 02:18:11.864393: step: 802/464, loss: 0.12120974808931351 2023-01-24 02:18:12.485861: step: 804/464, loss: 0.07602791488170624 2023-01-24 02:18:13.181723: step: 806/464, loss: 0.10465062409639359 2023-01-24 02:18:13.845293: step: 808/464, loss: 0.432160884141922 2023-01-24 02:18:14.439367: step: 810/464, loss: 0.19127866625785828 2023-01-24 02:18:15.042405: step: 812/464, loss: 0.17180559039115906 2023-01-24 02:18:15.643447: step: 814/464, loss: 0.4046097695827484 2023-01-24 02:18:16.282554: step: 816/464, loss: 0.14955194294452667 2023-01-24 02:18:16.897253: step: 818/464, loss: 0.3648272156715393 2023-01-24 02:18:17.543498: step: 820/464, loss: 0.07315445691347122 2023-01-24 02:18:18.142683: step: 822/464, loss: 0.08252275735139847 2023-01-24 02:18:18.683979: step: 824/464, loss: 0.07464836537837982 2023-01-24 02:18:19.354559: step: 826/464, loss: 0.8104329109191895 2023-01-24 02:18:20.031257: step: 828/464, loss: 0.3973939120769501 2023-01-24 02:18:20.641467: step: 830/464, loss: 0.22226989269256592 2023-01-24 02:18:21.241521: step: 832/464, loss: 0.19058486819267273 2023-01-24 02:18:21.798270: step: 834/464, loss: 0.13172155618667603 2023-01-24 02:18:22.395133: step: 836/464, loss: 0.10252564400434494 2023-01-24 02:18:23.071292: step: 838/464, loss: 0.156595379114151 2023-01-24 02:18:23.688901: step: 840/464, loss: 0.1269509494304657 2023-01-24 02:18:24.316952: step: 842/464, loss: 0.5591135621070862 2023-01-24 02:18:24.962877: step: 844/464, loss: 0.2109595239162445 2023-01-24 02:18:25.583706: step: 846/464, loss: 0.7321330308914185 2023-01-24 02:18:26.231366: step: 848/464, loss: 0.27749067544937134 2023-01-24 02:18:26.832374: step: 850/464, loss: 0.34926146268844604 2023-01-24 02:18:27.457532: step: 852/464, loss: 0.26148805022239685 2023-01-24 02:18:28.017587: step: 854/464, loss: 0.13771076500415802 2023-01-24 02:18:28.724438: step: 856/464, loss: 0.14018401503562927 2023-01-24 02:18:29.311252: step: 858/464, loss: 0.3332751989364624 2023-01-24 02:18:29.986226: step: 860/464, loss: 0.20438069105148315 2023-01-24 02:18:30.626352: step: 862/464, loss: 0.19731405377388 2023-01-24 02:18:31.252999: step: 864/464, loss: 0.4525524377822876 2023-01-24 02:18:31.880548: step: 866/464, loss: 0.7084553241729736 2023-01-24 02:18:32.442854: step: 868/464, loss: 0.29406675696372986 2023-01-24 02:18:33.112016: step: 870/464, loss: 0.23075011372566223 2023-01-24 02:18:33.702907: step: 872/464, loss: 0.1038394421339035 2023-01-24 02:18:34.338137: step: 874/464, loss: 0.25779569149017334 2023-01-24 02:18:35.016848: step: 876/464, loss: 0.3240090310573578 2023-01-24 02:18:35.617964: step: 878/464, loss: 2.591330051422119 2023-01-24 02:18:36.285317: step: 880/464, loss: 0.1222638189792633 2023-01-24 02:18:36.959947: step: 882/464, loss: 0.26508739590644836 2023-01-24 02:18:37.575135: step: 884/464, loss: 0.45191052556037903 2023-01-24 02:18:38.247731: step: 886/464, loss: 0.08805494010448456 2023-01-24 02:18:38.901797: step: 888/464, loss: 0.2745131254196167 2023-01-24 02:18:39.450659: step: 890/464, loss: 0.13159851729869843 2023-01-24 02:18:40.111657: step: 892/464, loss: 0.09864508360624313 2023-01-24 02:18:40.721596: step: 894/464, loss: 0.1688559204339981 2023-01-24 02:18:41.262148: step: 896/464, loss: 0.06535923480987549 2023-01-24 02:18:41.877396: step: 898/464, loss: 0.08384721726179123 2023-01-24 02:18:42.504134: step: 900/464, loss: 0.08997979760169983 2023-01-24 02:18:43.151258: step: 902/464, loss: 0.17216522991657257 2023-01-24 02:18:43.747295: step: 904/464, loss: 0.1290782392024994 2023-01-24 02:18:44.370637: step: 906/464, loss: 0.1777675300836563 2023-01-24 02:18:45.014504: step: 908/464, loss: 0.2289048284292221 2023-01-24 02:18:45.592151: step: 910/464, loss: 0.09597111493349075 2023-01-24 02:18:46.173997: step: 912/464, loss: 0.1423436552286148 2023-01-24 02:18:46.808140: step: 914/464, loss: 0.25019633769989014 2023-01-24 02:18:47.417319: step: 916/464, loss: 0.19405746459960938 2023-01-24 02:18:48.026404: step: 918/464, loss: 0.06312482059001923 2023-01-24 02:18:48.634691: step: 920/464, loss: 0.09683822840452194 2023-01-24 02:18:49.281805: step: 922/464, loss: 0.05893225595355034 2023-01-24 02:18:49.955619: step: 924/464, loss: 0.6508408784866333 2023-01-24 02:18:50.577315: step: 926/464, loss: 0.04898369312286377 2023-01-24 02:18:51.196202: step: 928/464, loss: 0.07690513879060745 2023-01-24 02:18:51.690916: step: 930/464, loss: 0.07759331911802292 ================================================== Loss: 0.255 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207174414555584, 'r': 0.3328888813589952, 'f1': 0.32668983328899526}, 'combined': 0.24071882452873333, 'epoch': 12} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3232181946404794, 'r': 0.267665067436647, 'f1': 0.29283015924693007}, 'combined': 0.19117409360162274, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3176007727652464, 'r': 0.34351506731724946, 'f1': 0.33005002821547935}, 'combined': 0.24319475763245846, 'epoch': 12} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3403563380231948, 'r': 0.2793549722929347, 'f1': 0.3068533163601342}, 'combined': 0.2003291080900358, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3253633342022941, 'r': 0.33832847655191106, 'f1': 0.33171926910299004}, 'combined': 0.24442472460220319, 'epoch': 12} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3343950595657901, 'r': 0.2653685564444114, 'f1': 0.29590969465412625}, 'combined': 0.19318456749440366, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2569444444444444, 'r': 0.35238095238095235, 'f1': 0.2971887550200803}, 'combined': 0.1981258366800535, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3106060606060606, 'r': 0.44565217391304346, 'f1': 0.3660714285714286}, 'combined': 0.1830357142857143, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5384615384615384, 'r': 0.2413793103448276, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 12} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3253633342022941, 'r': 0.33832847655191106, 'f1': 0.33171926910299004}, 'combined': 0.24442472460220319, 'epoch': 12} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3343950595657901, 'r': 0.2653685564444114, 'f1': 0.29590969465412625}, 'combined': 0.19318456749440366, 'epoch': 12} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5384615384615384, 'r': 0.2413793103448276, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 12} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:21:35.357332: step: 2/464, loss: 0.20185832679271698 2023-01-24 02:21:35.926694: step: 4/464, loss: 0.15542711317539215 2023-01-24 02:21:36.668509: step: 6/464, loss: 0.05096767470240593 2023-01-24 02:21:37.201504: step: 8/464, loss: 0.03697284683585167 2023-01-24 02:21:37.807417: step: 10/464, loss: 0.13277460634708405 2023-01-24 02:21:38.408177: step: 12/464, loss: 0.06230716407299042 2023-01-24 02:21:39.009145: step: 14/464, loss: 0.17492106556892395 2023-01-24 02:21:39.591988: step: 16/464, loss: 0.07750896364450455 2023-01-24 02:21:40.153225: step: 18/464, loss: 0.3672374188899994 2023-01-24 02:21:40.837200: step: 20/464, loss: 0.022687960416078568 2023-01-24 02:21:41.464635: step: 22/464, loss: 0.04192107543349266 2023-01-24 02:21:42.115388: step: 24/464, loss: 0.037765905261039734 2023-01-24 02:21:42.710555: step: 26/464, loss: 0.15746667981147766 2023-01-24 02:21:43.383198: step: 28/464, loss: 0.20232006907463074 2023-01-24 02:21:43.994045: step: 30/464, loss: 0.1225430890917778 2023-01-24 02:21:44.562959: step: 32/464, loss: 0.10123708099126816 2023-01-24 02:21:45.165714: step: 34/464, loss: 0.12023011595010757 2023-01-24 02:21:45.786625: step: 36/464, loss: 0.03455466777086258 2023-01-24 02:21:46.366610: step: 38/464, loss: 0.24662727117538452 2023-01-24 02:21:46.972930: step: 40/464, loss: 0.161251500248909 2023-01-24 02:21:47.577712: step: 42/464, loss: 0.12451664358377457 2023-01-24 02:21:48.288575: step: 44/464, loss: 0.21603776514530182 2023-01-24 02:21:48.929044: step: 46/464, loss: 0.14945615828037262 2023-01-24 02:21:49.588507: step: 48/464, loss: 0.14207737147808075 2023-01-24 02:21:50.166589: step: 50/464, loss: 0.017215615138411522 2023-01-24 02:21:50.782658: step: 52/464, loss: 0.7450473308563232 2023-01-24 02:21:51.373409: step: 54/464, loss: 0.1268204152584076 2023-01-24 02:21:51.984811: step: 56/464, loss: 0.07096073031425476 2023-01-24 02:21:52.608830: step: 58/464, loss: 0.5722800493240356 2023-01-24 02:21:53.225053: step: 60/464, loss: 0.13189375400543213 2023-01-24 02:21:53.873720: step: 62/464, loss: 0.5198311805725098 2023-01-24 02:21:54.596429: step: 64/464, loss: 0.17269432544708252 2023-01-24 02:21:55.261604: step: 66/464, loss: 0.3504256308078766 2023-01-24 02:21:55.978188: step: 68/464, loss: 2.9256834983825684 2023-01-24 02:21:56.597687: step: 70/464, loss: 0.1361749768257141 2023-01-24 02:21:57.216453: step: 72/464, loss: 0.07835980504751205 2023-01-24 02:21:57.893492: step: 74/464, loss: 0.1003730520606041 2023-01-24 02:21:58.559191: step: 76/464, loss: 0.10211724787950516 2023-01-24 02:21:59.184772: step: 78/464, loss: 0.05625668913125992 2023-01-24 02:21:59.734349: step: 80/464, loss: 0.1728784143924713 2023-01-24 02:22:00.354208: step: 82/464, loss: 0.18018820881843567 2023-01-24 02:22:00.930355: step: 84/464, loss: 0.10599424690008163 2023-01-24 02:22:01.595049: step: 86/464, loss: 0.06094743683934212 2023-01-24 02:22:02.262844: step: 88/464, loss: 0.1300642192363739 2023-01-24 02:22:02.927255: step: 90/464, loss: 0.08097365498542786 2023-01-24 02:22:03.539049: step: 92/464, loss: 0.2460734248161316 2023-01-24 02:22:04.153294: step: 94/464, loss: 0.19132286310195923 2023-01-24 02:22:04.839548: step: 96/464, loss: 0.19940787553787231 2023-01-24 02:22:05.421653: step: 98/464, loss: 0.10040685534477234 2023-01-24 02:22:06.048945: step: 100/464, loss: 0.08837602287530899 2023-01-24 02:22:06.662070: step: 102/464, loss: 0.2815611660480499 2023-01-24 02:22:07.242238: step: 104/464, loss: 0.08271598815917969 2023-01-24 02:22:07.907745: step: 106/464, loss: 0.187362939119339 2023-01-24 02:22:08.536073: step: 108/464, loss: 0.07661551237106323 2023-01-24 02:22:09.165029: step: 110/464, loss: 0.17065709829330444 2023-01-24 02:22:09.748806: step: 112/464, loss: 0.1066417247056961 2023-01-24 02:22:10.337500: step: 114/464, loss: 0.03715033456683159 2023-01-24 02:22:10.904484: step: 116/464, loss: 0.04503128305077553 2023-01-24 02:22:11.531833: step: 118/464, loss: 0.13615788519382477 2023-01-24 02:22:12.221318: step: 120/464, loss: 0.13754060864448547 2023-01-24 02:22:12.885660: step: 122/464, loss: 0.1050601676106453 2023-01-24 02:22:13.569060: step: 124/464, loss: 0.17007730901241302 2023-01-24 02:22:14.174817: step: 126/464, loss: 0.05719568207859993 2023-01-24 02:22:14.747252: step: 128/464, loss: 0.17565593123435974 2023-01-24 02:22:15.401963: step: 130/464, loss: 0.08824195712804794 2023-01-24 02:22:16.011277: step: 132/464, loss: 0.14226341247558594 2023-01-24 02:22:16.669806: step: 134/464, loss: 0.2548733949661255 2023-01-24 02:22:17.257517: step: 136/464, loss: 0.06347456574440002 2023-01-24 02:22:17.972697: step: 138/464, loss: 0.1744856983423233 2023-01-24 02:22:18.594248: step: 140/464, loss: 0.14623475074768066 2023-01-24 02:22:19.246891: step: 142/464, loss: 0.10009687393903732 2023-01-24 02:22:19.893700: step: 144/464, loss: 0.09313633292913437 2023-01-24 02:22:20.511959: step: 146/464, loss: 0.35434556007385254 2023-01-24 02:22:21.332746: step: 148/464, loss: 0.13106636703014374 2023-01-24 02:22:21.986027: step: 150/464, loss: 0.06860726326704025 2023-01-24 02:22:22.605721: step: 152/464, loss: 0.09948920458555222 2023-01-24 02:22:23.231870: step: 154/464, loss: 0.007669544313102961 2023-01-24 02:22:23.903307: step: 156/464, loss: 0.4408150911331177 2023-01-24 02:22:24.590725: step: 158/464, loss: 0.22104589641094208 2023-01-24 02:22:25.157061: step: 160/464, loss: 0.1109457015991211 2023-01-24 02:22:25.708027: step: 162/464, loss: 0.14397618174552917 2023-01-24 02:22:26.326652: step: 164/464, loss: 0.06954605132341385 2023-01-24 02:22:26.943395: step: 166/464, loss: 0.10357537120580673 2023-01-24 02:22:27.578049: step: 168/464, loss: 0.06206103786826134 2023-01-24 02:22:28.208203: step: 170/464, loss: 0.08428634703159332 2023-01-24 02:22:28.884093: step: 172/464, loss: 0.16943979263305664 2023-01-24 02:22:29.412597: step: 174/464, loss: 0.36546847224235535 2023-01-24 02:22:30.048279: step: 176/464, loss: 0.1322539895772934 2023-01-24 02:22:30.699125: step: 178/464, loss: 0.0765388011932373 2023-01-24 02:22:31.259221: step: 180/464, loss: 0.4844176471233368 2023-01-24 02:22:31.848212: step: 182/464, loss: 0.21347054839134216 2023-01-24 02:22:32.440486: step: 184/464, loss: 0.1827172338962555 2023-01-24 02:22:33.013488: step: 186/464, loss: 0.04949979484081268 2023-01-24 02:22:33.635230: step: 188/464, loss: 0.12278314679861069 2023-01-24 02:22:34.277724: step: 190/464, loss: 0.07154679298400879 2023-01-24 02:22:34.922084: step: 192/464, loss: 0.05961580574512482 2023-01-24 02:22:35.569115: step: 194/464, loss: 0.3202018737792969 2023-01-24 02:22:36.150626: step: 196/464, loss: 0.532412052154541 2023-01-24 02:22:36.734192: step: 198/464, loss: 0.06335130333900452 2023-01-24 02:22:37.372312: step: 200/464, loss: 0.6394345164299011 2023-01-24 02:22:38.055568: step: 202/464, loss: 0.09846265614032745 2023-01-24 02:22:38.680098: step: 204/464, loss: 0.06447742134332657 2023-01-24 02:22:39.277837: step: 206/464, loss: 0.08010944724082947 2023-01-24 02:22:39.961140: step: 208/464, loss: 0.12857092916965485 2023-01-24 02:22:40.599872: step: 210/464, loss: 0.2337302416563034 2023-01-24 02:22:41.225813: step: 212/464, loss: 0.25796255469322205 2023-01-24 02:22:41.883755: step: 214/464, loss: 0.15600861608982086 2023-01-24 02:22:42.473932: step: 216/464, loss: 0.10373272001743317 2023-01-24 02:22:43.133669: step: 218/464, loss: 0.07564297318458557 2023-01-24 02:22:43.731688: step: 220/464, loss: 0.21570098400115967 2023-01-24 02:22:44.363140: step: 222/464, loss: 0.06716049462556839 2023-01-24 02:22:45.003922: step: 224/464, loss: 0.1479952186346054 2023-01-24 02:22:45.599074: step: 226/464, loss: 0.12563945353031158 2023-01-24 02:22:46.225838: step: 228/464, loss: 0.10678159445524216 2023-01-24 02:22:46.898156: step: 230/464, loss: 0.17874892055988312 2023-01-24 02:22:47.462676: step: 232/464, loss: 0.08733192831277847 2023-01-24 02:22:48.102131: step: 234/464, loss: 0.1556948870420456 2023-01-24 02:22:48.763789: step: 236/464, loss: 0.4824150502681732 2023-01-24 02:22:49.384953: step: 238/464, loss: 0.05019241198897362 2023-01-24 02:22:49.970295: step: 240/464, loss: 0.07417203485965729 2023-01-24 02:22:50.595593: step: 242/464, loss: 0.21365107595920563 2023-01-24 02:22:51.178467: step: 244/464, loss: 0.34110331535339355 2023-01-24 02:22:51.759675: step: 246/464, loss: 0.45811355113983154 2023-01-24 02:22:52.325036: step: 248/464, loss: 0.10789628326892853 2023-01-24 02:22:52.907502: step: 250/464, loss: 0.2191714644432068 2023-01-24 02:22:53.552947: step: 252/464, loss: 0.06005513295531273 2023-01-24 02:22:54.201702: step: 254/464, loss: 0.23280870914459229 2023-01-24 02:22:54.835190: step: 256/464, loss: 0.09422003477811813 2023-01-24 02:22:55.465747: step: 258/464, loss: 0.2404172122478485 2023-01-24 02:22:56.093167: step: 260/464, loss: 0.17267613112926483 2023-01-24 02:22:56.710146: step: 262/464, loss: 0.32540443539619446 2023-01-24 02:22:57.372347: step: 264/464, loss: 0.0512615405023098 2023-01-24 02:22:58.013006: step: 266/464, loss: 0.2330555021762848 2023-01-24 02:22:58.699696: step: 268/464, loss: 0.307839572429657 2023-01-24 02:22:59.324900: step: 270/464, loss: 0.16887786984443665 2023-01-24 02:22:59.943701: step: 272/464, loss: 0.27475062012672424 2023-01-24 02:23:00.572246: step: 274/464, loss: 0.2929746210575104 2023-01-24 02:23:01.181883: step: 276/464, loss: 0.09339070320129395 2023-01-24 02:23:01.767392: step: 278/464, loss: 0.0926584005355835 2023-01-24 02:23:02.385578: step: 280/464, loss: 0.04409787058830261 2023-01-24 02:23:03.093067: step: 282/464, loss: 0.0822482779622078 2023-01-24 02:23:03.753270: step: 284/464, loss: 0.806532621383667 2023-01-24 02:23:04.395522: step: 286/464, loss: 0.07540340721607208 2023-01-24 02:23:05.007645: step: 288/464, loss: 0.07068516314029694 2023-01-24 02:23:05.634554: step: 290/464, loss: 0.19356432557106018 2023-01-24 02:23:06.303962: step: 292/464, loss: 0.3262822926044464 2023-01-24 02:23:06.872427: step: 294/464, loss: 0.06418322026729584 2023-01-24 02:23:07.441474: step: 296/464, loss: 0.19877147674560547 2023-01-24 02:23:07.994733: step: 298/464, loss: 0.6032619476318359 2023-01-24 02:23:08.580973: step: 300/464, loss: 0.22746288776397705 2023-01-24 02:23:09.188700: step: 302/464, loss: 0.010866068303585052 2023-01-24 02:23:09.775808: step: 304/464, loss: 0.11572247743606567 2023-01-24 02:23:10.311626: step: 306/464, loss: 0.1199236810207367 2023-01-24 02:23:10.896530: step: 308/464, loss: 0.49689608812332153 2023-01-24 02:23:11.449931: step: 310/464, loss: 0.13504759967327118 2023-01-24 02:23:12.118988: step: 312/464, loss: 0.1400964856147766 2023-01-24 02:23:12.857362: step: 314/464, loss: 1.2025758028030396 2023-01-24 02:23:13.465222: step: 316/464, loss: 0.10083557665348053 2023-01-24 02:23:14.117156: step: 318/464, loss: 0.23876634240150452 2023-01-24 02:23:14.712893: step: 320/464, loss: 0.2314985990524292 2023-01-24 02:23:15.342894: step: 322/464, loss: 0.665070652961731 2023-01-24 02:23:15.985956: step: 324/464, loss: 0.11292921751737595 2023-01-24 02:23:16.648851: step: 326/464, loss: 0.9820161461830139 2023-01-24 02:23:17.336304: step: 328/464, loss: 0.22751489281654358 2023-01-24 02:23:17.934290: step: 330/464, loss: 0.14982061088085175 2023-01-24 02:23:18.578988: step: 332/464, loss: 0.05553364381194115 2023-01-24 02:23:19.228635: step: 334/464, loss: 0.12155529856681824 2023-01-24 02:23:19.789111: step: 336/464, loss: 0.07996393740177155 2023-01-24 02:23:20.370631: step: 338/464, loss: 0.14088571071624756 2023-01-24 02:23:20.973884: step: 340/464, loss: 0.06349222362041473 2023-01-24 02:23:21.627140: step: 342/464, loss: 0.1997671276330948 2023-01-24 02:23:22.285113: step: 344/464, loss: 0.09512768685817719 2023-01-24 02:23:22.892488: step: 346/464, loss: 0.14336644113063812 2023-01-24 02:23:23.632797: step: 348/464, loss: 0.36467671394348145 2023-01-24 02:23:24.181843: step: 350/464, loss: 0.13217979669570923 2023-01-24 02:23:24.791301: step: 352/464, loss: 0.5009357929229736 2023-01-24 02:23:25.381007: step: 354/464, loss: 0.18532253801822662 2023-01-24 02:23:25.954413: step: 356/464, loss: 0.1418599635362625 2023-01-24 02:23:26.561747: step: 358/464, loss: 0.14950698614120483 2023-01-24 02:23:27.284953: step: 360/464, loss: 0.07432292401790619 2023-01-24 02:23:27.913012: step: 362/464, loss: 0.6410752534866333 2023-01-24 02:23:28.592549: step: 364/464, loss: 0.11664801836013794 2023-01-24 02:23:29.247160: step: 366/464, loss: 0.2435479313135147 2023-01-24 02:23:29.803243: step: 368/464, loss: 0.40186649560928345 2023-01-24 02:23:30.464415: step: 370/464, loss: 0.3850458860397339 2023-01-24 02:23:31.081558: step: 372/464, loss: 0.4946538507938385 2023-01-24 02:23:31.709056: step: 374/464, loss: 0.6173791885375977 2023-01-24 02:23:32.309001: step: 376/464, loss: 0.17963023483753204 2023-01-24 02:23:32.956920: step: 378/464, loss: 0.45287269353866577 2023-01-24 02:23:33.679672: step: 380/464, loss: 0.5002993941307068 2023-01-24 02:23:34.433303: step: 382/464, loss: 0.12893010675907135 2023-01-24 02:23:35.045071: step: 384/464, loss: 0.07852751761674881 2023-01-24 02:23:35.656662: step: 386/464, loss: 0.09611756354570389 2023-01-24 02:23:36.317112: step: 388/464, loss: 0.09752000123262405 2023-01-24 02:23:36.923541: step: 390/464, loss: 0.14214427769184113 2023-01-24 02:23:37.588019: step: 392/464, loss: 0.12182476371526718 2023-01-24 02:23:38.183008: step: 394/464, loss: 0.14517463743686676 2023-01-24 02:23:38.821963: step: 396/464, loss: 0.10885108262300491 2023-01-24 02:23:39.411742: step: 398/464, loss: 0.14360931515693665 2023-01-24 02:23:40.138225: step: 400/464, loss: 0.30467498302459717 2023-01-24 02:23:40.795271: step: 402/464, loss: 0.1928461492061615 2023-01-24 02:23:41.338250: step: 404/464, loss: 0.03510986641049385 2023-01-24 02:23:41.920308: step: 406/464, loss: 0.3122141659259796 2023-01-24 02:23:42.559984: step: 408/464, loss: 0.39408615231513977 2023-01-24 02:23:43.196101: step: 410/464, loss: 0.17328330874443054 2023-01-24 02:23:43.870233: step: 412/464, loss: 0.09749648720026016 2023-01-24 02:23:44.440947: step: 414/464, loss: 0.2930392324924469 2023-01-24 02:23:45.042006: step: 416/464, loss: 0.4744638502597809 2023-01-24 02:23:45.628148: step: 418/464, loss: 1.5964727401733398 2023-01-24 02:23:46.265611: step: 420/464, loss: 0.1724444031715393 2023-01-24 02:23:46.882202: step: 422/464, loss: 0.29615873098373413 2023-01-24 02:23:47.500814: step: 424/464, loss: 0.13187594711780548 2023-01-24 02:23:48.176458: step: 426/464, loss: 0.25384002923965454 2023-01-24 02:23:48.830297: step: 428/464, loss: 0.11173109710216522 2023-01-24 02:23:49.413455: step: 430/464, loss: 0.08897748589515686 2023-01-24 02:23:50.081001: step: 432/464, loss: 0.2709905505180359 2023-01-24 02:23:50.715067: step: 434/464, loss: 0.3819783627986908 2023-01-24 02:23:51.357972: step: 436/464, loss: 0.08471409231424332 2023-01-24 02:23:51.998943: step: 438/464, loss: 0.17317360639572144 2023-01-24 02:23:52.597644: step: 440/464, loss: 0.14402073621749878 2023-01-24 02:23:53.240757: step: 442/464, loss: 0.10715532302856445 2023-01-24 02:23:53.872549: step: 444/464, loss: 0.06745106726884842 2023-01-24 02:23:54.455254: step: 446/464, loss: 0.23669381439685822 2023-01-24 02:23:55.113637: step: 448/464, loss: 0.1531083732843399 2023-01-24 02:23:55.752922: step: 450/464, loss: 0.09652193635702133 2023-01-24 02:23:56.319723: step: 452/464, loss: 0.4267866015434265 2023-01-24 02:23:57.012465: step: 454/464, loss: 0.11406000703573227 2023-01-24 02:23:57.675400: step: 456/464, loss: 0.09449287503957748 2023-01-24 02:23:58.361609: step: 458/464, loss: 0.06641307473182678 2023-01-24 02:23:59.047608: step: 460/464, loss: 0.16144509613513947 2023-01-24 02:23:59.701146: step: 462/464, loss: 0.20724427700042725 2023-01-24 02:24:00.358064: step: 464/464, loss: 0.36079099774360657 2023-01-24 02:24:00.984279: step: 466/464, loss: 0.49858927726745605 2023-01-24 02:24:01.583353: step: 468/464, loss: 0.194271981716156 2023-01-24 02:24:02.246589: step: 470/464, loss: 0.067782923579216 2023-01-24 02:24:02.849452: step: 472/464, loss: 0.0739690512418747 2023-01-24 02:24:03.423467: step: 474/464, loss: 0.004954623989760876 2023-01-24 02:24:04.025119: step: 476/464, loss: 0.05232124403119087 2023-01-24 02:24:04.613616: step: 478/464, loss: 0.14982157945632935 2023-01-24 02:24:05.257064: step: 480/464, loss: 0.054943837225437164 2023-01-24 02:24:05.913566: step: 482/464, loss: 0.5149544477462769 2023-01-24 02:24:06.522702: step: 484/464, loss: 0.28584542870521545 2023-01-24 02:24:07.108774: step: 486/464, loss: 0.11520925164222717 2023-01-24 02:24:07.788192: step: 488/464, loss: 0.3075667917728424 2023-01-24 02:24:08.404537: step: 490/464, loss: 0.34954291582107544 2023-01-24 02:24:09.009577: step: 492/464, loss: 0.16785860061645508 2023-01-24 02:24:09.687139: step: 494/464, loss: 0.13346517086029053 2023-01-24 02:24:10.329482: step: 496/464, loss: 0.21838581562042236 2023-01-24 02:24:10.946654: step: 498/464, loss: 0.17647969722747803 2023-01-24 02:24:11.568933: step: 500/464, loss: 0.3172367513179779 2023-01-24 02:24:12.227119: step: 502/464, loss: 0.10279625654220581 2023-01-24 02:24:12.875823: step: 504/464, loss: 0.05722634121775627 2023-01-24 02:24:13.489062: step: 506/464, loss: 0.08139292895793915 2023-01-24 02:24:14.126951: step: 508/464, loss: 0.28629621863365173 2023-01-24 02:24:14.720494: step: 510/464, loss: 0.07338398694992065 2023-01-24 02:24:15.321399: step: 512/464, loss: 0.11667048931121826 2023-01-24 02:24:15.924412: step: 514/464, loss: 0.10032473504543304 2023-01-24 02:24:16.505917: step: 516/464, loss: 0.10229482501745224 2023-01-24 02:24:17.137284: step: 518/464, loss: 0.3341778516769409 2023-01-24 02:24:17.743613: step: 520/464, loss: 0.348959743976593 2023-01-24 02:24:18.458610: step: 522/464, loss: 0.16965113580226898 2023-01-24 02:24:19.047205: step: 524/464, loss: 0.056599151343107224 2023-01-24 02:24:19.691989: step: 526/464, loss: 0.0732683464884758 2023-01-24 02:24:20.369657: step: 528/464, loss: 0.0883973240852356 2023-01-24 02:24:20.952934: step: 530/464, loss: 0.08199550956487656 2023-01-24 02:24:21.574953: step: 532/464, loss: 0.08900762349367142 2023-01-24 02:24:22.220724: step: 534/464, loss: 0.05986578017473221 2023-01-24 02:24:22.853780: step: 536/464, loss: 0.11681246757507324 2023-01-24 02:24:23.517278: step: 538/464, loss: 0.4057319462299347 2023-01-24 02:24:24.198877: step: 540/464, loss: 0.0658615306019783 2023-01-24 02:24:24.839504: step: 542/464, loss: 0.25594618916511536 2023-01-24 02:24:25.463106: step: 544/464, loss: 0.15549439191818237 2023-01-24 02:24:26.077420: step: 546/464, loss: 0.11209609359502792 2023-01-24 02:24:26.732939: step: 548/464, loss: 0.21639572083950043 2023-01-24 02:24:27.398707: step: 550/464, loss: 0.11636323481798172 2023-01-24 02:24:28.107754: step: 552/464, loss: 0.2554556727409363 2023-01-24 02:24:28.731203: step: 554/464, loss: 0.47724249958992004 2023-01-24 02:24:29.379291: step: 556/464, loss: 0.05490335449576378 2023-01-24 02:24:30.043990: step: 558/464, loss: 0.05813895910978317 2023-01-24 02:24:30.677920: step: 560/464, loss: 0.07191114872694016 2023-01-24 02:24:31.357028: step: 562/464, loss: 0.0820101946592331 2023-01-24 02:24:31.957824: step: 564/464, loss: 0.14678408205509186 2023-01-24 02:24:32.513540: step: 566/464, loss: 0.14023204147815704 2023-01-24 02:24:33.151980: step: 568/464, loss: 0.4147571623325348 2023-01-24 02:24:33.785749: step: 570/464, loss: 0.12394271045923233 2023-01-24 02:24:34.379880: step: 572/464, loss: 0.4368882477283478 2023-01-24 02:24:35.021622: step: 574/464, loss: 0.6768796443939209 2023-01-24 02:24:35.603964: step: 576/464, loss: 0.05807606875896454 2023-01-24 02:24:36.233703: step: 578/464, loss: 0.10776543617248535 2023-01-24 02:24:36.854909: step: 580/464, loss: 0.09010083973407745 2023-01-24 02:24:37.481866: step: 582/464, loss: 0.09341294318437576 2023-01-24 02:24:38.131112: step: 584/464, loss: 0.1841656118631363 2023-01-24 02:24:38.712906: step: 586/464, loss: 0.1848258078098297 2023-01-24 02:24:39.335048: step: 588/464, loss: 0.2034948617219925 2023-01-24 02:24:40.030123: step: 590/464, loss: 0.09111060202121735 2023-01-24 02:24:40.648305: step: 592/464, loss: 0.0409679114818573 2023-01-24 02:24:41.279370: step: 594/464, loss: 0.26828378438949585 2023-01-24 02:24:41.895004: step: 596/464, loss: 0.07353124767541885 2023-01-24 02:24:42.517275: step: 598/464, loss: 0.14041757583618164 2023-01-24 02:24:43.219231: step: 600/464, loss: 0.34325841069221497 2023-01-24 02:24:43.888421: step: 602/464, loss: 0.6918802261352539 2023-01-24 02:24:44.475036: step: 604/464, loss: 0.12383803725242615 2023-01-24 02:24:45.089607: step: 606/464, loss: 0.06072302907705307 2023-01-24 02:24:45.681628: step: 608/464, loss: 0.061823777854442596 2023-01-24 02:24:46.269326: step: 610/464, loss: 0.06603369861841202 2023-01-24 02:24:46.861129: step: 612/464, loss: 0.14131344854831696 2023-01-24 02:24:47.512386: step: 614/464, loss: 0.12874765694141388 2023-01-24 02:24:48.125522: step: 616/464, loss: 0.34312903881073 2023-01-24 02:24:48.756019: step: 618/464, loss: 0.4952242970466614 2023-01-24 02:24:49.415141: step: 620/464, loss: 0.07113741338253021 2023-01-24 02:24:50.032422: step: 622/464, loss: 0.04245679825544357 2023-01-24 02:24:50.669488: step: 624/464, loss: 0.1600327342748642 2023-01-24 02:24:51.321583: step: 626/464, loss: 0.1523052304983139 2023-01-24 02:24:51.971077: step: 628/464, loss: 0.1359877735376358 2023-01-24 02:24:52.631307: step: 630/464, loss: 0.11738570034503937 2023-01-24 02:24:53.298978: step: 632/464, loss: 0.2088128924369812 2023-01-24 02:24:53.908329: step: 634/464, loss: 0.09273140132427216 2023-01-24 02:24:54.477563: step: 636/464, loss: 0.09316191077232361 2023-01-24 02:24:55.123543: step: 638/464, loss: 0.2756721079349518 2023-01-24 02:24:55.869853: step: 640/464, loss: 0.21960747241973877 2023-01-24 02:24:56.527259: step: 642/464, loss: 0.13363170623779297 2023-01-24 02:24:57.191210: step: 644/464, loss: 0.2028121054172516 2023-01-24 02:24:57.784019: step: 646/464, loss: 0.20220768451690674 2023-01-24 02:24:58.414209: step: 648/464, loss: 0.12750768661499023 2023-01-24 02:24:59.039070: step: 650/464, loss: 0.22797568142414093 2023-01-24 02:24:59.665244: step: 652/464, loss: 0.09133946895599365 2023-01-24 02:25:00.340904: step: 654/464, loss: 0.12407989799976349 2023-01-24 02:25:00.932296: step: 656/464, loss: 0.1795833855867386 2023-01-24 02:25:01.502935: step: 658/464, loss: 0.11011943221092224 2023-01-24 02:25:02.067404: step: 660/464, loss: 0.29204973578453064 2023-01-24 02:25:02.774162: step: 662/464, loss: 0.5304825305938721 2023-01-24 02:25:03.392828: step: 664/464, loss: 0.10545740276575089 2023-01-24 02:25:04.064588: step: 666/464, loss: 0.5920228958129883 2023-01-24 02:25:04.689318: step: 668/464, loss: 0.3886897563934326 2023-01-24 02:25:05.312872: step: 670/464, loss: 0.055316805839538574 2023-01-24 02:25:06.004027: step: 672/464, loss: 0.15686535835266113 2023-01-24 02:25:06.592639: step: 674/464, loss: 0.22624193131923676 2023-01-24 02:25:07.204108: step: 676/464, loss: 0.11832347512245178 2023-01-24 02:25:07.858296: step: 678/464, loss: 0.08923111855983734 2023-01-24 02:25:08.461954: step: 680/464, loss: 0.14050790667533875 2023-01-24 02:25:09.216493: step: 682/464, loss: 0.11861838400363922 2023-01-24 02:25:09.855484: step: 684/464, loss: 0.13761171698570251 2023-01-24 02:25:10.495123: step: 686/464, loss: 0.35186994075775146 2023-01-24 02:25:11.099835: step: 688/464, loss: 0.4274250864982605 2023-01-24 02:25:11.755973: step: 690/464, loss: 0.08268038183450699 2023-01-24 02:25:12.440073: step: 692/464, loss: 0.10088789463043213 2023-01-24 02:25:13.107479: step: 694/464, loss: 0.40460291504859924 2023-01-24 02:25:13.782574: step: 696/464, loss: 0.10182829201221466 2023-01-24 02:25:14.365576: step: 698/464, loss: 0.05861677974462509 2023-01-24 02:25:14.987483: step: 700/464, loss: 0.15200048685073853 2023-01-24 02:25:15.566264: step: 702/464, loss: 0.07056646049022675 2023-01-24 02:25:16.249513: step: 704/464, loss: 0.19686491787433624 2023-01-24 02:25:16.892976: step: 706/464, loss: 0.07907170802354813 2023-01-24 02:25:17.472010: step: 708/464, loss: 0.3801847994327545 2023-01-24 02:25:18.061385: step: 710/464, loss: 0.11562389135360718 2023-01-24 02:25:18.674912: step: 712/464, loss: 0.3307975232601166 2023-01-24 02:25:19.243275: step: 714/464, loss: 0.22483167052268982 2023-01-24 02:25:19.848505: step: 716/464, loss: 0.21573488414287567 2023-01-24 02:25:20.543740: step: 718/464, loss: 0.1486646980047226 2023-01-24 02:25:21.171549: step: 720/464, loss: 0.17777541279792786 2023-01-24 02:25:21.754195: step: 722/464, loss: 0.08999773114919662 2023-01-24 02:25:22.328679: step: 724/464, loss: 0.8580670952796936 2023-01-24 02:25:22.980235: step: 726/464, loss: 0.3896852731704712 2023-01-24 02:25:23.616878: step: 728/464, loss: 0.39495813846588135 2023-01-24 02:25:24.250411: step: 730/464, loss: 0.07761916518211365 2023-01-24 02:25:24.834112: step: 732/464, loss: 0.04868488386273384 2023-01-24 02:25:25.514403: step: 734/464, loss: 0.1510753333568573 2023-01-24 02:25:26.131662: step: 736/464, loss: 0.7264018654823303 2023-01-24 02:25:26.733233: step: 738/464, loss: 0.09703339636325836 2023-01-24 02:25:27.389683: step: 740/464, loss: 0.10365378856658936 2023-01-24 02:25:28.023870: step: 742/464, loss: 0.14898864924907684 2023-01-24 02:25:28.680578: step: 744/464, loss: 0.1205286905169487 2023-01-24 02:25:29.302099: step: 746/464, loss: 0.32317137718200684 2023-01-24 02:25:29.936258: step: 748/464, loss: 0.5914244651794434 2023-01-24 02:25:30.479776: step: 750/464, loss: 0.8164223432540894 2023-01-24 02:25:31.147921: step: 752/464, loss: 0.42005714774131775 2023-01-24 02:25:31.756036: step: 754/464, loss: 0.2197103053331375 2023-01-24 02:25:32.412192: step: 756/464, loss: 0.07207664847373962 2023-01-24 02:25:33.018629: step: 758/464, loss: 0.18790841102600098 2023-01-24 02:25:33.658220: step: 760/464, loss: 0.4112013578414917 2023-01-24 02:25:34.323338: step: 762/464, loss: 0.10273218899965286 2023-01-24 02:25:34.964062: step: 764/464, loss: 0.14175225794315338 2023-01-24 02:25:35.637928: step: 766/464, loss: 0.07376343011856079 2023-01-24 02:25:36.253918: step: 768/464, loss: 0.1520259529352188 2023-01-24 02:25:36.875511: step: 770/464, loss: 0.07247330248355865 2023-01-24 02:25:37.417772: step: 772/464, loss: 0.04153461754322052 2023-01-24 02:25:38.164174: step: 774/464, loss: 0.1823529452085495 2023-01-24 02:25:38.828426: step: 776/464, loss: 0.5420295596122742 2023-01-24 02:25:39.473573: step: 778/464, loss: 0.4019032120704651 2023-01-24 02:25:40.070775: step: 780/464, loss: 0.1072503924369812 2023-01-24 02:25:40.780845: step: 782/464, loss: 0.20655189454555511 2023-01-24 02:25:41.372639: step: 784/464, loss: 0.07755839824676514 2023-01-24 02:25:41.997304: step: 786/464, loss: 0.17498230934143066 2023-01-24 02:25:42.627832: step: 788/464, loss: 1.4299875497817993 2023-01-24 02:25:43.175019: step: 790/464, loss: 0.1002681702375412 2023-01-24 02:25:43.734365: step: 792/464, loss: 0.1689300835132599 2023-01-24 02:25:44.342777: step: 794/464, loss: 0.07717189937829971 2023-01-24 02:25:44.999649: step: 796/464, loss: 0.13709495961666107 2023-01-24 02:25:45.672554: step: 798/464, loss: 0.044095080345869064 2023-01-24 02:25:46.369196: step: 800/464, loss: 0.10243158042430878 2023-01-24 02:25:47.043494: step: 802/464, loss: 0.07887127250432968 2023-01-24 02:25:47.690159: step: 804/464, loss: 0.2742998003959656 2023-01-24 02:25:48.276943: step: 806/464, loss: 0.09627963602542877 2023-01-24 02:25:48.858521: step: 808/464, loss: 0.10505854338407516 2023-01-24 02:25:49.490958: step: 810/464, loss: 0.5737142562866211 2023-01-24 02:25:50.147168: step: 812/464, loss: 0.1623065322637558 2023-01-24 02:25:50.806245: step: 814/464, loss: 0.10733121633529663 2023-01-24 02:25:51.434996: step: 816/464, loss: 0.060589712113142014 2023-01-24 02:25:52.081718: step: 818/464, loss: 0.2681101858615875 2023-01-24 02:25:52.678341: step: 820/464, loss: 0.08098702877759933 2023-01-24 02:25:53.325736: step: 822/464, loss: 0.058230411261320114 2023-01-24 02:25:53.912825: step: 824/464, loss: 0.1930847018957138 2023-01-24 02:25:54.496896: step: 826/464, loss: 0.47333502769470215 2023-01-24 02:25:55.196938: step: 828/464, loss: 0.18079730868339539 2023-01-24 02:25:55.906627: step: 830/464, loss: 0.12895090878009796 2023-01-24 02:25:56.590667: step: 832/464, loss: 0.1963747888803482 2023-01-24 02:25:57.154145: step: 834/464, loss: 0.26649853587150574 2023-01-24 02:25:57.798834: step: 836/464, loss: 0.6310838460922241 2023-01-24 02:25:58.402930: step: 838/464, loss: 0.09940309077501297 2023-01-24 02:25:59.051402: step: 840/464, loss: 0.07393283396959305 2023-01-24 02:25:59.640646: step: 842/464, loss: 0.6933224201202393 2023-01-24 02:26:00.265626: step: 844/464, loss: 0.10924071073532104 2023-01-24 02:26:00.854015: step: 846/464, loss: 0.08506400883197784 2023-01-24 02:26:01.491393: step: 848/464, loss: 0.7190383672714233 2023-01-24 02:26:02.101124: step: 850/464, loss: 0.2215147167444229 2023-01-24 02:26:02.761089: step: 852/464, loss: 0.2889682650566101 2023-01-24 02:26:03.382391: step: 854/464, loss: 0.18120506405830383 2023-01-24 02:26:03.932983: step: 856/464, loss: 0.17137137055397034 2023-01-24 02:26:04.518626: step: 858/464, loss: 0.7795878648757935 2023-01-24 02:26:05.169420: step: 860/464, loss: 0.13829930126667023 2023-01-24 02:26:05.804254: step: 862/464, loss: 0.06743170320987701 2023-01-24 02:26:06.485657: step: 864/464, loss: 0.16243469715118408 2023-01-24 02:26:07.076547: step: 866/464, loss: 0.21655528247356415 2023-01-24 02:26:07.682763: step: 868/464, loss: 0.12445773184299469 2023-01-24 02:26:08.255545: step: 870/464, loss: 0.34826043248176575 2023-01-24 02:26:08.935393: step: 872/464, loss: 0.27048900723457336 2023-01-24 02:26:09.538434: step: 874/464, loss: 0.11076904088258743 2023-01-24 02:26:10.201565: step: 876/464, loss: 0.203446164727211 2023-01-24 02:26:10.875683: step: 878/464, loss: 0.3083826005458832 2023-01-24 02:26:11.488302: step: 880/464, loss: 0.13766227662563324 2023-01-24 02:26:12.167627: step: 882/464, loss: 0.25832998752593994 2023-01-24 02:26:12.783641: step: 884/464, loss: 0.08957251161336899 2023-01-24 02:26:13.427452: step: 886/464, loss: 0.12176202982664108 2023-01-24 02:26:14.025102: step: 888/464, loss: 0.10469197481870651 2023-01-24 02:26:14.622154: step: 890/464, loss: 0.06898195296525955 2023-01-24 02:26:15.222345: step: 892/464, loss: 0.15037567913532257 2023-01-24 02:26:15.834725: step: 894/464, loss: 0.10617604851722717 2023-01-24 02:26:16.455966: step: 896/464, loss: 0.11643420159816742 2023-01-24 02:26:17.129314: step: 898/464, loss: 0.13417676091194153 2023-01-24 02:26:17.778355: step: 900/464, loss: 0.14892005920410156 2023-01-24 02:26:18.413826: step: 902/464, loss: 0.5806592702865601 2023-01-24 02:26:19.027038: step: 904/464, loss: 0.09055360406637192 2023-01-24 02:26:19.649079: step: 906/464, loss: 0.12215010076761246 2023-01-24 02:26:20.309082: step: 908/464, loss: 0.11172604560852051 2023-01-24 02:26:20.905219: step: 910/464, loss: 0.2146151065826416 2023-01-24 02:26:21.544014: step: 912/464, loss: 0.18249428272247314 2023-01-24 02:26:22.176082: step: 914/464, loss: 0.07410207390785217 2023-01-24 02:26:22.837946: step: 916/464, loss: 0.13241495192050934 2023-01-24 02:26:23.512024: step: 918/464, loss: 0.08030088245868683 2023-01-24 02:26:24.100652: step: 920/464, loss: 0.19699254631996155 2023-01-24 02:26:24.707559: step: 922/464, loss: 0.10928104817867279 2023-01-24 02:26:25.360216: step: 924/464, loss: 0.0733671635389328 2023-01-24 02:26:26.021956: step: 926/464, loss: 0.14551511406898499 2023-01-24 02:26:26.685536: step: 928/464, loss: 0.38674890995025635 2023-01-24 02:26:27.161890: step: 930/464, loss: 0.08716857433319092 ================================================== Loss: 0.208 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30657638888888894, 'r': 0.34904332700822266, 'f1': 0.32643448683821363}, 'combined': 0.24053067451236793, 'epoch': 13} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.298247902807955, 'r': 0.31825902128679756, 'f1': 0.30792869289465163}, 'combined': 0.20103116738200055, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.301967750816435, 'r': 0.35926713427306406, 'f1': 0.3281348002805975}, 'combined': 0.2417835370488613, 'epoch': 13} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31532419976666665, 'r': 0.3283541253768595, 'f1': 0.3217072807336033}, 'combined': 0.21002651488307778, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30815201080432175, 'r': 0.3479135605855246, 'f1': 0.3268278902470079}, 'combined': 0.24082055070832162, 'epoch': 13} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.311653871882755, 'r': 0.3188216242259646, 'f1': 0.31519700366859804}, 'combined': 0.2057762821877894, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31134259259259256, 'r': 0.3202380952380952, 'f1': 0.3157276995305164}, 'combined': 0.21048513302034424, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25675675675675674, 'r': 0.41304347826086957, 'f1': 0.31666666666666665}, 'combined': 0.15833333333333333, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3253633342022941, 'r': 0.33832847655191106, 'f1': 0.33171926910299004}, 'combined': 0.24442472460220319, 'epoch': 12} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3343950595657901, 'r': 0.2653685564444114, 'f1': 0.29590969465412625}, 'combined': 0.19318456749440366, 'epoch': 12} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5384615384615384, 'r': 0.2413793103448276, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 12} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:29:04.223056: step: 2/464, loss: 0.12613213062286377 2023-01-24 02:29:04.833842: step: 4/464, loss: 0.12780576944351196 2023-01-24 02:29:05.436935: step: 6/464, loss: 0.06970081478357315 2023-01-24 02:29:06.080308: step: 8/464, loss: 0.07534730434417725 2023-01-24 02:29:06.749800: step: 10/464, loss: 0.1213054358959198 2023-01-24 02:29:07.356838: step: 12/464, loss: 0.1861896812915802 2023-01-24 02:29:07.987158: step: 14/464, loss: 0.21565768122673035 2023-01-24 02:29:08.625935: step: 16/464, loss: 0.03582453355193138 2023-01-24 02:29:09.254363: step: 18/464, loss: 0.11763232201337814 2023-01-24 02:29:09.859700: step: 20/464, loss: 0.07920301705598831 2023-01-24 02:29:10.517802: step: 22/464, loss: 0.21957677602767944 2023-01-24 02:29:11.199111: step: 24/464, loss: 0.08056668192148209 2023-01-24 02:29:11.876789: step: 26/464, loss: 0.43024322390556335 2023-01-24 02:29:12.503533: step: 28/464, loss: 0.21023477613925934 2023-01-24 02:29:13.178923: step: 30/464, loss: 0.058270473033189774 2023-01-24 02:29:13.794111: step: 32/464, loss: 0.06991127133369446 2023-01-24 02:29:14.345354: step: 34/464, loss: 0.11741513013839722 2023-01-24 02:29:14.965969: step: 36/464, loss: 0.09309908002614975 2023-01-24 02:29:15.564481: step: 38/464, loss: 0.09769189357757568 2023-01-24 02:29:16.189062: step: 40/464, loss: 0.039672598242759705 2023-01-24 02:29:16.825156: step: 42/464, loss: 0.08527854084968567 2023-01-24 02:29:17.419204: step: 44/464, loss: 0.05091758444905281 2023-01-24 02:29:17.949716: step: 46/464, loss: 0.11744002252817154 2023-01-24 02:29:18.609780: step: 48/464, loss: 0.03684813156723976 2023-01-24 02:29:19.197748: step: 50/464, loss: 0.23239551484584808 2023-01-24 02:29:19.782713: step: 52/464, loss: 0.009587530046701431 2023-01-24 02:29:20.375384: step: 54/464, loss: 0.4318223297595978 2023-01-24 02:29:20.916523: step: 56/464, loss: 0.0942460224032402 2023-01-24 02:29:21.537693: step: 58/464, loss: 0.12825199961662292 2023-01-24 02:29:22.259098: step: 60/464, loss: 0.1324436217546463 2023-01-24 02:29:22.865315: step: 62/464, loss: 0.04708656668663025 2023-01-24 02:29:23.485453: step: 64/464, loss: 0.04969207942485809 2023-01-24 02:29:24.108373: step: 66/464, loss: 0.41940873861312866 2023-01-24 02:29:24.718055: step: 68/464, loss: 0.0641220286488533 2023-01-24 02:29:25.304256: step: 70/464, loss: 0.060402270406484604 2023-01-24 02:29:25.964567: step: 72/464, loss: 0.0645303726196289 2023-01-24 02:29:26.641228: step: 74/464, loss: 0.07540280371904373 2023-01-24 02:29:27.265029: step: 76/464, loss: 0.0162261463701725 2023-01-24 02:29:27.927686: step: 78/464, loss: 0.292266845703125 2023-01-24 02:29:28.551235: step: 80/464, loss: 0.6582212448120117 2023-01-24 02:29:29.139813: step: 82/464, loss: 0.494401216506958 2023-01-24 02:29:29.764299: step: 84/464, loss: 0.307862251996994 2023-01-24 02:29:30.356738: step: 86/464, loss: 0.04717501625418663 2023-01-24 02:29:30.987370: step: 88/464, loss: 0.21211999654769897 2023-01-24 02:29:31.603816: step: 90/464, loss: 0.05120338127017021 2023-01-24 02:29:32.249213: step: 92/464, loss: 0.22324422001838684 2023-01-24 02:29:32.957843: step: 94/464, loss: 0.08178785443305969 2023-01-24 02:29:33.624435: step: 96/464, loss: 0.16229651868343353 2023-01-24 02:29:34.255718: step: 98/464, loss: 0.2983476519584656 2023-01-24 02:29:34.932974: step: 100/464, loss: 0.2334270179271698 2023-01-24 02:29:35.593764: step: 102/464, loss: 0.047153085470199585 2023-01-24 02:29:36.213723: step: 104/464, loss: 0.15230758488178253 2023-01-24 02:29:36.808664: step: 106/464, loss: 0.22207175195217133 2023-01-24 02:29:37.430045: step: 108/464, loss: 0.1279035210609436 2023-01-24 02:29:38.060212: step: 110/464, loss: 0.07191181927919388 2023-01-24 02:29:38.691586: step: 112/464, loss: 0.10683086514472961 2023-01-24 02:29:39.344515: step: 114/464, loss: 0.11870521306991577 2023-01-24 02:29:40.040157: step: 116/464, loss: 0.05834219232201576 2023-01-24 02:29:40.655906: step: 118/464, loss: 0.2276674211025238 2023-01-24 02:29:41.315676: step: 120/464, loss: 0.032239992171525955 2023-01-24 02:29:41.956486: step: 122/464, loss: 0.4272522032260895 2023-01-24 02:29:42.637486: step: 124/464, loss: 0.04740273952484131 2023-01-24 02:29:43.242812: step: 126/464, loss: 0.09091894328594208 2023-01-24 02:29:43.878905: step: 128/464, loss: 0.0550725944340229 2023-01-24 02:29:44.442022: step: 130/464, loss: 0.27808040380477905 2023-01-24 02:29:45.091545: step: 132/464, loss: 0.030161166563630104 2023-01-24 02:29:45.716342: step: 134/464, loss: 0.012089907191693783 2023-01-24 02:29:46.314337: step: 136/464, loss: 0.23043721914291382 2023-01-24 02:29:46.971104: step: 138/464, loss: 0.0220566987991333 2023-01-24 02:29:47.550345: step: 140/464, loss: 0.1365833580493927 2023-01-24 02:29:48.215800: step: 142/464, loss: 0.06428226828575134 2023-01-24 02:29:48.900060: step: 144/464, loss: 0.19025635719299316 2023-01-24 02:29:49.553844: step: 146/464, loss: 0.18001894652843475 2023-01-24 02:29:50.152165: step: 148/464, loss: 0.047540344297885895 2023-01-24 02:29:50.781958: step: 150/464, loss: 0.2429361641407013 2023-01-24 02:29:51.490099: step: 152/464, loss: 0.15517257153987885 2023-01-24 02:29:52.138472: step: 154/464, loss: 0.27889472246170044 2023-01-24 02:29:52.802324: step: 156/464, loss: 0.10768377035856247 2023-01-24 02:29:53.377811: step: 158/464, loss: 0.1395530253648758 2023-01-24 02:29:53.996960: step: 160/464, loss: 0.15417103469371796 2023-01-24 02:29:54.634183: step: 162/464, loss: 0.06897418200969696 2023-01-24 02:29:55.282599: step: 164/464, loss: 0.09392699599266052 2023-01-24 02:29:55.862604: step: 166/464, loss: 0.1080712378025055 2023-01-24 02:29:56.498517: step: 168/464, loss: 0.06684279441833496 2023-01-24 02:29:57.057076: step: 170/464, loss: 0.4811127781867981 2023-01-24 02:29:57.720656: step: 172/464, loss: 0.26632991433143616 2023-01-24 02:29:58.433352: step: 174/464, loss: 0.09028254449367523 2023-01-24 02:29:58.987568: step: 176/464, loss: 1.6176563501358032 2023-01-24 02:29:59.673975: step: 178/464, loss: 0.10107997804880142 2023-01-24 02:30:00.206035: step: 180/464, loss: 0.1728367805480957 2023-01-24 02:30:00.840385: step: 182/464, loss: 0.2647758722305298 2023-01-24 02:30:01.439102: step: 184/464, loss: 0.057612065225839615 2023-01-24 02:30:02.011703: step: 186/464, loss: 0.16593562066555023 2023-01-24 02:30:02.649264: step: 188/464, loss: 0.39534786343574524 2023-01-24 02:30:03.299761: step: 190/464, loss: 0.11273720115423203 2023-01-24 02:30:03.896438: step: 192/464, loss: 0.24984633922576904 2023-01-24 02:30:04.479748: step: 194/464, loss: 0.14231686294078827 2023-01-24 02:30:05.111251: step: 196/464, loss: 0.22011631727218628 2023-01-24 02:30:05.765993: step: 198/464, loss: 0.2356623113155365 2023-01-24 02:30:06.433550: step: 200/464, loss: 0.4909124970436096 2023-01-24 02:30:07.028967: step: 202/464, loss: 0.06964804232120514 2023-01-24 02:30:07.664991: step: 204/464, loss: 0.13431423902511597 2023-01-24 02:30:08.291819: step: 206/464, loss: 0.11794348061084747 2023-01-24 02:30:08.840859: step: 208/464, loss: 0.06027591973543167 2023-01-24 02:30:09.597537: step: 210/464, loss: 0.16354875266551971 2023-01-24 02:30:10.228319: step: 212/464, loss: 0.02065500244498253 2023-01-24 02:30:10.918553: step: 214/464, loss: 0.1693529486656189 2023-01-24 02:30:11.567276: step: 216/464, loss: 0.09896227717399597 2023-01-24 02:30:12.213791: step: 218/464, loss: 0.17069461941719055 2023-01-24 02:30:12.906337: step: 220/464, loss: 13.397747993469238 2023-01-24 02:30:13.523810: step: 222/464, loss: 0.1541566550731659 2023-01-24 02:30:14.122679: step: 224/464, loss: 0.15598753094673157 2023-01-24 02:30:14.800935: step: 226/464, loss: 0.11893276870250702 2023-01-24 02:30:15.481097: step: 228/464, loss: 0.15374010801315308 2023-01-24 02:30:16.025322: step: 230/464, loss: 2.745584487915039 2023-01-24 02:30:16.585565: step: 232/464, loss: 0.035824257880449295 2023-01-24 02:30:17.201034: step: 234/464, loss: 0.06368924677371979 2023-01-24 02:30:17.764830: step: 236/464, loss: 0.0707118809223175 2023-01-24 02:30:18.361913: step: 238/464, loss: 0.24368637800216675 2023-01-24 02:30:18.967815: step: 240/464, loss: 0.16533474624156952 2023-01-24 02:30:19.615318: step: 242/464, loss: 0.07772989571094513 2023-01-24 02:30:20.165433: step: 244/464, loss: 0.23945599794387817 2023-01-24 02:30:20.789906: step: 246/464, loss: 0.07287517935037613 2023-01-24 02:30:21.465162: step: 248/464, loss: 0.17496463656425476 2023-01-24 02:30:22.074786: step: 250/464, loss: 0.0961579903960228 2023-01-24 02:30:22.686525: step: 252/464, loss: 0.6809481978416443 2023-01-24 02:30:23.284990: step: 254/464, loss: 0.10889847576618195 2023-01-24 02:30:23.894691: step: 256/464, loss: 0.08200860768556595 2023-01-24 02:30:24.495169: step: 258/464, loss: 0.15323743224143982 2023-01-24 02:30:25.086025: step: 260/464, loss: 0.09156341105699539 2023-01-24 02:30:25.651607: step: 262/464, loss: 0.18293572962284088 2023-01-24 02:30:26.290025: step: 264/464, loss: 0.30259743332862854 2023-01-24 02:30:26.903909: step: 266/464, loss: 0.16626927256584167 2023-01-24 02:30:27.541594: step: 268/464, loss: 0.1518091857433319 2023-01-24 02:30:28.115401: step: 270/464, loss: 0.08012847602367401 2023-01-24 02:30:28.700215: step: 272/464, loss: 0.18999381363391876 2023-01-24 02:30:29.321700: step: 274/464, loss: 0.10878537595272064 2023-01-24 02:30:29.924400: step: 276/464, loss: 0.14942845702171326 2023-01-24 02:30:30.683789: step: 278/464, loss: 0.14834989607334137 2023-01-24 02:30:31.249959: step: 280/464, loss: 0.032504718750715256 2023-01-24 02:30:31.838396: step: 282/464, loss: 0.08153710514307022 2023-01-24 02:30:32.436967: step: 284/464, loss: 0.11140517145395279 2023-01-24 02:30:33.062323: step: 286/464, loss: 0.3821107745170593 2023-01-24 02:30:33.729945: step: 288/464, loss: 0.05341333895921707 2023-01-24 02:30:34.372793: step: 290/464, loss: 0.27715128660202026 2023-01-24 02:30:35.002480: step: 292/464, loss: 0.14849257469177246 2023-01-24 02:30:35.619557: step: 294/464, loss: 0.05485979840159416 2023-01-24 02:30:36.163211: step: 296/464, loss: 0.037415195256471634 2023-01-24 02:30:36.775193: step: 298/464, loss: 0.05425393208861351 2023-01-24 02:30:37.402348: step: 300/464, loss: 0.12795880436897278 2023-01-24 02:30:38.015087: step: 302/464, loss: 0.18602930009365082 2023-01-24 02:30:38.620180: step: 304/464, loss: 0.14597341418266296 2023-01-24 02:30:39.236585: step: 306/464, loss: 0.09839761257171631 2023-01-24 02:30:39.813617: step: 308/464, loss: 0.12964694201946259 2023-01-24 02:30:40.470998: step: 310/464, loss: 0.04418937861919403 2023-01-24 02:30:41.131859: step: 312/464, loss: 0.046734243631362915 2023-01-24 02:30:41.815891: step: 314/464, loss: 0.337647944688797 2023-01-24 02:30:42.453217: step: 316/464, loss: 0.1818884015083313 2023-01-24 02:30:43.033081: step: 318/464, loss: 0.13660390675067902 2023-01-24 02:30:43.672881: step: 320/464, loss: 0.05392802134156227 2023-01-24 02:30:44.298567: step: 322/464, loss: 0.15964898467063904 2023-01-24 02:30:44.967357: step: 324/464, loss: 0.23734918236732483 2023-01-24 02:30:45.639197: step: 326/464, loss: 0.11211232841014862 2023-01-24 02:30:46.336463: step: 328/464, loss: 0.08280149102210999 2023-01-24 02:30:47.027984: step: 330/464, loss: 0.2425793558359146 2023-01-24 02:30:47.688966: step: 332/464, loss: 1.0096615552902222 2023-01-24 02:30:48.348827: step: 334/464, loss: 0.2613997459411621 2023-01-24 02:30:49.009694: step: 336/464, loss: 0.06869491934776306 2023-01-24 02:30:49.667106: step: 338/464, loss: 0.1866166889667511 2023-01-24 02:30:50.291887: step: 340/464, loss: 0.05125715583562851 2023-01-24 02:30:50.928875: step: 342/464, loss: 0.02904468961060047 2023-01-24 02:30:51.532242: step: 344/464, loss: 0.12807013094425201 2023-01-24 02:30:52.099399: step: 346/464, loss: 0.09498754888772964 2023-01-24 02:30:52.780139: step: 348/464, loss: 0.22028857469558716 2023-01-24 02:30:53.411982: step: 350/464, loss: 0.10805842280387878 2023-01-24 02:30:54.119348: step: 352/464, loss: 0.14931246638298035 2023-01-24 02:30:54.805352: step: 354/464, loss: 0.09262239933013916 2023-01-24 02:30:55.394643: step: 356/464, loss: 0.10256149619817734 2023-01-24 02:30:55.964642: step: 358/464, loss: 0.27007755637168884 2023-01-24 02:30:56.603522: step: 360/464, loss: 0.33299100399017334 2023-01-24 02:30:57.227970: step: 362/464, loss: 0.10185973346233368 2023-01-24 02:30:57.823700: step: 364/464, loss: 0.3008521795272827 2023-01-24 02:30:58.429325: step: 366/464, loss: 0.13727213442325592 2023-01-24 02:30:59.063937: step: 368/464, loss: 0.1457100361585617 2023-01-24 02:30:59.691808: step: 370/464, loss: 0.2102964222431183 2023-01-24 02:31:00.369078: step: 372/464, loss: 0.11828085780143738 2023-01-24 02:31:00.963856: step: 374/464, loss: 0.024987978860735893 2023-01-24 02:31:01.774638: step: 376/464, loss: 0.2500342130661011 2023-01-24 02:31:02.461930: step: 378/464, loss: 0.23952820897102356 2023-01-24 02:31:03.095867: step: 380/464, loss: 0.15812312066555023 2023-01-24 02:31:03.846007: step: 382/464, loss: 0.09742007404565811 2023-01-24 02:31:04.451948: step: 384/464, loss: 0.18498073518276215 2023-01-24 02:31:05.133113: step: 386/464, loss: 0.10564729571342468 2023-01-24 02:31:05.748783: step: 388/464, loss: 0.1336047202348709 2023-01-24 02:31:06.327800: step: 390/464, loss: 0.057287439703941345 2023-01-24 02:31:06.961101: step: 392/464, loss: 0.0842222347855568 2023-01-24 02:31:07.525017: step: 394/464, loss: 0.1357746720314026 2023-01-24 02:31:08.114686: step: 396/464, loss: 0.07283379137516022 2023-01-24 02:31:08.857373: step: 398/464, loss: 0.505624532699585 2023-01-24 02:31:09.530673: step: 400/464, loss: 0.24738441407680511 2023-01-24 02:31:10.159904: step: 402/464, loss: 0.12691976130008698 2023-01-24 02:31:10.761255: step: 404/464, loss: 0.09851517528295517 2023-01-24 02:31:11.382730: step: 406/464, loss: 0.06937228888273239 2023-01-24 02:31:12.099688: step: 408/464, loss: 0.192485049366951 2023-01-24 02:31:12.770904: step: 410/464, loss: 0.12418614327907562 2023-01-24 02:31:13.415395: step: 412/464, loss: 0.03279638662934303 2023-01-24 02:31:14.076091: step: 414/464, loss: 0.20969468355178833 2023-01-24 02:31:14.723877: step: 416/464, loss: 0.11003034561872482 2023-01-24 02:31:15.345190: step: 418/464, loss: 0.20709995925426483 2023-01-24 02:31:15.911846: step: 420/464, loss: 0.14961837232112885 2023-01-24 02:31:16.510990: step: 422/464, loss: 0.12297564744949341 2023-01-24 02:31:17.121852: step: 424/464, loss: 0.45564550161361694 2023-01-24 02:31:17.789691: step: 426/464, loss: 0.16366340219974518 2023-01-24 02:31:18.442065: step: 428/464, loss: 0.09504736959934235 2023-01-24 02:31:19.055576: step: 430/464, loss: 0.12506511807441711 2023-01-24 02:31:19.727157: step: 432/464, loss: 0.16169333457946777 2023-01-24 02:31:20.352742: step: 434/464, loss: 0.5574436187744141 2023-01-24 02:31:20.935642: step: 436/464, loss: 0.037777990102767944 2023-01-24 02:31:21.548078: step: 438/464, loss: 0.06685791909694672 2023-01-24 02:31:22.071887: step: 440/464, loss: 0.04349729046225548 2023-01-24 02:31:22.708916: step: 442/464, loss: 0.14673559367656708 2023-01-24 02:31:23.317517: step: 444/464, loss: 0.11699734628200531 2023-01-24 02:31:23.952993: step: 446/464, loss: 0.17990678548812866 2023-01-24 02:31:24.547140: step: 448/464, loss: 0.03047196939587593 2023-01-24 02:31:25.082786: step: 450/464, loss: 0.08635605871677399 2023-01-24 02:31:25.690525: step: 452/464, loss: 0.133879154920578 2023-01-24 02:31:26.303590: step: 454/464, loss: 0.03776126354932785 2023-01-24 02:31:26.927443: step: 456/464, loss: 0.2038840502500534 2023-01-24 02:31:27.606202: step: 458/464, loss: 0.6528012156486511 2023-01-24 02:31:28.281570: step: 460/464, loss: 0.033662084490060806 2023-01-24 02:31:28.875526: step: 462/464, loss: 0.354397714138031 2023-01-24 02:31:29.557382: step: 464/464, loss: 0.07987023144960403 2023-01-24 02:31:30.130245: step: 466/464, loss: 0.20296648144721985 2023-01-24 02:31:30.736150: step: 468/464, loss: 0.13156884908676147 2023-01-24 02:31:31.384581: step: 470/464, loss: 0.11640924960374832 2023-01-24 02:31:32.017888: step: 472/464, loss: 0.09786844253540039 2023-01-24 02:31:32.599399: step: 474/464, loss: 0.17049400508403778 2023-01-24 02:31:33.206156: step: 476/464, loss: 0.1515236645936966 2023-01-24 02:31:33.877844: step: 478/464, loss: 0.4211825430393219 2023-01-24 02:31:34.541688: step: 480/464, loss: 0.09188634157180786 2023-01-24 02:31:35.132934: step: 482/464, loss: 0.14980553090572357 2023-01-24 02:31:35.782146: step: 484/464, loss: 0.21239091455936432 2023-01-24 02:31:36.421457: step: 486/464, loss: 0.05061900615692139 2023-01-24 02:31:37.103127: step: 488/464, loss: 0.3776748776435852 2023-01-24 02:31:37.721746: step: 490/464, loss: 0.09961634129285812 2023-01-24 02:31:38.416488: step: 492/464, loss: 0.052889157086610794 2023-01-24 02:31:39.092047: step: 494/464, loss: 0.09435848146677017 2023-01-24 02:31:39.793504: step: 496/464, loss: 0.47626474499702454 2023-01-24 02:31:40.483531: step: 498/464, loss: 0.08534081280231476 2023-01-24 02:31:41.063847: step: 500/464, loss: 0.10756982862949371 2023-01-24 02:31:41.691929: step: 502/464, loss: 0.09308572858572006 2023-01-24 02:31:42.331546: step: 504/464, loss: 0.258376806974411 2023-01-24 02:31:42.975891: step: 506/464, loss: 1.1513007879257202 2023-01-24 02:31:43.622494: step: 508/464, loss: 0.5706973671913147 2023-01-24 02:31:44.246897: step: 510/464, loss: 0.062000103294849396 2023-01-24 02:31:44.901394: step: 512/464, loss: 0.06004420295357704 2023-01-24 02:31:45.561421: step: 514/464, loss: 0.04904761537909508 2023-01-24 02:31:46.209299: step: 516/464, loss: 0.09929678589105606 2023-01-24 02:31:46.810677: step: 518/464, loss: 0.1910267472267151 2023-01-24 02:31:47.482082: step: 520/464, loss: 0.06864751130342484 2023-01-24 02:31:48.043957: step: 522/464, loss: 0.052617765963077545 2023-01-24 02:31:48.653866: step: 524/464, loss: 0.22045567631721497 2023-01-24 02:31:49.306647: step: 526/464, loss: 0.3091566264629364 2023-01-24 02:31:49.973224: step: 528/464, loss: 0.16436395049095154 2023-01-24 02:31:50.612655: step: 530/464, loss: 0.08261793851852417 2023-01-24 02:31:51.228553: step: 532/464, loss: 0.7199795842170715 2023-01-24 02:31:51.903063: step: 534/464, loss: 0.201430082321167 2023-01-24 02:31:52.508737: step: 536/464, loss: 0.7143619060516357 2023-01-24 02:31:53.117621: step: 538/464, loss: 0.0859590396285057 2023-01-24 02:31:53.723070: step: 540/464, loss: 0.12776008248329163 2023-01-24 02:31:54.355337: step: 542/464, loss: 0.0532626137137413 2023-01-24 02:31:54.972443: step: 544/464, loss: 0.16390809416770935 2023-01-24 02:31:55.609386: step: 546/464, loss: 0.6926539540290833 2023-01-24 02:31:56.294763: step: 548/464, loss: 0.028972607105970383 2023-01-24 02:31:56.899414: step: 550/464, loss: 0.060550302267074585 2023-01-24 02:31:57.502890: step: 552/464, loss: 0.1250130832195282 2023-01-24 02:31:58.085684: step: 554/464, loss: 0.22803033888339996 2023-01-24 02:31:58.735517: step: 556/464, loss: 0.24090181291103363 2023-01-24 02:31:59.411650: step: 558/464, loss: 0.12897726893424988 2023-01-24 02:32:00.075139: step: 560/464, loss: 0.12019973248243332 2023-01-24 02:32:00.717629: step: 562/464, loss: 0.2126852422952652 2023-01-24 02:32:01.328063: step: 564/464, loss: 0.07297311723232269 2023-01-24 02:32:02.093690: step: 566/464, loss: 0.12867839634418488 2023-01-24 02:32:02.683636: step: 568/464, loss: 0.04996206983923912 2023-01-24 02:32:03.262916: step: 570/464, loss: 0.1706579178571701 2023-01-24 02:32:03.897670: step: 572/464, loss: 0.16440588235855103 2023-01-24 02:32:04.531601: step: 574/464, loss: 0.20301684737205505 2023-01-24 02:32:05.200094: step: 576/464, loss: 0.04953206703066826 2023-01-24 02:32:05.843741: step: 578/464, loss: 0.041960328817367554 2023-01-24 02:32:06.482065: step: 580/464, loss: 0.02059108205139637 2023-01-24 02:32:07.095077: step: 582/464, loss: 0.17526574432849884 2023-01-24 02:32:07.725541: step: 584/464, loss: 0.18093959987163544 2023-01-24 02:32:08.345179: step: 586/464, loss: 0.2419964224100113 2023-01-24 02:32:08.927444: step: 588/464, loss: 0.11984448879957199 2023-01-24 02:32:09.525616: step: 590/464, loss: 0.1029224768280983 2023-01-24 02:32:10.174921: step: 592/464, loss: 0.034720923751592636 2023-01-24 02:32:10.772474: step: 594/464, loss: 0.05716710537672043 2023-01-24 02:32:11.419070: step: 596/464, loss: 0.51500403881073 2023-01-24 02:32:12.073719: step: 598/464, loss: 0.2622247040271759 2023-01-24 02:32:12.693642: step: 600/464, loss: 0.5692510008811951 2023-01-24 02:32:13.312763: step: 602/464, loss: 0.017966220155358315 2023-01-24 02:32:13.943609: step: 604/464, loss: 0.6535645723342896 2023-01-24 02:32:14.523297: step: 606/464, loss: 0.12797455489635468 2023-01-24 02:32:15.261962: step: 608/464, loss: 0.13018736243247986 2023-01-24 02:32:15.843617: step: 610/464, loss: 0.13536271452903748 2023-01-24 02:32:16.485350: step: 612/464, loss: 0.065898597240448 2023-01-24 02:32:17.066458: step: 614/464, loss: 0.049187514930963516 2023-01-24 02:32:17.708971: step: 616/464, loss: 0.12127663195133209 2023-01-24 02:32:18.343807: step: 618/464, loss: 0.3113647699356079 2023-01-24 02:32:18.977578: step: 620/464, loss: 0.0806652083992958 2023-01-24 02:32:19.587188: step: 622/464, loss: 0.037088535726070404 2023-01-24 02:32:20.218447: step: 624/464, loss: 0.1297650933265686 2023-01-24 02:32:20.770999: step: 626/464, loss: 0.2142733335494995 2023-01-24 02:32:21.324572: step: 628/464, loss: 0.1200641393661499 2023-01-24 02:32:22.009347: step: 630/464, loss: 0.20464667677879333 2023-01-24 02:32:22.658966: step: 632/464, loss: 0.04464574530720711 2023-01-24 02:32:23.263066: step: 634/464, loss: 0.03455955907702446 2023-01-24 02:32:23.860170: step: 636/464, loss: 2.4867355823516846 2023-01-24 02:32:24.518987: step: 638/464, loss: 0.09488419443368912 2023-01-24 02:32:25.163917: step: 640/464, loss: 0.09570202976465225 2023-01-24 02:32:25.832251: step: 642/464, loss: 0.6297911405563354 2023-01-24 02:32:26.470809: step: 644/464, loss: 0.17018218338489532 2023-01-24 02:32:27.129927: step: 646/464, loss: 0.037947215139865875 2023-01-24 02:32:27.728121: step: 648/464, loss: 0.0291412565857172 2023-01-24 02:32:28.289367: step: 650/464, loss: 0.3197057247161865 2023-01-24 02:32:28.890775: step: 652/464, loss: 0.3060678243637085 2023-01-24 02:32:29.492661: step: 654/464, loss: 0.3409462869167328 2023-01-24 02:32:30.094833: step: 656/464, loss: 0.11322534084320068 2023-01-24 02:32:30.775906: step: 658/464, loss: 0.06705331057310104 2023-01-24 02:32:31.389106: step: 660/464, loss: 0.5259943604469299 2023-01-24 02:32:32.059529: step: 662/464, loss: 0.08499801158905029 2023-01-24 02:32:32.777716: step: 664/464, loss: 0.3869590759277344 2023-01-24 02:32:33.418336: step: 666/464, loss: 0.47266441583633423 2023-01-24 02:32:34.014543: step: 668/464, loss: 0.15361620485782623 2023-01-24 02:32:34.635008: step: 670/464, loss: 0.039505161345005035 2023-01-24 02:32:35.252036: step: 672/464, loss: 0.16262929141521454 2023-01-24 02:32:35.837190: step: 674/464, loss: 0.07864541560411453 2023-01-24 02:32:36.425735: step: 676/464, loss: 0.11444167792797089 2023-01-24 02:32:37.080734: step: 678/464, loss: 0.15262271463871002 2023-01-24 02:32:37.806412: step: 680/464, loss: 0.1119389459490776 2023-01-24 02:32:38.416686: step: 682/464, loss: 0.21531479060649872 2023-01-24 02:32:39.080936: step: 684/464, loss: 0.3819708824157715 2023-01-24 02:32:39.638878: step: 686/464, loss: 0.06918393820524216 2023-01-24 02:32:40.221183: step: 688/464, loss: 0.03321441262960434 2023-01-24 02:32:40.867537: step: 690/464, loss: 0.2979850172996521 2023-01-24 02:32:41.530276: step: 692/464, loss: 0.04416452348232269 2023-01-24 02:32:42.171685: step: 694/464, loss: 0.1963769644498825 2023-01-24 02:32:42.727060: step: 696/464, loss: 0.04243450611829758 2023-01-24 02:32:43.365535: step: 698/464, loss: 0.10437124967575073 2023-01-24 02:32:43.957958: step: 700/464, loss: 0.15086588263511658 2023-01-24 02:32:44.641722: step: 702/464, loss: 0.08452273905277252 2023-01-24 02:32:45.259178: step: 704/464, loss: 0.1438084840774536 2023-01-24 02:32:45.934409: step: 706/464, loss: 0.20095717906951904 2023-01-24 02:32:46.537865: step: 708/464, loss: 0.12397371977567673 2023-01-24 02:32:47.233127: step: 710/464, loss: 0.12223966419696808 2023-01-24 02:32:47.873871: step: 712/464, loss: 0.16863887012004852 2023-01-24 02:32:48.464672: step: 714/464, loss: 0.1033097431063652 2023-01-24 02:32:49.156135: step: 716/464, loss: 0.12905749678611755 2023-01-24 02:32:49.778542: step: 718/464, loss: 0.12600231170654297 2023-01-24 02:32:50.441337: step: 720/464, loss: 0.08024387806653976 2023-01-24 02:32:51.047509: step: 722/464, loss: 0.08329156041145325 2023-01-24 02:32:51.581248: step: 724/464, loss: 0.12820667028427124 2023-01-24 02:32:52.139363: step: 726/464, loss: 0.06663686782121658 2023-01-24 02:32:52.714195: step: 728/464, loss: 0.12871386110782623 2023-01-24 02:32:53.337147: step: 730/464, loss: 0.15328256785869598 2023-01-24 02:32:53.928921: step: 732/464, loss: 0.08829231560230255 2023-01-24 02:32:54.534491: step: 734/464, loss: 0.11055053025484085 2023-01-24 02:32:55.189785: step: 736/464, loss: 0.14831359684467316 2023-01-24 02:32:55.772026: step: 738/464, loss: 0.12051140516996384 2023-01-24 02:32:56.355184: step: 740/464, loss: 0.15838512778282166 2023-01-24 02:32:56.993982: step: 742/464, loss: 0.18228304386138916 2023-01-24 02:32:57.619191: step: 744/464, loss: 0.07749143987894058 2023-01-24 02:32:58.222258: step: 746/464, loss: 0.11602754145860672 2023-01-24 02:32:58.938941: step: 748/464, loss: 1.0120458602905273 2023-01-24 02:32:59.641146: step: 750/464, loss: 0.14229518175125122 2023-01-24 02:33:00.317659: step: 752/464, loss: 0.4490815997123718 2023-01-24 02:33:00.955651: step: 754/464, loss: 0.0855349525809288 2023-01-24 02:33:01.565243: step: 756/464, loss: 0.1280864179134369 2023-01-24 02:33:02.229517: step: 758/464, loss: 0.07882758975028992 2023-01-24 02:33:02.942895: step: 760/464, loss: 0.15018177032470703 2023-01-24 02:33:03.630249: step: 762/464, loss: 1.9219484329223633 2023-01-24 02:33:04.280777: step: 764/464, loss: 0.279685914516449 2023-01-24 02:33:04.884816: step: 766/464, loss: 0.1714082658290863 2023-01-24 02:33:05.499175: step: 768/464, loss: 1.316659688949585 2023-01-24 02:33:06.134136: step: 770/464, loss: 0.2550918459892273 2023-01-24 02:33:06.750418: step: 772/464, loss: 0.10818459093570709 2023-01-24 02:33:07.386561: step: 774/464, loss: 0.10291647166013718 2023-01-24 02:33:07.992018: step: 776/464, loss: 0.08849417418241501 2023-01-24 02:33:08.564815: step: 778/464, loss: 0.08703681081533432 2023-01-24 02:33:09.189949: step: 780/464, loss: 0.09158416092395782 2023-01-24 02:33:09.900036: step: 782/464, loss: 0.08443973958492279 2023-01-24 02:33:10.542644: step: 784/464, loss: 0.10147544741630554 2023-01-24 02:33:11.127606: step: 786/464, loss: 0.06591346114873886 2023-01-24 02:33:11.815722: step: 788/464, loss: 0.05924259498715401 2023-01-24 02:33:12.479074: step: 790/464, loss: 0.12014824897050858 2023-01-24 02:33:13.213978: step: 792/464, loss: 0.06217009946703911 2023-01-24 02:33:13.915764: step: 794/464, loss: 0.33827370405197144 2023-01-24 02:33:14.503298: step: 796/464, loss: 0.39951568841934204 2023-01-24 02:33:15.148616: step: 798/464, loss: 0.17067255079746246 2023-01-24 02:33:15.753028: step: 800/464, loss: 0.03321341052651405 2023-01-24 02:33:16.353429: step: 802/464, loss: 0.071148581802845 2023-01-24 02:33:16.957870: step: 804/464, loss: 0.09109427779912949 2023-01-24 02:33:17.587504: step: 806/464, loss: 0.10565490275621414 2023-01-24 02:33:18.252555: step: 808/464, loss: 0.21609793603420258 2023-01-24 02:33:18.841357: step: 810/464, loss: 0.1835024505853653 2023-01-24 02:33:19.436080: step: 812/464, loss: 1.816299319267273 2023-01-24 02:33:20.011130: step: 814/464, loss: 0.03708549588918686 2023-01-24 02:33:20.665582: step: 816/464, loss: 0.1517760008573532 2023-01-24 02:33:21.358925: step: 818/464, loss: 0.9188517928123474 2023-01-24 02:33:21.982405: step: 820/464, loss: 0.2169983685016632 2023-01-24 02:33:22.599316: step: 822/464, loss: 0.19979752600193024 2023-01-24 02:33:23.232172: step: 824/464, loss: 0.14540570974349976 2023-01-24 02:33:23.793742: step: 826/464, loss: 0.39509427547454834 2023-01-24 02:33:24.465635: step: 828/464, loss: 0.07251793146133423 2023-01-24 02:33:25.164570: step: 830/464, loss: 0.27849599719047546 2023-01-24 02:33:25.777690: step: 832/464, loss: 0.11490640789270401 2023-01-24 02:33:26.439953: step: 834/464, loss: 0.04840140417218208 2023-01-24 02:33:27.042193: step: 836/464, loss: 0.04847031831741333 2023-01-24 02:33:27.620854: step: 838/464, loss: 0.16373252868652344 2023-01-24 02:33:28.221252: step: 840/464, loss: 0.36391955614089966 2023-01-24 02:33:28.824946: step: 842/464, loss: 0.11276250332593918 2023-01-24 02:33:29.464923: step: 844/464, loss: 0.054898716509342194 2023-01-24 02:33:30.087333: step: 846/464, loss: 1.3503093719482422 2023-01-24 02:33:30.729527: step: 848/464, loss: 0.17195822298526764 2023-01-24 02:33:31.345590: step: 850/464, loss: 0.12788565456867218 2023-01-24 02:33:31.969892: step: 852/464, loss: 0.07781606167554855 2023-01-24 02:33:32.614685: step: 854/464, loss: 0.09696003794670105 2023-01-24 02:33:33.224351: step: 856/464, loss: 0.08800847083330154 2023-01-24 02:33:33.848317: step: 858/464, loss: 0.1215500682592392 2023-01-24 02:33:34.489531: step: 860/464, loss: 0.19684864580631256 2023-01-24 02:33:35.121583: step: 862/464, loss: 0.12104436010122299 2023-01-24 02:33:35.799271: step: 864/464, loss: 0.09140199422836304 2023-01-24 02:33:36.470310: step: 866/464, loss: 0.166214719414711 2023-01-24 02:33:37.164137: step: 868/464, loss: 0.18114838004112244 2023-01-24 02:33:37.833177: step: 870/464, loss: 0.10851343721151352 2023-01-24 02:33:38.507564: step: 872/464, loss: 0.12601056694984436 2023-01-24 02:33:39.085796: step: 874/464, loss: 0.12040676921606064 2023-01-24 02:33:39.752922: step: 876/464, loss: 0.6361841559410095 2023-01-24 02:33:40.326202: step: 878/464, loss: 0.14714059233665466 2023-01-24 02:33:40.952164: step: 880/464, loss: 0.09067393839359283 2023-01-24 02:33:41.529054: step: 882/464, loss: 0.022877417504787445 2023-01-24 02:33:42.181053: step: 884/464, loss: 0.05267036333680153 2023-01-24 02:33:42.901919: step: 886/464, loss: 0.11166580766439438 2023-01-24 02:33:43.616985: step: 888/464, loss: 0.12089953571557999 2023-01-24 02:33:44.179085: step: 890/464, loss: 0.12612242996692657 2023-01-24 02:33:44.846146: step: 892/464, loss: 0.06378553807735443 2023-01-24 02:33:45.412412: step: 894/464, loss: 0.07771226018667221 2023-01-24 02:33:46.077392: step: 896/464, loss: 0.18652714788913727 2023-01-24 02:33:46.733331: step: 898/464, loss: 0.1391475349664688 2023-01-24 02:33:47.361734: step: 900/464, loss: 0.0657849982380867 2023-01-24 02:33:47.956923: step: 902/464, loss: 0.05659789219498634 2023-01-24 02:33:48.569656: step: 904/464, loss: 0.1160811260342598 2023-01-24 02:33:49.190106: step: 906/464, loss: 0.17878614366054535 2023-01-24 02:33:49.819311: step: 908/464, loss: 0.050274789333343506 2023-01-24 02:33:50.406565: step: 910/464, loss: 0.08825455605983734 2023-01-24 02:33:51.030239: step: 912/464, loss: 1.2888163328170776 2023-01-24 02:33:51.667879: step: 914/464, loss: 0.2394239753484726 2023-01-24 02:33:52.219497: step: 916/464, loss: 0.1337050199508667 2023-01-24 02:33:52.863410: step: 918/464, loss: 0.11263155937194824 2023-01-24 02:33:53.549676: step: 920/464, loss: 0.2214917689561844 2023-01-24 02:33:54.191013: step: 922/464, loss: 0.12152550369501114 2023-01-24 02:33:54.797324: step: 924/464, loss: 0.27968689799308777 2023-01-24 02:33:55.426915: step: 926/464, loss: 1.3776767253875732 2023-01-24 02:33:56.144799: step: 928/464, loss: 0.6834884881973267 2023-01-24 02:33:56.684922: step: 930/464, loss: 0.16125810146331787 ================================================== Loss: 0.226 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3433379120879121, 'r': 0.3303080103008946, 'f1': 0.3366969466703509}, 'combined': 0.24809248702025855, 'epoch': 14} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3234457610091689, 'r': 0.29996026917118696, 'f1': 0.3112606321967109}, 'combined': 0.20320642309215323, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3327511195877217, 'r': 0.3340139321857776, 'f1': 0.3333813300414863}, 'combined': 0.2456494010832004, 'epoch': 14} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.34126298940920263, 'r': 0.3051919932859873, 'f1': 0.3222211438089802}, 'combined': 0.21036199025871247, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34244791666666663, 'r': 0.31309523809523804, 'f1': 0.3271144278606965}, 'combined': 0.21807628524046432, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3106060606060606, 'r': 0.44565217391304346, 'f1': 0.3660714285714286}, 'combined': 0.1830357142857143, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:36:38.882018: step: 2/464, loss: 0.23376545310020447 2023-01-24 02:36:39.530116: step: 4/464, loss: 0.14027710258960724 2023-01-24 02:36:40.192470: step: 6/464, loss: 0.1160091981291771 2023-01-24 02:36:40.800315: step: 8/464, loss: 0.05574905499815941 2023-01-24 02:36:41.387948: step: 10/464, loss: 0.20945367217063904 2023-01-24 02:36:41.993485: step: 12/464, loss: 0.04651932418346405 2023-01-24 02:36:42.657863: step: 14/464, loss: 0.07678551226854324 2023-01-24 02:36:43.410242: step: 16/464, loss: 0.03442936763167381 2023-01-24 02:36:44.177007: step: 18/464, loss: 0.12439695000648499 2023-01-24 02:36:44.808494: step: 20/464, loss: 0.1926797479391098 2023-01-24 02:36:45.455059: step: 22/464, loss: 0.05314495787024498 2023-01-24 02:36:46.073714: step: 24/464, loss: 0.13110500574111938 2023-01-24 02:36:46.680872: step: 26/464, loss: 0.024646729230880737 2023-01-24 02:36:47.291984: step: 28/464, loss: 0.08958971500396729 2023-01-24 02:36:47.861566: step: 30/464, loss: 0.2605423629283905 2023-01-24 02:36:48.503997: step: 32/464, loss: 0.23618650436401367 2023-01-24 02:36:49.085812: step: 34/464, loss: 0.018489956855773926 2023-01-24 02:36:49.686657: step: 36/464, loss: 0.09872162342071533 2023-01-24 02:36:50.337172: step: 38/464, loss: 0.1872928887605667 2023-01-24 02:36:50.989823: step: 40/464, loss: 0.29993316531181335 2023-01-24 02:36:51.586415: step: 42/464, loss: 0.11506807804107666 2023-01-24 02:36:52.176224: step: 44/464, loss: 0.07895046472549438 2023-01-24 02:36:52.836168: step: 46/464, loss: 0.13309316337108612 2023-01-24 02:36:53.428139: step: 48/464, loss: 0.23003296554088593 2023-01-24 02:36:54.134134: step: 50/464, loss: 0.05665964633226395 2023-01-24 02:36:54.689534: step: 52/464, loss: 0.04237835109233856 2023-01-24 02:36:55.271217: step: 54/464, loss: 0.10793208330869675 2023-01-24 02:36:55.985589: step: 56/464, loss: 0.08486316353082657 2023-01-24 02:36:56.670291: step: 58/464, loss: 0.07201363146305084 2023-01-24 02:36:57.322289: step: 60/464, loss: 0.017657579854130745 2023-01-24 02:36:57.937356: step: 62/464, loss: 0.07392582297325134 2023-01-24 02:36:58.553584: step: 64/464, loss: 1.4638862609863281 2023-01-24 02:36:59.210136: step: 66/464, loss: 0.18318338692188263 2023-01-24 02:36:59.794701: step: 68/464, loss: 0.24032913148403168 2023-01-24 02:37:00.412508: step: 70/464, loss: 0.0510554239153862 2023-01-24 02:37:01.034524: step: 72/464, loss: 0.07235153019428253 2023-01-24 02:37:01.730350: step: 74/464, loss: 0.2249719351530075 2023-01-24 02:37:02.369885: step: 76/464, loss: 0.12286219000816345 2023-01-24 02:37:03.008320: step: 78/464, loss: 0.40949761867523193 2023-01-24 02:37:03.664189: step: 80/464, loss: 0.17456267774105072 2023-01-24 02:37:04.303247: step: 82/464, loss: 0.03343282639980316 2023-01-24 02:37:04.870674: step: 84/464, loss: 0.10452353209257126 2023-01-24 02:37:05.473976: step: 86/464, loss: 1.240683674812317 2023-01-24 02:37:06.133217: step: 88/464, loss: 0.14128847420215607 2023-01-24 02:37:06.708292: step: 90/464, loss: 0.13347876071929932 2023-01-24 02:37:07.273953: step: 92/464, loss: 0.1920931339263916 2023-01-24 02:37:07.860279: step: 94/464, loss: 0.02162131667137146 2023-01-24 02:37:08.471805: step: 96/464, loss: 0.019855773076415062 2023-01-24 02:37:09.011302: step: 98/464, loss: 0.017085513100028038 2023-01-24 02:37:09.622475: step: 100/464, loss: 0.12481603771448135 2023-01-24 02:37:10.214074: step: 102/464, loss: 0.007431838195770979 2023-01-24 02:37:10.896871: step: 104/464, loss: 0.08557116985321045 2023-01-24 02:37:11.504954: step: 106/464, loss: 0.044302843511104584 2023-01-24 02:37:12.166205: step: 108/464, loss: 0.2067716419696808 2023-01-24 02:37:12.879882: step: 110/464, loss: 0.13873068988323212 2023-01-24 02:37:13.554522: step: 112/464, loss: 0.1558220535516739 2023-01-24 02:37:14.202771: step: 114/464, loss: 0.052157383412122726 2023-01-24 02:37:14.798455: step: 116/464, loss: 0.0652746856212616 2023-01-24 02:37:15.475933: step: 118/464, loss: 0.02936823107302189 2023-01-24 02:37:16.050693: step: 120/464, loss: 0.055115893483161926 2023-01-24 02:37:16.676954: step: 122/464, loss: 0.19929994642734528 2023-01-24 02:37:17.235543: step: 124/464, loss: 0.06124817207455635 2023-01-24 02:37:17.950706: step: 126/464, loss: 0.6890258193016052 2023-01-24 02:37:18.554734: step: 128/464, loss: 0.08555949479341507 2023-01-24 02:37:19.184195: step: 130/464, loss: 0.046023499220609665 2023-01-24 02:37:19.881134: step: 132/464, loss: 0.1614185869693756 2023-01-24 02:37:20.498501: step: 134/464, loss: 0.4741933047771454 2023-01-24 02:37:21.143212: step: 136/464, loss: 0.09928226470947266 2023-01-24 02:37:21.794811: step: 138/464, loss: 0.06665418297052383 2023-01-24 02:37:22.329021: step: 140/464, loss: 0.022180533036589622 2023-01-24 02:37:23.002324: step: 142/464, loss: 0.04915543273091316 2023-01-24 02:37:23.677162: step: 144/464, loss: 0.29495981335639954 2023-01-24 02:37:24.274488: step: 146/464, loss: 0.05937875807285309 2023-01-24 02:37:24.889122: step: 148/464, loss: 0.0950523167848587 2023-01-24 02:37:25.476555: step: 150/464, loss: 0.15129932761192322 2023-01-24 02:37:26.085522: step: 152/464, loss: 0.17876113951206207 2023-01-24 02:37:26.680425: step: 154/464, loss: 0.419171005487442 2023-01-24 02:37:27.319881: step: 156/464, loss: 0.0766398161649704 2023-01-24 02:37:27.935725: step: 158/464, loss: 0.3040328919887543 2023-01-24 02:37:28.508503: step: 160/464, loss: 0.13785137236118317 2023-01-24 02:37:29.121441: step: 162/464, loss: 0.05590350180864334 2023-01-24 02:37:29.698752: step: 164/464, loss: 0.22143127024173737 2023-01-24 02:37:30.342416: step: 166/464, loss: 0.24892783164978027 2023-01-24 02:37:30.939521: step: 168/464, loss: 0.024986477568745613 2023-01-24 02:37:31.598473: step: 170/464, loss: 0.04494039714336395 2023-01-24 02:37:32.173430: step: 172/464, loss: 0.12915894389152527 2023-01-24 02:37:32.850663: step: 174/464, loss: 0.061370573937892914 2023-01-24 02:37:33.501435: step: 176/464, loss: 0.09692001342773438 2023-01-24 02:37:34.183192: step: 178/464, loss: 0.058332499116659164 2023-01-24 02:37:34.828931: step: 180/464, loss: 0.043565262109041214 2023-01-24 02:37:35.416765: step: 182/464, loss: 0.06048927828669548 2023-01-24 02:37:36.006723: step: 184/464, loss: 0.04548937454819679 2023-01-24 02:37:36.600863: step: 186/464, loss: 0.13006599247455597 2023-01-24 02:37:37.181823: step: 188/464, loss: 0.18524733185768127 2023-01-24 02:37:37.806365: step: 190/464, loss: 0.5778462290763855 2023-01-24 02:37:38.487504: step: 192/464, loss: 0.1461637318134308 2023-01-24 02:37:39.106868: step: 194/464, loss: 0.09309983998537064 2023-01-24 02:37:39.740229: step: 196/464, loss: 0.07581238448619843 2023-01-24 02:37:40.334378: step: 198/464, loss: 0.14314910769462585 2023-01-24 02:37:40.906265: step: 200/464, loss: 0.37534722685813904 2023-01-24 02:37:41.550760: step: 202/464, loss: 0.08187779039144516 2023-01-24 02:37:42.083045: step: 204/464, loss: 0.11266358941793442 2023-01-24 02:37:42.701236: step: 206/464, loss: 1.0839660167694092 2023-01-24 02:37:43.326797: step: 208/464, loss: 0.059994887560606 2023-01-24 02:37:43.919318: step: 210/464, loss: 0.01351864542812109 2023-01-24 02:37:44.642947: step: 212/464, loss: 0.07712341845035553 2023-01-24 02:37:45.249290: step: 214/464, loss: 0.20243489742279053 2023-01-24 02:37:45.873196: step: 216/464, loss: 0.1516103744506836 2023-01-24 02:37:46.563442: step: 218/464, loss: 0.06254269182682037 2023-01-24 02:37:47.207217: step: 220/464, loss: 0.05681382119655609 2023-01-24 02:37:47.860252: step: 222/464, loss: 0.04472225531935692 2023-01-24 02:37:48.477473: step: 224/464, loss: 0.0798860639333725 2023-01-24 02:37:49.140697: step: 226/464, loss: 0.09117922186851501 2023-01-24 02:37:49.754545: step: 228/464, loss: 0.06990282237529755 2023-01-24 02:37:50.405441: step: 230/464, loss: 0.38029158115386963 2023-01-24 02:37:51.056435: step: 232/464, loss: 0.3118628263473511 2023-01-24 02:37:51.699152: step: 234/464, loss: 0.19242215156555176 2023-01-24 02:37:52.399375: step: 236/464, loss: 0.06552839279174805 2023-01-24 02:37:53.066188: step: 238/464, loss: 0.15583792328834534 2023-01-24 02:37:53.711332: step: 240/464, loss: 0.060535915195941925 2023-01-24 02:37:54.276437: step: 242/464, loss: 0.17972353100776672 2023-01-24 02:37:54.890401: step: 244/464, loss: 0.02065899781882763 2023-01-24 02:37:55.499421: step: 246/464, loss: 0.02574816718697548 2023-01-24 02:37:56.086327: step: 248/464, loss: 0.08912333101034164 2023-01-24 02:37:56.729683: step: 250/464, loss: 0.10333779454231262 2023-01-24 02:37:57.371122: step: 252/464, loss: 0.07574377954006195 2023-01-24 02:37:57.969497: step: 254/464, loss: 0.06383416801691055 2023-01-24 02:37:58.626200: step: 256/464, loss: 0.048079632222652435 2023-01-24 02:37:59.224427: step: 258/464, loss: 0.4219626784324646 2023-01-24 02:37:59.805261: step: 260/464, loss: 0.07055892050266266 2023-01-24 02:38:00.399411: step: 262/464, loss: 0.3615981936454773 2023-01-24 02:38:01.003969: step: 264/464, loss: 0.14180190861225128 2023-01-24 02:38:01.581466: step: 266/464, loss: 0.1585061401128769 2023-01-24 02:38:02.201845: step: 268/464, loss: 0.12235391139984131 2023-01-24 02:38:02.882053: step: 270/464, loss: 0.2497805804014206 2023-01-24 02:38:03.588789: step: 272/464, loss: 0.0508241206407547 2023-01-24 02:38:04.236215: step: 274/464, loss: 0.1656421422958374 2023-01-24 02:38:04.907675: step: 276/464, loss: 0.03251105174422264 2023-01-24 02:38:05.571844: step: 278/464, loss: 0.052973296493291855 2023-01-24 02:38:06.160517: step: 280/464, loss: 0.38845095038414 2023-01-24 02:38:06.817664: step: 282/464, loss: 0.5072574615478516 2023-01-24 02:38:07.548094: step: 284/464, loss: 0.05709134787321091 2023-01-24 02:38:08.191296: step: 286/464, loss: 0.055934157222509384 2023-01-24 02:38:08.814292: step: 288/464, loss: 0.15055230259895325 2023-01-24 02:38:09.391170: step: 290/464, loss: 0.07166709005832672 2023-01-24 02:38:10.025277: step: 292/464, loss: 0.048629552125930786 2023-01-24 02:38:10.609569: step: 294/464, loss: 0.14174535870552063 2023-01-24 02:38:11.239889: step: 296/464, loss: 0.14061374962329865 2023-01-24 02:38:11.843554: step: 298/464, loss: 0.1977543979883194 2023-01-24 02:38:12.471051: step: 300/464, loss: 0.2333637773990631 2023-01-24 02:38:13.100369: step: 302/464, loss: 0.15602213144302368 2023-01-24 02:38:13.705563: step: 304/464, loss: 0.11040691286325455 2023-01-24 02:38:14.340338: step: 306/464, loss: 0.06026787310838699 2023-01-24 02:38:14.972241: step: 308/464, loss: 0.04550894722342491 2023-01-24 02:38:15.573409: step: 310/464, loss: 0.15067565441131592 2023-01-24 02:38:16.192226: step: 312/464, loss: 0.18284235894680023 2023-01-24 02:38:16.859227: step: 314/464, loss: 0.09937175363302231 2023-01-24 02:38:17.540813: step: 316/464, loss: 0.07563548535108566 2023-01-24 02:38:18.147010: step: 318/464, loss: 0.066224105656147 2023-01-24 02:38:18.735274: step: 320/464, loss: 0.14571310579776764 2023-01-24 02:38:19.327692: step: 322/464, loss: 0.0441647469997406 2023-01-24 02:38:19.916903: step: 324/464, loss: 0.061574917286634445 2023-01-24 02:38:20.541901: step: 326/464, loss: 2.2150657176971436 2023-01-24 02:38:21.227044: step: 328/464, loss: 0.07661069184541702 2023-01-24 02:38:21.899518: step: 330/464, loss: 0.1501404345035553 2023-01-24 02:38:22.489001: step: 332/464, loss: 0.10598700493574142 2023-01-24 02:38:23.130862: step: 334/464, loss: 0.17445522546768188 2023-01-24 02:38:23.735829: step: 336/464, loss: 0.018463660031557083 2023-01-24 02:38:24.378936: step: 338/464, loss: 0.22861771285533905 2023-01-24 02:38:25.051004: step: 340/464, loss: 0.43219172954559326 2023-01-24 02:38:25.661952: step: 342/464, loss: 0.05386766791343689 2023-01-24 02:38:26.249835: step: 344/464, loss: 0.08726462721824646 2023-01-24 02:38:26.943567: step: 346/464, loss: 0.18939970433712006 2023-01-24 02:38:27.581231: step: 348/464, loss: 0.062296636402606964 2023-01-24 02:38:28.163648: step: 350/464, loss: 0.11539874225854874 2023-01-24 02:38:28.786815: step: 352/464, loss: 0.23490335047245026 2023-01-24 02:38:29.453370: step: 354/464, loss: 0.09849587082862854 2023-01-24 02:38:30.137151: step: 356/464, loss: 0.14270582795143127 2023-01-24 02:38:30.724373: step: 358/464, loss: 0.11370982229709625 2023-01-24 02:38:31.571084: step: 360/464, loss: 0.17099055647850037 2023-01-24 02:38:32.137908: step: 362/464, loss: 0.04839561507105827 2023-01-24 02:38:32.757882: step: 364/464, loss: 0.22834891080856323 2023-01-24 02:38:33.336695: step: 366/464, loss: 0.1289820820093155 2023-01-24 02:38:33.956060: step: 368/464, loss: 0.07608374953269958 2023-01-24 02:38:34.576034: step: 370/464, loss: 0.10896164923906326 2023-01-24 02:38:35.176049: step: 372/464, loss: 0.08024145662784576 2023-01-24 02:38:35.832473: step: 374/464, loss: 0.23710879683494568 2023-01-24 02:38:36.376852: step: 376/464, loss: 0.17971497774124146 2023-01-24 02:38:36.937925: step: 378/464, loss: 0.020017314702272415 2023-01-24 02:38:37.484496: step: 380/464, loss: 0.06661124527454376 2023-01-24 02:38:38.100172: step: 382/464, loss: 0.10413400828838348 2023-01-24 02:38:38.724578: step: 384/464, loss: 0.495614618062973 2023-01-24 02:38:39.303486: step: 386/464, loss: 0.06679220497608185 2023-01-24 02:38:39.902509: step: 388/464, loss: 0.5594455599784851 2023-01-24 02:38:40.498146: step: 390/464, loss: 0.0512617863714695 2023-01-24 02:38:41.122371: step: 392/464, loss: 0.04496656358242035 2023-01-24 02:38:41.734970: step: 394/464, loss: 0.04869386553764343 2023-01-24 02:38:42.387791: step: 396/464, loss: 0.16523469984531403 2023-01-24 02:38:43.008804: step: 398/464, loss: 0.36372125148773193 2023-01-24 02:38:43.660913: step: 400/464, loss: 0.0999399796128273 2023-01-24 02:38:44.284376: step: 402/464, loss: 0.07038531452417374 2023-01-24 02:38:44.864032: step: 404/464, loss: 0.06395240873098373 2023-01-24 02:38:45.515233: step: 406/464, loss: 0.15672455728054047 2023-01-24 02:38:46.136281: step: 408/464, loss: 0.564751923084259 2023-01-24 02:38:46.807107: step: 410/464, loss: 0.11285138130187988 2023-01-24 02:38:47.485564: step: 412/464, loss: 0.0786018893122673 2023-01-24 02:38:48.114576: step: 414/464, loss: 0.05017438158392906 2023-01-24 02:38:48.735468: step: 416/464, loss: 0.2782638669013977 2023-01-24 02:38:49.314337: step: 418/464, loss: 0.5186176300048828 2023-01-24 02:38:49.807027: step: 420/464, loss: 0.10362860560417175 2023-01-24 02:38:50.409356: step: 422/464, loss: 0.07758516073226929 2023-01-24 02:38:51.062175: step: 424/464, loss: 0.12924860417842865 2023-01-24 02:38:51.790020: step: 426/464, loss: 0.33915987610816956 2023-01-24 02:38:52.424680: step: 428/464, loss: 0.05942815542221069 2023-01-24 02:38:53.078583: step: 430/464, loss: 0.35485219955444336 2023-01-24 02:38:53.742344: step: 432/464, loss: 0.11021588742733002 2023-01-24 02:38:54.280189: step: 434/464, loss: 0.4730512499809265 2023-01-24 02:38:54.872436: step: 436/464, loss: 0.12729057669639587 2023-01-24 02:38:55.428555: step: 438/464, loss: 0.09083368629217148 2023-01-24 02:38:56.062754: step: 440/464, loss: 0.08375909924507141 2023-01-24 02:38:56.634104: step: 442/464, loss: 0.14418822526931763 2023-01-24 02:38:57.250506: step: 444/464, loss: 1.2189446687698364 2023-01-24 02:38:57.878650: step: 446/464, loss: 0.046907439827919006 2023-01-24 02:38:58.523870: step: 448/464, loss: 0.1263635754585266 2023-01-24 02:38:59.122954: step: 450/464, loss: 0.07947744429111481 2023-01-24 02:38:59.825385: step: 452/464, loss: 0.4060099124908447 2023-01-24 02:39:00.457304: step: 454/464, loss: 0.05249390751123428 2023-01-24 02:39:01.064348: step: 456/464, loss: 0.06890036165714264 2023-01-24 02:39:01.684736: step: 458/464, loss: 0.1694265455007553 2023-01-24 02:39:02.308332: step: 460/464, loss: 0.1672012060880661 2023-01-24 02:39:02.935941: step: 462/464, loss: 0.12467416375875473 2023-01-24 02:39:03.526401: step: 464/464, loss: 0.2216804176568985 2023-01-24 02:39:04.220863: step: 466/464, loss: 0.02688206359744072 2023-01-24 02:39:04.816034: step: 468/464, loss: 0.15295109152793884 2023-01-24 02:39:05.451960: step: 470/464, loss: 0.17021578550338745 2023-01-24 02:39:06.031480: step: 472/464, loss: 0.08870581537485123 2023-01-24 02:39:06.646201: step: 474/464, loss: 0.057112276554107666 2023-01-24 02:39:07.263705: step: 476/464, loss: 0.11644468456506729 2023-01-24 02:39:07.887615: step: 478/464, loss: 0.10197106748819351 2023-01-24 02:39:08.523916: step: 480/464, loss: 0.345088928937912 2023-01-24 02:39:09.124015: step: 482/464, loss: 0.15798737108707428 2023-01-24 02:39:09.715083: step: 484/464, loss: 0.45416873693466187 2023-01-24 02:39:10.392814: step: 486/464, loss: 0.2126789689064026 2023-01-24 02:39:11.012954: step: 488/464, loss: 0.09349019825458527 2023-01-24 02:39:11.740112: step: 490/464, loss: 0.1536969393491745 2023-01-24 02:39:12.306447: step: 492/464, loss: 0.1000894233584404 2023-01-24 02:39:12.904110: step: 494/464, loss: 0.08661019057035446 2023-01-24 02:39:13.527226: step: 496/464, loss: 0.034136559814214706 2023-01-24 02:39:14.200379: step: 498/464, loss: 0.9338065385818481 2023-01-24 02:39:14.796447: step: 500/464, loss: 0.047420501708984375 2023-01-24 02:39:15.400244: step: 502/464, loss: 0.09700897336006165 2023-01-24 02:39:16.033852: step: 504/464, loss: 0.3427742123603821 2023-01-24 02:39:16.706832: step: 506/464, loss: 0.2460949569940567 2023-01-24 02:39:17.330069: step: 508/464, loss: 0.057013846933841705 2023-01-24 02:39:17.920844: step: 510/464, loss: 0.14210188388824463 2023-01-24 02:39:18.528931: step: 512/464, loss: 0.036092836409807205 2023-01-24 02:39:19.145631: step: 514/464, loss: 0.23894937336444855 2023-01-24 02:39:19.794519: step: 516/464, loss: 0.017120173200964928 2023-01-24 02:39:20.438222: step: 518/464, loss: 0.08431785553693771 2023-01-24 02:39:21.013433: step: 520/464, loss: 0.5087003111839294 2023-01-24 02:39:21.639879: step: 522/464, loss: 0.060391779989004135 2023-01-24 02:39:22.271926: step: 524/464, loss: 0.11891240626573563 2023-01-24 02:39:22.951910: step: 526/464, loss: 0.3534661829471588 2023-01-24 02:39:23.559365: step: 528/464, loss: 0.21036121249198914 2023-01-24 02:39:24.137963: step: 530/464, loss: 0.16232508420944214 2023-01-24 02:39:24.745823: step: 532/464, loss: 0.1413215547800064 2023-01-24 02:39:25.385343: step: 534/464, loss: 0.04500097036361694 2023-01-24 02:39:26.017841: step: 536/464, loss: 0.0697244256734848 2023-01-24 02:39:26.709170: step: 538/464, loss: 0.12187471985816956 2023-01-24 02:39:27.448036: step: 540/464, loss: 0.3152616620063782 2023-01-24 02:39:28.070904: step: 542/464, loss: 0.18337717652320862 2023-01-24 02:39:28.760582: step: 544/464, loss: 0.22301732003688812 2023-01-24 02:39:29.355911: step: 546/464, loss: 0.07435160130262375 2023-01-24 02:39:30.016886: step: 548/464, loss: 0.04245968163013458 2023-01-24 02:39:30.731901: step: 550/464, loss: 0.08916405588388443 2023-01-24 02:39:31.351470: step: 552/464, loss: 0.01657041907310486 2023-01-24 02:39:31.973066: step: 554/464, loss: 0.471982479095459 2023-01-24 02:39:32.691497: step: 556/464, loss: 0.1751091629266739 2023-01-24 02:39:33.324952: step: 558/464, loss: 0.10122612118721008 2023-01-24 02:39:33.924318: step: 560/464, loss: 0.11630212515592575 2023-01-24 02:39:34.556790: step: 562/464, loss: 0.014227952808141708 2023-01-24 02:39:35.146080: step: 564/464, loss: 0.20856709778308868 2023-01-24 02:39:35.766758: step: 566/464, loss: 0.11056730896234512 2023-01-24 02:39:36.377232: step: 568/464, loss: 0.0749744102358818 2023-01-24 02:39:37.024043: step: 570/464, loss: 0.04471934586763382 2023-01-24 02:39:37.635942: step: 572/464, loss: 0.15633288025856018 2023-01-24 02:39:38.268046: step: 574/464, loss: 0.11412084847688675 2023-01-24 02:39:38.891606: step: 576/464, loss: 0.028770413249731064 2023-01-24 02:39:39.526953: step: 578/464, loss: 0.17178964614868164 2023-01-24 02:39:40.108254: step: 580/464, loss: 0.07429475337266922 2023-01-24 02:39:40.706500: step: 582/464, loss: 0.14217743277549744 2023-01-24 02:39:41.288213: step: 584/464, loss: 0.10671277344226837 2023-01-24 02:39:41.924829: step: 586/464, loss: 0.31617486476898193 2023-01-24 02:39:42.497892: step: 588/464, loss: 0.2289099395275116 2023-01-24 02:39:43.199824: step: 590/464, loss: 0.2649511396884918 2023-01-24 02:39:43.799856: step: 592/464, loss: 0.2369571179151535 2023-01-24 02:39:44.425425: step: 594/464, loss: 0.26038309931755066 2023-01-24 02:39:45.154275: step: 596/464, loss: 0.41762369871139526 2023-01-24 02:39:45.804488: step: 598/464, loss: 0.18469442427158356 2023-01-24 02:39:46.524606: step: 600/464, loss: 0.032377939671278 2023-01-24 02:39:47.207175: step: 602/464, loss: 1.2633156776428223 2023-01-24 02:39:47.820666: step: 604/464, loss: 0.0722263976931572 2023-01-24 02:39:48.428260: step: 606/464, loss: 0.18451766669750214 2023-01-24 02:39:48.986282: step: 608/464, loss: 0.246455579996109 2023-01-24 02:39:49.591474: step: 610/464, loss: 0.15055091679096222 2023-01-24 02:39:50.228170: step: 612/464, loss: 0.082424096763134 2023-01-24 02:39:50.793200: step: 614/464, loss: 0.029938578605651855 2023-01-24 02:39:51.460091: step: 616/464, loss: 1.3979496955871582 2023-01-24 02:39:52.092798: step: 618/464, loss: 0.07138173282146454 2023-01-24 02:39:52.693017: step: 620/464, loss: 0.2069871574640274 2023-01-24 02:39:53.357464: step: 622/464, loss: 0.42995592951774597 2023-01-24 02:39:53.925404: step: 624/464, loss: 0.1266365945339203 2023-01-24 02:39:54.499078: step: 626/464, loss: 0.1062287762761116 2023-01-24 02:39:55.140669: step: 628/464, loss: 0.16082951426506042 2023-01-24 02:39:55.796079: step: 630/464, loss: 0.08530285209417343 2023-01-24 02:39:56.369621: step: 632/464, loss: 0.15649454295635223 2023-01-24 02:39:56.999160: step: 634/464, loss: 0.16694426536560059 2023-01-24 02:39:57.622430: step: 636/464, loss: 0.1181652843952179 2023-01-24 02:39:58.374873: step: 638/464, loss: 0.16633333265781403 2023-01-24 02:39:59.101759: step: 640/464, loss: 0.03431246429681778 2023-01-24 02:39:59.752514: step: 642/464, loss: 0.05832459032535553 2023-01-24 02:40:00.396027: step: 644/464, loss: 0.04486502707004547 2023-01-24 02:40:01.030723: step: 646/464, loss: 0.25312352180480957 2023-01-24 02:40:01.670881: step: 648/464, loss: 0.1561816781759262 2023-01-24 02:40:02.382971: step: 650/464, loss: 0.04630190134048462 2023-01-24 02:40:03.015644: step: 652/464, loss: 0.2901785969734192 2023-01-24 02:40:03.639797: step: 654/464, loss: 0.058462291955947876 2023-01-24 02:40:04.233205: step: 656/464, loss: 0.21496911346912384 2023-01-24 02:40:04.858555: step: 658/464, loss: 0.09897728264331818 2023-01-24 02:40:05.457095: step: 660/464, loss: 0.1999654620885849 2023-01-24 02:40:06.153195: step: 662/464, loss: 0.060551948845386505 2023-01-24 02:40:06.774839: step: 664/464, loss: 0.06704603880643845 2023-01-24 02:40:07.421645: step: 666/464, loss: 0.16565890610218048 2023-01-24 02:40:08.047723: step: 668/464, loss: 0.11661874502897263 2023-01-24 02:40:08.754623: step: 670/464, loss: 0.4740934371948242 2023-01-24 02:40:09.366569: step: 672/464, loss: 0.25017955899238586 2023-01-24 02:40:10.084603: step: 674/464, loss: 0.25479915738105774 2023-01-24 02:40:10.695981: step: 676/464, loss: 0.052261002361774445 2023-01-24 02:40:11.324532: step: 678/464, loss: 0.2774779796600342 2023-01-24 02:40:11.930381: step: 680/464, loss: 0.03868470713496208 2023-01-24 02:40:12.503485: step: 682/464, loss: 0.20814983546733856 2023-01-24 02:40:13.122283: step: 684/464, loss: 0.07299919426441193 2023-01-24 02:40:13.779764: step: 686/464, loss: 0.12103313952684402 2023-01-24 02:40:14.400384: step: 688/464, loss: 0.1205439493060112 2023-01-24 02:40:15.048163: step: 690/464, loss: 0.06843750178813934 2023-01-24 02:40:15.741380: step: 692/464, loss: 0.0651388168334961 2023-01-24 02:40:16.381207: step: 694/464, loss: 0.06578768789768219 2023-01-24 02:40:17.013376: step: 696/464, loss: 0.09666206687688828 2023-01-24 02:40:17.548303: step: 698/464, loss: 0.06669321656227112 2023-01-24 02:40:18.132942: step: 700/464, loss: 0.05347609147429466 2023-01-24 02:40:18.729585: step: 702/464, loss: 0.18711988627910614 2023-01-24 02:40:19.304198: step: 704/464, loss: 0.09071377664804459 2023-01-24 02:40:19.895851: step: 706/464, loss: 0.37969571352005005 2023-01-24 02:40:20.534230: step: 708/464, loss: 0.05282412841916084 2023-01-24 02:40:21.141739: step: 710/464, loss: 0.1803780049085617 2023-01-24 02:40:21.761242: step: 712/464, loss: 0.21614904701709747 2023-01-24 02:40:22.381014: step: 714/464, loss: 0.12261205166578293 2023-01-24 02:40:23.118110: step: 716/464, loss: 0.09753377735614777 2023-01-24 02:40:23.748672: step: 718/464, loss: 0.1277727484703064 2023-01-24 02:40:24.363140: step: 720/464, loss: 0.19646309316158295 2023-01-24 02:40:25.002639: step: 722/464, loss: 0.13632582128047943 2023-01-24 02:40:25.581610: step: 724/464, loss: 0.05022649094462395 2023-01-24 02:40:26.252845: step: 726/464, loss: 0.4702479839324951 2023-01-24 02:40:26.860805: step: 728/464, loss: 0.12363642454147339 2023-01-24 02:40:27.524759: step: 730/464, loss: 0.6109817028045654 2023-01-24 02:40:28.108824: step: 732/464, loss: 0.32869744300842285 2023-01-24 02:40:28.747638: step: 734/464, loss: 0.07312636077404022 2023-01-24 02:40:29.309570: step: 736/464, loss: 0.08416087180376053 2023-01-24 02:40:29.955239: step: 738/464, loss: 0.27036938071250916 2023-01-24 02:40:30.534877: step: 740/464, loss: 0.09116753190755844 2023-01-24 02:40:31.186409: step: 742/464, loss: 0.13980446755886078 2023-01-24 02:40:31.768444: step: 744/464, loss: 0.07982996106147766 2023-01-24 02:40:32.373564: step: 746/464, loss: 0.01018874254077673 2023-01-24 02:40:32.972145: step: 748/464, loss: 0.4726656675338745 2023-01-24 02:40:33.573382: step: 750/464, loss: 0.45306769013404846 2023-01-24 02:40:34.220429: step: 752/464, loss: 0.14255671203136444 2023-01-24 02:40:34.932674: step: 754/464, loss: 0.17154064774513245 2023-01-24 02:40:35.550165: step: 756/464, loss: 0.1206292062997818 2023-01-24 02:40:36.287484: step: 758/464, loss: 0.4320429861545563 2023-01-24 02:40:36.918761: step: 760/464, loss: 0.09046813100576401 2023-01-24 02:40:37.545777: step: 762/464, loss: 0.11342841386795044 2023-01-24 02:40:38.149668: step: 764/464, loss: 0.12874212861061096 2023-01-24 02:40:38.774904: step: 766/464, loss: 0.17379102110862732 2023-01-24 02:40:39.380072: step: 768/464, loss: 0.08714067935943604 2023-01-24 02:40:40.021285: step: 770/464, loss: 0.2055840790271759 2023-01-24 02:40:40.629358: step: 772/464, loss: 0.24172013998031616 2023-01-24 02:40:41.221259: step: 774/464, loss: 0.07567917555570602 2023-01-24 02:40:41.841726: step: 776/464, loss: 0.4996664226055145 2023-01-24 02:40:42.447828: step: 778/464, loss: 0.1334775984287262 2023-01-24 02:40:43.024000: step: 780/464, loss: 0.05684034898877144 2023-01-24 02:40:43.676642: step: 782/464, loss: 0.2430305927991867 2023-01-24 02:40:44.401813: step: 784/464, loss: 0.15896043181419373 2023-01-24 02:40:45.071481: step: 786/464, loss: 0.7327299118041992 2023-01-24 02:40:45.753088: step: 788/464, loss: 0.4628361463546753 2023-01-24 02:40:46.415976: step: 790/464, loss: 1.0212870836257935 2023-01-24 02:40:47.067775: step: 792/464, loss: 0.07481864094734192 2023-01-24 02:40:47.685130: step: 794/464, loss: 0.2146650105714798 2023-01-24 02:40:48.386870: step: 796/464, loss: 3.0680768489837646 2023-01-24 02:40:48.984587: step: 798/464, loss: 0.14101924002170563 2023-01-24 02:40:49.572840: step: 800/464, loss: 0.10189759731292725 2023-01-24 02:40:50.203323: step: 802/464, loss: 0.4617618918418884 2023-01-24 02:40:50.881180: step: 804/464, loss: 0.32869577407836914 2023-01-24 02:40:51.478099: step: 806/464, loss: 0.11678659170866013 2023-01-24 02:40:52.111434: step: 808/464, loss: 0.4081166088581085 2023-01-24 02:40:52.730503: step: 810/464, loss: 0.07252360880374908 2023-01-24 02:40:53.339099: step: 812/464, loss: 0.08252737671136856 2023-01-24 02:40:54.000879: step: 814/464, loss: 0.16961263120174408 2023-01-24 02:40:54.597163: step: 816/464, loss: 0.015630565583705902 2023-01-24 02:40:55.230454: step: 818/464, loss: 0.097167007625103 2023-01-24 02:40:55.826513: step: 820/464, loss: 0.06768547743558884 2023-01-24 02:40:56.481865: step: 822/464, loss: 0.15056432783603668 2023-01-24 02:40:57.136437: step: 824/464, loss: 0.09026777744293213 2023-01-24 02:40:57.734830: step: 826/464, loss: 0.13992241024971008 2023-01-24 02:40:58.322390: step: 828/464, loss: 0.31633102893829346 2023-01-24 02:40:58.890014: step: 830/464, loss: 0.25410526990890503 2023-01-24 02:40:59.460281: step: 832/464, loss: 0.008967715315520763 2023-01-24 02:41:00.134257: step: 834/464, loss: 0.10733868181705475 2023-01-24 02:41:00.759826: step: 836/464, loss: 0.10406633466482162 2023-01-24 02:41:01.368129: step: 838/464, loss: 0.09029827266931534 2023-01-24 02:41:01.970889: step: 840/464, loss: 0.24169792234897614 2023-01-24 02:41:02.541328: step: 842/464, loss: 0.13256599009037018 2023-01-24 02:41:03.165198: step: 844/464, loss: 0.11500195413827896 2023-01-24 02:41:03.728922: step: 846/464, loss: 0.03509023040533066 2023-01-24 02:41:04.358085: step: 848/464, loss: 0.11999034881591797 2023-01-24 02:41:05.005835: step: 850/464, loss: 0.04450235888361931 2023-01-24 02:41:05.618521: step: 852/464, loss: 0.10161370784044266 2023-01-24 02:41:06.255015: step: 854/464, loss: 0.12068932503461838 2023-01-24 02:41:06.836594: step: 856/464, loss: 0.07015305757522583 2023-01-24 02:41:07.440844: step: 858/464, loss: 0.10911504924297333 2023-01-24 02:41:08.037238: step: 860/464, loss: 0.05616682395339012 2023-01-24 02:41:08.661526: step: 862/464, loss: 1.5696333646774292 2023-01-24 02:41:09.305133: step: 864/464, loss: 0.16139520704746246 2023-01-24 02:41:09.919243: step: 866/464, loss: 0.4261089563369751 2023-01-24 02:41:10.572095: step: 868/464, loss: 0.07795168459415436 2023-01-24 02:41:11.150279: step: 870/464, loss: 0.0567050464451313 2023-01-24 02:41:11.753638: step: 872/464, loss: 0.04633583500981331 2023-01-24 02:41:12.447767: step: 874/464, loss: 0.12251248210668564 2023-01-24 02:41:13.060708: step: 876/464, loss: 0.06280695647001266 2023-01-24 02:41:13.663050: step: 878/464, loss: 0.15962255001068115 2023-01-24 02:41:14.289093: step: 880/464, loss: 0.0784224271774292 2023-01-24 02:41:14.986125: step: 882/464, loss: 0.2128182202577591 2023-01-24 02:41:15.552842: step: 884/464, loss: 0.022529419511556625 2023-01-24 02:41:16.217455: step: 886/464, loss: 0.3284769058227539 2023-01-24 02:41:16.851878: step: 888/464, loss: 0.09700017422437668 2023-01-24 02:41:17.533408: step: 890/464, loss: 0.08831659704446793 2023-01-24 02:41:18.174757: step: 892/464, loss: 0.08953232318162918 2023-01-24 02:41:18.776062: step: 894/464, loss: 0.5294684767723083 2023-01-24 02:41:19.398789: step: 896/464, loss: 0.13867852091789246 2023-01-24 02:41:20.014296: step: 898/464, loss: 3.1478331089019775 2023-01-24 02:41:20.630251: step: 900/464, loss: 0.1969163417816162 2023-01-24 02:41:21.229821: step: 902/464, loss: 0.07681287080049515 2023-01-24 02:41:21.914020: step: 904/464, loss: 0.040397848933935165 2023-01-24 02:41:22.566795: step: 906/464, loss: 0.7580767273902893 2023-01-24 02:41:23.214821: step: 908/464, loss: 0.049065783619880676 2023-01-24 02:41:23.889560: step: 910/464, loss: 0.09030801057815552 2023-01-24 02:41:24.535817: step: 912/464, loss: 0.12545651197433472 2023-01-24 02:41:25.178301: step: 914/464, loss: 0.04713783785700798 2023-01-24 02:41:25.837371: step: 916/464, loss: 0.09222618490457535 2023-01-24 02:41:26.463449: step: 918/464, loss: 0.02281993441283703 2023-01-24 02:41:27.098692: step: 920/464, loss: 0.11207670718431473 2023-01-24 02:41:27.715036: step: 922/464, loss: 0.07248653471469879 2023-01-24 02:41:28.419684: step: 924/464, loss: 0.10601303726434708 2023-01-24 02:41:29.056923: step: 926/464, loss: 0.05023236200213432 2023-01-24 02:41:29.798809: step: 928/464, loss: 0.1637064516544342 2023-01-24 02:41:30.294813: step: 930/464, loss: 0.008865215815603733 ================================================== Loss: 0.189 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33935491493383746, 'r': 0.34064278937381404, 'f1': 0.3399976325757576}, 'combined': 0.2505245713716108, 'epoch': 15} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3238358482990686, 'r': 0.29231463625159265, 'f1': 0.3072689564459309}, 'combined': 0.2006004586123694, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33749811973525873, 'r': 0.35478929475015814, 'f1': 0.34592776749922916}, 'combined': 0.2548941444731162, 'epoch': 15} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33150023593899164, 'r': 0.2870566781363353, 'f1': 0.3076818134748712}, 'combined': 0.2008699922167553, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34429227199354323, 'r': 0.3469054960693955, 'f1': 0.34559394409937894}, 'combined': 0.25464816933638446, 'epoch': 15} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3363087543255041, 'r': 0.2853528824580035, 'f1': 0.30874246298734803}, 'combined': 0.2015624369761961, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24905303030303028, 'r': 0.31309523809523804, 'f1': 0.2774261603375527}, 'combined': 0.18495077355836845, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.358695652173913, 'f1': 0.2946428571428571}, 'combined': 0.14732142857142855, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5227272727272727, 'r': 0.19827586206896552, 'f1': 0.28750000000000003}, 'combined': 0.19166666666666668, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:44:06.388093: step: 2/464, loss: 0.06967566907405853 2023-01-24 02:44:06.994705: step: 4/464, loss: 0.0738082230091095 2023-01-24 02:44:07.630481: step: 6/464, loss: 0.03507356718182564 2023-01-24 02:44:08.246748: step: 8/464, loss: 0.3681235909461975 2023-01-24 02:44:08.827798: step: 10/464, loss: 0.01978295110166073 2023-01-24 02:44:09.422871: step: 12/464, loss: 0.0787145122885704 2023-01-24 02:44:10.048969: step: 14/464, loss: 0.07112926989793777 2023-01-24 02:44:10.677578: step: 16/464, loss: 0.11526873707771301 2023-01-24 02:44:11.303125: step: 18/464, loss: 0.2685200870037079 2023-01-24 02:44:11.909483: step: 20/464, loss: 0.054469529539346695 2023-01-24 02:44:12.540688: step: 22/464, loss: 0.10368768870830536 2023-01-24 02:44:13.180358: step: 24/464, loss: 0.06370580196380615 2023-01-24 02:44:13.807442: step: 26/464, loss: 0.4279961884021759 2023-01-24 02:44:14.455572: step: 28/464, loss: 0.1075909212231636 2023-01-24 02:44:15.196938: step: 30/464, loss: 0.11014177650213242 2023-01-24 02:44:15.879635: step: 32/464, loss: 0.17367292940616608 2023-01-24 02:44:16.460990: step: 34/464, loss: 0.0846795067191124 2023-01-24 02:44:17.090860: step: 36/464, loss: 0.14901630580425262 2023-01-24 02:44:17.686756: step: 38/464, loss: 0.08096755295991898 2023-01-24 02:44:18.244631: step: 40/464, loss: 0.06089453399181366 2023-01-24 02:44:18.848212: step: 42/464, loss: 0.013688472099602222 2023-01-24 02:44:19.455036: step: 44/464, loss: 0.12926563620567322 2023-01-24 02:44:20.080036: step: 46/464, loss: 0.08856255561113358 2023-01-24 02:44:20.733602: step: 48/464, loss: 0.06617226451635361 2023-01-24 02:44:21.360394: step: 50/464, loss: 0.03761773556470871 2023-01-24 02:44:21.985837: step: 52/464, loss: 0.03826698660850525 2023-01-24 02:44:22.592288: step: 54/464, loss: 0.023077616468071938 2023-01-24 02:44:23.237019: step: 56/464, loss: 0.20579925179481506 2023-01-24 02:44:23.917287: step: 58/464, loss: 0.07355199754238129 2023-01-24 02:44:24.514766: step: 60/464, loss: 0.5119641423225403 2023-01-24 02:44:25.123347: step: 62/464, loss: 0.11700855940580368 2023-01-24 02:44:25.758891: step: 64/464, loss: 0.24788638949394226 2023-01-24 02:44:26.379574: step: 66/464, loss: 0.028873804956674576 2023-01-24 02:44:27.004351: step: 68/464, loss: 0.22852276265621185 2023-01-24 02:44:27.589894: step: 70/464, loss: 0.0821826308965683 2023-01-24 02:44:28.233562: step: 72/464, loss: 0.09222550690174103 2023-01-24 02:44:28.828564: step: 74/464, loss: 0.1988222748041153 2023-01-24 02:44:29.424323: step: 76/464, loss: 0.052708689123392105 2023-01-24 02:44:30.005807: step: 78/464, loss: 0.16055501997470856 2023-01-24 02:44:30.591641: step: 80/464, loss: 1.3652526140213013 2023-01-24 02:44:31.178255: step: 82/464, loss: 0.06484121829271317 2023-01-24 02:44:31.791019: step: 84/464, loss: 0.10143230855464935 2023-01-24 02:44:32.417262: step: 86/464, loss: 0.07512756437063217 2023-01-24 02:44:33.117849: step: 88/464, loss: 0.09119124710559845 2023-01-24 02:44:33.725218: step: 90/464, loss: 0.2271091341972351 2023-01-24 02:44:34.403035: step: 92/464, loss: 0.08392105996608734 2023-01-24 02:44:35.060197: step: 94/464, loss: 0.03651168569922447 2023-01-24 02:44:35.664185: step: 96/464, loss: 0.04457028955221176 2023-01-24 02:44:36.351811: step: 98/464, loss: 0.07616758346557617 2023-01-24 02:44:36.977589: step: 100/464, loss: 0.18623724579811096 2023-01-24 02:44:37.594181: step: 102/464, loss: 0.08702529966831207 2023-01-24 02:44:38.181235: step: 104/464, loss: 0.0574377216398716 2023-01-24 02:44:38.805793: step: 106/464, loss: 0.08055932074785233 2023-01-24 02:44:39.468621: step: 108/464, loss: 0.09230149537324905 2023-01-24 02:44:40.065840: step: 110/464, loss: 0.11659622937440872 2023-01-24 02:44:40.694990: step: 112/464, loss: 0.1416178196668625 2023-01-24 02:44:41.337002: step: 114/464, loss: 0.06667982786893845 2023-01-24 02:44:41.903645: step: 116/464, loss: 0.10814934968948364 2023-01-24 02:44:42.558280: step: 118/464, loss: 0.2515222728252411 2023-01-24 02:44:43.255093: step: 120/464, loss: 0.12438605725765228 2023-01-24 02:44:43.928770: step: 122/464, loss: 0.056299109011888504 2023-01-24 02:44:44.510605: step: 124/464, loss: 0.22707466781139374 2023-01-24 02:44:45.113406: step: 126/464, loss: 0.06861184537410736 2023-01-24 02:44:45.701435: step: 128/464, loss: 0.08313598483800888 2023-01-24 02:44:46.390612: step: 130/464, loss: 0.6121090054512024 2023-01-24 02:44:47.004098: step: 132/464, loss: 0.05384763330221176 2023-01-24 02:44:47.634576: step: 134/464, loss: 0.42914947867393494 2023-01-24 02:44:48.244934: step: 136/464, loss: 0.051652342081069946 2023-01-24 02:44:48.836809: step: 138/464, loss: 0.1508612036705017 2023-01-24 02:44:49.462438: step: 140/464, loss: 0.07505248486995697 2023-01-24 02:44:50.037335: step: 142/464, loss: 0.03908807784318924 2023-01-24 02:44:50.711997: step: 144/464, loss: 0.0981028825044632 2023-01-24 02:44:51.329117: step: 146/464, loss: 0.1842205822467804 2023-01-24 02:44:51.916103: step: 148/464, loss: 0.05427127704024315 2023-01-24 02:44:52.556105: step: 150/464, loss: 0.1553102284669876 2023-01-24 02:44:53.176584: step: 152/464, loss: 0.12031495571136475 2023-01-24 02:44:53.858436: step: 154/464, loss: 0.27708104252815247 2023-01-24 02:44:54.492368: step: 156/464, loss: 0.12309330701828003 2023-01-24 02:44:55.097541: step: 158/464, loss: 0.1471334844827652 2023-01-24 02:44:55.689484: step: 160/464, loss: 0.22516928613185883 2023-01-24 02:44:56.274546: step: 162/464, loss: 0.006947956047952175 2023-01-24 02:44:56.890397: step: 164/464, loss: 0.0785510316491127 2023-01-24 02:44:57.572821: step: 166/464, loss: 0.12449684739112854 2023-01-24 02:44:58.223825: step: 168/464, loss: 0.28927740454673767 2023-01-24 02:44:58.884212: step: 170/464, loss: 0.04315275698900223 2023-01-24 02:44:59.438141: step: 172/464, loss: 0.09736642986536026 2023-01-24 02:44:59.992392: step: 174/464, loss: 0.10606750100851059 2023-01-24 02:45:00.672732: step: 176/464, loss: 0.048355862498283386 2023-01-24 02:45:01.325797: step: 178/464, loss: 0.04929488152265549 2023-01-24 02:45:01.968246: step: 180/464, loss: 0.05741899088025093 2023-01-24 02:45:02.545744: step: 182/464, loss: 1.8915715217590332 2023-01-24 02:45:03.170472: step: 184/464, loss: 0.04535231366753578 2023-01-24 02:45:03.843682: step: 186/464, loss: 0.11236447840929031 2023-01-24 02:45:04.458071: step: 188/464, loss: 0.3592509925365448 2023-01-24 02:45:05.186665: step: 190/464, loss: 0.14119786024093628 2023-01-24 02:45:05.807851: step: 192/464, loss: 0.13931706547737122 2023-01-24 02:45:06.417838: step: 194/464, loss: 0.05476130172610283 2023-01-24 02:45:07.053233: step: 196/464, loss: 0.5920207500457764 2023-01-24 02:45:07.697483: step: 198/464, loss: 0.09765797853469849 2023-01-24 02:45:08.319101: step: 200/464, loss: 0.08117769658565521 2023-01-24 02:45:08.892524: step: 202/464, loss: 0.05527033284306526 2023-01-24 02:45:09.511365: step: 204/464, loss: 0.2432255893945694 2023-01-24 02:45:10.124024: step: 206/464, loss: 0.4053729772567749 2023-01-24 02:45:10.693664: step: 208/464, loss: 0.057459376752376556 2023-01-24 02:45:11.348324: step: 210/464, loss: 0.6187804341316223 2023-01-24 02:45:11.969938: step: 212/464, loss: 0.44048136472702026 2023-01-24 02:45:12.555273: step: 214/464, loss: 0.013785900548100471 2023-01-24 02:45:13.243758: step: 216/464, loss: 0.05621178448200226 2023-01-24 02:45:13.978257: step: 218/464, loss: 0.04226300120353699 2023-01-24 02:45:14.619062: step: 220/464, loss: 0.08753485232591629 2023-01-24 02:45:15.224809: step: 222/464, loss: 0.10808547586202621 2023-01-24 02:45:15.850219: step: 224/464, loss: 0.22819502651691437 2023-01-24 02:45:16.433168: step: 226/464, loss: 0.07079092413187027 2023-01-24 02:45:17.080514: step: 228/464, loss: 0.023703573271632195 2023-01-24 02:45:17.717566: step: 230/464, loss: 0.08335469663143158 2023-01-24 02:45:18.369654: step: 232/464, loss: 0.08740612864494324 2023-01-24 02:45:18.978482: step: 234/464, loss: 0.07887952029705048 2023-01-24 02:45:19.600200: step: 236/464, loss: 0.10675524175167084 2023-01-24 02:45:20.188314: step: 238/464, loss: 0.016704462468624115 2023-01-24 02:45:20.839240: step: 240/464, loss: 0.11877373605966568 2023-01-24 02:45:21.483401: step: 242/464, loss: 0.03198765590786934 2023-01-24 02:45:22.219032: step: 244/464, loss: 0.08731076121330261 2023-01-24 02:45:22.840079: step: 246/464, loss: 0.19646023213863373 2023-01-24 02:45:23.473115: step: 248/464, loss: 0.03692404553294182 2023-01-24 02:45:24.207476: step: 250/464, loss: 0.41249361634254456 2023-01-24 02:45:24.792459: step: 252/464, loss: 0.12720321118831635 2023-01-24 02:45:25.412334: step: 254/464, loss: 0.06617468595504761 2023-01-24 02:45:26.091732: step: 256/464, loss: 0.041970960795879364 2023-01-24 02:45:26.741277: step: 258/464, loss: 0.12893933057785034 2023-01-24 02:45:27.334173: step: 260/464, loss: 0.046301230788230896 2023-01-24 02:45:27.896762: step: 262/464, loss: 0.06939200311899185 2023-01-24 02:45:28.543557: step: 264/464, loss: 0.14944562315940857 2023-01-24 02:45:29.121402: step: 266/464, loss: 0.04032471030950546 2023-01-24 02:45:29.754255: step: 268/464, loss: 0.1515982449054718 2023-01-24 02:45:30.442714: step: 270/464, loss: 0.0740865021944046 2023-01-24 02:45:31.138502: step: 272/464, loss: 1.8496662378311157 2023-01-24 02:45:31.745950: step: 274/464, loss: 0.16171090304851532 2023-01-24 02:45:32.394070: step: 276/464, loss: 0.04464978352189064 2023-01-24 02:45:33.035449: step: 278/464, loss: 0.3915879726409912 2023-01-24 02:45:33.655674: step: 280/464, loss: 0.06493178009986877 2023-01-24 02:45:34.307225: step: 282/464, loss: 0.034338947385549545 2023-01-24 02:45:34.989532: step: 284/464, loss: 0.07518131285905838 2023-01-24 02:45:35.633175: step: 286/464, loss: 0.32087764143943787 2023-01-24 02:45:36.245637: step: 288/464, loss: 0.06764665246009827 2023-01-24 02:45:36.880826: step: 290/464, loss: 0.23117481172084808 2023-01-24 02:45:37.587094: step: 292/464, loss: 0.05319612845778465 2023-01-24 02:45:38.227711: step: 294/464, loss: 0.04225257411599159 2023-01-24 02:45:38.839046: step: 296/464, loss: 0.2298288196325302 2023-01-24 02:45:39.513966: step: 298/464, loss: 0.21512852609157562 2023-01-24 02:45:40.154022: step: 300/464, loss: 0.1140446588397026 2023-01-24 02:45:40.792231: step: 302/464, loss: 0.059378352016210556 2023-01-24 02:45:41.441151: step: 304/464, loss: 0.1522151678800583 2023-01-24 02:45:42.075169: step: 306/464, loss: 0.06590325385332108 2023-01-24 02:45:42.698680: step: 308/464, loss: 0.1357267200946808 2023-01-24 02:45:43.404867: step: 310/464, loss: 0.07562603801488876 2023-01-24 02:45:43.976949: step: 312/464, loss: 0.07189598679542542 2023-01-24 02:45:44.528297: step: 314/464, loss: 0.4715685546398163 2023-01-24 02:45:45.144362: step: 316/464, loss: 0.06670360267162323 2023-01-24 02:45:45.724369: step: 318/464, loss: 0.0039108796045184135 2023-01-24 02:45:46.321458: step: 320/464, loss: 0.08325283229351044 2023-01-24 02:45:46.924723: step: 322/464, loss: 0.23594802618026733 2023-01-24 02:45:47.569154: step: 324/464, loss: 0.08955429494380951 2023-01-24 02:45:48.230675: step: 326/464, loss: 0.3751218616962433 2023-01-24 02:45:48.879510: step: 328/464, loss: 0.04241259768605232 2023-01-24 02:45:49.520638: step: 330/464, loss: 0.12729467451572418 2023-01-24 02:45:50.163762: step: 332/464, loss: 0.15847192704677582 2023-01-24 02:45:50.773699: step: 334/464, loss: 0.13148055970668793 2023-01-24 02:45:51.397577: step: 336/464, loss: 0.1411283314228058 2023-01-24 02:45:51.994832: step: 338/464, loss: 0.07324307411909103 2023-01-24 02:45:52.644624: step: 340/464, loss: 0.05654343590140343 2023-01-24 02:45:53.240541: step: 342/464, loss: 0.07110802084207535 2023-01-24 02:45:53.861882: step: 344/464, loss: 0.19061794877052307 2023-01-24 02:45:54.412046: step: 346/464, loss: 0.03509838134050369 2023-01-24 02:45:55.018762: step: 348/464, loss: 0.5751135945320129 2023-01-24 02:45:55.613440: step: 350/464, loss: 0.059561777859926224 2023-01-24 02:45:56.208303: step: 352/464, loss: 0.03886803984642029 2023-01-24 02:45:56.823100: step: 354/464, loss: 0.0850643590092659 2023-01-24 02:45:57.402640: step: 356/464, loss: 0.05165430158376694 2023-01-24 02:45:58.035075: step: 358/464, loss: 0.11026255041360855 2023-01-24 02:45:58.645737: step: 360/464, loss: 0.0780283734202385 2023-01-24 02:45:59.261439: step: 362/464, loss: 0.04332159832119942 2023-01-24 02:45:59.956099: step: 364/464, loss: 0.03511100634932518 2023-01-24 02:46:00.699131: step: 366/464, loss: 0.06925657391548157 2023-01-24 02:46:01.248713: step: 368/464, loss: 0.025680840015411377 2023-01-24 02:46:01.852771: step: 370/464, loss: 0.06052357703447342 2023-01-24 02:46:02.455071: step: 372/464, loss: 0.03492802754044533 2023-01-24 02:46:03.144968: step: 374/464, loss: 0.09259124100208282 2023-01-24 02:46:03.748298: step: 376/464, loss: 0.037405405193567276 2023-01-24 02:46:04.389169: step: 378/464, loss: 0.45301663875579834 2023-01-24 02:46:05.000624: step: 380/464, loss: 0.04687231779098511 2023-01-24 02:46:05.683123: step: 382/464, loss: 0.05050540715456009 2023-01-24 02:46:06.314047: step: 384/464, loss: 0.11022012680768967 2023-01-24 02:46:06.977857: step: 386/464, loss: 0.10482141375541687 2023-01-24 02:46:07.614451: step: 388/464, loss: 0.08359164744615555 2023-01-24 02:46:08.242244: step: 390/464, loss: 0.07807466387748718 2023-01-24 02:46:08.863848: step: 392/464, loss: 0.20732919871807098 2023-01-24 02:46:09.491671: step: 394/464, loss: 0.08477477729320526 2023-01-24 02:46:10.084758: step: 396/464, loss: 0.014245497062802315 2023-01-24 02:46:10.725320: step: 398/464, loss: 0.16936513781547546 2023-01-24 02:46:11.337176: step: 400/464, loss: 0.11278378963470459 2023-01-24 02:46:12.009509: step: 402/464, loss: 0.17410509288311005 2023-01-24 02:46:12.820904: step: 404/464, loss: 0.052976060658693314 2023-01-24 02:46:13.408877: step: 406/464, loss: 0.008185646496713161 2023-01-24 02:46:14.026125: step: 408/464, loss: 0.14385266602039337 2023-01-24 02:46:14.629977: step: 410/464, loss: 0.09457972645759583 2023-01-24 02:46:15.242029: step: 412/464, loss: 0.03777166083455086 2023-01-24 02:46:15.896670: step: 414/464, loss: 0.07037343829870224 2023-01-24 02:46:16.523621: step: 416/464, loss: 0.04227151721715927 2023-01-24 02:46:17.138351: step: 418/464, loss: 0.12296376377344131 2023-01-24 02:46:17.802346: step: 420/464, loss: 0.06956274062395096 2023-01-24 02:46:18.417500: step: 422/464, loss: 0.18227338790893555 2023-01-24 02:46:19.078667: step: 424/464, loss: 0.12981176376342773 2023-01-24 02:46:19.676319: step: 426/464, loss: 0.031735748052597046 2023-01-24 02:46:20.294591: step: 428/464, loss: 0.10249984264373779 2023-01-24 02:46:21.006722: step: 430/464, loss: 0.07617301493883133 2023-01-24 02:46:21.593242: step: 432/464, loss: 0.0622231587767601 2023-01-24 02:46:22.205556: step: 434/464, loss: 0.0698821097612381 2023-01-24 02:46:22.849895: step: 436/464, loss: 0.1468801498413086 2023-01-24 02:46:23.473017: step: 438/464, loss: 0.439418226480484 2023-01-24 02:46:24.123922: step: 440/464, loss: 0.05343214049935341 2023-01-24 02:46:24.717134: step: 442/464, loss: 0.03801265358924866 2023-01-24 02:46:25.377160: step: 444/464, loss: 0.0925765261054039 2023-01-24 02:46:26.004613: step: 446/464, loss: 0.13436639308929443 2023-01-24 02:46:26.599209: step: 448/464, loss: 0.20754016935825348 2023-01-24 02:46:27.195461: step: 450/464, loss: 0.20153497159481049 2023-01-24 02:46:27.782537: step: 452/464, loss: 0.4196487069129944 2023-01-24 02:46:28.453283: step: 454/464, loss: 0.09021150320768356 2023-01-24 02:46:29.070178: step: 456/464, loss: 0.1262298971414566 2023-01-24 02:46:29.653523: step: 458/464, loss: 0.03276536613702774 2023-01-24 02:46:30.222303: step: 460/464, loss: 0.05063563585281372 2023-01-24 02:46:30.823617: step: 462/464, loss: 0.12348288297653198 2023-01-24 02:46:31.450622: step: 464/464, loss: 1.272400975227356 2023-01-24 02:46:32.034143: step: 466/464, loss: 0.5960069298744202 2023-01-24 02:46:32.606509: step: 468/464, loss: 0.18682511150836945 2023-01-24 02:46:33.292767: step: 470/464, loss: 0.5299834609031677 2023-01-24 02:46:33.889034: step: 472/464, loss: 0.047101832926273346 2023-01-24 02:46:34.586184: step: 474/464, loss: 0.12800177931785583 2023-01-24 02:46:35.187065: step: 476/464, loss: 0.03759802505373955 2023-01-24 02:46:35.832280: step: 478/464, loss: 0.13510610163211823 2023-01-24 02:46:36.458111: step: 480/464, loss: 0.2975039482116699 2023-01-24 02:46:37.123908: step: 482/464, loss: 0.06511014699935913 2023-01-24 02:46:37.874077: step: 484/464, loss: 0.08919087052345276 2023-01-24 02:46:38.505696: step: 486/464, loss: 0.039460860192775726 2023-01-24 02:46:39.159571: step: 488/464, loss: 0.04157282039523125 2023-01-24 02:46:39.838168: step: 490/464, loss: 0.10913825780153275 2023-01-24 02:46:40.411318: step: 492/464, loss: 0.06604700535535812 2023-01-24 02:46:41.052420: step: 494/464, loss: 0.44415482878685 2023-01-24 02:46:41.646317: step: 496/464, loss: 5.065694808959961 2023-01-24 02:46:42.271860: step: 498/464, loss: 0.07200480252504349 2023-01-24 02:46:42.878651: step: 500/464, loss: 0.09879495203495026 2023-01-24 02:46:43.446192: step: 502/464, loss: 0.18941263854503632 2023-01-24 02:46:44.145562: step: 504/464, loss: 0.42598849534988403 2023-01-24 02:46:44.738223: step: 506/464, loss: 0.08068060129880905 2023-01-24 02:46:45.356166: step: 508/464, loss: 0.0772366151213646 2023-01-24 02:46:46.046918: step: 510/464, loss: 0.2177586853504181 2023-01-24 02:46:46.668790: step: 512/464, loss: 0.08070854097604752 2023-01-24 02:46:47.266731: step: 514/464, loss: 0.027162298560142517 2023-01-24 02:46:47.910737: step: 516/464, loss: 0.09526893496513367 2023-01-24 02:46:48.580356: step: 518/464, loss: 0.1426323652267456 2023-01-24 02:46:49.181809: step: 520/464, loss: 0.10545886307954788 2023-01-24 02:46:49.774925: step: 522/464, loss: 0.09707791358232498 2023-01-24 02:46:50.385442: step: 524/464, loss: 0.08150474727153778 2023-01-24 02:46:51.002277: step: 526/464, loss: 0.1750078946352005 2023-01-24 02:46:51.595584: step: 528/464, loss: 0.08888711780309677 2023-01-24 02:46:52.170027: step: 530/464, loss: 0.09063917398452759 2023-01-24 02:46:52.812718: step: 532/464, loss: 0.068354532122612 2023-01-24 02:46:53.405080: step: 534/464, loss: 2.5237388610839844 2023-01-24 02:46:53.992484: step: 536/464, loss: 0.14953790605068207 2023-01-24 02:46:54.612155: step: 538/464, loss: 0.19570550322532654 2023-01-24 02:46:55.301315: step: 540/464, loss: 0.08763013780117035 2023-01-24 02:46:55.991162: step: 542/464, loss: 0.18320994079113007 2023-01-24 02:46:56.692114: step: 544/464, loss: 0.00924855750054121 2023-01-24 02:46:57.328392: step: 546/464, loss: 0.045794107019901276 2023-01-24 02:46:57.990545: step: 548/464, loss: 0.04685569554567337 2023-01-24 02:46:58.775429: step: 550/464, loss: 0.16176503896713257 2023-01-24 02:46:59.378753: step: 552/464, loss: 0.04111208766698837 2023-01-24 02:46:59.971261: step: 554/464, loss: 0.17326392233371735 2023-01-24 02:47:00.592662: step: 556/464, loss: 0.01820904016494751 2023-01-24 02:47:01.178325: step: 558/464, loss: 0.10440154373645782 2023-01-24 02:47:01.786250: step: 560/464, loss: 0.06176659092307091 2023-01-24 02:47:02.401427: step: 562/464, loss: 0.04807630181312561 2023-01-24 02:47:03.007416: step: 564/464, loss: 0.06456959992647171 2023-01-24 02:47:03.654683: step: 566/464, loss: 0.5453143119812012 2023-01-24 02:47:04.287001: step: 568/464, loss: 0.0833263024687767 2023-01-24 02:47:04.929035: step: 570/464, loss: 0.09743386507034302 2023-01-24 02:47:05.545966: step: 572/464, loss: 0.6703399419784546 2023-01-24 02:47:06.174310: step: 574/464, loss: 0.21478229761123657 2023-01-24 02:47:06.844231: step: 576/464, loss: 0.06619272381067276 2023-01-24 02:47:07.495851: step: 578/464, loss: 0.08586452901363373 2023-01-24 02:47:08.092532: step: 580/464, loss: 0.1202094778418541 2023-01-24 02:47:08.755402: step: 582/464, loss: 0.08488589525222778 2023-01-24 02:47:09.378146: step: 584/464, loss: 0.6994089484214783 2023-01-24 02:47:09.969870: step: 586/464, loss: 0.008067386224865913 2023-01-24 02:47:10.568252: step: 588/464, loss: 0.11339905858039856 2023-01-24 02:47:11.234260: step: 590/464, loss: 0.09993147850036621 2023-01-24 02:47:11.877691: step: 592/464, loss: 0.20108789205551147 2023-01-24 02:47:12.548013: step: 594/464, loss: 0.26813051104545593 2023-01-24 02:47:13.141743: step: 596/464, loss: 0.03282526135444641 2023-01-24 02:47:13.748557: step: 598/464, loss: 0.04220222681760788 2023-01-24 02:47:14.360851: step: 600/464, loss: 0.10550139844417572 2023-01-24 02:47:14.986063: step: 602/464, loss: 0.49047207832336426 2023-01-24 02:47:15.597777: step: 604/464, loss: 0.05437614768743515 2023-01-24 02:47:16.176448: step: 606/464, loss: 0.11706947535276413 2023-01-24 02:47:16.760232: step: 608/464, loss: 0.13433879613876343 2023-01-24 02:47:17.368230: step: 610/464, loss: 0.04218660295009613 2023-01-24 02:47:18.018465: step: 612/464, loss: 0.04034535586833954 2023-01-24 02:47:18.581336: step: 614/464, loss: 1.275100827217102 2023-01-24 02:47:19.208747: step: 616/464, loss: 0.18319328129291534 2023-01-24 02:47:19.872747: step: 618/464, loss: 0.22358042001724243 2023-01-24 02:47:20.478147: step: 620/464, loss: 0.28204214572906494 2023-01-24 02:47:21.126790: step: 622/464, loss: 0.05341412127017975 2023-01-24 02:47:21.699818: step: 624/464, loss: 0.21865655481815338 2023-01-24 02:47:22.338556: step: 626/464, loss: 0.23845311999320984 2023-01-24 02:47:22.932715: step: 628/464, loss: 0.08601119369268417 2023-01-24 02:47:23.598200: step: 630/464, loss: 0.07053272426128387 2023-01-24 02:47:24.224682: step: 632/464, loss: 0.045950137078762054 2023-01-24 02:47:24.856229: step: 634/464, loss: 0.11749845743179321 2023-01-24 02:47:25.588659: step: 636/464, loss: 0.37584465742111206 2023-01-24 02:47:26.211795: step: 638/464, loss: 0.06773810088634491 2023-01-24 02:47:26.795628: step: 640/464, loss: 0.15824325382709503 2023-01-24 02:47:27.408292: step: 642/464, loss: 0.19471389055252075 2023-01-24 02:47:28.163522: step: 644/464, loss: 0.06105535477399826 2023-01-24 02:47:28.787348: step: 646/464, loss: 0.06644576787948608 2023-01-24 02:47:29.496836: step: 648/464, loss: 0.09399471431970596 2023-01-24 02:47:30.132569: step: 650/464, loss: 0.24062883853912354 2023-01-24 02:47:30.737231: step: 652/464, loss: 0.09345666319131851 2023-01-24 02:47:31.419685: step: 654/464, loss: 0.18545842170715332 2023-01-24 02:47:32.031267: step: 656/464, loss: 0.11836571246385574 2023-01-24 02:47:32.623393: step: 658/464, loss: 0.06930890679359436 2023-01-24 02:47:33.216207: step: 660/464, loss: 0.08569063246250153 2023-01-24 02:47:33.833540: step: 662/464, loss: 0.1048484519124031 2023-01-24 02:47:34.497758: step: 664/464, loss: 0.2178206741809845 2023-01-24 02:47:35.142820: step: 666/464, loss: 0.12674731016159058 2023-01-24 02:47:35.733294: step: 668/464, loss: 0.11909352242946625 2023-01-24 02:47:36.452241: step: 670/464, loss: 0.6282179355621338 2023-01-24 02:47:37.143693: step: 672/464, loss: 0.08999377489089966 2023-01-24 02:47:37.744203: step: 674/464, loss: 0.02203369326889515 2023-01-24 02:47:38.282966: step: 676/464, loss: 0.02420470118522644 2023-01-24 02:47:38.843193: step: 678/464, loss: 0.1601714789867401 2023-01-24 02:47:39.539588: step: 680/464, loss: 0.06573443859815598 2023-01-24 02:47:40.167650: step: 682/464, loss: 0.043082889169454575 2023-01-24 02:47:40.766074: step: 684/464, loss: 0.14057160913944244 2023-01-24 02:47:41.457538: step: 686/464, loss: 0.21037933230400085 2023-01-24 02:47:42.079381: step: 688/464, loss: 0.039161499589681625 2023-01-24 02:47:42.698616: step: 690/464, loss: 0.07449066638946533 2023-01-24 02:47:43.397429: step: 692/464, loss: 0.053820233792066574 2023-01-24 02:47:44.135456: step: 694/464, loss: 0.9339656829833984 2023-01-24 02:47:44.737683: step: 696/464, loss: 0.06315405666828156 2023-01-24 02:47:45.335250: step: 698/464, loss: 0.927216112613678 2023-01-24 02:47:45.957990: step: 700/464, loss: 0.06461441516876221 2023-01-24 02:47:46.584045: step: 702/464, loss: 0.10851768404245377 2023-01-24 02:47:47.289134: step: 704/464, loss: 0.5746562480926514 2023-01-24 02:47:47.968177: step: 706/464, loss: 0.19040845334529877 2023-01-24 02:47:48.569185: step: 708/464, loss: 0.12218072265386581 2023-01-24 02:47:49.196147: step: 710/464, loss: 0.09031082689762115 2023-01-24 02:47:49.804296: step: 712/464, loss: 0.0764712393283844 2023-01-24 02:47:50.475521: step: 714/464, loss: 0.1057339459657669 2023-01-24 02:47:51.074592: step: 716/464, loss: 0.1484196037054062 2023-01-24 02:47:51.745787: step: 718/464, loss: 0.15744993090629578 2023-01-24 02:47:52.309878: step: 720/464, loss: 0.055503856390714645 2023-01-24 02:47:52.928243: step: 722/464, loss: 0.34808388352394104 2023-01-24 02:47:53.566774: step: 724/464, loss: 0.7415017485618591 2023-01-24 02:47:54.214747: step: 726/464, loss: 0.11673478782176971 2023-01-24 02:47:54.824036: step: 728/464, loss: 0.10913416743278503 2023-01-24 02:47:55.461587: step: 730/464, loss: 0.05623776093125343 2023-01-24 02:47:56.030682: step: 732/464, loss: 0.18155232071876526 2023-01-24 02:47:56.613078: step: 734/464, loss: 0.111542247235775 2023-01-24 02:47:57.259821: step: 736/464, loss: 0.1297747641801834 2023-01-24 02:47:57.836892: step: 738/464, loss: 0.05977364629507065 2023-01-24 02:47:58.476326: step: 740/464, loss: 0.07568145543336868 2023-01-24 02:47:59.170625: step: 742/464, loss: 0.16066378355026245 2023-01-24 02:47:59.768293: step: 744/464, loss: 0.023682449012994766 2023-01-24 02:48:00.365947: step: 746/464, loss: 0.05309910327196121 2023-01-24 02:48:00.944591: step: 748/464, loss: 0.10708057880401611 2023-01-24 02:48:01.609354: step: 750/464, loss: 0.1418672800064087 2023-01-24 02:48:02.157240: step: 752/464, loss: 0.08676539361476898 2023-01-24 02:48:02.782030: step: 754/464, loss: 0.15166112780570984 2023-01-24 02:48:03.452292: step: 756/464, loss: 0.0914529487490654 2023-01-24 02:48:04.097588: step: 758/464, loss: 0.11644382029771805 2023-01-24 02:48:04.717715: step: 760/464, loss: 0.13015790283679962 2023-01-24 02:48:05.340960: step: 762/464, loss: 0.1178271546959877 2023-01-24 02:48:05.989584: step: 764/464, loss: 0.1096925288438797 2023-01-24 02:48:06.658205: step: 766/464, loss: 0.08539048582315445 2023-01-24 02:48:07.224364: step: 768/464, loss: 0.08482424914836884 2023-01-24 02:48:07.836147: step: 770/464, loss: 0.026232510805130005 2023-01-24 02:48:08.417173: step: 772/464, loss: 0.05931536853313446 2023-01-24 02:48:09.070655: step: 774/464, loss: 0.06830425560474396 2023-01-24 02:48:09.686426: step: 776/464, loss: 0.026791663840413094 2023-01-24 02:48:10.384114: step: 778/464, loss: 0.1483849138021469 2023-01-24 02:48:11.034653: step: 780/464, loss: 0.07420195639133453 2023-01-24 02:48:11.643525: step: 782/464, loss: 0.06437802314758301 2023-01-24 02:48:12.269570: step: 784/464, loss: 0.03879198431968689 2023-01-24 02:48:12.928870: step: 786/464, loss: 0.07572466135025024 2023-01-24 02:48:13.502490: step: 788/464, loss: 0.08594454824924469 2023-01-24 02:48:14.113799: step: 790/464, loss: 0.08379257470369339 2023-01-24 02:48:14.688098: step: 792/464, loss: 0.08512155711650848 2023-01-24 02:48:15.354626: step: 794/464, loss: 0.19086991250514984 2023-01-24 02:48:16.004908: step: 796/464, loss: 0.07286135852336884 2023-01-24 02:48:16.687744: step: 798/464, loss: 0.02229865826666355 2023-01-24 02:48:17.346008: step: 800/464, loss: 0.09870410710573196 2023-01-24 02:48:17.997662: step: 802/464, loss: 0.19168686866760254 2023-01-24 02:48:18.607382: step: 804/464, loss: 0.3734838664531708 2023-01-24 02:48:19.192585: step: 806/464, loss: 0.02032576873898506 2023-01-24 02:48:19.809700: step: 808/464, loss: 0.0900048092007637 2023-01-24 02:48:20.424975: step: 810/464, loss: 0.06668906658887863 2023-01-24 02:48:21.006450: step: 812/464, loss: 0.018267882987856865 2023-01-24 02:48:21.662800: step: 814/464, loss: 0.02541377954185009 2023-01-24 02:48:22.342083: step: 816/464, loss: 0.09765075892210007 2023-01-24 02:48:23.065246: step: 818/464, loss: 0.07601515203714371 2023-01-24 02:48:23.685894: step: 820/464, loss: 0.43445128202438354 2023-01-24 02:48:24.337912: step: 822/464, loss: 0.09697567671537399 2023-01-24 02:48:25.002011: step: 824/464, loss: 0.12798605859279633 2023-01-24 02:48:25.618077: step: 826/464, loss: 0.17840342223644257 2023-01-24 02:48:26.284136: step: 828/464, loss: 0.026910794898867607 2023-01-24 02:48:26.893142: step: 830/464, loss: 0.01830548793077469 2023-01-24 02:48:27.520643: step: 832/464, loss: 0.1646566390991211 2023-01-24 02:48:28.150626: step: 834/464, loss: 0.03405522555112839 2023-01-24 02:48:28.799941: step: 836/464, loss: 0.22875934839248657 2023-01-24 02:48:29.385859: step: 838/464, loss: 0.02723325602710247 2023-01-24 02:48:29.929482: step: 840/464, loss: 0.10927391052246094 2023-01-24 02:48:30.681065: step: 842/464, loss: 0.17469929158687592 2023-01-24 02:48:31.347228: step: 844/464, loss: 0.10927444696426392 2023-01-24 02:48:31.910855: step: 846/464, loss: 0.13192278146743774 2023-01-24 02:48:32.542577: step: 848/464, loss: 0.4885123074054718 2023-01-24 02:48:33.149349: step: 850/464, loss: 0.09656789898872375 2023-01-24 02:48:33.807857: step: 852/464, loss: 0.09319087117910385 2023-01-24 02:48:34.426466: step: 854/464, loss: 0.016286242753267288 2023-01-24 02:48:34.983510: step: 856/464, loss: 0.05448547378182411 2023-01-24 02:48:35.626830: step: 858/464, loss: 0.15139378607273102 2023-01-24 02:48:36.272451: step: 860/464, loss: 0.20680542290210724 2023-01-24 02:48:36.887481: step: 862/464, loss: 0.1301339566707611 2023-01-24 02:48:37.558049: step: 864/464, loss: 0.09975486993789673 2023-01-24 02:48:38.154300: step: 866/464, loss: 0.12980853021144867 2023-01-24 02:48:38.732150: step: 868/464, loss: 0.10604464262723923 2023-01-24 02:48:39.413176: step: 870/464, loss: 0.11554470658302307 2023-01-24 02:48:40.016964: step: 872/464, loss: 0.15384931862354279 2023-01-24 02:48:40.697922: step: 874/464, loss: 0.07003022730350494 2023-01-24 02:48:41.303379: step: 876/464, loss: 0.07454525679349899 2023-01-24 02:48:41.939293: step: 878/464, loss: 0.10567981004714966 2023-01-24 02:48:42.561968: step: 880/464, loss: 5.253168106079102 2023-01-24 02:48:43.185785: step: 882/464, loss: 0.06796644628047943 2023-01-24 02:48:43.834973: step: 884/464, loss: 0.03893854469060898 2023-01-24 02:48:44.445998: step: 886/464, loss: 0.04492393508553505 2023-01-24 02:48:45.075314: step: 888/464, loss: 0.19420526921749115 2023-01-24 02:48:45.774253: step: 890/464, loss: 0.25114211440086365 2023-01-24 02:48:46.370810: step: 892/464, loss: 0.015781380236148834 2023-01-24 02:48:47.021286: step: 894/464, loss: 0.00819613691419363 2023-01-24 02:48:47.691157: step: 896/464, loss: 0.1408635824918747 2023-01-24 02:48:48.358285: step: 898/464, loss: 2.3857741355895996 2023-01-24 02:48:49.021824: step: 900/464, loss: 0.09957841783761978 2023-01-24 02:48:49.636360: step: 902/464, loss: 0.051161251962184906 2023-01-24 02:48:50.232607: step: 904/464, loss: 0.1271309107542038 2023-01-24 02:48:50.927870: step: 906/464, loss: 0.13337206840515137 2023-01-24 02:48:51.631667: step: 908/464, loss: 0.07845316082239151 2023-01-24 02:48:52.273752: step: 910/464, loss: 0.17774929106235504 2023-01-24 02:48:52.913134: step: 912/464, loss: 0.057782527059316635 2023-01-24 02:48:53.553620: step: 914/464, loss: 0.0975969061255455 2023-01-24 02:48:54.184025: step: 916/464, loss: 0.298834890127182 2023-01-24 02:48:54.759675: step: 918/464, loss: 0.08659328520298004 2023-01-24 02:48:55.430835: step: 920/464, loss: 0.48213207721710205 2023-01-24 02:48:56.061692: step: 922/464, loss: 0.051230985671281815 2023-01-24 02:48:56.661225: step: 924/464, loss: 0.1356029510498047 2023-01-24 02:48:57.273791: step: 926/464, loss: 0.3158682584762573 2023-01-24 02:48:57.924292: step: 928/464, loss: 0.08663108944892883 2023-01-24 02:48:58.442325: step: 930/464, loss: 0.0403536893427372 ================================================== Loss: 0.181 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3311254725999442, 'r': 0.33049715102005817, 'f1': 0.33081101346167263}, 'combined': 0.2437554836033377, 'epoch': 16} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3133301276099738, 'r': 0.2997947268768224, 'f1': 0.3064130228670575}, 'combined': 0.20004166259714634, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32623605060297034, 'r': 0.3392359691279464, 'f1': 0.3326090339170749}, 'combined': 0.24508034078100255, 'epoch': 16} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32804063945672174, 'r': 0.3075569264327942, 'f1': 0.3174687136353677}, 'combined': 0.20725936745106907, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33975954418298504, 'r': 0.3320230839738848, 'f1': 0.33584676632291227}, 'combined': 0.2474660383431985, 'epoch': 16} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32994217569549183, 'r': 0.3008028483111614, 'f1': 0.3146994187817444}, 'combined': 0.205451434023315, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24637681159420288, 'r': 0.32380952380952377, 'f1': 0.27983539094650206}, 'combined': 0.18655692729766804, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2890625, 'r': 0.40217391304347827, 'f1': 0.33636363636363636}, 'combined': 0.16818181818181818, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.2413793103448276, 'f1': 0.32558139534883723}, 'combined': 0.21705426356589147, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:51:35.829552: step: 2/464, loss: 2.323784828186035 2023-01-24 02:51:36.475221: step: 4/464, loss: 0.07156746834516525 2023-01-24 02:51:37.106083: step: 6/464, loss: 0.06237180158495903 2023-01-24 02:51:37.723563: step: 8/464, loss: 0.38281306624412537 2023-01-24 02:51:38.320996: step: 10/464, loss: 0.059620022773742676 2023-01-24 02:51:38.922709: step: 12/464, loss: 0.06657879054546356 2023-01-24 02:51:39.565165: step: 14/464, loss: 0.1868683099746704 2023-01-24 02:51:40.190914: step: 16/464, loss: 0.05568621680140495 2023-01-24 02:51:40.809924: step: 18/464, loss: 0.26912921667099 2023-01-24 02:51:41.398748: step: 20/464, loss: 0.029237784445285797 2023-01-24 02:51:42.036891: step: 22/464, loss: 0.05056983605027199 2023-01-24 02:51:42.700190: step: 24/464, loss: 0.07277239114046097 2023-01-24 02:51:43.375550: step: 26/464, loss: 0.07805615663528442 2023-01-24 02:51:43.944119: step: 28/464, loss: 0.06455496698617935 2023-01-24 02:51:44.576416: step: 30/464, loss: 0.037866562604904175 2023-01-24 02:51:45.196382: step: 32/464, loss: 0.04250551760196686 2023-01-24 02:51:45.745766: step: 34/464, loss: 0.665719747543335 2023-01-24 02:51:46.309122: step: 36/464, loss: 0.06608240306377411 2023-01-24 02:51:46.933250: step: 38/464, loss: 0.015798650681972504 2023-01-24 02:51:47.503579: step: 40/464, loss: 0.10514256358146667 2023-01-24 02:51:48.093655: step: 42/464, loss: 0.0462794303894043 2023-01-24 02:51:48.721971: step: 44/464, loss: 0.059097494930028915 2023-01-24 02:51:49.355226: step: 46/464, loss: 0.14740002155303955 2023-01-24 02:51:50.071416: step: 48/464, loss: 0.021973850205540657 2023-01-24 02:51:50.775899: step: 50/464, loss: 0.3088114559650421 2023-01-24 02:51:51.472600: step: 52/464, loss: 0.18334423005580902 2023-01-24 02:51:52.037793: step: 54/464, loss: 0.06878239661455154 2023-01-24 02:51:52.756010: step: 56/464, loss: 0.019568689167499542 2023-01-24 02:51:53.400115: step: 58/464, loss: 0.11608067899942398 2023-01-24 02:51:53.986764: step: 60/464, loss: 0.07847467809915543 2023-01-24 02:51:54.710806: step: 62/464, loss: 0.09866181761026382 2023-01-24 02:51:55.348414: step: 64/464, loss: 0.57988041639328 2023-01-24 02:51:56.008010: step: 66/464, loss: 0.06298445910215378 2023-01-24 02:51:56.619721: step: 68/464, loss: 0.1550336629152298 2023-01-24 02:51:57.223124: step: 70/464, loss: 0.04487539455294609 2023-01-24 02:51:57.801563: step: 72/464, loss: 0.06373658031225204 2023-01-24 02:51:58.375349: step: 74/464, loss: 0.03194844722747803 2023-01-24 02:51:58.998186: step: 76/464, loss: 0.05486992374062538 2023-01-24 02:51:59.613918: step: 78/464, loss: 0.1321585327386856 2023-01-24 02:52:00.226708: step: 80/464, loss: 0.09452857077121735 2023-01-24 02:52:00.834238: step: 82/464, loss: 0.06801652908325195 2023-01-24 02:52:01.479783: step: 84/464, loss: 0.06772405654191971 2023-01-24 02:52:02.136709: step: 86/464, loss: 0.14668281376361847 2023-01-24 02:52:02.780884: step: 88/464, loss: 0.042957138270139694 2023-01-24 02:52:03.409086: step: 90/464, loss: 0.028503797948360443 2023-01-24 02:52:03.983511: step: 92/464, loss: 0.10230975598096848 2023-01-24 02:52:04.617882: step: 94/464, loss: 0.05061762034893036 2023-01-24 02:52:05.285920: step: 96/464, loss: 0.02996540255844593 2023-01-24 02:52:05.939183: step: 98/464, loss: 0.0619596503674984 2023-01-24 02:52:06.618714: step: 100/464, loss: 0.023418670520186424 2023-01-24 02:52:07.265138: step: 102/464, loss: 0.18141427636146545 2023-01-24 02:52:07.925490: step: 104/464, loss: 0.08905918151140213 2023-01-24 02:52:08.521981: step: 106/464, loss: 0.09526338428258896 2023-01-24 02:52:09.034941: step: 108/464, loss: 0.005294324364513159 2023-01-24 02:52:09.672717: step: 110/464, loss: 0.11219874769449234 2023-01-24 02:52:10.327988: step: 112/464, loss: 0.13767443597316742 2023-01-24 02:52:10.966959: step: 114/464, loss: 0.02810472808778286 2023-01-24 02:52:11.577769: step: 116/464, loss: 0.09533141553401947 2023-01-24 02:52:12.230272: step: 118/464, loss: 0.24947750568389893 2023-01-24 02:52:12.875526: step: 120/464, loss: 0.07970800250768661 2023-01-24 02:52:13.487159: step: 122/464, loss: 0.04374972730875015 2023-01-24 02:52:14.108055: step: 124/464, loss: 0.15076035261154175 2023-01-24 02:52:14.755332: step: 126/464, loss: 0.03661858290433884 2023-01-24 02:52:15.306974: step: 128/464, loss: 0.057950735092163086 2023-01-24 02:52:15.910109: step: 130/464, loss: 0.03365790471434593 2023-01-24 02:52:16.589026: step: 132/464, loss: 0.04842458665370941 2023-01-24 02:52:17.239068: step: 134/464, loss: 0.09987257421016693 2023-01-24 02:52:17.899782: step: 136/464, loss: 0.022941607981920242 2023-01-24 02:52:18.508838: step: 138/464, loss: 0.06291542947292328 2023-01-24 02:52:19.102034: step: 140/464, loss: 0.00350829865783453 2023-01-24 02:52:19.676443: step: 142/464, loss: 0.09380892664194107 2023-01-24 02:52:20.339695: step: 144/464, loss: 0.06917431950569153 2023-01-24 02:52:20.942210: step: 146/464, loss: 0.04020599275827408 2023-01-24 02:52:21.554806: step: 148/464, loss: 0.08713655918836594 2023-01-24 02:52:22.100942: step: 150/464, loss: 0.046814925968647 2023-01-24 02:52:22.775204: step: 152/464, loss: 0.13716870546340942 2023-01-24 02:52:23.431055: step: 154/464, loss: 0.09229633212089539 2023-01-24 02:52:24.152214: step: 156/464, loss: 0.05761297792196274 2023-01-24 02:52:24.798050: step: 158/464, loss: 0.09417982399463654 2023-01-24 02:52:25.415595: step: 160/464, loss: 0.013948668725788593 2023-01-24 02:52:26.074289: step: 162/464, loss: 0.04147933050990105 2023-01-24 02:52:26.707969: step: 164/464, loss: 0.14888344705104828 2023-01-24 02:52:27.348938: step: 166/464, loss: 0.23217937350273132 2023-01-24 02:52:27.956981: step: 168/464, loss: 0.8458350896835327 2023-01-24 02:52:28.681224: step: 170/464, loss: 0.017771458253264427 2023-01-24 02:52:29.272556: step: 172/464, loss: 0.0396922342479229 2023-01-24 02:52:29.849570: step: 174/464, loss: 0.10030711442232132 2023-01-24 02:52:30.470962: step: 176/464, loss: 0.0012506459606811404 2023-01-24 02:52:31.078364: step: 178/464, loss: 0.12276698648929596 2023-01-24 02:52:31.666760: step: 180/464, loss: 0.0790601447224617 2023-01-24 02:52:32.358940: step: 182/464, loss: 0.15493811666965485 2023-01-24 02:52:33.030867: step: 184/464, loss: 0.12189780175685883 2023-01-24 02:52:33.786863: step: 186/464, loss: 0.0675785094499588 2023-01-24 02:52:34.402431: step: 188/464, loss: 0.09874018281698227 2023-01-24 02:52:34.952600: step: 190/464, loss: 0.04013747721910477 2023-01-24 02:52:35.512983: step: 192/464, loss: 0.123075470328331 2023-01-24 02:52:36.232359: step: 194/464, loss: 0.021077025681734085 2023-01-24 02:52:36.841396: step: 196/464, loss: 0.16640305519104004 2023-01-24 02:52:37.472601: step: 198/464, loss: 0.087140291929245 2023-01-24 02:52:38.123636: step: 200/464, loss: 0.059325482696294785 2023-01-24 02:52:38.746969: step: 202/464, loss: 0.06079040840268135 2023-01-24 02:52:39.342128: step: 204/464, loss: 0.04386744648218155 2023-01-24 02:52:39.987503: step: 206/464, loss: 0.008767152205109596 2023-01-24 02:52:40.668571: step: 208/464, loss: 0.02713126689195633 2023-01-24 02:52:41.314672: step: 210/464, loss: 0.0859706699848175 2023-01-24 02:52:41.877543: step: 212/464, loss: 0.10441812127828598 2023-01-24 02:52:42.502301: step: 214/464, loss: 0.02353387139737606 2023-01-24 02:52:43.124098: step: 216/464, loss: 0.13387531042099 2023-01-24 02:52:43.765190: step: 218/464, loss: 0.02812942862510681 2023-01-24 02:52:44.491583: step: 220/464, loss: 0.10433623194694519 2023-01-24 02:52:45.111932: step: 222/464, loss: 0.055663034319877625 2023-01-24 02:52:45.753512: step: 224/464, loss: 0.2841798961162567 2023-01-24 02:52:46.330748: step: 226/464, loss: 0.06304500997066498 2023-01-24 02:52:46.932911: step: 228/464, loss: 0.007425735704600811 2023-01-24 02:52:47.602354: step: 230/464, loss: 0.07880717515945435 2023-01-24 02:52:48.184377: step: 232/464, loss: 0.3579563498497009 2023-01-24 02:52:48.806638: step: 234/464, loss: 0.10063056647777557 2023-01-24 02:52:49.377669: step: 236/464, loss: 0.14094175398349762 2023-01-24 02:52:50.000865: step: 238/464, loss: 0.1755877137184143 2023-01-24 02:52:50.514232: step: 240/464, loss: 0.041254084557294846 2023-01-24 02:52:51.149631: step: 242/464, loss: 0.05494088679552078 2023-01-24 02:52:51.736033: step: 244/464, loss: 0.35076582431793213 2023-01-24 02:52:52.362652: step: 246/464, loss: 0.46399596333503723 2023-01-24 02:52:53.012337: step: 248/464, loss: 0.11473860591650009 2023-01-24 02:52:53.606850: step: 250/464, loss: 0.0690879076719284 2023-01-24 02:52:54.198601: step: 252/464, loss: 0.030523525550961494 2023-01-24 02:52:54.833416: step: 254/464, loss: 0.031805213540792465 2023-01-24 02:52:55.505821: step: 256/464, loss: 0.413985013961792 2023-01-24 02:52:56.169657: step: 258/464, loss: 0.35511845350265503 2023-01-24 02:52:56.845832: step: 260/464, loss: 0.11695799231529236 2023-01-24 02:52:57.453862: step: 262/464, loss: 0.11076057702302933 2023-01-24 02:52:58.012696: step: 264/464, loss: 0.1434173882007599 2023-01-24 02:52:58.610150: step: 266/464, loss: 0.08204736560583115 2023-01-24 02:52:59.239323: step: 268/464, loss: 0.06001695618033409 2023-01-24 02:52:59.926079: step: 270/464, loss: 0.0372479073703289 2023-01-24 02:53:00.477074: step: 272/464, loss: 0.03706498444080353 2023-01-24 02:53:01.113716: step: 274/464, loss: 0.07792051136493683 2023-01-24 02:53:01.678540: step: 276/464, loss: 0.15670345723628998 2023-01-24 02:53:02.315042: step: 278/464, loss: 0.057117052376270294 2023-01-24 02:53:03.011849: step: 280/464, loss: 0.044370219111442566 2023-01-24 02:53:03.656041: step: 282/464, loss: 0.04691505432128906 2023-01-24 02:53:04.301127: step: 284/464, loss: 0.12970347702503204 2023-01-24 02:53:04.957850: step: 286/464, loss: 0.07708616554737091 2023-01-24 02:53:05.617805: step: 288/464, loss: 0.07818364351987839 2023-01-24 02:53:06.253392: step: 290/464, loss: 0.10086287558078766 2023-01-24 02:53:06.849704: step: 292/464, loss: 0.012405943125486374 2023-01-24 02:53:07.474753: step: 294/464, loss: 0.12408587336540222 2023-01-24 02:53:08.078130: step: 296/464, loss: 0.0847407802939415 2023-01-24 02:53:08.681958: step: 298/464, loss: 0.2100462168455124 2023-01-24 02:53:09.320333: step: 300/464, loss: 0.01914643868803978 2023-01-24 02:53:09.986889: step: 302/464, loss: 0.16246408224105835 2023-01-24 02:53:10.592448: step: 304/464, loss: 0.0580965057015419 2023-01-24 02:53:11.198277: step: 306/464, loss: 0.022193651646375656 2023-01-24 02:53:11.797347: step: 308/464, loss: 0.6016255021095276 2023-01-24 02:53:12.382138: step: 310/464, loss: 0.05895552039146423 2023-01-24 02:53:13.014166: step: 312/464, loss: 0.0020470358431339264 2023-01-24 02:53:13.636897: step: 314/464, loss: 0.09098812937736511 2023-01-24 02:53:14.230116: step: 316/464, loss: 0.02651539258658886 2023-01-24 02:53:14.871178: step: 318/464, loss: 0.057089027017354965 2023-01-24 02:53:15.541105: step: 320/464, loss: 0.05555043742060661 2023-01-24 02:53:16.170331: step: 322/464, loss: 0.1641826331615448 2023-01-24 02:53:16.818628: step: 324/464, loss: 0.035017579793930054 2023-01-24 02:53:17.401931: step: 326/464, loss: 0.09218455851078033 2023-01-24 02:53:18.063252: step: 328/464, loss: 0.015327589586377144 2023-01-24 02:53:18.641595: step: 330/464, loss: 0.0740237608551979 2023-01-24 02:53:19.226974: step: 332/464, loss: 0.060544099658727646 2023-01-24 02:53:19.923339: step: 334/464, loss: 0.12118522822856903 2023-01-24 02:53:20.651568: step: 336/464, loss: 0.02989678643643856 2023-01-24 02:53:21.296230: step: 338/464, loss: 0.08799106627702713 2023-01-24 02:53:21.893834: step: 340/464, loss: 0.024672655388712883 2023-01-24 02:53:22.436784: step: 342/464, loss: 0.018849296495318413 2023-01-24 02:53:23.003938: step: 344/464, loss: 0.03175678104162216 2023-01-24 02:53:23.672307: step: 346/464, loss: 0.07565797120332718 2023-01-24 02:53:24.346378: step: 348/464, loss: 0.16002212464809418 2023-01-24 02:53:24.949714: step: 350/464, loss: 0.0876501128077507 2023-01-24 02:53:25.619339: step: 352/464, loss: 0.673304557800293 2023-01-24 02:53:26.248518: step: 354/464, loss: 0.046740952879190445 2023-01-24 02:53:26.879703: step: 356/464, loss: 0.16490834951400757 2023-01-24 02:53:27.453308: step: 358/464, loss: 0.13249750435352325 2023-01-24 02:53:28.142551: step: 360/464, loss: 0.14411969482898712 2023-01-24 02:53:28.741921: step: 362/464, loss: 0.050405144691467285 2023-01-24 02:53:29.321047: step: 364/464, loss: 2.248347520828247 2023-01-24 02:53:29.926102: step: 366/464, loss: 0.0665668323636055 2023-01-24 02:53:30.576071: step: 368/464, loss: 0.0881873369216919 2023-01-24 02:53:31.183175: step: 370/464, loss: 0.15915344655513763 2023-01-24 02:53:31.759343: step: 372/464, loss: 0.18714694678783417 2023-01-24 02:53:32.376802: step: 374/464, loss: 0.6852954626083374 2023-01-24 02:53:32.940301: step: 376/464, loss: 0.057403597980737686 2023-01-24 02:53:33.519774: step: 378/464, loss: 0.03670104593038559 2023-01-24 02:53:34.114996: step: 380/464, loss: 0.01751803793013096 2023-01-24 02:53:34.887497: step: 382/464, loss: 0.2213580459356308 2023-01-24 02:53:35.566281: step: 384/464, loss: 0.0936044305562973 2023-01-24 02:53:36.191270: step: 386/464, loss: 0.04487286135554314 2023-01-24 02:53:36.804036: step: 388/464, loss: 1.0210002660751343 2023-01-24 02:53:37.460886: step: 390/464, loss: 0.1955382376909256 2023-01-24 02:53:38.065086: step: 392/464, loss: 0.10517556965351105 2023-01-24 02:53:38.635563: step: 394/464, loss: 0.04596574604511261 2023-01-24 02:53:39.268523: step: 396/464, loss: 0.06611286848783493 2023-01-24 02:53:39.897167: step: 398/464, loss: 0.20487700402736664 2023-01-24 02:53:40.582357: step: 400/464, loss: 0.1009177640080452 2023-01-24 02:53:41.246399: step: 402/464, loss: 0.02000158652663231 2023-01-24 02:53:41.863720: step: 404/464, loss: 0.022291820496320724 2023-01-24 02:53:42.465459: step: 406/464, loss: 0.07473233342170715 2023-01-24 02:53:43.159565: step: 408/464, loss: 0.04916410893201828 2023-01-24 02:53:43.770227: step: 410/464, loss: 0.07321424782276154 2023-01-24 02:53:44.464077: step: 412/464, loss: 0.07492359727621078 2023-01-24 02:53:45.074575: step: 414/464, loss: 0.47756317257881165 2023-01-24 02:53:45.734292: step: 416/464, loss: 0.23084905743598938 2023-01-24 02:53:46.397033: step: 418/464, loss: 0.07805997133255005 2023-01-24 02:53:47.014467: step: 420/464, loss: 0.07706529647111893 2023-01-24 02:53:47.644968: step: 422/464, loss: 0.08947402983903885 2023-01-24 02:53:48.266849: step: 424/464, loss: 0.039802052080631256 2023-01-24 02:53:48.882801: step: 426/464, loss: 2.4658396244049072 2023-01-24 02:53:49.473775: step: 428/464, loss: 0.12952269613742828 2023-01-24 02:53:50.032383: step: 430/464, loss: 0.10222648084163666 2023-01-24 02:53:50.710785: step: 432/464, loss: 0.09944965690374374 2023-01-24 02:53:51.334038: step: 434/464, loss: 0.06927595287561417 2023-01-24 02:53:51.946227: step: 436/464, loss: 0.18523666262626648 2023-01-24 02:53:52.572334: step: 438/464, loss: 0.07039118558168411 2023-01-24 02:53:53.151174: step: 440/464, loss: 0.011012760922312737 2023-01-24 02:53:53.799948: step: 442/464, loss: 0.04270339012145996 2023-01-24 02:53:54.426543: step: 444/464, loss: 0.03231300413608551 2023-01-24 02:53:55.078274: step: 446/464, loss: 0.0860917940735817 2023-01-24 02:53:55.691996: step: 448/464, loss: 0.182876318693161 2023-01-24 02:53:56.311325: step: 450/464, loss: 0.05450039729475975 2023-01-24 02:53:56.911387: step: 452/464, loss: 0.0930841788649559 2023-01-24 02:53:57.564637: step: 454/464, loss: 0.18066838383674622 2023-01-24 02:53:58.224496: step: 456/464, loss: 0.09857988357543945 2023-01-24 02:53:58.851212: step: 458/464, loss: 0.05851111561059952 2023-01-24 02:53:59.510608: step: 460/464, loss: 0.02112884260714054 2023-01-24 02:54:00.113483: step: 462/464, loss: 0.3320540487766266 2023-01-24 02:54:00.682243: step: 464/464, loss: 0.05477989837527275 2023-01-24 02:54:01.248038: step: 466/464, loss: 0.04254557937383652 2023-01-24 02:54:01.865971: step: 468/464, loss: 0.08997486531734467 2023-01-24 02:54:02.510919: step: 470/464, loss: 0.08402448147535324 2023-01-24 02:54:03.114767: step: 472/464, loss: 0.1302717924118042 2023-01-24 02:54:03.680964: step: 474/464, loss: 0.05059501528739929 2023-01-24 02:54:04.342088: step: 476/464, loss: 0.0985957607626915 2023-01-24 02:54:04.966539: step: 478/464, loss: 0.051531895995140076 2023-01-24 02:54:05.619286: step: 480/464, loss: 0.229657843708992 2023-01-24 02:54:06.265392: step: 482/464, loss: 0.09313531965017319 2023-01-24 02:54:06.881373: step: 484/464, loss: 0.02037380449473858 2023-01-24 02:54:07.526170: step: 486/464, loss: 0.4801367223262787 2023-01-24 02:54:08.126256: step: 488/464, loss: 0.191447913646698 2023-01-24 02:54:08.760146: step: 490/464, loss: 0.10895294696092606 2023-01-24 02:54:09.349097: step: 492/464, loss: 0.192514106631279 2023-01-24 02:54:09.968659: step: 494/464, loss: 0.36968791484832764 2023-01-24 02:54:10.592836: step: 496/464, loss: 0.0772470086812973 2023-01-24 02:54:11.239377: step: 498/464, loss: 0.14960245788097382 2023-01-24 02:54:11.885601: step: 500/464, loss: 0.12601497769355774 2023-01-24 02:54:12.471983: step: 502/464, loss: 0.3592793643474579 2023-01-24 02:54:13.140919: step: 504/464, loss: 0.05251913145184517 2023-01-24 02:54:13.752371: step: 506/464, loss: 0.10560287535190582 2023-01-24 02:54:14.381492: step: 508/464, loss: 0.0798950046300888 2023-01-24 02:54:15.003813: step: 510/464, loss: 2.3549673557281494 2023-01-24 02:54:15.590777: step: 512/464, loss: 0.1024058386683464 2023-01-24 02:54:16.197905: step: 514/464, loss: 0.1027681976556778 2023-01-24 02:54:16.849206: step: 516/464, loss: 0.03300444036722183 2023-01-24 02:54:17.479013: step: 518/464, loss: 0.08592050522565842 2023-01-24 02:54:18.132734: step: 520/464, loss: 0.0952911302447319 2023-01-24 02:54:18.746194: step: 522/464, loss: 0.0671185627579689 2023-01-24 02:54:19.339639: step: 524/464, loss: 0.34994953870773315 2023-01-24 02:54:19.971446: step: 526/464, loss: 0.07091367244720459 2023-01-24 02:54:20.516773: step: 528/464, loss: 0.1497611254453659 2023-01-24 02:54:21.106961: step: 530/464, loss: 0.8266758322715759 2023-01-24 02:54:21.775064: step: 532/464, loss: 0.14483654499053955 2023-01-24 02:54:22.408106: step: 534/464, loss: 0.032947149127721786 2023-01-24 02:54:23.007013: step: 536/464, loss: 0.06762748211622238 2023-01-24 02:54:23.664836: step: 538/464, loss: 0.21056100726127625 2023-01-24 02:54:24.288616: step: 540/464, loss: 0.005598200485110283 2023-01-24 02:54:24.915051: step: 542/464, loss: 0.09686747938394547 2023-01-24 02:54:25.517607: step: 544/464, loss: 0.14103296399116516 2023-01-24 02:54:26.133814: step: 546/464, loss: 0.10201167315244675 2023-01-24 02:54:26.776907: step: 548/464, loss: 0.05190252885222435 2023-01-24 02:54:27.460261: step: 550/464, loss: 0.16771650314331055 2023-01-24 02:54:28.168778: step: 552/464, loss: 0.06907981634140015 2023-01-24 02:54:28.770695: step: 554/464, loss: 0.0784645602107048 2023-01-24 02:54:29.390082: step: 556/464, loss: 0.06753809750080109 2023-01-24 02:54:30.016748: step: 558/464, loss: 0.05707962065935135 2023-01-24 02:54:30.602173: step: 560/464, loss: 0.00938950851559639 2023-01-24 02:54:31.252617: step: 562/464, loss: 0.2545424699783325 2023-01-24 02:54:31.908493: step: 564/464, loss: 0.18138128519058228 2023-01-24 02:54:32.643979: step: 566/464, loss: 0.1237185001373291 2023-01-24 02:54:33.319823: step: 568/464, loss: 0.058364395052194595 2023-01-24 02:54:33.966711: step: 570/464, loss: 0.027004418894648552 2023-01-24 02:54:34.587716: step: 572/464, loss: 0.07947518676519394 2023-01-24 02:54:35.261040: step: 574/464, loss: 0.05337420478463173 2023-01-24 02:54:35.881414: step: 576/464, loss: 0.11597231030464172 2023-01-24 02:54:36.614036: step: 578/464, loss: 0.22074364125728607 2023-01-24 02:54:37.266289: step: 580/464, loss: 0.06235186755657196 2023-01-24 02:54:37.903482: step: 582/464, loss: 0.3610675036907196 2023-01-24 02:54:38.461849: step: 584/464, loss: 0.04820900782942772 2023-01-24 02:54:39.110992: step: 586/464, loss: 0.1334627866744995 2023-01-24 02:54:39.739060: step: 588/464, loss: 0.1078186184167862 2023-01-24 02:54:40.407567: step: 590/464, loss: 0.05610848218202591 2023-01-24 02:54:41.042023: step: 592/464, loss: 0.025929966941475868 2023-01-24 02:54:41.669629: step: 594/464, loss: 0.025815889239311218 2023-01-24 02:54:42.258615: step: 596/464, loss: 0.2066698968410492 2023-01-24 02:54:42.890194: step: 598/464, loss: 0.026584582403302193 2023-01-24 02:54:43.473890: step: 600/464, loss: 0.08874484896659851 2023-01-24 02:54:44.137003: step: 602/464, loss: 0.1297636330127716 2023-01-24 02:54:44.731863: step: 604/464, loss: 0.034968677908182144 2023-01-24 02:54:45.359563: step: 606/464, loss: 0.057455047965049744 2023-01-24 02:54:46.012756: step: 608/464, loss: 0.6402359008789062 2023-01-24 02:54:46.587181: step: 610/464, loss: 0.27028095722198486 2023-01-24 02:54:47.162833: step: 612/464, loss: 0.05439862608909607 2023-01-24 02:54:47.954100: step: 614/464, loss: 0.0543132908642292 2023-01-24 02:54:48.621060: step: 616/464, loss: 0.11633554846048355 2023-01-24 02:54:49.296599: step: 618/464, loss: 0.153121218085289 2023-01-24 02:54:49.973191: step: 620/464, loss: 0.03367575258016586 2023-01-24 02:54:50.639297: step: 622/464, loss: 0.024352246895432472 2023-01-24 02:54:51.277900: step: 624/464, loss: 0.0796428844332695 2023-01-24 02:54:51.884205: step: 626/464, loss: 0.2559564709663391 2023-01-24 02:54:52.473354: step: 628/464, loss: 0.08324375003576279 2023-01-24 02:54:53.057411: step: 630/464, loss: 0.027401749044656754 2023-01-24 02:54:53.693991: step: 632/464, loss: 0.09654907882213593 2023-01-24 02:54:54.271543: step: 634/464, loss: 0.48647624254226685 2023-01-24 02:54:54.925428: step: 636/464, loss: 0.2040783166885376 2023-01-24 02:54:55.583839: step: 638/464, loss: 0.1974896490573883 2023-01-24 02:54:56.152832: step: 640/464, loss: 0.31124648451805115 2023-01-24 02:54:56.803706: step: 642/464, loss: 0.1740168184041977 2023-01-24 02:54:57.493819: step: 644/464, loss: 0.11383002996444702 2023-01-24 02:54:58.188762: step: 646/464, loss: 0.08932628482580185 2023-01-24 02:54:58.879358: step: 648/464, loss: 0.15132799744606018 2023-01-24 02:54:59.471843: step: 650/464, loss: 0.060079481452703476 2023-01-24 02:55:00.143249: step: 652/464, loss: 0.1314394623041153 2023-01-24 02:55:00.749609: step: 654/464, loss: 0.09137765318155289 2023-01-24 02:55:01.387256: step: 656/464, loss: 0.45513778924942017 2023-01-24 02:55:02.002583: step: 658/464, loss: 0.09241456538438797 2023-01-24 02:55:02.561651: step: 660/464, loss: 0.020481666550040245 2023-01-24 02:55:03.158888: step: 662/464, loss: 0.129355788230896 2023-01-24 02:55:03.789448: step: 664/464, loss: 0.13476087152957916 2023-01-24 02:55:04.403604: step: 666/464, loss: 0.14710845053195953 2023-01-24 02:55:05.063272: step: 668/464, loss: 0.24165816605091095 2023-01-24 02:55:05.658543: step: 670/464, loss: 0.07523495703935623 2023-01-24 02:55:06.306861: step: 672/464, loss: 0.1060524731874466 2023-01-24 02:55:06.930745: step: 674/464, loss: 0.02545899897813797 2023-01-24 02:55:07.622977: step: 676/464, loss: 0.10189370065927505 2023-01-24 02:55:08.246924: step: 678/464, loss: 0.10449320822954178 2023-01-24 02:55:08.933284: step: 680/464, loss: 0.09408724308013916 2023-01-24 02:55:09.499812: step: 682/464, loss: 0.1366397887468338 2023-01-24 02:55:10.142639: step: 684/464, loss: 0.4168032109737396 2023-01-24 02:55:10.819143: step: 686/464, loss: 0.3207146227359772 2023-01-24 02:55:11.445736: step: 688/464, loss: 0.08765596151351929 2023-01-24 02:55:12.079131: step: 690/464, loss: 0.05220439285039902 2023-01-24 02:55:12.756877: step: 692/464, loss: 0.14175178110599518 2023-01-24 02:55:13.416800: step: 694/464, loss: 0.0828574076294899 2023-01-24 02:55:14.050458: step: 696/464, loss: 0.17829617857933044 2023-01-24 02:55:14.688741: step: 698/464, loss: 0.07231821864843369 2023-01-24 02:55:15.261260: step: 700/464, loss: 0.07823842018842697 2023-01-24 02:55:15.892145: step: 702/464, loss: 0.011867905966937542 2023-01-24 02:55:16.507906: step: 704/464, loss: 0.07791872322559357 2023-01-24 02:55:17.161632: step: 706/464, loss: 0.01650223508477211 2023-01-24 02:55:17.776681: step: 708/464, loss: 0.07700739800930023 2023-01-24 02:55:18.373825: step: 710/464, loss: 0.05118641257286072 2023-01-24 02:55:18.989597: step: 712/464, loss: 0.06447432190179825 2023-01-24 02:55:19.690287: step: 714/464, loss: 0.2940301299095154 2023-01-24 02:55:20.344764: step: 716/464, loss: 0.10145014524459839 2023-01-24 02:55:20.969948: step: 718/464, loss: 0.0858948826789856 2023-01-24 02:55:21.606038: step: 720/464, loss: 0.0860401913523674 2023-01-24 02:55:22.227302: step: 722/464, loss: 0.06797818094491959 2023-01-24 02:55:22.779844: step: 724/464, loss: 0.15240587294101715 2023-01-24 02:55:23.409654: step: 726/464, loss: 0.026602206751704216 2023-01-24 02:55:24.108628: step: 728/464, loss: 0.20981769263744354 2023-01-24 02:55:24.787029: step: 730/464, loss: 0.0704990103840828 2023-01-24 02:55:25.426062: step: 732/464, loss: 0.0580148808658123 2023-01-24 02:55:26.039440: step: 734/464, loss: 0.07398834824562073 2023-01-24 02:55:26.652829: step: 736/464, loss: 0.21365252137184143 2023-01-24 02:55:27.242108: step: 738/464, loss: 0.08065182715654373 2023-01-24 02:55:27.862783: step: 740/464, loss: 0.09811010211706161 2023-01-24 02:55:28.500857: step: 742/464, loss: 0.03974481299519539 2023-01-24 02:55:29.091992: step: 744/464, loss: 0.10361728072166443 2023-01-24 02:55:29.719521: step: 746/464, loss: 0.08695392310619354 2023-01-24 02:55:30.335175: step: 748/464, loss: 0.04826812446117401 2023-01-24 02:55:30.907509: step: 750/464, loss: 0.048566415905952454 2023-01-24 02:55:31.575373: step: 752/464, loss: 0.05902061611413956 2023-01-24 02:55:32.206133: step: 754/464, loss: 0.200590580701828 2023-01-24 02:55:32.840197: step: 756/464, loss: 0.06276501715183258 2023-01-24 02:55:33.471321: step: 758/464, loss: 0.17710302770137787 2023-01-24 02:55:34.135113: step: 760/464, loss: 0.03277694061398506 2023-01-24 02:55:34.708317: step: 762/464, loss: 0.06173248216509819 2023-01-24 02:55:35.333954: step: 764/464, loss: 0.04483325779438019 2023-01-24 02:55:35.886423: step: 766/464, loss: 0.20668122172355652 2023-01-24 02:55:36.499458: step: 768/464, loss: 0.06592900305986404 2023-01-24 02:55:37.211172: step: 770/464, loss: 0.26677677035331726 2023-01-24 02:55:37.862355: step: 772/464, loss: 0.12577171623706818 2023-01-24 02:55:38.473789: step: 774/464, loss: 0.037337757647037506 2023-01-24 02:55:39.088441: step: 776/464, loss: 0.8632736802101135 2023-01-24 02:55:39.665141: step: 778/464, loss: 0.09053826332092285 2023-01-24 02:55:40.274890: step: 780/464, loss: 0.2430754154920578 2023-01-24 02:55:40.960209: step: 782/464, loss: 0.057098012417554855 2023-01-24 02:55:41.628717: step: 784/464, loss: 0.248166024684906 2023-01-24 02:55:42.193357: step: 786/464, loss: 0.05004265159368515 2023-01-24 02:55:42.927553: step: 788/464, loss: 0.11410597711801529 2023-01-24 02:55:43.633880: step: 790/464, loss: 0.0983242616057396 2023-01-24 02:55:44.357887: step: 792/464, loss: 0.05851510167121887 2023-01-24 02:55:44.981764: step: 794/464, loss: 0.05701727420091629 2023-01-24 02:55:45.552458: step: 796/464, loss: 0.07675885409116745 2023-01-24 02:55:46.088442: step: 798/464, loss: 1.612334966659546 2023-01-24 02:55:46.694695: step: 800/464, loss: 0.12681429088115692 2023-01-24 02:55:47.348035: step: 802/464, loss: 0.7585135102272034 2023-01-24 02:55:47.990738: step: 804/464, loss: 0.03295173868536949 2023-01-24 02:55:48.614725: step: 806/464, loss: 0.0535990409553051 2023-01-24 02:55:49.268944: step: 808/464, loss: 0.0772099569439888 2023-01-24 02:55:49.913734: step: 810/464, loss: 0.12030258774757385 2023-01-24 02:55:50.502765: step: 812/464, loss: 0.14565113186836243 2023-01-24 02:55:51.098478: step: 814/464, loss: 0.04593478515744209 2023-01-24 02:55:51.711806: step: 816/464, loss: 0.04296870529651642 2023-01-24 02:55:52.429202: step: 818/464, loss: 0.07276370376348495 2023-01-24 02:55:53.069284: step: 820/464, loss: 0.07223569601774216 2023-01-24 02:55:53.745627: step: 822/464, loss: 0.07028353214263916 2023-01-24 02:55:54.389519: step: 824/464, loss: 0.09849094599485397 2023-01-24 02:55:55.047456: step: 826/464, loss: 0.10646132379770279 2023-01-24 02:55:55.646495: step: 828/464, loss: 0.06366239488124847 2023-01-24 02:55:56.274929: step: 830/464, loss: 0.054628703743219376 2023-01-24 02:55:56.910483: step: 832/464, loss: 0.08870861679315567 2023-01-24 02:55:57.487761: step: 834/464, loss: 0.09653643518686295 2023-01-24 02:55:58.067331: step: 836/464, loss: 0.13117004930973053 2023-01-24 02:55:58.671792: step: 838/464, loss: 0.08204658329486847 2023-01-24 02:55:59.322510: step: 840/464, loss: 0.11241410672664642 2023-01-24 02:55:59.958721: step: 842/464, loss: 0.0784202367067337 2023-01-24 02:56:00.580676: step: 844/464, loss: 0.10148025304079056 2023-01-24 02:56:01.267653: step: 846/464, loss: 0.23058192431926727 2023-01-24 02:56:01.844184: step: 848/464, loss: 0.009667718783020973 2023-01-24 02:56:02.443231: step: 850/464, loss: 0.06157953292131424 2023-01-24 02:56:03.007367: step: 852/464, loss: 0.06967552751302719 2023-01-24 02:56:03.651311: step: 854/464, loss: 0.17770619690418243 2023-01-24 02:56:04.304961: step: 856/464, loss: 0.2894016206264496 2023-01-24 02:56:04.957954: step: 858/464, loss: 0.09124509990215302 2023-01-24 02:56:05.669377: step: 860/464, loss: 0.0562569834291935 2023-01-24 02:56:06.397636: step: 862/464, loss: 0.12094349414110184 2023-01-24 02:56:06.978044: step: 864/464, loss: 0.18446677923202515 2023-01-24 02:56:07.603660: step: 866/464, loss: 0.21191228926181793 2023-01-24 02:56:08.260096: step: 868/464, loss: 0.1075199767947197 2023-01-24 02:56:08.919549: step: 870/464, loss: 0.08860337734222412 2023-01-24 02:56:09.641302: step: 872/464, loss: 0.06619521975517273 2023-01-24 02:56:10.302274: step: 874/464, loss: 0.17170093953609467 2023-01-24 02:56:10.915842: step: 876/464, loss: 0.1552496701478958 2023-01-24 02:56:11.588446: step: 878/464, loss: 0.02386847883462906 2023-01-24 02:56:12.240127: step: 880/464, loss: 0.1008271872997284 2023-01-24 02:56:12.858170: step: 882/464, loss: 0.016682665795087814 2023-01-24 02:56:13.431312: step: 884/464, loss: 0.058642178773880005 2023-01-24 02:56:14.050087: step: 886/464, loss: 0.07499005645513535 2023-01-24 02:56:14.662733: step: 888/464, loss: 0.23680682480335236 2023-01-24 02:56:15.253359: step: 890/464, loss: 0.13080447912216187 2023-01-24 02:56:15.893416: step: 892/464, loss: 0.03092711791396141 2023-01-24 02:56:16.560795: step: 894/464, loss: 0.13128332793712616 2023-01-24 02:56:17.118811: step: 896/464, loss: 0.06329464912414551 2023-01-24 02:56:17.754232: step: 898/464, loss: 0.050127509981393814 2023-01-24 02:56:18.347118: step: 900/464, loss: 0.06793268769979477 2023-01-24 02:56:18.956374: step: 902/464, loss: 0.21452850103378296 2023-01-24 02:56:19.639064: step: 904/464, loss: 0.1359676569700241 2023-01-24 02:56:20.266490: step: 906/464, loss: 0.02065298706293106 2023-01-24 02:56:20.916383: step: 908/464, loss: 0.10467097908258438 2023-01-24 02:56:21.580503: step: 910/464, loss: 0.031337834894657135 2023-01-24 02:56:22.216833: step: 912/464, loss: 0.03133145719766617 2023-01-24 02:56:22.842666: step: 914/464, loss: 0.066623255610466 2023-01-24 02:56:23.427079: step: 916/464, loss: 0.12824635207653046 2023-01-24 02:56:24.039513: step: 918/464, loss: 0.17738160490989685 2023-01-24 02:56:24.677363: step: 920/464, loss: 0.041672658175230026 2023-01-24 02:56:25.370343: step: 922/464, loss: 0.04812244698405266 2023-01-24 02:56:26.043723: step: 924/464, loss: 0.20663529634475708 2023-01-24 02:56:26.719488: step: 926/464, loss: 0.08598199486732483 2023-01-24 02:56:27.399674: step: 928/464, loss: 0.12492011487483978 2023-01-24 02:56:27.876427: step: 930/464, loss: 0.010233801789581776 ================================================== Loss: 0.141 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32474092970521545, 'r': 0.32350851630974975, 'f1': 0.3241235515118595}, 'combined': 0.23882788006137015, 'epoch': 17} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3159871988945335, 'r': 0.2933337048561387, 'f1': 0.3042393430729065}, 'combined': 0.19862257630666436, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3243305542675771, 'r': 0.3366391142018305, 'f1': 0.3303702293936028}, 'combined': 0.24343069534265466, 'epoch': 17} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3274705235353624, 'r': 0.297673113765141, 'f1': 0.3118616733524058}, 'combined': 0.20359881265493848, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33581280055374757, 'r': 0.33071507303111003, 'f1': 0.3332444426145219}, 'combined': 0.24554853666333193, 'epoch': 17} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33257073525252095, 'r': 0.2906937217142899, 'f1': 0.31022537442388176}, 'combined': 0.20253055532336323, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2764227642276422, 'r': 0.32380952380952377, 'f1': 0.29824561403508765}, 'combined': 0.19883040935672508, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29285714285714287, 'r': 0.44565217391304346, 'f1': 0.35344827586206895}, 'combined': 0.17672413793103448, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:59:04.733140: step: 2/464, loss: 0.041110098361968994 2023-01-24 02:59:05.369445: step: 4/464, loss: 0.0779784619808197 2023-01-24 02:59:05.978947: step: 6/464, loss: 0.04192517325282097 2023-01-24 02:59:06.597395: step: 8/464, loss: 0.09400144219398499 2023-01-24 02:59:07.238072: step: 10/464, loss: 0.025001734495162964 2023-01-24 02:59:07.852535: step: 12/464, loss: 0.02416273020207882 2023-01-24 02:59:08.552785: step: 14/464, loss: 0.10990064591169357 2023-01-24 02:59:09.240795: step: 16/464, loss: 0.033001650124788284 2023-01-24 02:59:09.835434: step: 18/464, loss: 0.1382390856742859 2023-01-24 02:59:10.402871: step: 20/464, loss: 0.038926269859075546 2023-01-24 02:59:11.063449: step: 22/464, loss: 0.06506065279245377 2023-01-24 02:59:11.682884: step: 24/464, loss: 0.05326371639966965 2023-01-24 02:59:12.343965: step: 26/464, loss: 0.030164137482643127 2023-01-24 02:59:12.956163: step: 28/464, loss: 0.06463479995727539 2023-01-24 02:59:13.580095: step: 30/464, loss: 0.224723681807518 2023-01-24 02:59:14.201859: step: 32/464, loss: 0.029780397191643715 2023-01-24 02:59:14.867071: step: 34/464, loss: 0.11149509251117706 2023-01-24 02:59:15.528947: step: 36/464, loss: 0.15237043797969818 2023-01-24 02:59:16.149751: step: 38/464, loss: 0.0657920241355896 2023-01-24 02:59:16.800150: step: 40/464, loss: 0.026680337265133858 2023-01-24 02:59:17.432790: step: 42/464, loss: 0.07511678338050842 2023-01-24 02:59:18.029326: step: 44/464, loss: 0.027576958760619164 2023-01-24 02:59:18.653780: step: 46/464, loss: 0.1626603901386261 2023-01-24 02:59:19.274713: step: 48/464, loss: 0.06560619175434113 2023-01-24 02:59:19.774755: step: 50/464, loss: 0.06561832875013351 2023-01-24 02:59:20.503496: step: 52/464, loss: 0.09738533198833466 2023-01-24 02:59:21.107476: step: 54/464, loss: 0.017096685245633125 2023-01-24 02:59:21.782977: step: 56/464, loss: 0.024371450766921043 2023-01-24 02:59:22.389549: step: 58/464, loss: 0.2863137722015381 2023-01-24 02:59:23.001736: step: 60/464, loss: 0.04328886792063713 2023-01-24 02:59:23.598416: step: 62/464, loss: 0.15461523830890656 2023-01-24 02:59:24.215698: step: 64/464, loss: 0.0762556940317154 2023-01-24 02:59:24.816683: step: 66/464, loss: 0.008332204073667526 2023-01-24 02:59:25.387768: step: 68/464, loss: 0.1283010095357895 2023-01-24 02:59:26.086889: step: 70/464, loss: 0.8763940334320068 2023-01-24 02:59:26.686938: step: 72/464, loss: 0.03446583077311516 2023-01-24 02:59:27.264045: step: 74/464, loss: 0.09413344413042068 2023-01-24 02:59:27.854171: step: 76/464, loss: 0.025667276233434677 2023-01-24 02:59:28.449695: step: 78/464, loss: 0.006051539443433285 2023-01-24 02:59:29.083743: step: 80/464, loss: 0.10737523436546326 2023-01-24 02:59:29.671438: step: 82/464, loss: 0.04778726026415825 2023-01-24 02:59:30.433562: step: 84/464, loss: 0.022513197734951973 2023-01-24 02:59:31.010236: step: 86/464, loss: 0.04210365563631058 2023-01-24 02:59:31.630787: step: 88/464, loss: 0.08981982618570328 2023-01-24 02:59:32.278641: step: 90/464, loss: 0.07886163890361786 2023-01-24 02:59:32.896126: step: 92/464, loss: 0.0617126002907753 2023-01-24 02:59:33.536354: step: 94/464, loss: 0.05298151820898056 2023-01-24 02:59:34.202820: step: 96/464, loss: 0.15078003704547882 2023-01-24 02:59:34.957473: step: 98/464, loss: 0.02470622956752777 2023-01-24 02:59:35.648294: step: 100/464, loss: 0.048409465700387955 2023-01-24 02:59:36.250806: step: 102/464, loss: 0.010371328331530094 2023-01-24 02:59:36.917835: step: 104/464, loss: 0.04539891332387924 2023-01-24 02:59:37.543381: step: 106/464, loss: 0.2269112467765808 2023-01-24 02:59:38.119487: step: 108/464, loss: 0.031764477491378784 2023-01-24 02:59:38.746982: step: 110/464, loss: 0.09004306048154831 2023-01-24 02:59:39.379169: step: 112/464, loss: 0.007801396306604147 2023-01-24 02:59:40.021473: step: 114/464, loss: 0.09724310785531998 2023-01-24 02:59:40.570291: step: 116/464, loss: 0.06961280852556229 2023-01-24 02:59:41.300423: step: 118/464, loss: 0.3340175151824951 2023-01-24 02:59:41.906914: step: 120/464, loss: 0.7357770800590515 2023-01-24 02:59:42.462651: step: 122/464, loss: 0.01734624058008194 2023-01-24 02:59:43.109626: step: 124/464, loss: 0.037545252591371536 2023-01-24 02:59:43.721696: step: 126/464, loss: 0.029403043910861015 2023-01-24 02:59:44.367357: step: 128/464, loss: 0.02145428955554962 2023-01-24 02:59:44.996300: step: 130/464, loss: 0.12339643388986588 2023-01-24 02:59:45.602024: step: 132/464, loss: 0.10763213783502579 2023-01-24 02:59:46.206584: step: 134/464, loss: 0.09028073400259018 2023-01-24 02:59:46.823495: step: 136/464, loss: 0.11030615121126175 2023-01-24 02:59:47.443554: step: 138/464, loss: 0.06513524800539017 2023-01-24 02:59:48.085088: step: 140/464, loss: 0.020626483485102654 2023-01-24 02:59:48.748618: step: 142/464, loss: 0.038871075958013535 2023-01-24 02:59:49.394073: step: 144/464, loss: 0.1584267020225525 2023-01-24 02:59:49.964111: step: 146/464, loss: 0.43131452798843384 2023-01-24 02:59:50.549076: step: 148/464, loss: 0.041273582726716995 2023-01-24 02:59:51.171102: step: 150/464, loss: 0.09523283690214157 2023-01-24 02:59:51.758690: step: 152/464, loss: 0.029719259589910507 2023-01-24 02:59:52.430283: step: 154/464, loss: 0.06470183283090591 2023-01-24 02:59:53.028589: step: 156/464, loss: 0.028908902779221535 2023-01-24 02:59:53.701693: step: 158/464, loss: 0.08944498002529144 2023-01-24 02:59:54.308234: step: 160/464, loss: 0.03279788792133331 2023-01-24 02:59:54.904801: step: 162/464, loss: 0.03688148781657219 2023-01-24 02:59:55.537835: step: 164/464, loss: 0.009075549431145191 2023-01-24 02:59:56.168313: step: 166/464, loss: 0.060015976428985596 2023-01-24 02:59:56.807388: step: 168/464, loss: 0.596712589263916 2023-01-24 02:59:57.451218: step: 170/464, loss: 0.06025798246264458 2023-01-24 02:59:58.152710: step: 172/464, loss: 0.052680373191833496 2023-01-24 02:59:58.712403: step: 174/464, loss: 0.07611407339572906 2023-01-24 02:59:59.435677: step: 176/464, loss: 0.044987499713897705 2023-01-24 03:00:00.072725: step: 178/464, loss: 0.07838653028011322 2023-01-24 03:00:00.686480: step: 180/464, loss: 0.03733246773481369 2023-01-24 03:00:01.389033: step: 182/464, loss: 0.34007447957992554 2023-01-24 03:00:02.122517: step: 184/464, loss: 0.39250442385673523 2023-01-24 03:00:02.764494: step: 186/464, loss: 0.0765819102525711 2023-01-24 03:00:03.478815: step: 188/464, loss: 0.32012081146240234 2023-01-24 03:00:04.108336: step: 190/464, loss: 0.12850935757160187 2023-01-24 03:00:04.675718: step: 192/464, loss: 0.1261843889951706 2023-01-24 03:00:05.297337: step: 194/464, loss: 0.07346116006374359 2023-01-24 03:00:05.897490: step: 196/464, loss: 0.040497634559869766 2023-01-24 03:00:06.477296: step: 198/464, loss: 0.06537256389856339 2023-01-24 03:00:07.106251: step: 200/464, loss: 0.12721972167491913 2023-01-24 03:00:07.680927: step: 202/464, loss: 0.1444709450006485 2023-01-24 03:00:08.259822: step: 204/464, loss: 0.03681337833404541 2023-01-24 03:00:08.826212: step: 206/464, loss: 0.11811290681362152 2023-01-24 03:00:09.419107: step: 208/464, loss: 0.4837630093097687 2023-01-24 03:00:10.054808: step: 210/464, loss: 0.040075208991765976 2023-01-24 03:00:10.670997: step: 212/464, loss: 0.04343654587864876 2023-01-24 03:00:11.260781: step: 214/464, loss: 0.07648956775665283 2023-01-24 03:00:11.910807: step: 216/464, loss: 0.09798888117074966 2023-01-24 03:00:12.464454: step: 218/464, loss: 0.006863585207611322 2023-01-24 03:00:13.098241: step: 220/464, loss: 0.14883661270141602 2023-01-24 03:00:13.758953: step: 222/464, loss: 0.04018692672252655 2023-01-24 03:00:14.342519: step: 224/464, loss: 0.15864543616771698 2023-01-24 03:00:14.936637: step: 226/464, loss: 0.340562641620636 2023-01-24 03:00:15.575583: step: 228/464, loss: 4.534863471984863 2023-01-24 03:00:16.211565: step: 230/464, loss: 0.0738111138343811 2023-01-24 03:00:16.840658: step: 232/464, loss: 0.1230124831199646 2023-01-24 03:00:17.538138: step: 234/464, loss: 0.08037590235471725 2023-01-24 03:00:18.103978: step: 236/464, loss: 0.199858620762825 2023-01-24 03:00:18.734074: step: 238/464, loss: 0.6242475509643555 2023-01-24 03:00:19.321979: step: 240/464, loss: 0.044954366981983185 2023-01-24 03:00:19.933120: step: 242/464, loss: 0.08434027433395386 2023-01-24 03:00:20.542404: step: 244/464, loss: 0.05194975063204765 2023-01-24 03:00:21.163681: step: 246/464, loss: 0.07475651800632477 2023-01-24 03:00:21.733808: step: 248/464, loss: 0.053270064294338226 2023-01-24 03:00:22.356465: step: 250/464, loss: 0.011551953852176666 2023-01-24 03:00:22.975402: step: 252/464, loss: 0.007733418606221676 2023-01-24 03:00:23.631786: step: 254/464, loss: 0.15320716798305511 2023-01-24 03:00:24.272742: step: 256/464, loss: 0.09333204478025436 2023-01-24 03:00:24.893536: step: 258/464, loss: 0.055523402988910675 2023-01-24 03:00:25.554584: step: 260/464, loss: 0.10055474936962128 2023-01-24 03:00:26.162373: step: 262/464, loss: 0.02099481225013733 2023-01-24 03:00:26.794344: step: 264/464, loss: 0.12725037336349487 2023-01-24 03:00:27.440894: step: 266/464, loss: 0.07128822058439255 2023-01-24 03:00:28.048313: step: 268/464, loss: 0.07808691263198853 2023-01-24 03:00:28.686822: step: 270/464, loss: 0.04100404679775238 2023-01-24 03:00:29.322862: step: 272/464, loss: 0.034963686019182205 2023-01-24 03:00:29.915936: step: 274/464, loss: 0.05234309285879135 2023-01-24 03:00:30.589542: step: 276/464, loss: 0.12192157655954361 2023-01-24 03:00:31.244127: step: 278/464, loss: 0.029132816940546036 2023-01-24 03:00:31.847855: step: 280/464, loss: 0.03790270909667015 2023-01-24 03:00:32.435657: step: 282/464, loss: 0.03763355314731598 2023-01-24 03:00:33.045619: step: 284/464, loss: 0.04848802089691162 2023-01-24 03:00:33.629929: step: 286/464, loss: 0.04248863086104393 2023-01-24 03:00:34.236633: step: 288/464, loss: 0.06175648421049118 2023-01-24 03:00:34.897991: step: 290/464, loss: 0.19913525879383087 2023-01-24 03:00:35.547103: step: 292/464, loss: 0.07799387723207474 2023-01-24 03:00:36.187684: step: 294/464, loss: 0.05060545355081558 2023-01-24 03:00:36.826657: step: 296/464, loss: 0.09269505739212036 2023-01-24 03:00:37.493292: step: 298/464, loss: 0.08034278452396393 2023-01-24 03:00:38.132264: step: 300/464, loss: 0.13465744256973267 2023-01-24 03:00:38.700382: step: 302/464, loss: 0.04607897996902466 2023-01-24 03:00:39.413217: step: 304/464, loss: 0.04409221559762955 2023-01-24 03:00:40.026083: step: 306/464, loss: 0.09037043154239655 2023-01-24 03:00:40.699926: step: 308/464, loss: 0.0722469910979271 2023-01-24 03:00:41.315996: step: 310/464, loss: 2.683577537536621 2023-01-24 03:00:41.968580: step: 312/464, loss: 0.06599780917167664 2023-01-24 03:00:42.598969: step: 314/464, loss: 0.0621807798743248 2023-01-24 03:00:43.241685: step: 316/464, loss: 0.12119466811418533 2023-01-24 03:00:43.793175: step: 318/464, loss: 0.060603171586990356 2023-01-24 03:00:44.404266: step: 320/464, loss: 0.019483543932437897 2023-01-24 03:00:45.003997: step: 322/464, loss: 0.08985820412635803 2023-01-24 03:00:45.628247: step: 324/464, loss: 0.07970519363880157 2023-01-24 03:00:46.297653: step: 326/464, loss: 0.06589441001415253 2023-01-24 03:00:46.961831: step: 328/464, loss: 0.13149969279766083 2023-01-24 03:00:47.558186: step: 330/464, loss: 0.04497695714235306 2023-01-24 03:00:48.180744: step: 332/464, loss: 0.07083417475223541 2023-01-24 03:00:48.913140: step: 334/464, loss: 0.15319781005382538 2023-01-24 03:00:49.526240: step: 336/464, loss: 0.16858309507369995 2023-01-24 03:00:50.124125: step: 338/464, loss: 0.5628475546836853 2023-01-24 03:00:50.750880: step: 340/464, loss: 0.041089314967393875 2023-01-24 03:00:51.307017: step: 342/464, loss: 0.03335518017411232 2023-01-24 03:00:51.910551: step: 344/464, loss: 0.13614562153816223 2023-01-24 03:00:52.520649: step: 346/464, loss: 0.017061160877346992 2023-01-24 03:00:53.170397: step: 348/464, loss: 0.0899868905544281 2023-01-24 03:00:53.789899: step: 350/464, loss: 0.05003766342997551 2023-01-24 03:00:54.356194: step: 352/464, loss: 0.03434675186872482 2023-01-24 03:00:55.044536: step: 354/464, loss: 0.31193801760673523 2023-01-24 03:00:55.648762: step: 356/464, loss: 0.19960804283618927 2023-01-24 03:00:56.277765: step: 358/464, loss: 0.13193681836128235 2023-01-24 03:00:56.905368: step: 360/464, loss: 0.16517403721809387 2023-01-24 03:00:57.598032: step: 362/464, loss: 4.577445983886719 2023-01-24 03:00:58.183377: step: 364/464, loss: 0.022282257676124573 2023-01-24 03:00:58.798445: step: 366/464, loss: 0.0883570984005928 2023-01-24 03:00:59.387479: step: 368/464, loss: 0.07743432372808456 2023-01-24 03:01:00.064553: step: 370/464, loss: 0.06543838977813721 2023-01-24 03:01:00.671745: step: 372/464, loss: 0.7924804091453552 2023-01-24 03:01:01.273739: step: 374/464, loss: 0.07172054052352905 2023-01-24 03:01:01.901431: step: 376/464, loss: 0.15854498744010925 2023-01-24 03:01:02.548148: step: 378/464, loss: 0.0758543461561203 2023-01-24 03:01:03.267479: step: 380/464, loss: 0.4814733564853668 2023-01-24 03:01:03.988261: step: 382/464, loss: 0.07087898254394531 2023-01-24 03:01:04.561965: step: 384/464, loss: 0.017085997387766838 2023-01-24 03:01:05.189456: step: 386/464, loss: 0.15765145421028137 2023-01-24 03:01:05.782263: step: 388/464, loss: 0.03160521388053894 2023-01-24 03:01:06.334258: step: 390/464, loss: 0.02035319060087204 2023-01-24 03:01:06.981174: step: 392/464, loss: 0.027835458517074585 2023-01-24 03:01:07.602099: step: 394/464, loss: 0.10642287135124207 2023-01-24 03:01:08.189356: step: 396/464, loss: 0.004822934977710247 2023-01-24 03:01:08.793958: step: 398/464, loss: 0.022031346336007118 2023-01-24 03:01:09.403416: step: 400/464, loss: 0.05492803454399109 2023-01-24 03:01:09.967016: step: 402/464, loss: 0.04970407485961914 2023-01-24 03:01:10.535781: step: 404/464, loss: 0.010082113556563854 2023-01-24 03:01:11.237816: step: 406/464, loss: 0.11934235692024231 2023-01-24 03:01:11.793185: step: 408/464, loss: 0.02112478017807007 2023-01-24 03:01:12.410977: step: 410/464, loss: 0.09695271402597427 2023-01-24 03:01:13.002315: step: 412/464, loss: 0.8858206272125244 2023-01-24 03:01:13.671794: step: 414/464, loss: 0.08377958089113235 2023-01-24 03:01:14.282016: step: 416/464, loss: 0.5520583391189575 2023-01-24 03:01:15.042818: step: 418/464, loss: 0.08656132221221924 2023-01-24 03:01:15.694062: step: 420/464, loss: 0.10898464173078537 2023-01-24 03:01:16.345077: step: 422/464, loss: 0.3741585612297058 2023-01-24 03:01:16.951553: step: 424/464, loss: 0.6237977147102356 2023-01-24 03:01:17.558553: step: 426/464, loss: 0.05429627373814583 2023-01-24 03:01:18.137864: step: 428/464, loss: 0.10554182529449463 2023-01-24 03:01:18.761632: step: 430/464, loss: 0.1171455830335617 2023-01-24 03:01:19.352125: step: 432/464, loss: 0.15304937958717346 2023-01-24 03:01:19.988779: step: 434/464, loss: 0.08756015449762344 2023-01-24 03:01:20.596063: step: 436/464, loss: 0.06931018829345703 2023-01-24 03:01:21.237143: step: 438/464, loss: 0.06887312233448029 2023-01-24 03:01:21.844230: step: 440/464, loss: 0.05952431634068489 2023-01-24 03:01:22.481297: step: 442/464, loss: 0.03734879940748215 2023-01-24 03:01:23.144797: step: 444/464, loss: 0.04014189913868904 2023-01-24 03:01:23.731711: step: 446/464, loss: 0.035013578832149506 2023-01-24 03:01:24.298334: step: 448/464, loss: 0.01310723926872015 2023-01-24 03:01:24.908200: step: 450/464, loss: 0.12435585260391235 2023-01-24 03:01:25.564465: step: 452/464, loss: 0.10649074614048004 2023-01-24 03:01:26.137733: step: 454/464, loss: 0.055591508746147156 2023-01-24 03:01:26.755465: step: 456/464, loss: 0.07723914831876755 2023-01-24 03:01:27.435637: step: 458/464, loss: 0.24242645502090454 2023-01-24 03:01:28.052262: step: 460/464, loss: 0.48419389128685 2023-01-24 03:01:28.700940: step: 462/464, loss: 0.11971094459295273 2023-01-24 03:01:29.405438: step: 464/464, loss: 0.02786792442202568 2023-01-24 03:01:30.036987: step: 466/464, loss: 0.04215722158551216 2023-01-24 03:01:30.645037: step: 468/464, loss: 0.29968586564064026 2023-01-24 03:01:31.218035: step: 470/464, loss: 0.15486252307891846 2023-01-24 03:01:31.836706: step: 472/464, loss: 0.02210089936852455 2023-01-24 03:01:32.410154: step: 474/464, loss: 0.018674930557608604 2023-01-24 03:01:32.981681: step: 476/464, loss: 0.0230863057076931 2023-01-24 03:01:33.659476: step: 478/464, loss: 0.08007515966892242 2023-01-24 03:01:34.255966: step: 480/464, loss: 0.17813025414943695 2023-01-24 03:01:34.874307: step: 482/464, loss: 0.022379839792847633 2023-01-24 03:01:35.576057: step: 484/464, loss: 0.3104344308376312 2023-01-24 03:01:36.246840: step: 486/464, loss: 0.0335686057806015 2023-01-24 03:01:36.856501: step: 488/464, loss: 0.03525681793689728 2023-01-24 03:01:37.456938: step: 490/464, loss: 0.0288990531116724 2023-01-24 03:01:38.123832: step: 492/464, loss: 0.0916205570101738 2023-01-24 03:01:38.815221: step: 494/464, loss: 0.04630056768655777 2023-01-24 03:01:39.421803: step: 496/464, loss: 0.03312882408499718 2023-01-24 03:01:40.031699: step: 498/464, loss: 0.012081924825906754 2023-01-24 03:01:40.619969: step: 500/464, loss: 0.009474774822592735 2023-01-24 03:01:41.244168: step: 502/464, loss: 0.08945140242576599 2023-01-24 03:01:41.950528: step: 504/464, loss: 0.018516186624765396 2023-01-24 03:01:42.579273: step: 506/464, loss: 0.03508473560214043 2023-01-24 03:01:43.184448: step: 508/464, loss: 0.11172937601804733 2023-01-24 03:01:43.814616: step: 510/464, loss: 0.02396565116941929 2023-01-24 03:01:44.430839: step: 512/464, loss: 0.08729846775531769 2023-01-24 03:01:45.072662: step: 514/464, loss: 0.03557702153921127 2023-01-24 03:01:45.697134: step: 516/464, loss: 0.39258062839508057 2023-01-24 03:01:46.404980: step: 518/464, loss: 0.020482761785387993 2023-01-24 03:01:47.031343: step: 520/464, loss: 0.01152450405061245 2023-01-24 03:01:47.680063: step: 522/464, loss: 0.02186405472457409 2023-01-24 03:01:48.344374: step: 524/464, loss: 0.059222932904958725 2023-01-24 03:01:48.961855: step: 526/464, loss: 0.04049962759017944 2023-01-24 03:01:49.587395: step: 528/464, loss: 0.055559832602739334 2023-01-24 03:01:50.199979: step: 530/464, loss: 0.03861182928085327 2023-01-24 03:01:50.892714: step: 532/464, loss: 0.006717274431139231 2023-01-24 03:01:51.530803: step: 534/464, loss: 0.05977385863661766 2023-01-24 03:01:52.177251: step: 536/464, loss: 0.2655171751976013 2023-01-24 03:01:52.811461: step: 538/464, loss: 0.3412909209728241 2023-01-24 03:01:53.418833: step: 540/464, loss: 0.06314253807067871 2023-01-24 03:01:54.015071: step: 542/464, loss: 0.09367623925209045 2023-01-24 03:01:54.646343: step: 544/464, loss: 0.11986998468637466 2023-01-24 03:01:55.264132: step: 546/464, loss: 0.05728675052523613 2023-01-24 03:01:55.855141: step: 548/464, loss: 0.09846282005310059 2023-01-24 03:01:56.505042: step: 550/464, loss: 0.2600788176059723 2023-01-24 03:01:57.154410: step: 552/464, loss: 0.025453370064496994 2023-01-24 03:01:57.806539: step: 554/464, loss: 0.18188495934009552 2023-01-24 03:01:58.437046: step: 556/464, loss: 0.10237760096788406 2023-01-24 03:01:59.058110: step: 558/464, loss: 0.034575216472148895 2023-01-24 03:01:59.678718: step: 560/464, loss: 0.04972601681947708 2023-01-24 03:02:00.360201: step: 562/464, loss: 0.17534971237182617 2023-01-24 03:02:00.983954: step: 564/464, loss: 0.077382892370224 2023-01-24 03:02:01.602359: step: 566/464, loss: 0.11572468280792236 2023-01-24 03:02:02.230225: step: 568/464, loss: 0.1357371062040329 2023-01-24 03:02:02.921004: step: 570/464, loss: 0.07837279886007309 2023-01-24 03:02:03.554958: step: 572/464, loss: 0.45323073863983154 2023-01-24 03:02:04.156233: step: 574/464, loss: 0.052668049931526184 2023-01-24 03:02:04.800202: step: 576/464, loss: 0.0244721919298172 2023-01-24 03:02:05.438327: step: 578/464, loss: 0.13950631022453308 2023-01-24 03:02:05.983587: step: 580/464, loss: 0.19411024451255798 2023-01-24 03:02:06.624261: step: 582/464, loss: 0.7193666696548462 2023-01-24 03:02:07.259558: step: 584/464, loss: 0.07989802211523056 2023-01-24 03:02:07.856263: step: 586/464, loss: 0.09557466953992844 2023-01-24 03:02:08.465924: step: 588/464, loss: 0.011171412654221058 2023-01-24 03:02:09.157852: step: 590/464, loss: 0.06220633536577225 2023-01-24 03:02:09.803932: step: 592/464, loss: 0.04791721701622009 2023-01-24 03:02:10.467363: step: 594/464, loss: 0.04407791048288345 2023-01-24 03:02:11.068825: step: 596/464, loss: 0.053509440273046494 2023-01-24 03:02:11.810861: step: 598/464, loss: 0.07637985050678253 2023-01-24 03:02:12.566153: step: 600/464, loss: 0.01582491211593151 2023-01-24 03:02:13.217528: step: 602/464, loss: 0.016740994527935982 2023-01-24 03:02:13.835234: step: 604/464, loss: 0.02371152862906456 2023-01-24 03:02:14.427719: step: 606/464, loss: 0.09300397336483002 2023-01-24 03:02:15.045402: step: 608/464, loss: 0.030090780928730965 2023-01-24 03:02:15.723440: step: 610/464, loss: 0.05113796889781952 2023-01-24 03:02:16.299028: step: 612/464, loss: 0.024184707552194595 2023-01-24 03:02:16.905692: step: 614/464, loss: 0.026209469884634018 2023-01-24 03:02:17.556296: step: 616/464, loss: 0.0543823279440403 2023-01-24 03:02:18.153178: step: 618/464, loss: 0.06335633248090744 2023-01-24 03:02:18.737725: step: 620/464, loss: 0.10870085656642914 2023-01-24 03:02:19.367190: step: 622/464, loss: 0.03715025633573532 2023-01-24 03:02:19.872763: step: 624/464, loss: 0.03070726804435253 2023-01-24 03:02:20.463710: step: 626/464, loss: 0.042747415602207184 2023-01-24 03:02:21.146609: step: 628/464, loss: 0.227604478597641 2023-01-24 03:02:21.730016: step: 630/464, loss: 0.10268836468458176 2023-01-24 03:02:22.325610: step: 632/464, loss: 0.06253713369369507 2023-01-24 03:02:22.932474: step: 634/464, loss: 0.03581200912594795 2023-01-24 03:02:23.604771: step: 636/464, loss: 0.38426434993743896 2023-01-24 03:02:24.203875: step: 638/464, loss: 0.09403909742832184 2023-01-24 03:02:24.898154: step: 640/464, loss: 0.033941175788640976 2023-01-24 03:02:25.436608: step: 642/464, loss: 0.052015673369169235 2023-01-24 03:02:26.078140: step: 644/464, loss: 0.05198928713798523 2023-01-24 03:02:26.732478: step: 646/464, loss: 0.1153169572353363 2023-01-24 03:02:27.338710: step: 648/464, loss: 0.05121876299381256 2023-01-24 03:02:27.957175: step: 650/464, loss: 0.06071249395608902 2023-01-24 03:02:28.643250: step: 652/464, loss: 0.14332036674022675 2023-01-24 03:02:29.235288: step: 654/464, loss: 0.12269081175327301 2023-01-24 03:02:29.859472: step: 656/464, loss: 0.038532841950654984 2023-01-24 03:02:30.466481: step: 658/464, loss: 0.07894743978977203 2023-01-24 03:02:31.129332: step: 660/464, loss: 0.04618338122963905 2023-01-24 03:02:31.795023: step: 662/464, loss: 0.13720129430294037 2023-01-24 03:02:32.502661: step: 664/464, loss: 0.21078215539455414 2023-01-24 03:02:33.134798: step: 666/464, loss: 0.05811762437224388 2023-01-24 03:02:33.736063: step: 668/464, loss: 0.0910278782248497 2023-01-24 03:02:34.304974: step: 670/464, loss: 0.068028025329113 2023-01-24 03:02:34.985207: step: 672/464, loss: 0.20305296778678894 2023-01-24 03:02:35.645928: step: 674/464, loss: 0.1361401528120041 2023-01-24 03:02:36.340865: step: 676/464, loss: 0.0884474515914917 2023-01-24 03:02:37.010164: step: 678/464, loss: 0.055569324642419815 2023-01-24 03:02:37.651948: step: 680/464, loss: 0.11080397665500641 2023-01-24 03:02:38.309238: step: 682/464, loss: 0.024835048243403435 2023-01-24 03:02:38.972996: step: 684/464, loss: 0.11517369002103806 2023-01-24 03:02:39.603107: step: 686/464, loss: 0.03534563630819321 2023-01-24 03:02:40.186607: step: 688/464, loss: 0.045791640877723694 2023-01-24 03:02:40.816650: step: 690/464, loss: 0.06101490184664726 2023-01-24 03:02:41.558663: step: 692/464, loss: 0.06657504290342331 2023-01-24 03:02:42.250806: step: 694/464, loss: 0.02768394909799099 2023-01-24 03:02:42.918359: step: 696/464, loss: 0.12973853945732117 2023-01-24 03:02:43.542150: step: 698/464, loss: 0.16631053388118744 2023-01-24 03:02:44.213013: step: 700/464, loss: 0.049925461411476135 2023-01-24 03:02:44.798752: step: 702/464, loss: 0.07540343701839447 2023-01-24 03:02:45.335533: step: 704/464, loss: 0.06347178667783737 2023-01-24 03:02:45.890689: step: 706/464, loss: 0.014731958508491516 2023-01-24 03:02:46.545508: step: 708/464, loss: 0.02005917578935623 2023-01-24 03:02:47.169857: step: 710/464, loss: 0.04048619046807289 2023-01-24 03:02:47.777341: step: 712/464, loss: 0.00946044921875 2023-01-24 03:02:48.375116: step: 714/464, loss: 0.02131369151175022 2023-01-24 03:02:48.911409: step: 716/464, loss: 0.12847092747688293 2023-01-24 03:02:49.574439: step: 718/464, loss: 0.024426549673080444 2023-01-24 03:02:50.245779: step: 720/464, loss: 0.12111278623342514 2023-01-24 03:02:50.841331: step: 722/464, loss: 0.03929593041539192 2023-01-24 03:02:51.473639: step: 724/464, loss: 0.0471901036798954 2023-01-24 03:02:52.141194: step: 726/464, loss: 0.06377588212490082 2023-01-24 03:02:52.793841: step: 728/464, loss: 0.600334644317627 2023-01-24 03:02:53.408903: step: 730/464, loss: 0.01961999200284481 2023-01-24 03:02:53.939588: step: 732/464, loss: 0.06293871998786926 2023-01-24 03:02:54.551707: step: 734/464, loss: 0.008754570037126541 2023-01-24 03:02:55.136469: step: 736/464, loss: 0.0206870436668396 2023-01-24 03:02:55.765178: step: 738/464, loss: 0.04069453477859497 2023-01-24 03:02:56.332520: step: 740/464, loss: 0.014518450945615768 2023-01-24 03:02:56.959618: step: 742/464, loss: 0.05866739898920059 2023-01-24 03:02:57.586881: step: 744/464, loss: 0.09685290604829788 2023-01-24 03:02:58.234019: step: 746/464, loss: 0.0906219631433487 2023-01-24 03:02:58.893469: step: 748/464, loss: 0.013325815089046955 2023-01-24 03:02:59.413897: step: 750/464, loss: 0.03870666027069092 2023-01-24 03:03:00.037479: step: 752/464, loss: 0.14578811824321747 2023-01-24 03:03:00.660027: step: 754/464, loss: 0.1603614091873169 2023-01-24 03:03:01.378028: step: 756/464, loss: 0.07946944236755371 2023-01-24 03:03:02.007540: step: 758/464, loss: 0.09339815378189087 2023-01-24 03:03:02.643770: step: 760/464, loss: 0.10323522984981537 2023-01-24 03:03:03.301978: step: 762/464, loss: 0.057176802307367325 2023-01-24 03:03:03.980341: step: 764/464, loss: 0.38984283804893494 2023-01-24 03:03:04.604011: step: 766/464, loss: 0.02038021758198738 2023-01-24 03:03:05.323425: step: 768/464, loss: 0.15187768638134003 2023-01-24 03:03:05.963034: step: 770/464, loss: 0.11661543697118759 2023-01-24 03:03:06.577095: step: 772/464, loss: 0.11710263788700104 2023-01-24 03:03:07.232928: step: 774/464, loss: 0.12077804654836655 2023-01-24 03:03:07.943160: step: 776/464, loss: 0.09523236751556396 2023-01-24 03:03:08.623364: step: 778/464, loss: 0.12539014220237732 2023-01-24 03:03:09.264932: step: 780/464, loss: 0.07297204434871674 2023-01-24 03:03:09.954565: step: 782/464, loss: 0.22416375577449799 2023-01-24 03:03:10.552145: step: 784/464, loss: 0.09013670682907104 2023-01-24 03:03:11.147492: step: 786/464, loss: 0.1530447006225586 2023-01-24 03:03:11.762120: step: 788/464, loss: 0.056844066828489304 2023-01-24 03:03:12.358983: step: 790/464, loss: 0.024438276886940002 2023-01-24 03:03:12.982395: step: 792/464, loss: 0.10997240245342255 2023-01-24 03:03:13.557498: step: 794/464, loss: 0.0637752115726471 2023-01-24 03:03:14.175668: step: 796/464, loss: 0.0915752574801445 2023-01-24 03:03:14.750756: step: 798/464, loss: 0.030072197318077087 2023-01-24 03:03:15.334023: step: 800/464, loss: 0.06692662090063095 2023-01-24 03:03:15.958988: step: 802/464, loss: 0.06783221662044525 2023-01-24 03:03:16.608742: step: 804/464, loss: 0.216878280043602 2023-01-24 03:03:17.198509: step: 806/464, loss: 0.11674143373966217 2023-01-24 03:03:17.840757: step: 808/464, loss: 0.09718753397464752 2023-01-24 03:03:18.508794: step: 810/464, loss: 0.1712542623281479 2023-01-24 03:03:19.161671: step: 812/464, loss: 0.3075892925262451 2023-01-24 03:03:19.799424: step: 814/464, loss: 0.24416068196296692 2023-01-24 03:03:20.414056: step: 816/464, loss: 0.046887144446372986 2023-01-24 03:03:21.034890: step: 818/464, loss: 0.20484276115894318 2023-01-24 03:03:21.652186: step: 820/464, loss: 0.08469177037477493 2023-01-24 03:03:22.247248: step: 822/464, loss: 0.05184699594974518 2023-01-24 03:03:22.878977: step: 824/464, loss: 0.02204933390021324 2023-01-24 03:03:23.471633: step: 826/464, loss: 0.04278869926929474 2023-01-24 03:03:24.109499: step: 828/464, loss: 0.09152209758758545 2023-01-24 03:03:24.701307: step: 830/464, loss: 0.8785819411277771 2023-01-24 03:03:25.342928: step: 832/464, loss: 0.46504613757133484 2023-01-24 03:03:25.958650: step: 834/464, loss: 0.16306844353675842 2023-01-24 03:03:26.604962: step: 836/464, loss: 0.027663005515933037 2023-01-24 03:03:27.261653: step: 838/464, loss: 0.11722411960363388 2023-01-24 03:03:27.906776: step: 840/464, loss: 0.11919905245304108 2023-01-24 03:03:28.546165: step: 842/464, loss: 0.05290542542934418 2023-01-24 03:03:29.132627: step: 844/464, loss: 0.034706782549619675 2023-01-24 03:03:29.725750: step: 846/464, loss: 0.06160600855946541 2023-01-24 03:03:30.360431: step: 848/464, loss: 0.029707245528697968 2023-01-24 03:03:31.000099: step: 850/464, loss: 0.3252856135368347 2023-01-24 03:03:31.616400: step: 852/464, loss: 0.006985540967434645 2023-01-24 03:03:32.233819: step: 854/464, loss: 0.07038073986768723 2023-01-24 03:03:32.842071: step: 856/464, loss: 0.14189094305038452 2023-01-24 03:03:33.567789: step: 858/464, loss: 0.21152852475643158 2023-01-24 03:03:34.208466: step: 860/464, loss: 0.03939679637551308 2023-01-24 03:03:34.805087: step: 862/464, loss: 0.16607047617435455 2023-01-24 03:03:35.385581: step: 864/464, loss: 0.05294930934906006 2023-01-24 03:03:35.991163: step: 866/464, loss: 0.04461954906582832 2023-01-24 03:03:36.627237: step: 868/464, loss: 0.024309026077389717 2023-01-24 03:03:37.184677: step: 870/464, loss: 0.04591096192598343 2023-01-24 03:03:37.836459: step: 872/464, loss: 0.07206946611404419 2023-01-24 03:03:38.526181: step: 874/464, loss: 0.045928601175546646 2023-01-24 03:03:39.184908: step: 876/464, loss: 0.05889524519443512 2023-01-24 03:03:39.764853: step: 878/464, loss: 0.11609186232089996 2023-01-24 03:03:40.429888: step: 880/464, loss: 0.13958750665187836 2023-01-24 03:03:41.083937: step: 882/464, loss: 0.0900338888168335 2023-01-24 03:03:41.676407: step: 884/464, loss: 0.058699481189250946 2023-01-24 03:03:42.334330: step: 886/464, loss: 0.07599426060914993 2023-01-24 03:03:42.974949: step: 888/464, loss: 0.12085162103176117 2023-01-24 03:03:43.665050: step: 890/464, loss: 0.5733956098556519 2023-01-24 03:03:44.291465: step: 892/464, loss: 0.03937987610697746 2023-01-24 03:03:44.900496: step: 894/464, loss: 0.15277822315692902 2023-01-24 03:03:45.493134: step: 896/464, loss: 0.10777375102043152 2023-01-24 03:03:46.137542: step: 898/464, loss: 0.35059159994125366 2023-01-24 03:03:46.796313: step: 900/464, loss: 0.07969984412193298 2023-01-24 03:03:47.461040: step: 902/464, loss: 0.06196504086256027 2023-01-24 03:03:48.070652: step: 904/464, loss: 0.11778075248003006 2023-01-24 03:03:48.686942: step: 906/464, loss: 0.08533184230327606 2023-01-24 03:03:49.376481: step: 908/464, loss: 0.08933708816766739 2023-01-24 03:03:49.971711: step: 910/464, loss: 0.04876921698451042 2023-01-24 03:03:50.570537: step: 912/464, loss: 0.01687920093536377 2023-01-24 03:03:51.226989: step: 914/464, loss: 0.1410721093416214 2023-01-24 03:03:51.824524: step: 916/464, loss: 0.10500001907348633 2023-01-24 03:03:52.510393: step: 918/464, loss: 0.863258421421051 2023-01-24 03:03:53.206421: step: 920/464, loss: 0.03630939871072769 2023-01-24 03:03:53.829082: step: 922/464, loss: 0.07011484354734421 2023-01-24 03:03:54.459407: step: 924/464, loss: 0.10621833056211472 2023-01-24 03:03:55.088259: step: 926/464, loss: 0.0867888554930687 2023-01-24 03:03:55.693145: step: 928/464, loss: 0.12446019053459167 2023-01-24 03:03:56.165657: step: 930/464, loss: 0.00032717103022150695 ================================================== Loss: 0.134 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32239063326937, 'r': 0.3358490657777688, 'f1': 0.3289822633176285}, 'combined': 0.2424079834971999, 'epoch': 18} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3269202676947831, 'r': 0.2977738835344945, 'f1': 0.3116671335118134}, 'combined': 0.20347180737040668, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32734466696009396, 'r': 0.35281170936116385, 'f1': 0.3396014079147641}, 'combined': 0.25023261635824723, 'epoch': 18} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3370631970178224, 'r': 0.2998871091114449, 'f1': 0.31739024777553704}, 'combined': 0.20720814103480656, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33755993192096545, 'r': 0.3439652437221223, 'f1': 0.3407324876721023}, 'combined': 0.2510660435478648, 'epoch': 18} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3362870737417875, 'r': 0.2859520939255237, 'f1': 0.30908370251602507}, 'combined': 0.2017852151140889, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28741496598639454, 'r': 0.40238095238095234, 'f1': 0.3353174603174603}, 'combined': 0.22354497354497352, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2426470588235294, 'r': 0.358695652173913, 'f1': 0.2894736842105263}, 'combined': 0.14473684210526316, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4666666666666667, 'r': 0.2413793103448276, 'f1': 0.3181818181818182}, 'combined': 0.2121212121212121, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:06:33.426831: step: 2/464, loss: 0.043649137020111084 2023-01-24 03:06:34.070619: step: 4/464, loss: 0.0485350638628006 2023-01-24 03:06:34.676938: step: 6/464, loss: 0.06481219083070755 2023-01-24 03:06:35.324580: step: 8/464, loss: 0.25353118777275085 2023-01-24 03:06:35.904659: step: 10/464, loss: 0.04911917448043823 2023-01-24 03:06:36.560333: step: 12/464, loss: 0.08624802529811859 2023-01-24 03:06:37.197875: step: 14/464, loss: 0.08918741345405579 2023-01-24 03:06:37.780384: step: 16/464, loss: 0.07811764627695084 2023-01-24 03:06:38.385258: step: 18/464, loss: 0.037579625844955444 2023-01-24 03:06:39.001732: step: 20/464, loss: 0.05042491853237152 2023-01-24 03:06:39.597938: step: 22/464, loss: 0.03239549696445465 2023-01-24 03:06:40.170567: step: 24/464, loss: 0.0234699584543705 2023-01-24 03:06:40.752770: step: 26/464, loss: 0.11668428778648376 2023-01-24 03:06:41.352053: step: 28/464, loss: 0.015777695924043655 2023-01-24 03:06:41.967327: step: 30/464, loss: 0.05128711462020874 2023-01-24 03:06:42.569668: step: 32/464, loss: 0.08597104996442795 2023-01-24 03:06:43.224483: step: 34/464, loss: 0.06288472563028336 2023-01-24 03:06:43.820475: step: 36/464, loss: 0.08793296664953232 2023-01-24 03:06:44.434030: step: 38/464, loss: 0.011416365392506123 2023-01-24 03:06:45.047811: step: 40/464, loss: 0.09050152450799942 2023-01-24 03:06:45.741630: step: 42/464, loss: 0.46950775384902954 2023-01-24 03:06:46.306222: step: 44/464, loss: 0.23902305960655212 2023-01-24 03:06:46.930824: step: 46/464, loss: 0.04410150274634361 2023-01-24 03:06:47.534380: step: 48/464, loss: 0.019048461690545082 2023-01-24 03:06:48.237416: step: 50/464, loss: 0.34819623827934265 2023-01-24 03:06:48.887845: step: 52/464, loss: 0.05642472952604294 2023-01-24 03:06:49.472778: step: 54/464, loss: 0.030337944626808167 2023-01-24 03:06:50.114867: step: 56/464, loss: 0.08515579253435135 2023-01-24 03:06:50.721697: step: 58/464, loss: 0.013418847694993019 2023-01-24 03:06:51.303482: step: 60/464, loss: 0.021707965061068535 2023-01-24 03:06:51.939997: step: 62/464, loss: 0.009400303475558758 2023-01-24 03:06:52.543628: step: 64/464, loss: 0.20763559639453888 2023-01-24 03:06:53.118207: step: 66/464, loss: 0.054090019315481186 2023-01-24 03:06:53.801806: step: 68/464, loss: 0.12675267457962036 2023-01-24 03:06:54.377299: step: 70/464, loss: 0.01828434132039547 2023-01-24 03:06:55.032656: step: 72/464, loss: 0.03414449095726013 2023-01-24 03:06:55.574594: step: 74/464, loss: 0.005128489341586828 2023-01-24 03:06:56.148719: step: 76/464, loss: 0.017243361100554466 2023-01-24 03:06:56.850330: step: 78/464, loss: 0.01814139261841774 2023-01-24 03:06:57.475839: step: 80/464, loss: 0.10465224832296371 2023-01-24 03:06:58.112498: step: 82/464, loss: 0.1301683783531189 2023-01-24 03:06:58.766745: step: 84/464, loss: 0.030013512820005417 2023-01-24 03:06:59.484422: step: 86/464, loss: 0.105363629758358 2023-01-24 03:07:00.076083: step: 88/464, loss: 0.045319244265556335 2023-01-24 03:07:00.687683: step: 90/464, loss: 0.015606243163347244 2023-01-24 03:07:01.292517: step: 92/464, loss: 0.001734345336444676 2023-01-24 03:07:01.910161: step: 94/464, loss: 0.018572824075818062 2023-01-24 03:07:02.534410: step: 96/464, loss: 0.044814515858888626 2023-01-24 03:07:03.076420: step: 98/464, loss: 0.04040629044175148 2023-01-24 03:07:03.753171: step: 100/464, loss: 0.14970074594020844 2023-01-24 03:07:04.365772: step: 102/464, loss: 1.2993954420089722 2023-01-24 03:07:04.990529: step: 104/464, loss: 0.05898214876651764 2023-01-24 03:07:05.606975: step: 106/464, loss: 0.6732302904129028 2023-01-24 03:07:06.260491: step: 108/464, loss: 0.10700202733278275 2023-01-24 03:07:06.863936: step: 110/464, loss: 0.06434134393930435 2023-01-24 03:07:07.512254: step: 112/464, loss: 0.06865505129098892 2023-01-24 03:07:08.121299: step: 114/464, loss: 0.028769873082637787 2023-01-24 03:07:08.730032: step: 116/464, loss: 0.04342222586274147 2023-01-24 03:07:09.354557: step: 118/464, loss: 0.22497865557670593 2023-01-24 03:07:09.956027: step: 120/464, loss: 0.19769158959388733 2023-01-24 03:07:10.560612: step: 122/464, loss: 0.007902226410806179 2023-01-24 03:07:11.186303: step: 124/464, loss: 0.05975205451250076 2023-01-24 03:07:11.829897: step: 126/464, loss: 0.11951949447393417 2023-01-24 03:07:12.412106: step: 128/464, loss: 0.19905780255794525 2023-01-24 03:07:13.014447: step: 130/464, loss: 0.08745703846216202 2023-01-24 03:07:13.634690: step: 132/464, loss: 0.07580374926328659 2023-01-24 03:07:14.241083: step: 134/464, loss: 0.05008558928966522 2023-01-24 03:07:14.863401: step: 136/464, loss: 0.09049684554338455 2023-01-24 03:07:15.558050: step: 138/464, loss: 0.08347878605127335 2023-01-24 03:07:16.186637: step: 140/464, loss: 0.06866847723722458 2023-01-24 03:07:16.840524: step: 142/464, loss: 0.12992535531520844 2023-01-24 03:07:17.447659: step: 144/464, loss: 0.07780981063842773 2023-01-24 03:07:18.102638: step: 146/464, loss: 0.17778530716896057 2023-01-24 03:07:18.713157: step: 148/464, loss: 0.030234916135668755 2023-01-24 03:07:19.349892: step: 150/464, loss: 0.040719643235206604 2023-01-24 03:07:19.924521: step: 152/464, loss: 0.09193213284015656 2023-01-24 03:07:20.461271: step: 154/464, loss: 0.003865104168653488 2023-01-24 03:07:21.067708: step: 156/464, loss: 0.033786624670028687 2023-01-24 03:07:21.704389: step: 158/464, loss: 0.014323681592941284 2023-01-24 03:07:22.320556: step: 160/464, loss: 0.011782080866396427 2023-01-24 03:07:22.916274: step: 162/464, loss: 0.07542740553617477 2023-01-24 03:07:23.453491: step: 164/464, loss: 0.05516229569911957 2023-01-24 03:07:24.066071: step: 166/464, loss: 1.0248652696609497 2023-01-24 03:07:24.705481: step: 168/464, loss: 0.4814464747905731 2023-01-24 03:07:25.430380: step: 170/464, loss: 0.037918876856565475 2023-01-24 03:07:26.050325: step: 172/464, loss: 0.11610672622919083 2023-01-24 03:07:26.703816: step: 174/464, loss: 0.047969166189432144 2023-01-24 03:07:27.402196: step: 176/464, loss: 0.03514929860830307 2023-01-24 03:07:28.045820: step: 178/464, loss: 0.04943988844752312 2023-01-24 03:07:28.750006: step: 180/464, loss: 0.05877591669559479 2023-01-24 03:07:29.428062: step: 182/464, loss: 0.07240848243236542 2023-01-24 03:07:30.143467: step: 184/464, loss: 0.07649239897727966 2023-01-24 03:07:30.766807: step: 186/464, loss: 0.023809637874364853 2023-01-24 03:07:31.419252: step: 188/464, loss: 0.11529197543859482 2023-01-24 03:07:32.051617: step: 190/464, loss: 0.060493309050798416 2023-01-24 03:07:32.648616: step: 192/464, loss: 0.020831234753131866 2023-01-24 03:07:33.265764: step: 194/464, loss: 0.023075975477695465 2023-01-24 03:07:33.979475: step: 196/464, loss: 0.0883576050400734 2023-01-24 03:07:34.578496: step: 198/464, loss: 0.045864857733249664 2023-01-24 03:07:35.212596: step: 200/464, loss: 0.27580738067626953 2023-01-24 03:07:35.863285: step: 202/464, loss: 0.20697720348834991 2023-01-24 03:07:36.439827: step: 204/464, loss: 0.014832507818937302 2023-01-24 03:07:37.088588: step: 206/464, loss: 0.15060590207576752 2023-01-24 03:07:37.741803: step: 208/464, loss: 0.10265764594078064 2023-01-24 03:07:38.396924: step: 210/464, loss: 0.06849902123212814 2023-01-24 03:07:38.985909: step: 212/464, loss: 0.03741922602057457 2023-01-24 03:07:39.605701: step: 214/464, loss: 0.022133296355605125 2023-01-24 03:07:40.308208: step: 216/464, loss: 0.09701382368803024 2023-01-24 03:07:40.931299: step: 218/464, loss: 0.11456061154603958 2023-01-24 03:07:41.527031: step: 220/464, loss: 0.03466453775763512 2023-01-24 03:07:42.143609: step: 222/464, loss: 0.03732229024171829 2023-01-24 03:07:42.794472: step: 224/464, loss: 0.0477830171585083 2023-01-24 03:07:43.408752: step: 226/464, loss: 0.03878113254904747 2023-01-24 03:07:44.018704: step: 228/464, loss: 0.10058791190385818 2023-01-24 03:07:44.638887: step: 230/464, loss: 0.012252474203705788 2023-01-24 03:07:45.248796: step: 232/464, loss: 0.047803886234760284 2023-01-24 03:07:45.917145: step: 234/464, loss: 0.047188617289066315 2023-01-24 03:07:46.488538: step: 236/464, loss: 0.04074384272098541 2023-01-24 03:07:47.012841: step: 238/464, loss: 0.23794332146644592 2023-01-24 03:07:47.671881: step: 240/464, loss: 0.057008638978004456 2023-01-24 03:07:48.349028: step: 242/464, loss: 0.047704242169857025 2023-01-24 03:07:48.943940: step: 244/464, loss: 0.04410808905959129 2023-01-24 03:07:49.576477: step: 246/464, loss: 0.06980064511299133 2023-01-24 03:07:50.194365: step: 248/464, loss: 0.06814610213041306 2023-01-24 03:07:50.803176: step: 250/464, loss: 0.09101682156324387 2023-01-24 03:07:51.403707: step: 252/464, loss: 0.026719750836491585 2023-01-24 03:07:51.997693: step: 254/464, loss: 0.029958590865135193 2023-01-24 03:07:52.670446: step: 256/464, loss: 0.08468754589557648 2023-01-24 03:07:53.260198: step: 258/464, loss: 0.06755435466766357 2023-01-24 03:07:53.853206: step: 260/464, loss: 0.11044995486736298 2023-01-24 03:07:54.442335: step: 262/464, loss: 0.037777338176965714 2023-01-24 03:07:55.066267: step: 264/464, loss: 0.048214830458164215 2023-01-24 03:07:55.686433: step: 266/464, loss: 0.04914911836385727 2023-01-24 03:07:56.313151: step: 268/464, loss: 0.13612990081310272 2023-01-24 03:07:56.912398: step: 270/464, loss: 0.05357581004500389 2023-01-24 03:07:57.536198: step: 272/464, loss: 0.38628140091896057 2023-01-24 03:07:58.257044: step: 274/464, loss: 0.2231987863779068 2023-01-24 03:07:58.875442: step: 276/464, loss: 0.07788091152906418 2023-01-24 03:07:59.579914: step: 278/464, loss: 0.00692960899323225 2023-01-24 03:08:00.206034: step: 280/464, loss: 0.02506919391453266 2023-01-24 03:08:00.817794: step: 282/464, loss: 0.046163853257894516 2023-01-24 03:08:01.503213: step: 284/464, loss: 0.09043905138969421 2023-01-24 03:08:02.134739: step: 286/464, loss: 0.990919828414917 2023-01-24 03:08:02.718169: step: 288/464, loss: 0.11400745064020157 2023-01-24 03:08:03.285427: step: 290/464, loss: 0.02036544308066368 2023-01-24 03:08:03.911609: step: 292/464, loss: 0.03672667592763901 2023-01-24 03:08:04.492679: step: 294/464, loss: 0.06968734413385391 2023-01-24 03:08:05.126298: step: 296/464, loss: 0.05790454521775246 2023-01-24 03:08:05.757516: step: 298/464, loss: 0.09719129651784897 2023-01-24 03:08:06.382922: step: 300/464, loss: 0.21586154401302338 2023-01-24 03:08:07.011092: step: 302/464, loss: 0.05399652197957039 2023-01-24 03:08:07.642346: step: 304/464, loss: 0.06565868854522705 2023-01-24 03:08:08.199063: step: 306/464, loss: 0.024197373539209366 2023-01-24 03:08:08.791567: step: 308/464, loss: 0.04468630626797676 2023-01-24 03:08:09.510743: step: 310/464, loss: 0.034479573369026184 2023-01-24 03:08:10.187451: step: 312/464, loss: 0.03586513549089432 2023-01-24 03:08:10.798143: step: 314/464, loss: 0.0927836000919342 2023-01-24 03:08:11.377889: step: 316/464, loss: 0.03038622997701168 2023-01-24 03:08:12.029240: step: 318/464, loss: 0.05650036409497261 2023-01-24 03:08:12.650819: step: 320/464, loss: 0.17589622735977173 2023-01-24 03:08:13.345943: step: 322/464, loss: 0.3455429673194885 2023-01-24 03:08:13.931841: step: 324/464, loss: 0.03918929398059845 2023-01-24 03:08:14.555619: step: 326/464, loss: 0.13918551802635193 2023-01-24 03:08:15.188592: step: 328/464, loss: 0.058665983378887177 2023-01-24 03:08:15.871387: step: 330/464, loss: 0.03296054154634476 2023-01-24 03:08:16.486549: step: 332/464, loss: 0.40283647179603577 2023-01-24 03:08:17.093873: step: 334/464, loss: 0.13812364637851715 2023-01-24 03:08:17.752334: step: 336/464, loss: 0.9251861572265625 2023-01-24 03:08:18.379711: step: 338/464, loss: 0.11263386160135269 2023-01-24 03:08:18.961903: step: 340/464, loss: 0.06823277473449707 2023-01-24 03:08:19.592514: step: 342/464, loss: 0.051416944712400436 2023-01-24 03:08:20.216502: step: 344/464, loss: 0.02385799027979374 2023-01-24 03:08:20.869017: step: 346/464, loss: 0.039055630564689636 2023-01-24 03:08:21.468291: step: 348/464, loss: 0.10177624225616455 2023-01-24 03:08:22.092073: step: 350/464, loss: 0.06820239126682281 2023-01-24 03:08:22.788780: step: 352/464, loss: 0.10517510771751404 2023-01-24 03:08:23.419269: step: 354/464, loss: 0.1108148992061615 2023-01-24 03:08:24.023632: step: 356/464, loss: 0.026672907173633575 2023-01-24 03:08:24.600524: step: 358/464, loss: 0.2044680267572403 2023-01-24 03:08:25.240829: step: 360/464, loss: 0.15214747190475464 2023-01-24 03:08:25.891747: step: 362/464, loss: 0.004370806738734245 2023-01-24 03:08:26.491280: step: 364/464, loss: 0.04464809224009514 2023-01-24 03:08:27.099994: step: 366/464, loss: 0.05915183201432228 2023-01-24 03:08:27.738262: step: 368/464, loss: 0.005321057513356209 2023-01-24 03:08:28.398334: step: 370/464, loss: 0.04195632040500641 2023-01-24 03:08:29.123077: step: 372/464, loss: 0.05455410107970238 2023-01-24 03:08:29.699886: step: 374/464, loss: 0.025054074823856354 2023-01-24 03:08:30.344503: step: 376/464, loss: 0.022881191223859787 2023-01-24 03:08:30.933590: step: 378/464, loss: 0.11177432537078857 2023-01-24 03:08:31.517967: step: 380/464, loss: 0.07313412427902222 2023-01-24 03:08:32.148538: step: 382/464, loss: 0.08014141023159027 2023-01-24 03:08:32.717353: step: 384/464, loss: 0.0690186396241188 2023-01-24 03:08:33.327484: step: 386/464, loss: 0.0153644485399127 2023-01-24 03:08:33.959540: step: 388/464, loss: 0.08579351752996445 2023-01-24 03:08:34.522548: step: 390/464, loss: 0.022128120064735413 2023-01-24 03:08:35.147065: step: 392/464, loss: 0.04024612158536911 2023-01-24 03:08:35.772790: step: 394/464, loss: 0.097946397960186 2023-01-24 03:08:36.321672: step: 396/464, loss: 0.02984456904232502 2023-01-24 03:08:36.941172: step: 398/464, loss: 0.014964022673666477 2023-01-24 03:08:37.583450: step: 400/464, loss: 0.025280293077230453 2023-01-24 03:08:38.252749: step: 402/464, loss: 0.21250039339065552 2023-01-24 03:08:38.859206: step: 404/464, loss: 0.082395538687706 2023-01-24 03:08:39.494308: step: 406/464, loss: 0.08781449496746063 2023-01-24 03:08:40.124950: step: 408/464, loss: 0.04008530080318451 2023-01-24 03:08:40.730072: step: 410/464, loss: 0.08903039246797562 2023-01-24 03:08:41.349085: step: 412/464, loss: 0.027576781809329987 2023-01-24 03:08:41.950328: step: 414/464, loss: 0.21906928718090057 2023-01-24 03:08:42.526396: step: 416/464, loss: 0.46569788455963135 2023-01-24 03:08:43.224734: step: 418/464, loss: 0.22811473906040192 2023-01-24 03:08:43.777911: step: 420/464, loss: 0.0242232084274292 2023-01-24 03:08:44.450227: step: 422/464, loss: 0.023428920656442642 2023-01-24 03:08:45.055109: step: 424/464, loss: 0.04994883015751839 2023-01-24 03:08:45.748874: step: 426/464, loss: 0.06240205466747284 2023-01-24 03:08:46.429012: step: 428/464, loss: 0.08307365328073502 2023-01-24 03:08:47.050084: step: 430/464, loss: 0.005301331635564566 2023-01-24 03:08:47.695038: step: 432/464, loss: 0.09943713247776031 2023-01-24 03:08:48.341782: step: 434/464, loss: 0.4063957631587982 2023-01-24 03:08:48.984467: step: 436/464, loss: 0.012808769941329956 2023-01-24 03:08:49.634883: step: 438/464, loss: 0.03013821877539158 2023-01-24 03:08:50.261125: step: 440/464, loss: 0.06016019359230995 2023-01-24 03:08:50.898320: step: 442/464, loss: 0.01702016219496727 2023-01-24 03:08:51.534764: step: 444/464, loss: 0.17447879910469055 2023-01-24 03:08:52.199684: step: 446/464, loss: 0.04016990214586258 2023-01-24 03:08:52.838242: step: 448/464, loss: 0.039848506450653076 2023-01-24 03:08:53.437808: step: 450/464, loss: 0.12156244367361069 2023-01-24 03:08:54.076487: step: 452/464, loss: 0.4423430263996124 2023-01-24 03:08:54.686362: step: 454/464, loss: 0.3502335548400879 2023-01-24 03:08:55.343534: step: 456/464, loss: 0.07991401851177216 2023-01-24 03:08:55.934899: step: 458/464, loss: 0.028739823028445244 2023-01-24 03:08:56.619326: step: 460/464, loss: 0.03141813725233078 2023-01-24 03:08:57.317437: step: 462/464, loss: 0.015840908512473106 2023-01-24 03:08:57.931306: step: 464/464, loss: 0.07145281136035919 2023-01-24 03:08:58.509955: step: 466/464, loss: 0.22814258933067322 2023-01-24 03:08:59.069641: step: 468/464, loss: 0.11121980100870132 2023-01-24 03:08:59.690224: step: 470/464, loss: 0.08399312198162079 2023-01-24 03:09:00.376574: step: 472/464, loss: 0.046664491295814514 2023-01-24 03:09:01.000672: step: 474/464, loss: 0.10848533362150192 2023-01-24 03:09:01.587133: step: 476/464, loss: 0.023174069821834564 2023-01-24 03:09:02.165379: step: 478/464, loss: 0.07110322266817093 2023-01-24 03:09:02.717511: step: 480/464, loss: 0.2516523003578186 2023-01-24 03:09:03.332633: step: 482/464, loss: 0.032961003482341766 2023-01-24 03:09:03.949533: step: 484/464, loss: 0.12668445706367493 2023-01-24 03:09:04.579039: step: 486/464, loss: 0.09966351091861725 2023-01-24 03:09:05.133318: step: 488/464, loss: 0.023068716749548912 2023-01-24 03:09:05.723811: step: 490/464, loss: 0.033713988959789276 2023-01-24 03:09:06.359821: step: 492/464, loss: 0.020636672154068947 2023-01-24 03:09:07.106540: step: 494/464, loss: 0.10108671337366104 2023-01-24 03:09:07.694148: step: 496/464, loss: 0.28205952048301697 2023-01-24 03:09:08.359662: step: 498/464, loss: 0.023579929023981094 2023-01-24 03:09:08.986719: step: 500/464, loss: 0.03312808275222778 2023-01-24 03:09:09.629546: step: 502/464, loss: 0.049804236739873886 2023-01-24 03:09:10.226849: step: 504/464, loss: 0.08485108613967896 2023-01-24 03:09:10.843174: step: 506/464, loss: 0.10577449202537537 2023-01-24 03:09:11.451429: step: 508/464, loss: 0.04027533903717995 2023-01-24 03:09:12.046522: step: 510/464, loss: 0.06960257887840271 2023-01-24 03:09:12.713015: step: 512/464, loss: 0.02281743660569191 2023-01-24 03:09:13.346884: step: 514/464, loss: 0.03415166586637497 2023-01-24 03:09:14.022007: step: 516/464, loss: 0.1048787534236908 2023-01-24 03:09:14.627325: step: 518/464, loss: 0.030197374522686005 2023-01-24 03:09:15.285636: step: 520/464, loss: 0.22120463848114014 2023-01-24 03:09:15.884951: step: 522/464, loss: 0.15047624707221985 2023-01-24 03:09:16.551663: step: 524/464, loss: 0.08096174150705338 2023-01-24 03:09:17.137571: step: 526/464, loss: 0.05747506394982338 2023-01-24 03:09:17.762402: step: 528/464, loss: 0.0479048490524292 2023-01-24 03:09:18.434952: step: 530/464, loss: 0.06097549945116043 2023-01-24 03:09:19.105708: step: 532/464, loss: 0.040148764848709106 2023-01-24 03:09:19.715319: step: 534/464, loss: 0.18284642696380615 2023-01-24 03:09:20.277701: step: 536/464, loss: 0.015024237334728241 2023-01-24 03:09:20.991047: step: 538/464, loss: 0.019144777208566666 2023-01-24 03:09:21.638257: step: 540/464, loss: 0.0017776531167328358 2023-01-24 03:09:22.229859: step: 542/464, loss: 0.06847799569368362 2023-01-24 03:09:22.850393: step: 544/464, loss: 0.1029786616563797 2023-01-24 03:09:23.475534: step: 546/464, loss: 0.01616619899868965 2023-01-24 03:09:24.131094: step: 548/464, loss: 0.022176573053002357 2023-01-24 03:09:24.809601: step: 550/464, loss: 0.06482881307601929 2023-01-24 03:09:25.433121: step: 552/464, loss: 0.01709182932972908 2023-01-24 03:09:26.044321: step: 554/464, loss: 0.014702457003295422 2023-01-24 03:09:26.608725: step: 556/464, loss: 0.011646861210465431 2023-01-24 03:09:27.223635: step: 558/464, loss: 0.12523825466632843 2023-01-24 03:09:27.968150: step: 560/464, loss: 0.04933774843811989 2023-01-24 03:09:28.559103: step: 562/464, loss: 0.05424141138792038 2023-01-24 03:09:29.171549: step: 564/464, loss: 0.11501363664865494 2023-01-24 03:09:29.779376: step: 566/464, loss: 0.07519600540399551 2023-01-24 03:09:30.359653: step: 568/464, loss: 0.08401346951723099 2023-01-24 03:09:31.014091: step: 570/464, loss: 0.056519828736782074 2023-01-24 03:09:31.616914: step: 572/464, loss: 0.08458318561315536 2023-01-24 03:09:32.186179: step: 574/464, loss: 0.031199859455227852 2023-01-24 03:09:32.945012: step: 576/464, loss: 0.01655229926109314 2023-01-24 03:09:33.605363: step: 578/464, loss: 0.10784925520420074 2023-01-24 03:09:34.265365: step: 580/464, loss: 0.06575610488653183 2023-01-24 03:09:34.879408: step: 582/464, loss: 0.03883712738752365 2023-01-24 03:09:35.507206: step: 584/464, loss: 0.04284549504518509 2023-01-24 03:09:36.120599: step: 586/464, loss: 0.11854876577854156 2023-01-24 03:09:36.721633: step: 588/464, loss: 0.04092574864625931 2023-01-24 03:09:37.316301: step: 590/464, loss: 0.3116703927516937 2023-01-24 03:09:37.931486: step: 592/464, loss: 0.030424591153860092 2023-01-24 03:09:38.512983: step: 594/464, loss: 0.23410645127296448 2023-01-24 03:09:39.130612: step: 596/464, loss: 0.048708029091358185 2023-01-24 03:09:39.724491: step: 598/464, loss: 0.047511957585811615 2023-01-24 03:09:40.362229: step: 600/464, loss: 0.03580872714519501 2023-01-24 03:09:41.023277: step: 602/464, loss: 0.10451705753803253 2023-01-24 03:09:41.647158: step: 604/464, loss: 0.1291225254535675 2023-01-24 03:09:42.405070: step: 606/464, loss: 0.08524785935878754 2023-01-24 03:09:43.005721: step: 608/464, loss: 0.0811164602637291 2023-01-24 03:09:43.608341: step: 610/464, loss: 0.06724970042705536 2023-01-24 03:09:44.208581: step: 612/464, loss: 0.11718723177909851 2023-01-24 03:09:44.794167: step: 614/464, loss: 0.07521151006221771 2023-01-24 03:09:45.439319: step: 616/464, loss: 0.06996964663267136 2023-01-24 03:09:46.108358: step: 618/464, loss: 0.06816425919532776 2023-01-24 03:09:46.709477: step: 620/464, loss: 0.004956142511218786 2023-01-24 03:09:47.427915: step: 622/464, loss: 0.07144085317850113 2023-01-24 03:09:48.001757: step: 624/464, loss: 0.029275184497237206 2023-01-24 03:09:48.621246: step: 626/464, loss: 0.05224230885505676 2023-01-24 03:09:49.211402: step: 628/464, loss: 0.05378033220767975 2023-01-24 03:09:49.911905: step: 630/464, loss: 0.11603333055973053 2023-01-24 03:09:50.505947: step: 632/464, loss: 0.07940942049026489 2023-01-24 03:09:51.118032: step: 634/464, loss: 0.013182312250137329 2023-01-24 03:09:51.785463: step: 636/464, loss: 0.012838025577366352 2023-01-24 03:09:52.431314: step: 638/464, loss: 0.06080120429396629 2023-01-24 03:09:53.070046: step: 640/464, loss: 0.05779057368636131 2023-01-24 03:09:53.675768: step: 642/464, loss: 0.021169716492295265 2023-01-24 03:09:54.256644: step: 644/464, loss: 0.029275264590978622 2023-01-24 03:09:54.834292: step: 646/464, loss: 0.07615438848733902 2023-01-24 03:09:55.526685: step: 648/464, loss: 0.03917456418275833 2023-01-24 03:09:56.174644: step: 650/464, loss: 0.03054221160709858 2023-01-24 03:09:56.850765: step: 652/464, loss: 0.08743109554052353 2023-01-24 03:09:57.448989: step: 654/464, loss: 0.1987110674381256 2023-01-24 03:09:58.132734: step: 656/464, loss: 0.013709792867302895 2023-01-24 03:09:58.763469: step: 658/464, loss: 0.0877533107995987 2023-01-24 03:09:59.349714: step: 660/464, loss: 0.041052624583244324 2023-01-24 03:10:00.069610: step: 662/464, loss: 0.08393299579620361 2023-01-24 03:10:00.641654: step: 664/464, loss: 0.05065491423010826 2023-01-24 03:10:01.282130: step: 666/464, loss: 0.03020860254764557 2023-01-24 03:10:01.933047: step: 668/464, loss: 0.03692087158560753 2023-01-24 03:10:02.585089: step: 670/464, loss: 0.07487764954566956 2023-01-24 03:10:03.172572: step: 672/464, loss: 0.15530121326446533 2023-01-24 03:10:03.718103: step: 674/464, loss: 0.06111391261219978 2023-01-24 03:10:04.403539: step: 676/464, loss: 0.01869584433734417 2023-01-24 03:10:04.974263: step: 678/464, loss: 0.3590307831764221 2023-01-24 03:10:05.626549: step: 680/464, loss: 0.045789625495672226 2023-01-24 03:10:06.244051: step: 682/464, loss: 0.0480433851480484 2023-01-24 03:10:06.781678: step: 684/464, loss: 0.03853193297982216 2023-01-24 03:10:07.385118: step: 686/464, loss: 0.1281861513853073 2023-01-24 03:10:07.977219: step: 688/464, loss: 0.010447696782648563 2023-01-24 03:10:08.567143: step: 690/464, loss: 0.18426543474197388 2023-01-24 03:10:09.210060: step: 692/464, loss: 0.011111344210803509 2023-01-24 03:10:09.771027: step: 694/464, loss: 0.09752097725868225 2023-01-24 03:10:10.442607: step: 696/464, loss: 0.018900105729699135 2023-01-24 03:10:11.079978: step: 698/464, loss: 0.07611563056707382 2023-01-24 03:10:11.713899: step: 700/464, loss: 0.03231906145811081 2023-01-24 03:10:12.317562: step: 702/464, loss: 0.6264570355415344 2023-01-24 03:10:12.974172: step: 704/464, loss: 0.03651156276464462 2023-01-24 03:10:13.607911: step: 706/464, loss: 0.05947602912783623 2023-01-24 03:10:14.327992: step: 708/464, loss: 1.0475647449493408 2023-01-24 03:10:14.988364: step: 710/464, loss: 0.09719257056713104 2023-01-24 03:10:15.737915: step: 712/464, loss: 0.05212196335196495 2023-01-24 03:10:16.354526: step: 714/464, loss: 0.03310827910900116 2023-01-24 03:10:17.017947: step: 716/464, loss: 0.059840813279151917 2023-01-24 03:10:17.644739: step: 718/464, loss: 0.07850253582000732 2023-01-24 03:10:18.264045: step: 720/464, loss: 0.1370672732591629 2023-01-24 03:10:18.868542: step: 722/464, loss: 0.03510038182139397 2023-01-24 03:10:19.449182: step: 724/464, loss: 0.06712029129266739 2023-01-24 03:10:20.047396: step: 726/464, loss: 0.054644204676151276 2023-01-24 03:10:20.720740: step: 728/464, loss: 0.12576636672019958 2023-01-24 03:10:21.324509: step: 730/464, loss: 0.2506440281867981 2023-01-24 03:10:21.930753: step: 732/464, loss: 0.07583318650722504 2023-01-24 03:10:22.625490: step: 734/464, loss: 0.0640006735920906 2023-01-24 03:10:23.265009: step: 736/464, loss: 0.36113810539245605 2023-01-24 03:10:23.868482: step: 738/464, loss: 0.02094738371670246 2023-01-24 03:10:24.484271: step: 740/464, loss: 0.07040239125490189 2023-01-24 03:10:25.141388: step: 742/464, loss: 0.07829161733388901 2023-01-24 03:10:25.773959: step: 744/464, loss: 0.03566007688641548 2023-01-24 03:10:26.356449: step: 746/464, loss: 0.07003956288099289 2023-01-24 03:10:26.946655: step: 748/464, loss: 0.4165613055229187 2023-01-24 03:10:27.567656: step: 750/464, loss: 1.438755989074707 2023-01-24 03:10:28.190584: step: 752/464, loss: 0.07919133454561234 2023-01-24 03:10:28.870152: step: 754/464, loss: 0.04016372188925743 2023-01-24 03:10:29.404377: step: 756/464, loss: 0.03662552312016487 2023-01-24 03:10:30.021761: step: 758/464, loss: 0.011545374058187008 2023-01-24 03:10:30.792534: step: 760/464, loss: 0.08247997611761093 2023-01-24 03:10:31.425735: step: 762/464, loss: 0.062238164246082306 2023-01-24 03:10:31.998963: step: 764/464, loss: 0.06991934031248093 2023-01-24 03:10:32.582583: step: 766/464, loss: 0.7585777640342712 2023-01-24 03:10:33.269884: step: 768/464, loss: 0.6400312781333923 2023-01-24 03:10:33.853535: step: 770/464, loss: 0.040842048823833466 2023-01-24 03:10:34.502236: step: 772/464, loss: 0.06262914836406708 2023-01-24 03:10:35.142092: step: 774/464, loss: 0.057515811175107956 2023-01-24 03:10:35.701959: step: 776/464, loss: 0.05399390310049057 2023-01-24 03:10:36.306392: step: 778/464, loss: 0.1591375321149826 2023-01-24 03:10:36.919185: step: 780/464, loss: 0.6502782106399536 2023-01-24 03:10:37.472048: step: 782/464, loss: 0.0023214572574943304 2023-01-24 03:10:38.086109: step: 784/464, loss: 0.1388404220342636 2023-01-24 03:10:38.735084: step: 786/464, loss: 0.22411896288394928 2023-01-24 03:10:39.318200: step: 788/464, loss: 0.019362160935997963 2023-01-24 03:10:39.890147: step: 790/464, loss: 0.021875949576497078 2023-01-24 03:10:40.498739: step: 792/464, loss: 0.06016132980585098 2023-01-24 03:10:41.164368: step: 794/464, loss: 0.04290073364973068 2023-01-24 03:10:41.785733: step: 796/464, loss: 0.08380314707756042 2023-01-24 03:10:42.426458: step: 798/464, loss: 0.025916090235114098 2023-01-24 03:10:43.187769: step: 800/464, loss: 0.10179685056209564 2023-01-24 03:10:43.797007: step: 802/464, loss: 0.03139950707554817 2023-01-24 03:10:44.390576: step: 804/464, loss: 0.05587141215801239 2023-01-24 03:10:44.946416: step: 806/464, loss: 0.3088460862636566 2023-01-24 03:10:45.627995: step: 808/464, loss: 0.04401913657784462 2023-01-24 03:10:46.279923: step: 810/464, loss: 0.12718059122562408 2023-01-24 03:10:46.918530: step: 812/464, loss: 0.2657465636730194 2023-01-24 03:10:47.553631: step: 814/464, loss: 0.06844300776720047 2023-01-24 03:10:48.226140: step: 816/464, loss: 0.022106116637587547 2023-01-24 03:10:48.905092: step: 818/464, loss: 0.15089477598667145 2023-01-24 03:10:49.596601: step: 820/464, loss: 0.08072828501462936 2023-01-24 03:10:50.251234: step: 822/464, loss: 0.11256927251815796 2023-01-24 03:10:50.911732: step: 824/464, loss: 0.19882658123970032 2023-01-24 03:10:51.560093: step: 826/464, loss: 0.05668610706925392 2023-01-24 03:10:52.199320: step: 828/464, loss: 0.033308397978544235 2023-01-24 03:10:52.891932: step: 830/464, loss: 0.0646291971206665 2023-01-24 03:10:53.542639: step: 832/464, loss: 0.09487643837928772 2023-01-24 03:10:54.130270: step: 834/464, loss: 0.09053507447242737 2023-01-24 03:10:54.787242: step: 836/464, loss: 0.049667082726955414 2023-01-24 03:10:55.370088: step: 838/464, loss: 0.1634913980960846 2023-01-24 03:10:55.975132: step: 840/464, loss: 0.011972474865615368 2023-01-24 03:10:56.622801: step: 842/464, loss: 0.06723528355360031 2023-01-24 03:10:57.281882: step: 844/464, loss: 0.04506109282374382 2023-01-24 03:10:57.951605: step: 846/464, loss: 0.5153870582580566 2023-01-24 03:10:58.559829: step: 848/464, loss: 0.019879765808582306 2023-01-24 03:10:59.136387: step: 850/464, loss: 0.028760865330696106 2023-01-24 03:10:59.778511: step: 852/464, loss: 0.03077036142349243 2023-01-24 03:11:00.427702: step: 854/464, loss: 0.06875479966402054 2023-01-24 03:11:01.088994: step: 856/464, loss: 0.047324683517217636 2023-01-24 03:11:01.666767: step: 858/464, loss: 0.029233718290925026 2023-01-24 03:11:02.268266: step: 860/464, loss: 0.16239085793495178 2023-01-24 03:11:02.942486: step: 862/464, loss: 0.12425319850444794 2023-01-24 03:11:03.558168: step: 864/464, loss: 0.07654102146625519 2023-01-24 03:11:04.275299: step: 866/464, loss: 0.4811508059501648 2023-01-24 03:11:04.964777: step: 868/464, loss: 0.05206020548939705 2023-01-24 03:11:05.609861: step: 870/464, loss: 0.29282277822494507 2023-01-24 03:11:06.264012: step: 872/464, loss: 0.0878470316529274 2023-01-24 03:11:06.947733: step: 874/464, loss: 0.31125855445861816 2023-01-24 03:11:07.602014: step: 876/464, loss: 0.004280728287994862 2023-01-24 03:11:08.260084: step: 878/464, loss: 0.0990959033370018 2023-01-24 03:11:08.848896: step: 880/464, loss: 0.05305367335677147 2023-01-24 03:11:09.492185: step: 882/464, loss: 0.03634254261851311 2023-01-24 03:11:10.112358: step: 884/464, loss: 0.10698814690113068 2023-01-24 03:11:10.773864: step: 886/464, loss: 0.07746051251888275 2023-01-24 03:11:11.429333: step: 888/464, loss: 0.019592829048633575 2023-01-24 03:11:12.060238: step: 890/464, loss: 0.027923477813601494 2023-01-24 03:11:12.706267: step: 892/464, loss: 0.32788199186325073 2023-01-24 03:11:13.407108: step: 894/464, loss: 0.050454281270504 2023-01-24 03:11:14.095690: step: 896/464, loss: 0.051692795008420944 2023-01-24 03:11:14.679055: step: 898/464, loss: 0.31875765323638916 2023-01-24 03:11:15.360971: step: 900/464, loss: 0.09781712293624878 2023-01-24 03:11:15.951104: step: 902/464, loss: 0.3061683773994446 2023-01-24 03:11:16.512874: step: 904/464, loss: 0.010698407888412476 2023-01-24 03:11:17.213452: step: 906/464, loss: 0.02591118961572647 2023-01-24 03:11:17.880468: step: 908/464, loss: 0.12273906171321869 2023-01-24 03:11:18.501918: step: 910/464, loss: 0.07957099378108978 2023-01-24 03:11:19.115015: step: 912/464, loss: 0.1690153181552887 2023-01-24 03:11:19.736631: step: 914/464, loss: 0.03161702677607536 2023-01-24 03:11:20.372709: step: 916/464, loss: 0.041874177753925323 2023-01-24 03:11:20.987890: step: 918/464, loss: 0.037186700850725174 2023-01-24 03:11:21.632748: step: 920/464, loss: 0.16274917125701904 2023-01-24 03:11:22.246272: step: 922/464, loss: 0.06382595747709274 2023-01-24 03:11:22.896043: step: 924/464, loss: 0.18154233694076538 2023-01-24 03:11:23.552824: step: 926/464, loss: 0.018826236948370934 2023-01-24 03:11:24.179889: step: 928/464, loss: 0.010248782113194466 2023-01-24 03:11:24.664280: step: 930/464, loss: 0.010287413373589516 ================================================== Loss: 0.105 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3282170307443366, 'r': 0.32074339816571795, 'f1': 0.3244371801023673}, 'combined': 0.23905897481227062, 'epoch': 19} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3143718087495953, 'r': 0.3080150258520851, 'f1': 0.3111609546212336}, 'combined': 0.2031413486128261, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32409417351499425, 'r': 0.33454882427354243, 'f1': 0.32923852547554977}, 'combined': 0.24259680824514193, 'epoch': 19} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32674456666966506, 'r': 0.31473189877739793, 'f1': 0.3206257545597462}, 'combined': 0.2093204408006633, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3418937073847221, 'r': 0.33021612345127804, 'f1': 0.33595346922552805}, 'combined': 0.24754466153459959, 'epoch': 19} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32687398925714756, 'r': 0.303740444061559, 'f1': 0.3148828996083623}, 'combined': 0.2055712194334386, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24113475177304963, 'r': 0.32380952380952377, 'f1': 0.2764227642276422}, 'combined': 0.18428184281842813, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27205882352941174, 'r': 0.40217391304347827, 'f1': 0.32456140350877194}, 'combined': 0.16228070175438597, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6363636363636364, 'r': 0.2413793103448276, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:14:01.084521: step: 2/464, loss: 0.11350901424884796 2023-01-24 03:14:01.748705: step: 4/464, loss: 0.018851248547434807 2023-01-24 03:14:02.379761: step: 6/464, loss: 0.09123280644416809 2023-01-24 03:14:02.991486: step: 8/464, loss: 0.9773920178413391 2023-01-24 03:14:03.612562: step: 10/464, loss: 0.09162721037864685 2023-01-24 03:14:04.415456: step: 12/464, loss: 0.09528015553951263 2023-01-24 03:14:05.073679: step: 14/464, loss: 0.06696914881467819 2023-01-24 03:14:05.653555: step: 16/464, loss: 0.14077621698379517 2023-01-24 03:14:06.317478: step: 18/464, loss: 0.11180231720209122 2023-01-24 03:14:06.886003: step: 20/464, loss: 0.03215533122420311 2023-01-24 03:14:07.520344: step: 22/464, loss: 0.0469103641808033 2023-01-24 03:14:08.226447: step: 24/464, loss: 0.03041096217930317 2023-01-24 03:14:08.935218: step: 26/464, loss: 0.050311531871557236 2023-01-24 03:14:09.640270: step: 28/464, loss: 0.03684114292263985 2023-01-24 03:14:10.231623: step: 30/464, loss: 0.11024195700883865 2023-01-24 03:14:10.891255: step: 32/464, loss: 0.042063917964696884 2023-01-24 03:14:11.572174: step: 34/464, loss: 0.11315114051103592 2023-01-24 03:14:12.246768: step: 36/464, loss: 0.21695709228515625 2023-01-24 03:14:12.898978: step: 38/464, loss: 0.062165722250938416 2023-01-24 03:14:13.554567: step: 40/464, loss: 0.01616864651441574 2023-01-24 03:14:14.193658: step: 42/464, loss: 0.025449542328715324 2023-01-24 03:14:14.773573: step: 44/464, loss: 0.008808738552033901 2023-01-24 03:14:15.394338: step: 46/464, loss: 0.035203129053115845 2023-01-24 03:14:15.981342: step: 48/464, loss: 10.598348617553711 2023-01-24 03:14:16.609457: step: 50/464, loss: 0.39950209856033325 2023-01-24 03:14:17.193916: step: 52/464, loss: 0.015635067597031593 2023-01-24 03:14:17.814183: step: 54/464, loss: 0.08349144458770752 2023-01-24 03:14:18.478765: step: 56/464, loss: 0.08045043796300888 2023-01-24 03:14:19.078748: step: 58/464, loss: 0.07628641277551651 2023-01-24 03:14:19.701082: step: 60/464, loss: 0.07498513907194138 2023-01-24 03:14:20.287595: step: 62/464, loss: 0.1008698120713234 2023-01-24 03:14:20.911580: step: 64/464, loss: 0.8809760212898254 2023-01-24 03:14:21.527438: step: 66/464, loss: 0.015431375242769718 2023-01-24 03:14:22.231309: step: 68/464, loss: 0.03336857631802559 2023-01-24 03:14:22.833014: step: 70/464, loss: 0.009299016557633877 2023-01-24 03:14:23.477571: step: 72/464, loss: 0.034576259553432465 2023-01-24 03:14:24.032530: step: 74/464, loss: 0.036344997584819794 2023-01-24 03:14:24.622816: step: 76/464, loss: 0.04858871176838875 2023-01-24 03:14:25.232885: step: 78/464, loss: 0.05398684740066528 2023-01-24 03:14:25.818076: step: 80/464, loss: 0.023192742839455605 2023-01-24 03:14:26.456825: step: 82/464, loss: 0.09319634735584259 2023-01-24 03:14:27.123642: step: 84/464, loss: 0.07040213793516159 2023-01-24 03:14:27.754003: step: 86/464, loss: 0.22368858754634857 2023-01-24 03:14:28.387656: step: 88/464, loss: 0.05666378140449524 2023-01-24 03:14:29.033551: step: 90/464, loss: 0.015081451274454594 2023-01-24 03:14:29.711182: step: 92/464, loss: 0.016412660479545593 2023-01-24 03:14:30.347623: step: 94/464, loss: 0.14707431197166443 2023-01-24 03:14:30.972447: step: 96/464, loss: 0.17913639545440674 2023-01-24 03:14:31.569037: step: 98/464, loss: 0.0637957751750946 2023-01-24 03:14:32.309637: step: 100/464, loss: 0.06730735301971436 2023-01-24 03:14:32.934619: step: 102/464, loss: 0.026260200887918472 2023-01-24 03:14:33.618181: step: 104/464, loss: 0.020162414759397507 2023-01-24 03:14:34.206348: step: 106/464, loss: 0.012230110354721546 2023-01-24 03:14:34.908124: step: 108/464, loss: 0.1642729640007019 2023-01-24 03:14:35.519263: step: 110/464, loss: 0.041497450321912766 2023-01-24 03:14:36.214138: step: 112/464, loss: 0.06851537525653839 2023-01-24 03:14:36.844591: step: 114/464, loss: 0.12393549829721451 2023-01-24 03:14:37.483698: step: 116/464, loss: 0.010810545645654202 2023-01-24 03:14:38.098510: step: 118/464, loss: 0.19591349363327026 2023-01-24 03:14:38.729480: step: 120/464, loss: 0.053106024861335754 2023-01-24 03:14:39.428852: step: 122/464, loss: 0.022138893604278564 2023-01-24 03:14:40.137676: step: 124/464, loss: 0.004090593662112951 2023-01-24 03:14:40.745835: step: 126/464, loss: 0.005117037799209356 2023-01-24 03:14:41.405287: step: 128/464, loss: 0.11544764041900635 2023-01-24 03:14:42.077756: step: 130/464, loss: 0.031940482556819916 2023-01-24 03:14:42.664851: step: 132/464, loss: 0.046688053756952286 2023-01-24 03:14:43.297765: step: 134/464, loss: 0.11258510500192642 2023-01-24 03:14:43.870824: step: 136/464, loss: 0.027288896963000298 2023-01-24 03:14:44.476004: step: 138/464, loss: 0.06842369586229324 2023-01-24 03:14:45.116652: step: 140/464, loss: 0.03138872981071472 2023-01-24 03:14:45.715190: step: 142/464, loss: 0.050660863518714905 2023-01-24 03:14:46.340881: step: 144/464, loss: 0.02836783416569233 2023-01-24 03:14:46.983391: step: 146/464, loss: 0.06767866015434265 2023-01-24 03:14:47.574144: step: 148/464, loss: 0.3799901008605957 2023-01-24 03:14:48.134931: step: 150/464, loss: 0.04470186308026314 2023-01-24 03:14:48.763091: step: 152/464, loss: 0.024545220658183098 2023-01-24 03:14:49.313634: step: 154/464, loss: 0.02867044135928154 2023-01-24 03:14:49.928404: step: 156/464, loss: 0.04654316604137421 2023-01-24 03:14:50.572849: step: 158/464, loss: 0.0218499843031168 2023-01-24 03:14:51.264141: step: 160/464, loss: 0.058538228273391724 2023-01-24 03:14:51.893537: step: 162/464, loss: 0.03959566727280617 2023-01-24 03:14:52.550265: step: 164/464, loss: 0.8627987504005432 2023-01-24 03:14:53.206372: step: 166/464, loss: 0.4328685998916626 2023-01-24 03:14:53.796247: step: 168/464, loss: 0.013413364067673683 2023-01-24 03:14:54.363506: step: 170/464, loss: 0.03929009661078453 2023-01-24 03:14:54.972877: step: 172/464, loss: 0.0497039295732975 2023-01-24 03:14:55.618045: step: 174/464, loss: 0.042862989008426666 2023-01-24 03:14:56.208231: step: 176/464, loss: 0.018839845433831215 2023-01-24 03:14:56.828967: step: 178/464, loss: 0.04663556441664696 2023-01-24 03:14:57.444321: step: 180/464, loss: 0.02066127397119999 2023-01-24 03:14:58.083065: step: 182/464, loss: 0.10703632235527039 2023-01-24 03:14:58.676091: step: 184/464, loss: 0.1171896904706955 2023-01-24 03:14:59.234531: step: 186/464, loss: 0.14891201257705688 2023-01-24 03:14:59.869029: step: 188/464, loss: 0.13603384792804718 2023-01-24 03:15:00.493410: step: 190/464, loss: 0.11203299462795258 2023-01-24 03:15:01.105763: step: 192/464, loss: 0.01096986886113882 2023-01-24 03:15:01.711325: step: 194/464, loss: 0.011121785268187523 2023-01-24 03:15:02.312407: step: 196/464, loss: 0.052049603313207626 2023-01-24 03:15:02.906833: step: 198/464, loss: 0.12041202187538147 2023-01-24 03:15:03.485349: step: 200/464, loss: 0.03418387845158577 2023-01-24 03:15:04.093089: step: 202/464, loss: 0.027816904708743095 2023-01-24 03:15:04.787258: step: 204/464, loss: 0.012873095460236073 2023-01-24 03:15:05.443798: step: 206/464, loss: 0.07279540598392487 2023-01-24 03:15:06.091295: step: 208/464, loss: 0.025248397141695023 2023-01-24 03:15:06.720682: step: 210/464, loss: 0.2562524378299713 2023-01-24 03:15:07.277183: step: 212/464, loss: 0.05504145473241806 2023-01-24 03:15:07.855557: step: 214/464, loss: 0.031437475234270096 2023-01-24 03:15:08.460552: step: 216/464, loss: 0.021800341084599495 2023-01-24 03:15:09.117433: step: 218/464, loss: 0.041105590760707855 2023-01-24 03:15:09.777276: step: 220/464, loss: 0.025019172579050064 2023-01-24 03:15:10.414739: step: 222/464, loss: 0.051166728138923645 2023-01-24 03:15:10.995340: step: 224/464, loss: 0.0837060809135437 2023-01-24 03:15:11.586562: step: 226/464, loss: 0.032398246228694916 2023-01-24 03:15:12.190702: step: 228/464, loss: 0.06584363430738449 2023-01-24 03:15:12.816905: step: 230/464, loss: 0.1002630889415741 2023-01-24 03:15:13.493784: step: 232/464, loss: 0.14658352732658386 2023-01-24 03:15:14.096917: step: 234/464, loss: 0.022812798619270325 2023-01-24 03:15:14.692523: step: 236/464, loss: 0.0083860382437706 2023-01-24 03:15:15.311623: step: 238/464, loss: 0.059458956122398376 2023-01-24 03:15:15.953928: step: 240/464, loss: 0.2803577184677124 2023-01-24 03:15:16.652889: step: 242/464, loss: 0.054438553750514984 2023-01-24 03:15:17.300731: step: 244/464, loss: 0.07082608342170715 2023-01-24 03:15:17.965252: step: 246/464, loss: 0.39088043570518494 2023-01-24 03:15:18.609590: step: 248/464, loss: 0.0758625715970993 2023-01-24 03:15:19.223393: step: 250/464, loss: 0.03512102738022804 2023-01-24 03:15:19.881234: step: 252/464, loss: 0.0235645342618227 2023-01-24 03:15:20.556551: step: 254/464, loss: 0.23970156908035278 2023-01-24 03:15:21.230757: step: 256/464, loss: 0.0481828935444355 2023-01-24 03:15:21.855190: step: 258/464, loss: 0.025949949398636818 2023-01-24 03:15:22.494632: step: 260/464, loss: 0.05415412038564682 2023-01-24 03:15:23.079680: step: 262/464, loss: 0.01755969226360321 2023-01-24 03:15:23.726042: step: 264/464, loss: 0.011041303165256977 2023-01-24 03:15:24.416759: step: 266/464, loss: 0.05071423202753067 2023-01-24 03:15:24.992324: step: 268/464, loss: 0.10481975227594376 2023-01-24 03:15:25.700807: step: 270/464, loss: 0.05235401168465614 2023-01-24 03:15:26.415130: step: 272/464, loss: 0.04715341702103615 2023-01-24 03:15:27.029232: step: 274/464, loss: 0.016234122216701508 2023-01-24 03:15:27.609637: step: 276/464, loss: 0.03238828852772713 2023-01-24 03:15:28.289569: step: 278/464, loss: 0.005319828633219004 2023-01-24 03:15:28.909986: step: 280/464, loss: 0.005704961251467466 2023-01-24 03:15:29.509341: step: 282/464, loss: 0.022617634385824203 2023-01-24 03:15:30.219753: step: 284/464, loss: 0.14760327339172363 2023-01-24 03:15:30.840603: step: 286/464, loss: 0.0545155368745327 2023-01-24 03:15:31.453627: step: 288/464, loss: 0.03985011577606201 2023-01-24 03:15:32.050848: step: 290/464, loss: 0.06032747030258179 2023-01-24 03:15:32.753644: step: 292/464, loss: 0.18031242489814758 2023-01-24 03:15:33.390361: step: 294/464, loss: 0.0449274517595768 2023-01-24 03:15:34.009855: step: 296/464, loss: 0.06015952676534653 2023-01-24 03:15:34.617464: step: 298/464, loss: 0.07454682886600494 2023-01-24 03:15:35.355025: step: 300/464, loss: 0.012659909203648567 2023-01-24 03:15:35.966891: step: 302/464, loss: 0.013314616866409779 2023-01-24 03:15:36.578081: step: 304/464, loss: 0.13898083567619324 2023-01-24 03:15:37.210563: step: 306/464, loss: 0.13636218011379242 2023-01-24 03:15:37.940300: step: 308/464, loss: 0.04414936900138855 2023-01-24 03:15:38.623252: step: 310/464, loss: 0.04620746150612831 2023-01-24 03:15:39.214306: step: 312/464, loss: 0.05739176645874977 2023-01-24 03:15:39.887715: step: 314/464, loss: 0.01256087888032198 2023-01-24 03:15:40.525230: step: 316/464, loss: 0.2560037672519684 2023-01-24 03:15:41.161805: step: 318/464, loss: 0.11795883625745773 2023-01-24 03:15:41.812011: step: 320/464, loss: 0.25905025005340576 2023-01-24 03:15:42.462946: step: 322/464, loss: 0.043898455798625946 2023-01-24 03:15:43.144811: step: 324/464, loss: 0.01372869685292244 2023-01-24 03:15:43.734730: step: 326/464, loss: 0.02278818003833294 2023-01-24 03:15:44.363626: step: 328/464, loss: 0.019648827612400055 2023-01-24 03:15:45.039098: step: 330/464, loss: 0.9100474119186401 2023-01-24 03:15:45.679000: step: 332/464, loss: 0.025974465534090996 2023-01-24 03:15:46.255645: step: 334/464, loss: 0.04663110896945 2023-01-24 03:15:46.777390: step: 336/464, loss: 0.06748991459608078 2023-01-24 03:15:47.454323: step: 338/464, loss: 0.050867773592472076 2023-01-24 03:15:48.095000: step: 340/464, loss: 3.019404411315918 2023-01-24 03:15:48.685820: step: 342/464, loss: 0.005875057075172663 2023-01-24 03:15:49.235491: step: 344/464, loss: 0.09006526321172714 2023-01-24 03:15:49.806570: step: 346/464, loss: 0.27136194705963135 2023-01-24 03:15:50.426292: step: 348/464, loss: 0.03859527409076691 2023-01-24 03:15:51.004619: step: 350/464, loss: 0.029409943148493767 2023-01-24 03:15:51.666156: step: 352/464, loss: 0.07346473634243011 2023-01-24 03:15:52.305381: step: 354/464, loss: 0.014956225641071796 2023-01-24 03:15:52.896691: step: 356/464, loss: 0.02973783016204834 2023-01-24 03:15:53.542015: step: 358/464, loss: 0.10100691020488739 2023-01-24 03:15:54.167681: step: 360/464, loss: 0.03443049639463425 2023-01-24 03:15:54.820985: step: 362/464, loss: 0.06473658233880997 2023-01-24 03:15:55.415438: step: 364/464, loss: 0.018873097375035286 2023-01-24 03:15:56.032851: step: 366/464, loss: 0.0961344912648201 2023-01-24 03:15:56.675644: step: 368/464, loss: 0.2247253954410553 2023-01-24 03:15:57.235527: step: 370/464, loss: 0.06430181860923767 2023-01-24 03:15:57.875025: step: 372/464, loss: 0.02368774078786373 2023-01-24 03:15:58.541184: step: 374/464, loss: 0.010254189372062683 2023-01-24 03:15:59.153817: step: 376/464, loss: 0.033824507147073746 2023-01-24 03:15:59.759888: step: 378/464, loss: 0.0350734181702137 2023-01-24 03:16:00.376084: step: 380/464, loss: 0.09410750865936279 2023-01-24 03:16:00.949612: step: 382/464, loss: 0.036839719861745834 2023-01-24 03:16:01.550357: step: 384/464, loss: 0.0061513688415288925 2023-01-24 03:16:02.150838: step: 386/464, loss: 0.02870376594364643 2023-01-24 03:16:02.779800: step: 388/464, loss: 0.10995577275753021 2023-01-24 03:16:03.397976: step: 390/464, loss: 0.0530233196914196 2023-01-24 03:16:04.042250: step: 392/464, loss: 0.05366725102066994 2023-01-24 03:16:04.643983: step: 394/464, loss: 0.16559071838855743 2023-01-24 03:16:05.246905: step: 396/464, loss: 0.003926682285964489 2023-01-24 03:16:05.852759: step: 398/464, loss: 0.032966699451208115 2023-01-24 03:16:06.482070: step: 400/464, loss: 0.02813999354839325 2023-01-24 03:16:07.107426: step: 402/464, loss: 0.013925126753747463 2023-01-24 03:16:07.727009: step: 404/464, loss: 0.0418202169239521 2023-01-24 03:16:08.318792: step: 406/464, loss: 0.1888405680656433 2023-01-24 03:16:08.916625: step: 408/464, loss: 0.029980331659317017 2023-01-24 03:16:09.522876: step: 410/464, loss: 0.12852568924427032 2023-01-24 03:16:10.084903: step: 412/464, loss: 0.06435182690620422 2023-01-24 03:16:10.725574: step: 414/464, loss: 0.024199888110160828 2023-01-24 03:16:11.394942: step: 416/464, loss: 0.008535795845091343 2023-01-24 03:16:12.048600: step: 418/464, loss: 0.04710658639669418 2023-01-24 03:16:12.713792: step: 420/464, loss: 0.09452218562364578 2023-01-24 03:16:13.375218: step: 422/464, loss: 0.01563073880970478 2023-01-24 03:16:14.033295: step: 424/464, loss: 0.08478792756795883 2023-01-24 03:16:14.681930: step: 426/464, loss: 0.016601169481873512 2023-01-24 03:16:15.297261: step: 428/464, loss: 0.13459746539592743 2023-01-24 03:16:15.962452: step: 430/464, loss: 0.28737953305244446 2023-01-24 03:16:16.603038: step: 432/464, loss: 0.17553038895130157 2023-01-24 03:16:17.273687: step: 434/464, loss: 0.10274675488471985 2023-01-24 03:16:17.891088: step: 436/464, loss: 0.07716778665781021 2023-01-24 03:16:18.550704: step: 438/464, loss: 0.09986363351345062 2023-01-24 03:16:19.156925: step: 440/464, loss: 1.048673391342163 2023-01-24 03:16:19.774884: step: 442/464, loss: 0.01017417199909687 2023-01-24 03:16:20.408196: step: 444/464, loss: 0.04866155609488487 2023-01-24 03:16:21.014484: step: 446/464, loss: 0.055671293288469315 2023-01-24 03:16:21.591137: step: 448/464, loss: 0.08362412452697754 2023-01-24 03:16:22.225915: step: 450/464, loss: 0.04041793569922447 2023-01-24 03:16:22.851915: step: 452/464, loss: 0.02534993179142475 2023-01-24 03:16:23.464742: step: 454/464, loss: 0.1549074947834015 2023-01-24 03:16:24.046107: step: 456/464, loss: 0.036239758133888245 2023-01-24 03:16:24.659615: step: 458/464, loss: 0.07680630683898926 2023-01-24 03:16:25.287512: step: 460/464, loss: 0.029601193964481354 2023-01-24 03:16:25.918585: step: 462/464, loss: 0.13906963169574738 2023-01-24 03:16:26.560179: step: 464/464, loss: 0.07985132932662964 2023-01-24 03:16:27.159460: step: 466/464, loss: 0.09230957180261612 2023-01-24 03:16:27.863834: step: 468/464, loss: 0.16694991290569305 2023-01-24 03:16:28.526168: step: 470/464, loss: 1.2943658828735352 2023-01-24 03:16:29.141562: step: 472/464, loss: 0.007379346992820501 2023-01-24 03:16:29.780883: step: 474/464, loss: 0.020899252966046333 2023-01-24 03:16:30.407677: step: 476/464, loss: 0.004411220550537109 2023-01-24 03:16:31.044415: step: 478/464, loss: 0.044038306921720505 2023-01-24 03:16:31.643640: step: 480/464, loss: 0.021422799676656723 2023-01-24 03:16:32.228708: step: 482/464, loss: 0.02925972454249859 2023-01-24 03:16:32.843579: step: 484/464, loss: 0.09991101175546646 2023-01-24 03:16:33.481792: step: 486/464, loss: 0.06824138760566711 2023-01-24 03:16:34.133563: step: 488/464, loss: 0.04947546496987343 2023-01-24 03:16:34.762462: step: 490/464, loss: 0.020775338634848595 2023-01-24 03:16:35.366765: step: 492/464, loss: 0.7242358922958374 2023-01-24 03:16:36.050878: step: 494/464, loss: 0.0369894877076149 2023-01-24 03:16:36.631456: step: 496/464, loss: 0.012789350003004074 2023-01-24 03:16:37.239432: step: 498/464, loss: 0.07767671346664429 2023-01-24 03:16:37.872886: step: 500/464, loss: 0.006513867061585188 2023-01-24 03:16:38.487766: step: 502/464, loss: 0.0035043805837631226 2023-01-24 03:16:39.121285: step: 504/464, loss: 0.09234146773815155 2023-01-24 03:16:39.718317: step: 506/464, loss: 0.12091848999261856 2023-01-24 03:16:40.340898: step: 508/464, loss: 0.04351840168237686 2023-01-24 03:16:41.004197: step: 510/464, loss: 0.08592573553323746 2023-01-24 03:16:41.648658: step: 512/464, loss: 0.02391248755156994 2023-01-24 03:16:42.290758: step: 514/464, loss: 0.03436162322759628 2023-01-24 03:16:42.920762: step: 516/464, loss: 0.07426901161670685 2023-01-24 03:16:43.684774: step: 518/464, loss: 0.05543963238596916 2023-01-24 03:16:44.413303: step: 520/464, loss: 0.04919443279504776 2023-01-24 03:16:45.028659: step: 522/464, loss: 0.5651112198829651 2023-01-24 03:16:45.582956: step: 524/464, loss: 0.4395751655101776 2023-01-24 03:16:46.196033: step: 526/464, loss: 0.09639833122491837 2023-01-24 03:16:46.877022: step: 528/464, loss: 0.08385375142097473 2023-01-24 03:16:47.448503: step: 530/464, loss: 0.07988838851451874 2023-01-24 03:16:48.128367: step: 532/464, loss: 0.16797766089439392 2023-01-24 03:16:48.761364: step: 534/464, loss: 0.5436436533927917 2023-01-24 03:16:49.386076: step: 536/464, loss: 0.0218205489218235 2023-01-24 03:16:50.033638: step: 538/464, loss: 0.03554167598485947 2023-01-24 03:16:50.663984: step: 540/464, loss: 0.01219553779810667 2023-01-24 03:16:51.256043: step: 542/464, loss: 0.060262829065322876 2023-01-24 03:16:51.973606: step: 544/464, loss: 0.018596313893795013 2023-01-24 03:16:52.646386: step: 546/464, loss: 1.0576659440994263 2023-01-24 03:16:53.271156: step: 548/464, loss: 0.014665772207081318 2023-01-24 03:16:53.910383: step: 550/464, loss: 0.17855624854564667 2023-01-24 03:16:54.498852: step: 552/464, loss: 0.09625093638896942 2023-01-24 03:16:55.097163: step: 554/464, loss: 0.03178940713405609 2023-01-24 03:16:55.709849: step: 556/464, loss: 0.3213636875152588 2023-01-24 03:16:56.316681: step: 558/464, loss: 0.04676403850317001 2023-01-24 03:16:56.955564: step: 560/464, loss: 0.03744920343160629 2023-01-24 03:16:57.589745: step: 562/464, loss: 0.20983579754829407 2023-01-24 03:16:58.236938: step: 564/464, loss: 0.026830270886421204 2023-01-24 03:16:58.974242: step: 566/464, loss: 0.052127353847026825 2023-01-24 03:16:59.544601: step: 568/464, loss: 0.12017125636339188 2023-01-24 03:17:00.192652: step: 570/464, loss: 0.050923123955726624 2023-01-24 03:17:00.813771: step: 572/464, loss: 0.07927900552749634 2023-01-24 03:17:01.441020: step: 574/464, loss: 0.014502828009426594 2023-01-24 03:17:02.010616: step: 576/464, loss: 0.040527619421482086 2023-01-24 03:17:02.633082: step: 578/464, loss: 0.019206058233976364 2023-01-24 03:17:03.273388: step: 580/464, loss: 0.03054519183933735 2023-01-24 03:17:03.815278: step: 582/464, loss: 0.17930087447166443 2023-01-24 03:17:04.479494: step: 584/464, loss: 0.24165508151054382 2023-01-24 03:17:05.044798: step: 586/464, loss: 0.04640674591064453 2023-01-24 03:17:05.615945: step: 588/464, loss: 0.07006865739822388 2023-01-24 03:17:06.196027: step: 590/464, loss: 0.023412950336933136 2023-01-24 03:17:06.841424: step: 592/464, loss: 1.1467721462249756 2023-01-24 03:17:07.543257: step: 594/464, loss: 0.02414711005985737 2023-01-24 03:17:08.142627: step: 596/464, loss: 0.1264013797044754 2023-01-24 03:17:08.772710: step: 598/464, loss: 7.126690864562988 2023-01-24 03:17:09.373424: step: 600/464, loss: 0.04586399346590042 2023-01-24 03:17:10.022332: step: 602/464, loss: 0.01952878199517727 2023-01-24 03:17:10.614800: step: 604/464, loss: 0.01158772874623537 2023-01-24 03:17:11.258508: step: 606/464, loss: 0.030504167079925537 2023-01-24 03:17:11.904241: step: 608/464, loss: 0.0007740100263617933 2023-01-24 03:17:12.541144: step: 610/464, loss: 0.04654207453131676 2023-01-24 03:17:13.149584: step: 612/464, loss: 0.045558538287878036 2023-01-24 03:17:13.716367: step: 614/464, loss: 0.03136839345097542 2023-01-24 03:17:14.328634: step: 616/464, loss: 0.11822475492954254 2023-01-24 03:17:14.919319: step: 618/464, loss: 0.14972276985645294 2023-01-24 03:17:15.552775: step: 620/464, loss: 0.1923346370458603 2023-01-24 03:17:16.222058: step: 622/464, loss: 0.03167210519313812 2023-01-24 03:17:16.968803: step: 624/464, loss: 0.02385994978249073 2023-01-24 03:17:17.575081: step: 626/464, loss: 0.017547806724905968 2023-01-24 03:17:18.274370: step: 628/464, loss: 0.030291052535176277 2023-01-24 03:17:18.904773: step: 630/464, loss: 0.05227775126695633 2023-01-24 03:17:19.535705: step: 632/464, loss: 0.09638462960720062 2023-01-24 03:17:20.120377: step: 634/464, loss: 0.04361134022474289 2023-01-24 03:17:20.785048: step: 636/464, loss: 0.12159812450408936 2023-01-24 03:17:21.359837: step: 638/464, loss: 0.2759445905685425 2023-01-24 03:17:21.949785: step: 640/464, loss: 0.02072410099208355 2023-01-24 03:17:22.679899: step: 642/464, loss: 0.5923712849617004 2023-01-24 03:17:23.295737: step: 644/464, loss: 0.10416293144226074 2023-01-24 03:17:23.901347: step: 646/464, loss: 0.029567325487732887 2023-01-24 03:17:24.462558: step: 648/464, loss: 0.044003792107105255 2023-01-24 03:17:25.056845: step: 650/464, loss: 0.4801039695739746 2023-01-24 03:17:25.689647: step: 652/464, loss: 0.1835222840309143 2023-01-24 03:17:26.300088: step: 654/464, loss: 0.1707906872034073 2023-01-24 03:17:26.926324: step: 656/464, loss: 0.05018517002463341 2023-01-24 03:17:27.516739: step: 658/464, loss: 0.14961649477481842 2023-01-24 03:17:28.131628: step: 660/464, loss: 0.15067699551582336 2023-01-24 03:17:28.800072: step: 662/464, loss: 5.681148052215576 2023-01-24 03:17:29.459208: step: 664/464, loss: 0.02603510022163391 2023-01-24 03:17:30.127629: step: 666/464, loss: 0.059188805520534515 2023-01-24 03:17:30.808872: step: 668/464, loss: 0.06851480901241302 2023-01-24 03:17:31.449680: step: 670/464, loss: 0.03866920247673988 2023-01-24 03:17:32.113356: step: 672/464, loss: 0.11503680050373077 2023-01-24 03:17:32.713815: step: 674/464, loss: 0.07259032875299454 2023-01-24 03:17:33.319412: step: 676/464, loss: 0.01903834007680416 2023-01-24 03:17:33.922662: step: 678/464, loss: 0.03478504717350006 2023-01-24 03:17:34.574620: step: 680/464, loss: 0.02486582100391388 2023-01-24 03:17:35.175576: step: 682/464, loss: 0.07330082356929779 2023-01-24 03:17:35.770814: step: 684/464, loss: 0.2603822946548462 2023-01-24 03:17:36.398515: step: 686/464, loss: 0.01605270616710186 2023-01-24 03:17:37.060552: step: 688/464, loss: 0.12658101320266724 2023-01-24 03:17:37.728638: step: 690/464, loss: 0.07447104901075363 2023-01-24 03:17:38.320056: step: 692/464, loss: 0.04469962790608406 2023-01-24 03:17:38.927495: step: 694/464, loss: 0.014046311378479004 2023-01-24 03:17:39.532890: step: 696/464, loss: 0.06072556599974632 2023-01-24 03:17:40.133335: step: 698/464, loss: 0.357933908700943 2023-01-24 03:17:40.778047: step: 700/464, loss: 0.4937451183795929 2023-01-24 03:17:41.424516: step: 702/464, loss: 0.04278302937746048 2023-01-24 03:17:42.064741: step: 704/464, loss: 0.0051592267118394375 2023-01-24 03:17:42.691501: step: 706/464, loss: 0.02061188779771328 2023-01-24 03:17:43.400014: step: 708/464, loss: 0.03858703374862671 2023-01-24 03:17:44.085782: step: 710/464, loss: 0.08712794631719589 2023-01-24 03:17:44.693943: step: 712/464, loss: 0.07264778763055801 2023-01-24 03:17:45.334971: step: 714/464, loss: 0.013997122645378113 2023-01-24 03:17:45.983306: step: 716/464, loss: 0.02950339764356613 2023-01-24 03:17:46.610167: step: 718/464, loss: 0.04933254048228264 2023-01-24 03:17:47.234577: step: 720/464, loss: 0.008906682021915913 2023-01-24 03:17:47.858937: step: 722/464, loss: 0.03700536489486694 2023-01-24 03:17:48.448994: step: 724/464, loss: 0.06359779834747314 2023-01-24 03:17:49.158572: step: 726/464, loss: 0.06773487478494644 2023-01-24 03:17:49.771817: step: 728/464, loss: 0.06296397745609283 2023-01-24 03:17:50.411708: step: 730/464, loss: 0.11643794178962708 2023-01-24 03:17:50.993979: step: 732/464, loss: 0.14957650005817413 2023-01-24 03:17:51.628487: step: 734/464, loss: 0.026945363730192184 2023-01-24 03:17:52.270419: step: 736/464, loss: 0.011237741447985172 2023-01-24 03:17:52.957154: step: 738/464, loss: 0.0552695207297802 2023-01-24 03:17:53.594305: step: 740/464, loss: 0.0017769387923181057 2023-01-24 03:17:54.230821: step: 742/464, loss: 0.0779266208410263 2023-01-24 03:17:54.850130: step: 744/464, loss: 0.9308744668960571 2023-01-24 03:17:55.470246: step: 746/464, loss: 0.0353737510740757 2023-01-24 03:17:56.157774: step: 748/464, loss: 0.09376434981822968 2023-01-24 03:17:56.768330: step: 750/464, loss: 0.10377569496631622 2023-01-24 03:17:57.380430: step: 752/464, loss: 0.3020147383213043 2023-01-24 03:17:58.043272: step: 754/464, loss: 0.05934539809823036 2023-01-24 03:17:58.599479: step: 756/464, loss: 0.05946807563304901 2023-01-24 03:17:59.236333: step: 758/464, loss: 0.04041888937354088 2023-01-24 03:17:59.817237: step: 760/464, loss: 0.13945092260837555 2023-01-24 03:18:00.452721: step: 762/464, loss: 0.1342869997024536 2023-01-24 03:18:01.084259: step: 764/464, loss: 0.022831939160823822 2023-01-24 03:18:01.681585: step: 766/464, loss: 0.05797601491212845 2023-01-24 03:18:02.341377: step: 768/464, loss: 0.04214341565966606 2023-01-24 03:18:02.961238: step: 770/464, loss: 0.06823202967643738 2023-01-24 03:18:03.575101: step: 772/464, loss: 0.04907310754060745 2023-01-24 03:18:04.289020: step: 774/464, loss: 0.04450121521949768 2023-01-24 03:18:04.937277: step: 776/464, loss: 0.23292294144630432 2023-01-24 03:18:05.543682: step: 778/464, loss: 0.06011654809117317 2023-01-24 03:18:06.190919: step: 780/464, loss: 0.3196452260017395 2023-01-24 03:18:06.764912: step: 782/464, loss: 0.03805053234100342 2023-01-24 03:18:07.428823: step: 784/464, loss: 0.0314154289662838 2023-01-24 03:18:08.080648: step: 786/464, loss: 0.13404737412929535 2023-01-24 03:18:08.688990: step: 788/464, loss: 0.08828222751617432 2023-01-24 03:18:09.284915: step: 790/464, loss: 0.001246851752512157 2023-01-24 03:18:09.926033: step: 792/464, loss: 0.07216297090053558 2023-01-24 03:18:10.654369: step: 794/464, loss: 0.05633767321705818 2023-01-24 03:18:11.237172: step: 796/464, loss: 0.14546740055084229 2023-01-24 03:18:11.835925: step: 798/464, loss: 0.056861281394958496 2023-01-24 03:18:12.511115: step: 800/464, loss: 0.2274370640516281 2023-01-24 03:18:13.213725: step: 802/464, loss: 0.027379153296351433 2023-01-24 03:18:13.804920: step: 804/464, loss: 0.030357034876942635 2023-01-24 03:18:14.403123: step: 806/464, loss: 0.13116410374641418 2023-01-24 03:18:15.002070: step: 808/464, loss: 0.09121730178594589 2023-01-24 03:18:15.653892: step: 810/464, loss: 0.27981850504875183 2023-01-24 03:18:16.278116: step: 812/464, loss: 0.0588790699839592 2023-01-24 03:18:16.905578: step: 814/464, loss: 0.017142634838819504 2023-01-24 03:18:17.539180: step: 816/464, loss: 0.04943757504224777 2023-01-24 03:18:18.250447: step: 818/464, loss: 0.01358798611909151 2023-01-24 03:18:18.903384: step: 820/464, loss: 0.0991472601890564 2023-01-24 03:18:19.538278: step: 822/464, loss: 0.05006462708115578 2023-01-24 03:18:20.131486: step: 824/464, loss: 0.005734651815146208 2023-01-24 03:18:20.708701: step: 826/464, loss: 0.027040397748351097 2023-01-24 03:18:21.359560: step: 828/464, loss: 0.1423128992319107 2023-01-24 03:18:21.906119: step: 830/464, loss: 0.013830373995006084 2023-01-24 03:18:22.563241: step: 832/464, loss: 0.06289157271385193 2023-01-24 03:18:23.164601: step: 834/464, loss: 0.12293533980846405 2023-01-24 03:18:23.768264: step: 836/464, loss: 0.016394013538956642 2023-01-24 03:18:24.431083: step: 838/464, loss: 0.29480549693107605 2023-01-24 03:18:25.013421: step: 840/464, loss: 0.09442153573036194 2023-01-24 03:18:25.645725: step: 842/464, loss: 0.2897118330001831 2023-01-24 03:18:26.255334: step: 844/464, loss: 0.07965207099914551 2023-01-24 03:18:26.852930: step: 846/464, loss: 0.05758389085531235 2023-01-24 03:18:27.451817: step: 848/464, loss: 0.14077042043209076 2023-01-24 03:18:28.027460: step: 850/464, loss: 0.052814725786447525 2023-01-24 03:18:28.648051: step: 852/464, loss: 0.06765639781951904 2023-01-24 03:18:29.276408: step: 854/464, loss: 0.0761323794722557 2023-01-24 03:18:29.878491: step: 856/464, loss: 0.06979372352361679 2023-01-24 03:18:30.467079: step: 858/464, loss: 0.013624468818306923 2023-01-24 03:18:31.132523: step: 860/464, loss: 0.034657832235097885 2023-01-24 03:18:31.773507: step: 862/464, loss: 0.030351106077432632 2023-01-24 03:18:32.392577: step: 864/464, loss: 0.040423404425382614 2023-01-24 03:18:33.034074: step: 866/464, loss: 0.0385703444480896 2023-01-24 03:18:33.665365: step: 868/464, loss: 0.04118971526622772 2023-01-24 03:18:34.336396: step: 870/464, loss: 0.14877402782440186 2023-01-24 03:18:34.949090: step: 872/464, loss: 0.02471744269132614 2023-01-24 03:18:35.555411: step: 874/464, loss: 0.08140549808740616 2023-01-24 03:18:36.139816: step: 876/464, loss: 0.032378822565078735 2023-01-24 03:18:36.772392: step: 878/464, loss: 0.018628351390361786 2023-01-24 03:18:37.378288: step: 880/464, loss: 1.039273738861084 2023-01-24 03:18:38.006831: step: 882/464, loss: 0.013100219890475273 2023-01-24 03:18:38.599745: step: 884/464, loss: 0.04357944428920746 2023-01-24 03:18:39.197371: step: 886/464, loss: 0.02474367432296276 2023-01-24 03:18:39.861477: step: 888/464, loss: 0.010314030572772026 2023-01-24 03:18:40.459403: step: 890/464, loss: 0.05558737367391586 2023-01-24 03:18:41.004654: step: 892/464, loss: 0.07188452780246735 2023-01-24 03:18:41.599564: step: 894/464, loss: 0.012282581068575382 2023-01-24 03:18:42.233309: step: 896/464, loss: 0.03327278420329094 2023-01-24 03:18:42.979206: step: 898/464, loss: 0.11073649674654007 2023-01-24 03:18:43.542422: step: 900/464, loss: 0.048409298062324524 2023-01-24 03:18:44.235606: step: 902/464, loss: 1.977931261062622 2023-01-24 03:18:44.914435: step: 904/464, loss: 0.010337852872908115 2023-01-24 03:18:45.535103: step: 906/464, loss: 0.0512164831161499 2023-01-24 03:18:46.201402: step: 908/464, loss: 0.051889412105083466 2023-01-24 03:18:46.905233: step: 910/464, loss: 0.04711935296654701 2023-01-24 03:18:47.534753: step: 912/464, loss: 0.023373626172542572 2023-01-24 03:18:48.223452: step: 914/464, loss: 0.0980856716632843 2023-01-24 03:18:48.860392: step: 916/464, loss: 0.07699963450431824 2023-01-24 03:18:49.417496: step: 918/464, loss: 0.11549467593431473 2023-01-24 03:18:50.070946: step: 920/464, loss: 0.014528129249811172 2023-01-24 03:18:50.715030: step: 922/464, loss: 0.08139970153570175 2023-01-24 03:18:51.335886: step: 924/464, loss: 0.019468706101179123 2023-01-24 03:18:52.034094: step: 926/464, loss: 0.36008772253990173 2023-01-24 03:18:52.657357: step: 928/464, loss: 0.02395019680261612 2023-01-24 03:18:53.130302: step: 930/464, loss: 1.4262751340866089 ================================================== Loss: 0.163 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3237444550063372, 'r': 0.3231301391524352, 'f1': 0.3234370053814499}, 'combined': 0.2383220039652789, 'epoch': 20} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3157880379064507, 'r': 0.3082416325888333, 'f1': 0.31196920582014015}, 'combined': 0.20366901519864072, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3321030417764121, 'r': 0.3377746307251554, 'f1': 0.33491482670208256}, 'combined': 0.24677934599100818, 'epoch': 20} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3221522121829989, 'r': 0.3091120122418287, 'f1': 0.31549742470128433}, 'combined': 0.20597241198114935, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3439523521610236, 'r': 0.3406890471120575, 'f1': 0.3423129224557756}, 'combined': 0.2522305744410978, 'epoch': 20} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3292787346239084, 'r': 0.30204060400244537, 'f1': 0.31507207780887875}, 'combined': 0.20569472437263586, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24911347517730495, 'r': 0.3345238095238095, 'f1': 0.2855691056910569}, 'combined': 0.19037940379403795, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.358695652173913, 'f1': 0.3113207547169812}, 'combined': 0.1556603773584906, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:21:30.604750: step: 2/464, loss: 0.2879459857940674 2023-01-24 03:21:31.195792: step: 4/464, loss: 0.02850549854338169 2023-01-24 03:21:31.734418: step: 6/464, loss: 0.0287797749042511 2023-01-24 03:21:32.333226: step: 8/464, loss: 0.06807540357112885 2023-01-24 03:21:32.978852: step: 10/464, loss: 0.10075829178094864 2023-01-24 03:21:33.657629: step: 12/464, loss: 0.060137659311294556 2023-01-24 03:21:34.247335: step: 14/464, loss: 0.03860960528254509 2023-01-24 03:21:34.909585: step: 16/464, loss: 0.1313168853521347 2023-01-24 03:21:35.492256: step: 18/464, loss: 0.03235574811697006 2023-01-24 03:21:36.119915: step: 20/464, loss: 0.028379498049616814 2023-01-24 03:21:36.730368: step: 22/464, loss: 0.03577785566449165 2023-01-24 03:21:37.341430: step: 24/464, loss: 0.08534808456897736 2023-01-24 03:21:37.951619: step: 26/464, loss: 0.05950196087360382 2023-01-24 03:21:38.586601: step: 28/464, loss: 0.08497898280620575 2023-01-24 03:21:39.190127: step: 30/464, loss: 5.2432098388671875 2023-01-24 03:21:39.843126: step: 32/464, loss: 0.3288305401802063 2023-01-24 03:21:40.446856: step: 34/464, loss: 0.027298834174871445 2023-01-24 03:21:41.083227: step: 36/464, loss: 0.020640632137656212 2023-01-24 03:21:41.694186: step: 38/464, loss: 0.6343681812286377 2023-01-24 03:21:42.302225: step: 40/464, loss: 0.02846040204167366 2023-01-24 03:21:42.928091: step: 42/464, loss: 0.031150346621870995 2023-01-24 03:21:43.575130: step: 44/464, loss: 0.057186901569366455 2023-01-24 03:21:44.164884: step: 46/464, loss: 0.02878502942621708 2023-01-24 03:21:44.752288: step: 48/464, loss: 0.01139222364872694 2023-01-24 03:21:45.420869: step: 50/464, loss: 0.18849028646945953 2023-01-24 03:21:46.023980: step: 52/464, loss: 0.10385727137327194 2023-01-24 03:21:46.628048: step: 54/464, loss: 0.1948239952325821 2023-01-24 03:21:47.255981: step: 56/464, loss: 0.040434375405311584 2023-01-24 03:21:47.891824: step: 58/464, loss: 0.044722944498062134 2023-01-24 03:21:48.550376: step: 60/464, loss: 0.1531590223312378 2023-01-24 03:21:49.234778: step: 62/464, loss: 0.036119494587183 2023-01-24 03:21:49.832252: step: 64/464, loss: 0.17749238014221191 2023-01-24 03:21:50.409940: step: 66/464, loss: 0.09734697639942169 2023-01-24 03:21:50.983176: step: 68/464, loss: 0.035482268780469894 2023-01-24 03:21:51.636625: step: 70/464, loss: 0.017056122422218323 2023-01-24 03:21:52.282722: step: 72/464, loss: 0.009295911528170109 2023-01-24 03:21:52.923881: step: 74/464, loss: 0.23246900737285614 2023-01-24 03:21:53.543766: step: 76/464, loss: 0.1933257281780243 2023-01-24 03:21:54.162871: step: 78/464, loss: 0.0870656743645668 2023-01-24 03:21:54.774617: step: 80/464, loss: 0.022031113505363464 2023-01-24 03:21:55.437122: step: 82/464, loss: 0.027440907433629036 2023-01-24 03:21:56.107772: step: 84/464, loss: 0.128365620970726 2023-01-24 03:21:56.677829: step: 86/464, loss: 0.005240590311586857 2023-01-24 03:21:57.289762: step: 88/464, loss: 0.149997740983963 2023-01-24 03:21:57.911032: step: 90/464, loss: 0.1929507851600647 2023-01-24 03:21:58.536730: step: 92/464, loss: 0.15306395292282104 2023-01-24 03:21:59.204528: step: 94/464, loss: 0.02803787775337696 2023-01-24 03:21:59.793419: step: 96/464, loss: 0.04839000850915909 2023-01-24 03:22:00.347190: step: 98/464, loss: 0.01491641066968441 2023-01-24 03:22:00.961075: step: 100/464, loss: 0.08155541121959686 2023-01-24 03:22:01.543345: step: 102/464, loss: 0.041741661727428436 2023-01-24 03:22:02.159690: step: 104/464, loss: 0.02218620479106903 2023-01-24 03:22:02.739843: step: 106/464, loss: 0.026464005932211876 2023-01-24 03:22:03.394087: step: 108/464, loss: 0.04624956101179123 2023-01-24 03:22:03.991605: step: 110/464, loss: 0.08009933680295944 2023-01-24 03:22:04.622548: step: 112/464, loss: 0.0521833561360836 2023-01-24 03:22:05.269750: step: 114/464, loss: 0.030787384137511253 2023-01-24 03:22:05.943404: step: 116/464, loss: 0.07171545177698135 2023-01-24 03:22:06.589356: step: 118/464, loss: 0.012919297441840172 2023-01-24 03:22:07.159553: step: 120/464, loss: 0.2969965636730194 2023-01-24 03:22:07.778661: step: 122/464, loss: 0.027525799348950386 2023-01-24 03:22:08.439684: step: 124/464, loss: 0.011926285922527313 2023-01-24 03:22:08.983184: step: 126/464, loss: 0.04155060276389122 2023-01-24 03:22:09.575643: step: 128/464, loss: 0.029117681086063385 2023-01-24 03:22:10.162767: step: 130/464, loss: 0.021135665476322174 2023-01-24 03:22:10.769963: step: 132/464, loss: 0.021641455590724945 2023-01-24 03:22:11.402059: step: 134/464, loss: 0.015647945925593376 2023-01-24 03:22:12.024329: step: 136/464, loss: 0.08576782792806625 2023-01-24 03:22:12.571493: step: 138/464, loss: 0.9435380697250366 2023-01-24 03:22:13.242692: step: 140/464, loss: 0.0296559389680624 2023-01-24 03:22:13.851285: step: 142/464, loss: 0.015051405876874924 2023-01-24 03:22:14.504911: step: 144/464, loss: 3.710465908050537 2023-01-24 03:22:15.068733: step: 146/464, loss: 1.3897058963775635 2023-01-24 03:22:15.763402: step: 148/464, loss: 0.05914725735783577 2023-01-24 03:22:16.409872: step: 150/464, loss: 0.038997046649456024 2023-01-24 03:22:17.098794: step: 152/464, loss: 0.033092889934778214 2023-01-24 03:22:17.727396: step: 154/464, loss: 0.06606288254261017 2023-01-24 03:22:18.492522: step: 156/464, loss: 0.005674705840647221 2023-01-24 03:22:19.138876: step: 158/464, loss: 0.1593201756477356 2023-01-24 03:22:19.793386: step: 160/464, loss: 0.051348887383937836 2023-01-24 03:22:20.394967: step: 162/464, loss: 0.0321359857916832 2023-01-24 03:22:21.022476: step: 164/464, loss: 0.16339421272277832 2023-01-24 03:22:21.675341: step: 166/464, loss: 0.08392519503831863 2023-01-24 03:22:22.302493: step: 168/464, loss: 0.04907825589179993 2023-01-24 03:22:22.916957: step: 170/464, loss: 0.0903925821185112 2023-01-24 03:22:23.512258: step: 172/464, loss: 0.01728115975856781 2023-01-24 03:22:24.211299: step: 174/464, loss: 0.39139094948768616 2023-01-24 03:22:24.944005: step: 176/464, loss: 0.05329263582825661 2023-01-24 03:22:25.612641: step: 178/464, loss: 0.06931658834218979 2023-01-24 03:22:26.223160: step: 180/464, loss: 0.061838164925575256 2023-01-24 03:22:26.787079: step: 182/464, loss: 0.06392183154821396 2023-01-24 03:22:27.519018: step: 184/464, loss: 0.11093329638242722 2023-01-24 03:22:28.146388: step: 186/464, loss: 0.027669671922922134 2023-01-24 03:22:28.830951: step: 188/464, loss: 0.05224090442061424 2023-01-24 03:22:29.490266: step: 190/464, loss: 0.1401863545179367 2023-01-24 03:22:30.038926: step: 192/464, loss: 0.02816525287926197 2023-01-24 03:22:30.686491: step: 194/464, loss: 0.023517200723290443 2023-01-24 03:22:31.236934: step: 196/464, loss: 0.0481773279607296 2023-01-24 03:22:31.868391: step: 198/464, loss: 0.02006007730960846 2023-01-24 03:22:32.460589: step: 200/464, loss: 0.011920628137886524 2023-01-24 03:22:33.130913: step: 202/464, loss: 0.0183256845921278 2023-01-24 03:22:33.771795: step: 204/464, loss: 0.04755396768450737 2023-01-24 03:22:34.416038: step: 206/464, loss: 0.0879100114107132 2023-01-24 03:22:35.075976: step: 208/464, loss: 0.03002486377954483 2023-01-24 03:22:35.764588: step: 210/464, loss: 0.04555777460336685 2023-01-24 03:22:36.433200: step: 212/464, loss: 0.02567063271999359 2023-01-24 03:22:37.071827: step: 214/464, loss: 0.007622311823070049 2023-01-24 03:22:37.646700: step: 216/464, loss: 0.3607770502567291 2023-01-24 03:22:38.306593: step: 218/464, loss: 0.017416903749108315 2023-01-24 03:22:38.920915: step: 220/464, loss: 0.08756670355796814 2023-01-24 03:22:39.558167: step: 222/464, loss: 0.07937604188919067 2023-01-24 03:22:40.204656: step: 224/464, loss: 0.1075153797864914 2023-01-24 03:22:40.747159: step: 226/464, loss: 0.0323040634393692 2023-01-24 03:22:41.369850: step: 228/464, loss: 0.03348545730113983 2023-01-24 03:22:41.992073: step: 230/464, loss: 2.6350879669189453 2023-01-24 03:22:42.574973: step: 232/464, loss: 0.011316961608827114 2023-01-24 03:22:43.249330: step: 234/464, loss: 0.11862074583768845 2023-01-24 03:22:43.841266: step: 236/464, loss: 0.05245814472436905 2023-01-24 03:22:44.502878: step: 238/464, loss: 0.3137938380241394 2023-01-24 03:22:45.128641: step: 240/464, loss: 0.08349335193634033 2023-01-24 03:22:45.787116: step: 242/464, loss: 0.11334215849637985 2023-01-24 03:22:46.418127: step: 244/464, loss: 0.0941920280456543 2023-01-24 03:22:46.999587: step: 246/464, loss: 0.010574623011052608 2023-01-24 03:22:47.606828: step: 248/464, loss: 0.01173438224941492 2023-01-24 03:22:48.237973: step: 250/464, loss: 0.04012186452746391 2023-01-24 03:22:48.816791: step: 252/464, loss: 0.04171859472990036 2023-01-24 03:22:49.464142: step: 254/464, loss: 0.03524204343557358 2023-01-24 03:22:50.157141: step: 256/464, loss: 0.028740596026182175 2023-01-24 03:22:50.826139: step: 258/464, loss: 0.03225775808095932 2023-01-24 03:22:51.375233: step: 260/464, loss: 0.043071161955595016 2023-01-24 03:22:51.891460: step: 262/464, loss: 0.02790139988064766 2023-01-24 03:22:52.507068: step: 264/464, loss: 0.054161399602890015 2023-01-24 03:22:53.044436: step: 266/464, loss: 0.10258924216032028 2023-01-24 03:22:53.655900: step: 268/464, loss: 0.05986592546105385 2023-01-24 03:22:54.450676: step: 270/464, loss: 0.016713662073016167 2023-01-24 03:22:55.046671: step: 272/464, loss: 0.07509545236825943 2023-01-24 03:22:55.686969: step: 274/464, loss: 0.6472654342651367 2023-01-24 03:22:56.249363: step: 276/464, loss: 0.11911797523498535 2023-01-24 03:22:56.872257: step: 278/464, loss: 0.04465444013476372 2023-01-24 03:22:57.445784: step: 280/464, loss: 0.026976360008120537 2023-01-24 03:22:58.065809: step: 282/464, loss: 0.17191733419895172 2023-01-24 03:22:58.710151: step: 284/464, loss: 0.06832541525363922 2023-01-24 03:22:59.328979: step: 286/464, loss: 0.016170917078852654 2023-01-24 03:22:59.945751: step: 288/464, loss: 0.030662380158901215 2023-01-24 03:23:00.625190: step: 290/464, loss: 0.10576559603214264 2023-01-24 03:23:01.292801: step: 292/464, loss: 0.05849209055304527 2023-01-24 03:23:01.918586: step: 294/464, loss: 0.11040965467691422 2023-01-24 03:23:02.540752: step: 296/464, loss: 0.2339029610157013 2023-01-24 03:23:03.126589: step: 298/464, loss: 0.025430981069803238 2023-01-24 03:23:03.704296: step: 300/464, loss: 0.018815884366631508 2023-01-24 03:23:04.332810: step: 302/464, loss: 0.0253947451710701 2023-01-24 03:23:04.867026: step: 304/464, loss: 0.10287298262119293 2023-01-24 03:23:05.466527: step: 306/464, loss: 0.10712384432554245 2023-01-24 03:23:06.056666: step: 308/464, loss: 0.06882070004940033 2023-01-24 03:23:06.723878: step: 310/464, loss: 0.1400669515132904 2023-01-24 03:23:07.334843: step: 312/464, loss: 0.020639974623918533 2023-01-24 03:23:07.954288: step: 314/464, loss: 0.01266369316726923 2023-01-24 03:23:08.571219: step: 316/464, loss: 0.10999668389558792 2023-01-24 03:23:09.187959: step: 318/464, loss: 0.05323231965303421 2023-01-24 03:23:09.850396: step: 320/464, loss: 0.015401119366288185 2023-01-24 03:23:10.428359: step: 322/464, loss: 0.036088548600673676 2023-01-24 03:23:11.052962: step: 324/464, loss: 0.25261619687080383 2023-01-24 03:23:11.696346: step: 326/464, loss: 0.03083650954067707 2023-01-24 03:23:12.370053: step: 328/464, loss: 0.07151514291763306 2023-01-24 03:23:12.999586: step: 330/464, loss: 0.14306670427322388 2023-01-24 03:23:13.626196: step: 332/464, loss: 0.06048280745744705 2023-01-24 03:23:14.242488: step: 334/464, loss: 0.03349380940198898 2023-01-24 03:23:14.914967: step: 336/464, loss: 0.033371634781360626 2023-01-24 03:23:15.580082: step: 338/464, loss: 0.05449317395687103 2023-01-24 03:23:16.198068: step: 340/464, loss: 0.034356266260147095 2023-01-24 03:23:16.787662: step: 342/464, loss: 0.05067477002739906 2023-01-24 03:23:17.407602: step: 344/464, loss: 0.011599806137382984 2023-01-24 03:23:17.998347: step: 346/464, loss: 0.06270205974578857 2023-01-24 03:23:18.675640: step: 348/464, loss: 0.029605992138385773 2023-01-24 03:23:19.323941: step: 350/464, loss: 0.017472002655267715 2023-01-24 03:23:19.924636: step: 352/464, loss: 0.0753653347492218 2023-01-24 03:23:20.510942: step: 354/464, loss: 0.015513327904045582 2023-01-24 03:23:21.218458: step: 356/464, loss: 0.04514767974615097 2023-01-24 03:23:21.904929: step: 358/464, loss: 0.06842514127492905 2023-01-24 03:23:22.572304: step: 360/464, loss: 0.02094561979174614 2023-01-24 03:23:23.246268: step: 362/464, loss: 0.06777993589639664 2023-01-24 03:23:23.822452: step: 364/464, loss: 0.022707749158143997 2023-01-24 03:23:24.449667: step: 366/464, loss: 0.04803130403161049 2023-01-24 03:23:25.142444: step: 368/464, loss: 0.01649634726345539 2023-01-24 03:23:25.831376: step: 370/464, loss: 0.027657456696033478 2023-01-24 03:23:26.438772: step: 372/464, loss: 0.023393074050545692 2023-01-24 03:23:26.997941: step: 374/464, loss: 0.03944697603583336 2023-01-24 03:23:27.647652: step: 376/464, loss: 0.15143027901649475 2023-01-24 03:23:28.336713: step: 378/464, loss: 0.02022443525493145 2023-01-24 03:23:28.976142: step: 380/464, loss: 0.05097239837050438 2023-01-24 03:23:29.571142: step: 382/464, loss: 0.035139452666044235 2023-01-24 03:23:30.186168: step: 384/464, loss: 0.10621704906225204 2023-01-24 03:23:30.777253: step: 386/464, loss: 0.0067662461660802364 2023-01-24 03:23:31.386810: step: 388/464, loss: 0.06667128950357437 2023-01-24 03:23:32.036803: step: 390/464, loss: 0.04963650554418564 2023-01-24 03:23:32.717606: step: 392/464, loss: 0.016870509833097458 2023-01-24 03:23:33.342749: step: 394/464, loss: 0.044276162981987 2023-01-24 03:23:33.932574: step: 396/464, loss: 0.00846901349723339 2023-01-24 03:23:34.544875: step: 398/464, loss: 0.07062599807977676 2023-01-24 03:23:35.149683: step: 400/464, loss: 0.046838875859975815 2023-01-24 03:23:35.856041: step: 402/464, loss: 0.16247150301933289 2023-01-24 03:23:36.479616: step: 404/464, loss: 0.08528237789869308 2023-01-24 03:23:37.073905: step: 406/464, loss: 0.03201433643698692 2023-01-24 03:23:37.642692: step: 408/464, loss: 0.005379590671509504 2023-01-24 03:23:38.293637: step: 410/464, loss: 0.03673448786139488 2023-01-24 03:23:38.953134: step: 412/464, loss: 1.2216758728027344 2023-01-24 03:23:39.626787: step: 414/464, loss: 0.026421865448355675 2023-01-24 03:23:40.245251: step: 416/464, loss: 0.11724504083395004 2023-01-24 03:23:40.914741: step: 418/464, loss: 0.0895252674818039 2023-01-24 03:23:41.485170: step: 420/464, loss: 0.036729466170072556 2023-01-24 03:23:42.133880: step: 422/464, loss: 0.05561475083231926 2023-01-24 03:23:42.758286: step: 424/464, loss: 0.2665712535381317 2023-01-24 03:23:43.387900: step: 426/464, loss: 0.048663366585969925 2023-01-24 03:23:44.017570: step: 428/464, loss: 0.02604263462126255 2023-01-24 03:23:44.666579: step: 430/464, loss: 0.09479238837957382 2023-01-24 03:23:45.323573: step: 432/464, loss: 0.056636255234479904 2023-01-24 03:23:45.947164: step: 434/464, loss: 0.04270360246300697 2023-01-24 03:23:46.589945: step: 436/464, loss: 0.06862106919288635 2023-01-24 03:23:47.215849: step: 438/464, loss: 0.10623826831579208 2023-01-24 03:23:47.803369: step: 440/464, loss: 0.0833999365568161 2023-01-24 03:23:48.369590: step: 442/464, loss: 0.009899923577904701 2023-01-24 03:23:48.970978: step: 444/464, loss: 0.028848154470324516 2023-01-24 03:23:49.584626: step: 446/464, loss: 0.08141449093818665 2023-01-24 03:23:50.196187: step: 448/464, loss: 0.4562784433364868 2023-01-24 03:23:50.834260: step: 450/464, loss: 0.019771141931414604 2023-01-24 03:23:51.455660: step: 452/464, loss: 0.04862720146775246 2023-01-24 03:23:52.077058: step: 454/464, loss: 0.5798062086105347 2023-01-24 03:23:52.643157: step: 456/464, loss: 0.02002645842730999 2023-01-24 03:23:53.230320: step: 458/464, loss: 0.03038819320499897 2023-01-24 03:23:53.804639: step: 460/464, loss: 1.1626721620559692 2023-01-24 03:23:54.425578: step: 462/464, loss: 0.00896801520138979 2023-01-24 03:23:54.970686: step: 464/464, loss: 0.02811810001730919 2023-01-24 03:23:55.597505: step: 466/464, loss: 0.012318997643887997 2023-01-24 03:23:56.318347: step: 468/464, loss: 0.07171928137540817 2023-01-24 03:23:57.074818: step: 470/464, loss: 0.099583700299263 2023-01-24 03:23:57.636338: step: 472/464, loss: 0.03668338432908058 2023-01-24 03:23:58.234998: step: 474/464, loss: 0.05059617757797241 2023-01-24 03:23:58.853094: step: 476/464, loss: 0.07265280187129974 2023-01-24 03:23:59.411476: step: 478/464, loss: 0.011166172102093697 2023-01-24 03:24:00.070421: step: 480/464, loss: 0.03359964117407799 2023-01-24 03:24:00.635657: step: 482/464, loss: 0.019395098090171814 2023-01-24 03:24:01.287418: step: 484/464, loss: 0.05742871016263962 2023-01-24 03:24:01.853680: step: 486/464, loss: 0.020450137555599213 2023-01-24 03:24:02.491629: step: 488/464, loss: 0.03934083878993988 2023-01-24 03:24:03.154192: step: 490/464, loss: 0.08692191541194916 2023-01-24 03:24:03.796284: step: 492/464, loss: 0.02365756221115589 2023-01-24 03:24:04.393672: step: 494/464, loss: 0.03775375708937645 2023-01-24 03:24:05.022073: step: 496/464, loss: 0.05765479430556297 2023-01-24 03:24:05.579210: step: 498/464, loss: 0.0200423002243042 2023-01-24 03:24:06.240793: step: 500/464, loss: 0.03446970880031586 2023-01-24 03:24:06.829925: step: 502/464, loss: 0.03138982877135277 2023-01-24 03:24:07.435272: step: 504/464, loss: 0.02364160306751728 2023-01-24 03:24:08.062136: step: 506/464, loss: 0.03466923162341118 2023-01-24 03:24:08.753814: step: 508/464, loss: 0.10752367973327637 2023-01-24 03:24:09.380380: step: 510/464, loss: 0.05907217785716057 2023-01-24 03:24:10.036095: step: 512/464, loss: 0.5026339292526245 2023-01-24 03:24:10.670783: step: 514/464, loss: 0.019515832886099815 2023-01-24 03:24:11.341596: step: 516/464, loss: 0.025658082216978073 2023-01-24 03:24:11.996997: step: 518/464, loss: 0.08302264660596848 2023-01-24 03:24:12.567232: step: 520/464, loss: 0.3803815543651581 2023-01-24 03:24:13.218758: step: 522/464, loss: 0.0429992713034153 2023-01-24 03:24:13.804329: step: 524/464, loss: 0.027729887515306473 2023-01-24 03:24:14.377945: step: 526/464, loss: 0.007166002411395311 2023-01-24 03:24:15.047944: step: 528/464, loss: 0.9323362112045288 2023-01-24 03:24:15.697873: step: 530/464, loss: 0.04219109192490578 2023-01-24 03:24:16.351322: step: 532/464, loss: 0.0168119128793478 2023-01-24 03:24:16.952249: step: 534/464, loss: 0.06935977190732956 2023-01-24 03:24:17.563495: step: 536/464, loss: 0.015928490087389946 2023-01-24 03:24:18.195646: step: 538/464, loss: 0.0640493631362915 2023-01-24 03:24:18.888609: step: 540/464, loss: 0.005617052782326937 2023-01-24 03:24:19.518856: step: 542/464, loss: 0.05419943481683731 2023-01-24 03:24:20.162903: step: 544/464, loss: 0.023738780990242958 2023-01-24 03:24:20.774425: step: 546/464, loss: 0.14997144043445587 2023-01-24 03:24:21.358653: step: 548/464, loss: 0.18707990646362305 2023-01-24 03:24:22.095970: step: 550/464, loss: 0.825693666934967 2023-01-24 03:24:22.728339: step: 552/464, loss: 0.035482730716466904 2023-01-24 03:24:23.336587: step: 554/464, loss: 0.030714012682437897 2023-01-24 03:24:24.072970: step: 556/464, loss: 0.04303360357880592 2023-01-24 03:24:24.736994: step: 558/464, loss: 0.11482765525579453 2023-01-24 03:24:25.325041: step: 560/464, loss: 0.014046892523765564 2023-01-24 03:24:26.011267: step: 562/464, loss: 0.069298654794693 2023-01-24 03:24:26.619593: step: 564/464, loss: 0.13554075360298157 2023-01-24 03:24:27.239997: step: 566/464, loss: 0.058562301099300385 2023-01-24 03:24:27.893481: step: 568/464, loss: 0.027104122564196587 2023-01-24 03:24:28.564008: step: 570/464, loss: 0.15894848108291626 2023-01-24 03:24:29.208786: step: 572/464, loss: 0.03762518987059593 2023-01-24 03:24:29.789309: step: 574/464, loss: 0.3076055943965912 2023-01-24 03:24:30.453974: step: 576/464, loss: 0.01175174955278635 2023-01-24 03:24:31.093388: step: 578/464, loss: 0.031038088724017143 2023-01-24 03:24:31.733764: step: 580/464, loss: 0.23649537563323975 2023-01-24 03:24:32.430569: step: 582/464, loss: 0.08286372572183609 2023-01-24 03:24:33.061113: step: 584/464, loss: 0.020930882543325424 2023-01-24 03:24:33.659796: step: 586/464, loss: 0.04851653426885605 2023-01-24 03:24:34.238617: step: 588/464, loss: 0.04376620426774025 2023-01-24 03:24:34.892422: step: 590/464, loss: 0.07862431555986404 2023-01-24 03:24:35.549682: step: 592/464, loss: 0.06696991622447968 2023-01-24 03:24:36.125360: step: 594/464, loss: 0.014856110326945782 2023-01-24 03:24:36.746753: step: 596/464, loss: 0.03530821204185486 2023-01-24 03:24:37.411703: step: 598/464, loss: 0.08003076165914536 2023-01-24 03:24:38.005645: step: 600/464, loss: 0.038060080260038376 2023-01-24 03:24:38.672889: step: 602/464, loss: 0.07375941425561905 2023-01-24 03:24:39.260968: step: 604/464, loss: 0.007829632610082626 2023-01-24 03:24:39.880481: step: 606/464, loss: 0.03282972425222397 2023-01-24 03:24:40.556574: step: 608/464, loss: 0.06060962378978729 2023-01-24 03:24:41.154344: step: 610/464, loss: 0.17766328155994415 2023-01-24 03:24:41.739590: step: 612/464, loss: 0.14387981593608856 2023-01-24 03:24:42.302586: step: 614/464, loss: 0.04774000868201256 2023-01-24 03:24:42.967856: step: 616/464, loss: 0.017273657023906708 2023-01-24 03:24:43.636940: step: 618/464, loss: 0.043972667306661606 2023-01-24 03:24:44.298704: step: 620/464, loss: 0.02949305810034275 2023-01-24 03:24:44.883408: step: 622/464, loss: 0.0540006048977375 2023-01-24 03:24:45.491181: step: 624/464, loss: 0.002033422002568841 2023-01-24 03:24:46.113766: step: 626/464, loss: 0.2388991117477417 2023-01-24 03:24:46.710502: step: 628/464, loss: 0.08435311913490295 2023-01-24 03:24:47.361484: step: 630/464, loss: 0.0835963562130928 2023-01-24 03:24:48.008525: step: 632/464, loss: 0.01947946660220623 2023-01-24 03:24:48.620593: step: 634/464, loss: 0.1267274171113968 2023-01-24 03:24:49.322001: step: 636/464, loss: 0.6429212093353271 2023-01-24 03:24:49.925617: step: 638/464, loss: 0.15381783246994019 2023-01-24 03:24:50.483531: step: 640/464, loss: 0.007321540731936693 2023-01-24 03:24:51.145689: step: 642/464, loss: 0.06959162652492523 2023-01-24 03:24:51.816586: step: 644/464, loss: 0.059891972690820694 2023-01-24 03:24:52.383824: step: 646/464, loss: 0.13082417845726013 2023-01-24 03:24:52.972287: step: 648/464, loss: 0.037121932953596115 2023-01-24 03:24:53.609145: step: 650/464, loss: 0.006254613399505615 2023-01-24 03:24:54.197408: step: 652/464, loss: 0.04932815954089165 2023-01-24 03:24:54.782453: step: 654/464, loss: 0.03491489961743355 2023-01-24 03:24:55.521180: step: 656/464, loss: 0.06369685381650925 2023-01-24 03:24:56.126844: step: 658/464, loss: 0.29147234559059143 2023-01-24 03:24:56.737132: step: 660/464, loss: 0.017888322472572327 2023-01-24 03:24:57.349998: step: 662/464, loss: 0.0431828498840332 2023-01-24 03:24:58.063309: step: 664/464, loss: 0.04324536398053169 2023-01-24 03:24:58.660734: step: 666/464, loss: 0.007824858650565147 2023-01-24 03:24:59.325068: step: 668/464, loss: 0.12181359529495239 2023-01-24 03:24:59.910985: step: 670/464, loss: 0.048685409128665924 2023-01-24 03:25:00.592556: step: 672/464, loss: 0.030775291845202446 2023-01-24 03:25:01.236607: step: 674/464, loss: 0.04483339563012123 2023-01-24 03:25:01.813918: step: 676/464, loss: 0.15573513507843018 2023-01-24 03:25:02.457604: step: 678/464, loss: 0.059290602803230286 2023-01-24 03:25:03.069412: step: 680/464, loss: 0.4794975817203522 2023-01-24 03:25:03.730124: step: 682/464, loss: 0.06171262636780739 2023-01-24 03:25:04.327459: step: 684/464, loss: 0.07990762591362 2023-01-24 03:25:04.974136: step: 686/464, loss: 0.14318765699863434 2023-01-24 03:25:05.535088: step: 688/464, loss: 0.0039002220146358013 2023-01-24 03:25:06.213251: step: 690/464, loss: 0.011800544336438179 2023-01-24 03:25:06.813383: step: 692/464, loss: 0.0400354377925396 2023-01-24 03:25:07.509477: step: 694/464, loss: 0.03960973024368286 2023-01-24 03:25:08.123167: step: 696/464, loss: 0.052699748426675797 2023-01-24 03:25:08.765609: step: 698/464, loss: 0.055506426841020584 2023-01-24 03:25:09.391549: step: 700/464, loss: 0.08599307388067245 2023-01-24 03:25:10.013782: step: 702/464, loss: 0.07293307036161423 2023-01-24 03:25:10.622356: step: 704/464, loss: 0.02525770291686058 2023-01-24 03:25:11.231813: step: 706/464, loss: 0.14079737663269043 2023-01-24 03:25:11.889868: step: 708/464, loss: 0.294656902551651 2023-01-24 03:25:12.505037: step: 710/464, loss: 0.14622029662132263 2023-01-24 03:25:13.133862: step: 712/464, loss: 0.012117592617869377 2023-01-24 03:25:13.746510: step: 714/464, loss: 0.006660753861069679 2023-01-24 03:25:14.370851: step: 716/464, loss: 0.0036901962012052536 2023-01-24 03:25:14.987269: step: 718/464, loss: 0.05405588448047638 2023-01-24 03:25:15.584632: step: 720/464, loss: 0.03300733119249344 2023-01-24 03:25:16.152074: step: 722/464, loss: 0.1454676240682602 2023-01-24 03:25:16.776441: step: 724/464, loss: 0.12987717986106873 2023-01-24 03:25:17.379366: step: 726/464, loss: 0.018779395148158073 2023-01-24 03:25:17.972505: step: 728/464, loss: 0.02126624621450901 2023-01-24 03:25:18.612984: step: 730/464, loss: 0.06283842772245407 2023-01-24 03:25:19.216131: step: 732/464, loss: 0.07268655300140381 2023-01-24 03:25:19.873715: step: 734/464, loss: 0.020557576790452003 2023-01-24 03:25:20.556088: step: 736/464, loss: 0.4944629669189453 2023-01-24 03:25:21.175082: step: 738/464, loss: 0.08161593973636627 2023-01-24 03:25:21.814679: step: 740/464, loss: 0.008167529478669167 2023-01-24 03:25:22.466149: step: 742/464, loss: 0.012403919361531734 2023-01-24 03:25:23.175415: step: 744/464, loss: 0.36549267172813416 2023-01-24 03:25:23.783949: step: 746/464, loss: 0.0025342279113829136 2023-01-24 03:25:24.425211: step: 748/464, loss: 0.02981056272983551 2023-01-24 03:25:25.024024: step: 750/464, loss: 0.04026034474372864 2023-01-24 03:25:25.672604: step: 752/464, loss: 0.031751926988363266 2023-01-24 03:25:26.269381: step: 754/464, loss: 0.019904276356101036 2023-01-24 03:25:26.916990: step: 756/464, loss: 0.052797820419073105 2023-01-24 03:25:27.512476: step: 758/464, loss: 0.6545272469520569 2023-01-24 03:25:28.072042: step: 760/464, loss: 0.05094560608267784 2023-01-24 03:25:28.700107: step: 762/464, loss: 0.06578005850315094 2023-01-24 03:25:29.393755: step: 764/464, loss: 0.008793084882199764 2023-01-24 03:25:30.070628: step: 766/464, loss: 0.07369567453861237 2023-01-24 03:25:30.632950: step: 768/464, loss: 0.06409741938114166 2023-01-24 03:25:31.195125: step: 770/464, loss: 0.07845474779605865 2023-01-24 03:25:31.861623: step: 772/464, loss: 0.13869287073612213 2023-01-24 03:25:32.495987: step: 774/464, loss: 0.10635069012641907 2023-01-24 03:25:33.097577: step: 776/464, loss: 15.494712829589844 2023-01-24 03:25:33.748355: step: 778/464, loss: 0.03596947342157364 2023-01-24 03:25:34.440650: step: 780/464, loss: 0.0767352506518364 2023-01-24 03:25:35.147604: step: 782/464, loss: 0.017218533903360367 2023-01-24 03:25:35.782156: step: 784/464, loss: 0.15298594534397125 2023-01-24 03:25:36.420513: step: 786/464, loss: 0.11351388692855835 2023-01-24 03:25:37.040375: step: 788/464, loss: 0.18765272200107574 2023-01-24 03:25:37.714389: step: 790/464, loss: 0.044379230588674545 2023-01-24 03:25:38.307196: step: 792/464, loss: 0.06699885427951813 2023-01-24 03:25:38.950369: step: 794/464, loss: 0.028787074610590935 2023-01-24 03:25:39.557780: step: 796/464, loss: 0.33658385276794434 2023-01-24 03:25:40.198648: step: 798/464, loss: 0.09242162108421326 2023-01-24 03:25:40.805794: step: 800/464, loss: 0.35072797536849976 2023-01-24 03:25:41.480151: step: 802/464, loss: 0.04858367517590523 2023-01-24 03:25:42.189543: step: 804/464, loss: 0.051442377269268036 2023-01-24 03:25:42.797490: step: 806/464, loss: 0.035532910376787186 2023-01-24 03:25:43.461292: step: 808/464, loss: 0.0076722376979887486 2023-01-24 03:25:44.023372: step: 810/464, loss: 0.030712995678186417 2023-01-24 03:25:44.616218: step: 812/464, loss: 0.10349004715681076 2023-01-24 03:25:45.211416: step: 814/464, loss: 0.03708258643746376 2023-01-24 03:25:45.828191: step: 816/464, loss: 1.1700975894927979 2023-01-24 03:25:46.463853: step: 818/464, loss: 0.05340784788131714 2023-01-24 03:25:47.077454: step: 820/464, loss: 0.03580804914236069 2023-01-24 03:25:47.724964: step: 822/464, loss: 0.019508054479956627 2023-01-24 03:25:48.348018: step: 824/464, loss: 0.08856187015771866 2023-01-24 03:25:48.992537: step: 826/464, loss: 0.023758994415402412 2023-01-24 03:25:49.725922: step: 828/464, loss: 0.024007648229599 2023-01-24 03:25:50.272374: step: 830/464, loss: 0.017617687582969666 2023-01-24 03:25:50.905101: step: 832/464, loss: 0.040325313806533813 2023-01-24 03:25:51.553671: step: 834/464, loss: 0.10785327106714249 2023-01-24 03:25:52.122957: step: 836/464, loss: 0.02498418465256691 2023-01-24 03:25:52.831141: step: 838/464, loss: 0.06586012244224548 2023-01-24 03:25:53.440011: step: 840/464, loss: 0.1145709902048111 2023-01-24 03:25:54.081679: step: 842/464, loss: 0.07150621712207794 2023-01-24 03:25:54.707791: step: 844/464, loss: 0.0404898002743721 2023-01-24 03:25:55.404634: step: 846/464, loss: 0.03282487764954567 2023-01-24 03:25:56.019263: step: 848/464, loss: 0.039687447249889374 2023-01-24 03:25:56.654326: step: 850/464, loss: 1.8295704126358032 2023-01-24 03:25:57.262450: step: 852/464, loss: 0.10599999874830246 2023-01-24 03:25:57.892256: step: 854/464, loss: 0.23428945243358612 2023-01-24 03:25:58.567050: step: 856/464, loss: 0.08374352753162384 2023-01-24 03:25:59.246314: step: 858/464, loss: 2.8789405822753906 2023-01-24 03:25:59.881121: step: 860/464, loss: 0.019031409174203873 2023-01-24 03:26:00.560874: step: 862/464, loss: 0.2627542018890381 2023-01-24 03:26:01.184241: step: 864/464, loss: 0.06664364784955978 2023-01-24 03:26:01.766270: step: 866/464, loss: 0.0017015093471854925 2023-01-24 03:26:02.400838: step: 868/464, loss: 0.08811833709478378 2023-01-24 03:26:03.036711: step: 870/464, loss: 0.049786198884248734 2023-01-24 03:26:03.685433: step: 872/464, loss: 0.0974259227514267 2023-01-24 03:26:04.352855: step: 874/464, loss: 0.16216818988323212 2023-01-24 03:26:04.944245: step: 876/464, loss: 0.14738522469997406 2023-01-24 03:26:05.574575: step: 878/464, loss: 0.13370949029922485 2023-01-24 03:26:06.180496: step: 880/464, loss: 0.03261880576610565 2023-01-24 03:26:06.857186: step: 882/464, loss: 1.8354557752609253 2023-01-24 03:26:07.483762: step: 884/464, loss: 0.03743087500333786 2023-01-24 03:26:08.151388: step: 886/464, loss: 0.015178278088569641 2023-01-24 03:26:08.847387: step: 888/464, loss: 0.07287105172872543 2023-01-24 03:26:09.483836: step: 890/464, loss: 0.006585855036973953 2023-01-24 03:26:10.071817: step: 892/464, loss: 0.1828705072402954 2023-01-24 03:26:10.654745: step: 894/464, loss: 0.031428128480911255 2023-01-24 03:26:11.249738: step: 896/464, loss: 0.043859273195266724 2023-01-24 03:26:11.818746: step: 898/464, loss: 0.14311519265174866 2023-01-24 03:26:12.402249: step: 900/464, loss: 0.015120675787329674 2023-01-24 03:26:13.112815: step: 902/464, loss: 0.010816311463713646 2023-01-24 03:26:13.735476: step: 904/464, loss: 1.2248504161834717 2023-01-24 03:26:14.319005: step: 906/464, loss: 0.06228072941303253 2023-01-24 03:26:15.002108: step: 908/464, loss: 0.030728058889508247 2023-01-24 03:26:15.574557: step: 910/464, loss: 0.10771479457616806 2023-01-24 03:26:16.223024: step: 912/464, loss: 0.15603163838386536 2023-01-24 03:26:16.862490: step: 914/464, loss: 0.061756011098623276 2023-01-24 03:26:17.529578: step: 916/464, loss: 0.04426893591880798 2023-01-24 03:26:18.133035: step: 918/464, loss: 0.026621172204613686 2023-01-24 03:26:18.663312: step: 920/464, loss: 0.009261952713131905 2023-01-24 03:26:19.326439: step: 922/464, loss: 0.0934833437204361 2023-01-24 03:26:19.985291: step: 924/464, loss: 0.06076713278889656 2023-01-24 03:26:20.634473: step: 926/464, loss: 0.01151891890913248 2023-01-24 03:26:21.251092: step: 928/464, loss: 0.16116264462471008 2023-01-24 03:26:21.733716: step: 930/464, loss: 0.005820272024720907 ================================================== Loss: 0.167 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3326241332619112, 'r': 0.3073775197315954, 'f1': 0.3195028656776149}, 'combined': 0.23542316418350567, 'epoch': 21} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31956524170006595, 'r': 0.2831442031239555, 'f1': 0.3002542816753056}, 'combined': 0.19602093000563992, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165235368956743, 'r': 0.3147216951296648, 'f1': 0.31562004440215674}, 'combined': 0.23256213798053654, 'epoch': 21} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32642348776495944, 'r': 0.28622059497037805, 'f1': 0.3050029454728417}, 'combined': 0.19912109393563757, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3489951593161192, 'r': 0.31528539960944857, 'f1': 0.3312849572015698}, 'combined': 0.24410470530641984, 'epoch': 21} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33480245207493264, 'r': 0.276027389256631, 'f1': 0.30258720353774765}, 'combined': 0.1975439774391513, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25203252032520324, 'r': 0.2952380952380952, 'f1': 0.27192982456140347}, 'combined': 0.1812865497076023, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29, 'r': 0.31521739130434784, 'f1': 0.3020833333333333}, 'combined': 0.15104166666666666, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6363636363636364, 'r': 0.2413793103448276, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:28:57.579013: step: 2/464, loss: 0.006758301518857479 2023-01-24 03:28:58.178304: step: 4/464, loss: 0.14314305782318115 2023-01-24 03:28:58.910175: step: 6/464, loss: 0.0054590520448982716 2023-01-24 03:28:59.479120: step: 8/464, loss: 0.03108260966837406 2023-01-24 03:29:00.070616: step: 10/464, loss: 0.07363323867321014 2023-01-24 03:29:00.712411: step: 12/464, loss: 0.06686335802078247 2023-01-24 03:29:01.282574: step: 14/464, loss: 0.01742183230817318 2023-01-24 03:29:01.864216: step: 16/464, loss: 0.05019579827785492 2023-01-24 03:29:02.476639: step: 18/464, loss: 0.017191683873534203 2023-01-24 03:29:03.090832: step: 20/464, loss: 0.11063700169324875 2023-01-24 03:29:03.749019: step: 22/464, loss: 0.10509807616472244 2023-01-24 03:29:04.425264: step: 24/464, loss: 0.018130838871002197 2023-01-24 03:29:05.089486: step: 26/464, loss: 0.02149237133562565 2023-01-24 03:29:05.680602: step: 28/464, loss: 0.03470902144908905 2023-01-24 03:29:06.226852: step: 30/464, loss: 0.00973481498658657 2023-01-24 03:29:06.888397: step: 32/464, loss: 0.04356635734438896 2023-01-24 03:29:07.435787: step: 34/464, loss: 0.4145301878452301 2023-01-24 03:29:08.052857: step: 36/464, loss: 0.02120104245841503 2023-01-24 03:29:08.733703: step: 38/464, loss: 0.20581495761871338 2023-01-24 03:29:09.331532: step: 40/464, loss: 0.082242451608181 2023-01-24 03:29:09.956433: step: 42/464, loss: 0.05397970974445343 2023-01-24 03:29:10.564122: step: 44/464, loss: 0.011220986023545265 2023-01-24 03:29:11.228809: step: 46/464, loss: 0.160739928483963 2023-01-24 03:29:11.813172: step: 48/464, loss: 0.0300454068928957 2023-01-24 03:29:12.374233: step: 50/464, loss: 2.2988154888153076 2023-01-24 03:29:13.069183: step: 52/464, loss: 0.025502270087599754 2023-01-24 03:29:13.663492: step: 54/464, loss: 0.10452800989151001 2023-01-24 03:29:14.352865: step: 56/464, loss: 0.03143616393208504 2023-01-24 03:29:14.968051: step: 58/464, loss: 0.029270924627780914 2023-01-24 03:29:15.628579: step: 60/464, loss: 0.03443043678998947 2023-01-24 03:29:16.231022: step: 62/464, loss: 0.029024790972471237 2023-01-24 03:29:16.936502: step: 64/464, loss: 0.03446149453520775 2023-01-24 03:29:17.527526: step: 66/464, loss: 0.00391471479088068 2023-01-24 03:29:18.146283: step: 68/464, loss: 0.0015981389442458749 2023-01-24 03:29:18.693304: step: 70/464, loss: 0.0024913130328059196 2023-01-24 03:29:19.270029: step: 72/464, loss: 0.013183190487325191 2023-01-24 03:29:19.855554: step: 74/464, loss: 0.02803228795528412 2023-01-24 03:29:20.417882: step: 76/464, loss: 0.3102782070636749 2023-01-24 03:29:21.051640: step: 78/464, loss: 0.039724547415971756 2023-01-24 03:29:21.639544: step: 80/464, loss: 0.0979723408818245 2023-01-24 03:29:22.252902: step: 82/464, loss: 0.08590143918991089 2023-01-24 03:29:22.855575: step: 84/464, loss: 0.04453185573220253 2023-01-24 03:29:23.481510: step: 86/464, loss: 0.09062372148036957 2023-01-24 03:29:24.129988: step: 88/464, loss: 0.045178622007369995 2023-01-24 03:29:24.747838: step: 90/464, loss: 0.05401252210140228 2023-01-24 03:29:25.346783: step: 92/464, loss: 0.005012328736484051 2023-01-24 03:29:25.983529: step: 94/464, loss: 0.00415660860016942 2023-01-24 03:29:26.568160: step: 96/464, loss: 0.036089878529310226 2023-01-24 03:29:27.145925: step: 98/464, loss: 0.04593805968761444 2023-01-24 03:29:27.729793: step: 100/464, loss: 0.06143837049603462 2023-01-24 03:29:28.335938: step: 102/464, loss: 0.018398325890302658 2023-01-24 03:29:28.913719: step: 104/464, loss: 0.025438066571950912 2023-01-24 03:29:29.558281: step: 106/464, loss: 0.05824285000562668 2023-01-24 03:29:30.125856: step: 108/464, loss: 0.02007497102022171 2023-01-24 03:29:30.800332: step: 110/464, loss: 0.06423313170671463 2023-01-24 03:29:31.443918: step: 112/464, loss: 0.0621689036488533 2023-01-24 03:29:32.065682: step: 114/464, loss: 0.11528107523918152 2023-01-24 03:29:32.690462: step: 116/464, loss: 0.053891878575086594 2023-01-24 03:29:33.256457: step: 118/464, loss: 0.02513236738741398 2023-01-24 03:29:33.899968: step: 120/464, loss: 0.059944998472929 2023-01-24 03:29:34.566580: step: 122/464, loss: 0.023398755118250847 2023-01-24 03:29:35.192208: step: 124/464, loss: 0.47563937306404114 2023-01-24 03:29:35.817927: step: 126/464, loss: 0.01028918381780386 2023-01-24 03:29:36.401145: step: 128/464, loss: 0.010359102860093117 2023-01-24 03:29:37.050114: step: 130/464, loss: 0.4950485825538635 2023-01-24 03:29:37.680117: step: 132/464, loss: 0.010646861046552658 2023-01-24 03:29:38.373149: step: 134/464, loss: 0.019035184755921364 2023-01-24 03:29:38.922157: step: 136/464, loss: 0.12675100564956665 2023-01-24 03:29:39.550967: step: 138/464, loss: 0.008613305166363716 2023-01-24 03:29:40.198526: step: 140/464, loss: 0.009645706973969936 2023-01-24 03:29:40.835604: step: 142/464, loss: 0.03028269298374653 2023-01-24 03:29:41.448740: step: 144/464, loss: 0.004214303568005562 2023-01-24 03:29:42.118717: step: 146/464, loss: 0.0990675687789917 2023-01-24 03:29:42.767724: step: 148/464, loss: 0.04364365339279175 2023-01-24 03:29:43.406236: step: 150/464, loss: 0.06068604812026024 2023-01-24 03:29:44.016826: step: 152/464, loss: 0.05011973902583122 2023-01-24 03:29:44.659411: step: 154/464, loss: 0.0195098128169775 2023-01-24 03:29:45.253946: step: 156/464, loss: 0.011885374784469604 2023-01-24 03:29:45.894481: step: 158/464, loss: 0.021790891885757446 2023-01-24 03:29:46.485485: step: 160/464, loss: 0.0470498651266098 2023-01-24 03:29:47.123416: step: 162/464, loss: 0.0032399813644587994 2023-01-24 03:29:47.759280: step: 164/464, loss: 0.0178668275475502 2023-01-24 03:29:48.352825: step: 166/464, loss: 0.0072959973476827145 2023-01-24 03:29:48.934297: step: 168/464, loss: 0.022963957861065865 2023-01-24 03:29:49.490662: step: 170/464, loss: 0.00620347261428833 2023-01-24 03:29:50.111138: step: 172/464, loss: 0.04798974096775055 2023-01-24 03:29:50.695426: step: 174/464, loss: 0.014539512805640697 2023-01-24 03:29:51.246778: step: 176/464, loss: 0.06378328055143356 2023-01-24 03:29:51.876171: step: 178/464, loss: 0.04277843236923218 2023-01-24 03:29:52.489302: step: 180/464, loss: 1.583171010017395 2023-01-24 03:29:53.105009: step: 182/464, loss: 0.10841219872236252 2023-01-24 03:29:53.736158: step: 184/464, loss: 0.007886328734457493 2023-01-24 03:29:54.306148: step: 186/464, loss: 0.13257752358913422 2023-01-24 03:29:54.913387: step: 188/464, loss: 0.4060317575931549 2023-01-24 03:29:55.527962: step: 190/464, loss: 0.06079034507274628 2023-01-24 03:29:56.148817: step: 192/464, loss: 0.014938176609575748 2023-01-24 03:29:56.815151: step: 194/464, loss: 0.01524947490543127 2023-01-24 03:29:57.460761: step: 196/464, loss: 0.032105594873428345 2023-01-24 03:29:58.200556: step: 198/464, loss: 0.016917673870921135 2023-01-24 03:29:58.857651: step: 200/464, loss: 0.07070586830377579 2023-01-24 03:29:59.498882: step: 202/464, loss: 0.020239602774381638 2023-01-24 03:30:00.158873: step: 204/464, loss: 0.008704866282641888 2023-01-24 03:30:00.797468: step: 206/464, loss: 0.030908847227692604 2023-01-24 03:30:01.463164: step: 208/464, loss: 0.07697136700153351 2023-01-24 03:30:02.084168: step: 210/464, loss: 0.09847211837768555 2023-01-24 03:30:02.663744: step: 212/464, loss: 0.11305604875087738 2023-01-24 03:30:03.298199: step: 214/464, loss: 0.16584600508213043 2023-01-24 03:30:03.924690: step: 216/464, loss: 0.31934815645217896 2023-01-24 03:30:04.585923: step: 218/464, loss: 0.05138273909687996 2023-01-24 03:30:05.186752: step: 220/464, loss: 0.05057439208030701 2023-01-24 03:30:05.824897: step: 222/464, loss: 0.09238734841346741 2023-01-24 03:30:06.508205: step: 224/464, loss: 0.00903787650167942 2023-01-24 03:30:07.109590: step: 226/464, loss: 0.019578061997890472 2023-01-24 03:30:07.838378: step: 228/464, loss: 0.044871579855680466 2023-01-24 03:30:08.423667: step: 230/464, loss: 0.08930542320013046 2023-01-24 03:30:09.106150: step: 232/464, loss: 0.005069571081548929 2023-01-24 03:30:09.715200: step: 234/464, loss: 1.0194897651672363 2023-01-24 03:30:10.373879: step: 236/464, loss: 0.0036579384468495846 2023-01-24 03:30:10.967400: step: 238/464, loss: 0.12989561259746552 2023-01-24 03:30:11.582937: step: 240/464, loss: 0.1594226062297821 2023-01-24 03:30:12.171521: step: 242/464, loss: 0.06729419529438019 2023-01-24 03:30:12.805865: step: 244/464, loss: 0.041194040328264236 2023-01-24 03:30:13.453202: step: 246/464, loss: 0.02332022227346897 2023-01-24 03:30:14.093599: step: 248/464, loss: 0.03594561293721199 2023-01-24 03:30:14.798206: step: 250/464, loss: 0.09034476429224014 2023-01-24 03:30:15.421535: step: 252/464, loss: 0.03551686927676201 2023-01-24 03:30:16.101275: step: 254/464, loss: 0.024127595126628876 2023-01-24 03:30:16.728861: step: 256/464, loss: 0.07626382261514664 2023-01-24 03:30:17.487211: step: 258/464, loss: 0.07468894869089127 2023-01-24 03:30:18.141720: step: 260/464, loss: 0.06592415273189545 2023-01-24 03:30:18.775662: step: 262/464, loss: 0.3178122043609619 2023-01-24 03:30:19.428015: step: 264/464, loss: 0.12749071419239044 2023-01-24 03:30:20.064148: step: 266/464, loss: 0.01742948777973652 2023-01-24 03:30:20.658545: step: 268/464, loss: 0.01200178824365139 2023-01-24 03:30:21.231349: step: 270/464, loss: 0.15292149782180786 2023-01-24 03:30:21.859380: step: 272/464, loss: 0.12947878241539001 2023-01-24 03:30:22.556043: step: 274/464, loss: 0.4505672752857208 2023-01-24 03:30:23.162255: step: 276/464, loss: 0.05702408775687218 2023-01-24 03:30:23.823983: step: 278/464, loss: 0.024187006056308746 2023-01-24 03:30:24.434406: step: 280/464, loss: 0.03368527069687843 2023-01-24 03:30:25.030610: step: 282/464, loss: 0.03611384332180023 2023-01-24 03:30:25.659785: step: 284/464, loss: 0.021873539313673973 2023-01-24 03:30:26.362954: step: 286/464, loss: 0.10742607712745667 2023-01-24 03:30:27.055127: step: 288/464, loss: 0.11804650723934174 2023-01-24 03:30:27.693086: step: 290/464, loss: 0.018299585208296776 2023-01-24 03:30:28.323382: step: 292/464, loss: 0.06340126693248749 2023-01-24 03:30:28.996782: step: 294/464, loss: 0.016614476218819618 2023-01-24 03:30:29.611680: step: 296/464, loss: 0.24299855530261993 2023-01-24 03:30:30.182106: step: 298/464, loss: 0.07846701145172119 2023-01-24 03:30:30.816128: step: 300/464, loss: 0.023147309198975563 2023-01-24 03:30:31.431591: step: 302/464, loss: 0.06763315200805664 2023-01-24 03:30:32.048104: step: 304/464, loss: 0.0443316288292408 2023-01-24 03:30:32.675213: step: 306/464, loss: 0.011263721622526646 2023-01-24 03:30:33.296427: step: 308/464, loss: 0.041338883340358734 2023-01-24 03:30:33.935082: step: 310/464, loss: 0.022091420367360115 2023-01-24 03:30:34.658902: step: 312/464, loss: 0.0018858188996091485 2023-01-24 03:30:35.277339: step: 314/464, loss: 0.13889434933662415 2023-01-24 03:30:35.945374: step: 316/464, loss: 0.018320569768548012 2023-01-24 03:30:36.676576: step: 318/464, loss: 0.13306625187397003 2023-01-24 03:30:37.346290: step: 320/464, loss: 0.25399693846702576 2023-01-24 03:30:37.994661: step: 322/464, loss: 0.11977692693471909 2023-01-24 03:30:38.634884: step: 324/464, loss: 0.06183283403515816 2023-01-24 03:30:39.213483: step: 326/464, loss: 0.04578084126114845 2023-01-24 03:30:39.882172: step: 328/464, loss: 0.0503641702234745 2023-01-24 03:30:40.486829: step: 330/464, loss: 0.029564879834651947 2023-01-24 03:30:41.100609: step: 332/464, loss: 0.1057472974061966 2023-01-24 03:30:41.720959: step: 334/464, loss: 0.043896257877349854 2023-01-24 03:30:42.377928: step: 336/464, loss: 0.06871680915355682 2023-01-24 03:30:42.992355: step: 338/464, loss: 0.02590806782245636 2023-01-24 03:30:43.581105: step: 340/464, loss: 0.012619096785783768 2023-01-24 03:30:44.252531: step: 342/464, loss: 0.24375081062316895 2023-01-24 03:30:44.899656: step: 344/464, loss: 0.06244340538978577 2023-01-24 03:30:45.526513: step: 346/464, loss: 0.03981610760092735 2023-01-24 03:30:46.171984: step: 348/464, loss: 0.02591633051633835 2023-01-24 03:30:46.870649: step: 350/464, loss: 0.002215398009866476 2023-01-24 03:30:47.506333: step: 352/464, loss: 0.003011910943314433 2023-01-24 03:30:48.145074: step: 354/464, loss: 0.012786061502993107 2023-01-24 03:30:48.701444: step: 356/464, loss: 0.008177302777767181 2023-01-24 03:30:49.314691: step: 358/464, loss: 0.02352547086775303 2023-01-24 03:30:49.884583: step: 360/464, loss: 0.054179847240448 2023-01-24 03:30:50.462513: step: 362/464, loss: 0.19881106913089752 2023-01-24 03:30:51.037824: step: 364/464, loss: 0.07807371765375137 2023-01-24 03:30:51.653283: step: 366/464, loss: 0.07180866599082947 2023-01-24 03:30:52.260868: step: 368/464, loss: 0.028741007670760155 2023-01-24 03:30:52.930637: step: 370/464, loss: 0.1762828379869461 2023-01-24 03:30:53.566579: step: 372/464, loss: 0.014499752782285213 2023-01-24 03:30:54.224713: step: 374/464, loss: 0.046783238649368286 2023-01-24 03:30:54.868774: step: 376/464, loss: 0.07356592267751694 2023-01-24 03:30:55.492612: step: 378/464, loss: 0.25877201557159424 2023-01-24 03:30:56.105582: step: 380/464, loss: 0.013184488750994205 2023-01-24 03:30:56.711916: step: 382/464, loss: 0.0728127658367157 2023-01-24 03:30:57.338306: step: 384/464, loss: 0.07052990049123764 2023-01-24 03:30:57.967282: step: 386/464, loss: 0.04295302927494049 2023-01-24 03:30:58.583639: step: 388/464, loss: 0.04797111079096794 2023-01-24 03:30:59.271717: step: 390/464, loss: 0.005648870021104813 2023-01-24 03:30:59.834058: step: 392/464, loss: 0.03515315055847168 2023-01-24 03:31:00.383554: step: 394/464, loss: 0.005830351263284683 2023-01-24 03:31:01.014636: step: 396/464, loss: 0.044321294873952866 2023-01-24 03:31:01.676194: step: 398/464, loss: 0.0036008793395012617 2023-01-24 03:31:02.325956: step: 400/464, loss: 7.645439147949219 2023-01-24 03:31:02.921642: step: 402/464, loss: 0.03247757628560066 2023-01-24 03:31:03.522640: step: 404/464, loss: 0.06370621919631958 2023-01-24 03:31:04.101438: step: 406/464, loss: 0.04437775909900665 2023-01-24 03:31:04.662384: step: 408/464, loss: 0.05218346416950226 2023-01-24 03:31:05.311534: step: 410/464, loss: 0.028424391523003578 2023-01-24 03:31:05.897188: step: 412/464, loss: 0.08368080109357834 2023-01-24 03:31:06.477650: step: 414/464, loss: 0.06493217498064041 2023-01-24 03:31:07.180314: step: 416/464, loss: 0.10637064278125763 2023-01-24 03:31:07.859836: step: 418/464, loss: 0.12815283238887787 2023-01-24 03:31:08.441644: step: 420/464, loss: 0.017795566469430923 2023-01-24 03:31:09.134668: step: 422/464, loss: 0.016728708520531654 2023-01-24 03:31:09.834557: step: 424/464, loss: 0.10644500702619553 2023-01-24 03:31:10.443133: step: 426/464, loss: 0.06623958051204681 2023-01-24 03:31:11.111617: step: 428/464, loss: 0.009427705779671669 2023-01-24 03:31:11.729554: step: 430/464, loss: 0.04186146333813667 2023-01-24 03:31:12.366431: step: 432/464, loss: 0.0737660676240921 2023-01-24 03:31:13.145011: step: 434/464, loss: 0.013183049857616425 2023-01-24 03:31:13.719085: step: 436/464, loss: 0.019024258479475975 2023-01-24 03:31:14.359417: step: 438/464, loss: 0.0077477432787418365 2023-01-24 03:31:14.993413: step: 440/464, loss: 0.003322381991893053 2023-01-24 03:31:15.653473: step: 442/464, loss: 0.009420504793524742 2023-01-24 03:31:16.303426: step: 444/464, loss: 0.1055421233177185 2023-01-24 03:31:16.928466: step: 446/464, loss: 0.072256900370121 2023-01-24 03:31:17.500884: step: 448/464, loss: 0.03360395506024361 2023-01-24 03:31:18.108694: step: 450/464, loss: 0.03499939665198326 2023-01-24 03:31:18.752362: step: 452/464, loss: 0.0326056033372879 2023-01-24 03:31:19.342051: step: 454/464, loss: 0.11220617592334747 2023-01-24 03:31:20.052405: step: 456/464, loss: 0.0994848981499672 2023-01-24 03:31:20.669604: step: 458/464, loss: 0.07699523866176605 2023-01-24 03:31:21.275856: step: 460/464, loss: 0.011592340655624866 2023-01-24 03:31:21.927620: step: 462/464, loss: 0.02367353066802025 2023-01-24 03:31:22.545531: step: 464/464, loss: 0.021390561014413834 2023-01-24 03:31:23.162154: step: 466/464, loss: 0.017145728692412376 2023-01-24 03:31:23.845911: step: 468/464, loss: 2.969501495361328 2023-01-24 03:31:24.468105: step: 470/464, loss: 0.05871887877583504 2023-01-24 03:31:25.118483: step: 472/464, loss: 0.06936714798212051 2023-01-24 03:31:25.697331: step: 474/464, loss: 0.18440918624401093 2023-01-24 03:31:26.243030: step: 476/464, loss: 0.6779115796089172 2023-01-24 03:31:26.890018: step: 478/464, loss: 0.6086742281913757 2023-01-24 03:31:27.454524: step: 480/464, loss: 0.024632520973682404 2023-01-24 03:31:28.102622: step: 482/464, loss: 0.02487977221608162 2023-01-24 03:31:28.726203: step: 484/464, loss: 0.007665781769901514 2023-01-24 03:31:29.384699: step: 486/464, loss: 0.05555078759789467 2023-01-24 03:31:30.012120: step: 488/464, loss: 0.0898955911397934 2023-01-24 03:31:30.657130: step: 490/464, loss: 0.03891049697995186 2023-01-24 03:31:31.347812: step: 492/464, loss: 0.01622828096151352 2023-01-24 03:31:31.910309: step: 494/464, loss: 0.09543530642986298 2023-01-24 03:31:32.593985: step: 496/464, loss: 0.320547491312027 2023-01-24 03:31:33.182917: step: 498/464, loss: 0.034561898559331894 2023-01-24 03:31:33.804271: step: 500/464, loss: 0.030993588268756866 2023-01-24 03:31:34.401363: step: 502/464, loss: 0.009038617834448814 2023-01-24 03:31:35.120339: step: 504/464, loss: 0.07900907099246979 2023-01-24 03:31:35.669096: step: 506/464, loss: 0.007727830670773983 2023-01-24 03:31:36.269761: step: 508/464, loss: 0.06347976624965668 2023-01-24 03:31:36.861008: step: 510/464, loss: 0.0526079498231411 2023-01-24 03:31:37.562708: step: 512/464, loss: 0.11497075110673904 2023-01-24 03:31:38.249671: step: 514/464, loss: 0.03942112997174263 2023-01-24 03:31:38.896391: step: 516/464, loss: 0.038449421525001526 2023-01-24 03:31:39.518679: step: 518/464, loss: 0.01590256206691265 2023-01-24 03:31:40.122780: step: 520/464, loss: 0.043512072414159775 2023-01-24 03:31:40.719934: step: 522/464, loss: 0.021960392594337463 2023-01-24 03:31:41.322718: step: 524/464, loss: 0.031567446887493134 2023-01-24 03:31:41.866995: step: 526/464, loss: 0.007618204224854708 2023-01-24 03:31:42.555317: step: 528/464, loss: 0.024137090891599655 2023-01-24 03:31:43.258022: step: 530/464, loss: 0.061135660856962204 2023-01-24 03:31:43.888395: step: 532/464, loss: 0.07971035689115524 2023-01-24 03:31:44.458287: step: 534/464, loss: 0.048400457948446274 2023-01-24 03:31:45.112052: step: 536/464, loss: 0.05122085288167 2023-01-24 03:31:45.731736: step: 538/464, loss: 0.029268791899085045 2023-01-24 03:31:46.345237: step: 540/464, loss: 0.09635338187217712 2023-01-24 03:31:46.990427: step: 542/464, loss: 0.07422041893005371 2023-01-24 03:31:47.648664: step: 544/464, loss: 0.05977031961083412 2023-01-24 03:31:48.365123: step: 546/464, loss: 0.009733343496918678 2023-01-24 03:31:49.080971: step: 548/464, loss: 0.06108175963163376 2023-01-24 03:31:49.678258: step: 550/464, loss: 0.03537317365407944 2023-01-24 03:31:50.292426: step: 552/464, loss: 0.21923477947711945 2023-01-24 03:31:51.002793: step: 554/464, loss: 0.025603273883461952 2023-01-24 03:31:51.622084: step: 556/464, loss: 0.03309585526585579 2023-01-24 03:31:52.260183: step: 558/464, loss: 0.0017926269210875034 2023-01-24 03:31:52.882434: step: 560/464, loss: 0.04637129232287407 2023-01-24 03:31:53.501063: step: 562/464, loss: 0.0066716535948216915 2023-01-24 03:31:54.093870: step: 564/464, loss: 0.04727320373058319 2023-01-24 03:31:54.761759: step: 566/464, loss: 0.23864291608333588 2023-01-24 03:31:55.371013: step: 568/464, loss: 0.007027804851531982 2023-01-24 03:31:55.999120: step: 570/464, loss: 0.00685263192281127 2023-01-24 03:31:56.593825: step: 572/464, loss: 0.03749839961528778 2023-01-24 03:31:57.201582: step: 574/464, loss: 0.12423070520162582 2023-01-24 03:31:57.853342: step: 576/464, loss: 0.07700704783201218 2023-01-24 03:31:58.450013: step: 578/464, loss: 0.0055520497262477875 2023-01-24 03:31:59.061777: step: 580/464, loss: 0.019946932792663574 2023-01-24 03:31:59.658297: step: 582/464, loss: 0.06127611920237541 2023-01-24 03:32:00.337863: step: 584/464, loss: 0.12984861433506012 2023-01-24 03:32:00.966191: step: 586/464, loss: 0.042202942073345184 2023-01-24 03:32:01.578815: step: 588/464, loss: 0.01746554672718048 2023-01-24 03:32:02.193797: step: 590/464, loss: 0.08601154386997223 2023-01-24 03:32:02.810800: step: 592/464, loss: 0.01006026566028595 2023-01-24 03:32:03.504372: step: 594/464, loss: 0.059398457407951355 2023-01-24 03:32:04.108595: step: 596/464, loss: 0.04708977788686752 2023-01-24 03:32:04.749713: step: 598/464, loss: 0.0155387157574296 2023-01-24 03:32:05.391133: step: 600/464, loss: 0.038387056440114975 2023-01-24 03:32:06.056957: step: 602/464, loss: 0.07821520417928696 2023-01-24 03:32:06.748934: step: 604/464, loss: 0.019257348030805588 2023-01-24 03:32:07.401595: step: 606/464, loss: 0.05501368269324303 2023-01-24 03:32:08.045259: step: 608/464, loss: 0.07247531414031982 2023-01-24 03:32:08.714184: step: 610/464, loss: 1.6925289630889893 2023-01-24 03:32:09.308110: step: 612/464, loss: 0.017896315082907677 2023-01-24 03:32:09.979999: step: 614/464, loss: 0.47850364446640015 2023-01-24 03:32:10.620652: step: 616/464, loss: 0.034765344113111496 2023-01-24 03:32:11.212702: step: 618/464, loss: 0.011117692105472088 2023-01-24 03:32:11.848436: step: 620/464, loss: 0.025692759081721306 2023-01-24 03:32:12.462437: step: 622/464, loss: 0.057302094995975494 2023-01-24 03:32:13.100214: step: 624/464, loss: 0.0591789186000824 2023-01-24 03:32:13.710642: step: 626/464, loss: 0.039957478642463684 2023-01-24 03:32:14.361415: step: 628/464, loss: 0.023328104987740517 2023-01-24 03:32:14.968748: step: 630/464, loss: 0.01869240775704384 2023-01-24 03:32:15.621661: step: 632/464, loss: 0.03534715250134468 2023-01-24 03:32:16.167558: step: 634/464, loss: 0.02479608729481697 2023-01-24 03:32:16.802825: step: 636/464, loss: 0.06726644933223724 2023-01-24 03:32:17.402877: step: 638/464, loss: 0.03567817807197571 2023-01-24 03:32:18.056560: step: 640/464, loss: 0.025053132325410843 2023-01-24 03:32:18.713196: step: 642/464, loss: 0.035145752131938934 2023-01-24 03:32:19.330030: step: 644/464, loss: 0.03355040028691292 2023-01-24 03:32:19.964327: step: 646/464, loss: 0.2510853111743927 2023-01-24 03:32:20.708172: step: 648/464, loss: 1.1779260635375977 2023-01-24 03:32:21.311008: step: 650/464, loss: 0.014893234707415104 2023-01-24 03:32:21.975303: step: 652/464, loss: 0.14382338523864746 2023-01-24 03:32:22.600243: step: 654/464, loss: 0.08110351115465164 2023-01-24 03:32:23.271607: step: 656/464, loss: 0.4584517478942871 2023-01-24 03:32:23.937215: step: 658/464, loss: 0.03606909513473511 2023-01-24 03:32:24.553046: step: 660/464, loss: 0.03051850013434887 2023-01-24 03:32:25.099088: step: 662/464, loss: 0.06344317644834518 2023-01-24 03:32:25.702693: step: 664/464, loss: 0.3073681592941284 2023-01-24 03:32:26.298993: step: 666/464, loss: 0.0030211834236979485 2023-01-24 03:32:26.946185: step: 668/464, loss: 0.2713046371936798 2023-01-24 03:32:27.547786: step: 670/464, loss: 0.10655353963375092 2023-01-24 03:32:28.209170: step: 672/464, loss: 0.05543030798435211 2023-01-24 03:32:28.820703: step: 674/464, loss: 0.0740400180220604 2023-01-24 03:32:29.430935: step: 676/464, loss: 0.05888616293668747 2023-01-24 03:32:30.101776: step: 678/464, loss: 0.03706745803356171 2023-01-24 03:32:30.675778: step: 680/464, loss: 0.3182055950164795 2023-01-24 03:32:31.225733: step: 682/464, loss: 0.0079153161495924 2023-01-24 03:32:31.808263: step: 684/464, loss: 0.09043563157320023 2023-01-24 03:32:32.417996: step: 686/464, loss: 0.014447236433625221 2023-01-24 03:32:33.052787: step: 688/464, loss: 0.06912492215633392 2023-01-24 03:32:33.674167: step: 690/464, loss: 0.023338302969932556 2023-01-24 03:32:34.298228: step: 692/464, loss: 0.0038261814042925835 2023-01-24 03:32:34.935476: step: 694/464, loss: 0.15607619285583496 2023-01-24 03:32:35.566407: step: 696/464, loss: 0.040195267647504807 2023-01-24 03:32:36.115029: step: 698/464, loss: 0.0795249193906784 2023-01-24 03:32:36.685830: step: 700/464, loss: 0.10193470120429993 2023-01-24 03:32:37.325404: step: 702/464, loss: 0.033415645360946655 2023-01-24 03:32:37.945677: step: 704/464, loss: 0.04193181172013283 2023-01-24 03:32:38.540231: step: 706/464, loss: 0.0171140655875206 2023-01-24 03:32:39.173792: step: 708/464, loss: 0.033263515681028366 2023-01-24 03:32:39.793921: step: 710/464, loss: 0.0340133011341095 2023-01-24 03:32:40.483041: step: 712/464, loss: 0.08204270154237747 2023-01-24 03:32:41.156506: step: 714/464, loss: 0.06933029741048813 2023-01-24 03:32:41.790975: step: 716/464, loss: 0.01937519945204258 2023-01-24 03:32:42.363331: step: 718/464, loss: 0.017431603744626045 2023-01-24 03:32:43.001244: step: 720/464, loss: 0.009755883365869522 2023-01-24 03:32:43.547269: step: 722/464, loss: 0.06420817226171494 2023-01-24 03:32:44.107486: step: 724/464, loss: 0.009759259410202503 2023-01-24 03:32:44.748482: step: 726/464, loss: 0.00809104647487402 2023-01-24 03:32:45.380700: step: 728/464, loss: 0.07396422326564789 2023-01-24 03:32:45.984617: step: 730/464, loss: 0.29828405380249023 2023-01-24 03:32:46.552542: step: 732/464, loss: 0.0108730997890234 2023-01-24 03:32:47.126291: step: 734/464, loss: 0.02986094169318676 2023-01-24 03:32:47.827031: step: 736/464, loss: 0.05073142424225807 2023-01-24 03:32:48.532260: step: 738/464, loss: 0.02255566418170929 2023-01-24 03:32:49.189721: step: 740/464, loss: 1.2356326580047607 2023-01-24 03:32:49.864662: step: 742/464, loss: 0.0011746954405680299 2023-01-24 03:32:50.563075: step: 744/464, loss: 0.031542785465717316 2023-01-24 03:32:51.161490: step: 746/464, loss: 0.03540130704641342 2023-01-24 03:32:51.792889: step: 748/464, loss: 0.030641360208392143 2023-01-24 03:32:52.458182: step: 750/464, loss: 0.5967709422111511 2023-01-24 03:32:53.051510: step: 752/464, loss: 0.003998616710305214 2023-01-24 03:32:53.745346: step: 754/464, loss: 0.04136351868510246 2023-01-24 03:32:54.387946: step: 756/464, loss: 0.011975946836173534 2023-01-24 03:32:55.082215: step: 758/464, loss: 0.019011514261364937 2023-01-24 03:32:55.703543: step: 760/464, loss: 0.024285368621349335 2023-01-24 03:32:56.330952: step: 762/464, loss: 0.0003498998412396759 2023-01-24 03:32:56.982898: step: 764/464, loss: 0.1730339080095291 2023-01-24 03:32:57.535379: step: 766/464, loss: 0.036918770521879196 2023-01-24 03:32:58.146621: step: 768/464, loss: 0.2820606231689453 2023-01-24 03:32:58.731157: step: 770/464, loss: 0.11820968985557556 2023-01-24 03:32:59.416311: step: 772/464, loss: 0.007654739078134298 2023-01-24 03:33:00.044893: step: 774/464, loss: 0.038378458470106125 2023-01-24 03:33:00.745206: step: 776/464, loss: 0.0019242237322032452 2023-01-24 03:33:01.343860: step: 778/464, loss: 0.024111930280923843 2023-01-24 03:33:01.933856: step: 780/464, loss: 0.35401463508605957 2023-01-24 03:33:02.593953: step: 782/464, loss: 0.1398405134677887 2023-01-24 03:33:03.215934: step: 784/464, loss: 0.05183522775769234 2023-01-24 03:33:03.836572: step: 786/464, loss: 0.014426725916564465 2023-01-24 03:33:04.462940: step: 788/464, loss: 0.008511271327733994 2023-01-24 03:33:05.003856: step: 790/464, loss: 0.01709834113717079 2023-01-24 03:33:05.646146: step: 792/464, loss: 0.11454438418149948 2023-01-24 03:33:06.263365: step: 794/464, loss: 0.07856132090091705 2023-01-24 03:33:06.930616: step: 796/464, loss: 0.08450557291507721 2023-01-24 03:33:07.624949: step: 798/464, loss: 0.07357024401426315 2023-01-24 03:33:08.232734: step: 800/464, loss: 0.03462034836411476 2023-01-24 03:33:08.887294: step: 802/464, loss: 0.035732369869947433 2023-01-24 03:33:09.529288: step: 804/464, loss: 0.25203943252563477 2023-01-24 03:33:10.136556: step: 806/464, loss: 0.07085590809583664 2023-01-24 03:33:10.727555: step: 808/464, loss: 0.05034415423870087 2023-01-24 03:33:11.331789: step: 810/464, loss: 0.012539473362267017 2023-01-24 03:33:11.906778: step: 812/464, loss: 0.01923408731818199 2023-01-24 03:33:12.463941: step: 814/464, loss: 0.11244556307792664 2023-01-24 03:33:13.036659: step: 816/464, loss: 0.05045890435576439 2023-01-24 03:33:13.663924: step: 818/464, loss: 0.09219861775636673 2023-01-24 03:33:14.317680: step: 820/464, loss: 0.03292452171444893 2023-01-24 03:33:14.963622: step: 822/464, loss: 0.035826146602630615 2023-01-24 03:33:15.548645: step: 824/464, loss: 0.010722989216446877 2023-01-24 03:33:16.136813: step: 826/464, loss: 0.005713389255106449 2023-01-24 03:33:16.720737: step: 828/464, loss: 0.0340614952147007 2023-01-24 03:33:17.397155: step: 830/464, loss: 0.08456560969352722 2023-01-24 03:33:18.041100: step: 832/464, loss: 0.16549208760261536 2023-01-24 03:33:18.671777: step: 834/464, loss: 0.539658784866333 2023-01-24 03:33:19.319496: step: 836/464, loss: 0.027302242815494537 2023-01-24 03:33:19.965765: step: 838/464, loss: 0.018886419013142586 2023-01-24 03:33:20.606295: step: 840/464, loss: 0.007382436189800501 2023-01-24 03:33:21.269211: step: 842/464, loss: 13.666826248168945 2023-01-24 03:33:21.897036: step: 844/464, loss: 0.17506583034992218 2023-01-24 03:33:22.536108: step: 846/464, loss: 0.14494232833385468 2023-01-24 03:33:23.206828: step: 848/464, loss: 0.05238157510757446 2023-01-24 03:33:23.756919: step: 850/464, loss: 0.04806152358651161 2023-01-24 03:33:24.356084: step: 852/464, loss: 0.026084087789058685 2023-01-24 03:33:24.999228: step: 854/464, loss: 0.08451046794652939 2023-01-24 03:33:25.621564: step: 856/464, loss: 0.010034758597612381 2023-01-24 03:33:26.207106: step: 858/464, loss: 0.03995664045214653 2023-01-24 03:33:26.795625: step: 860/464, loss: 0.09627938270568848 2023-01-24 03:33:27.396311: step: 862/464, loss: 0.0830642431974411 2023-01-24 03:33:28.034852: step: 864/464, loss: 0.02704242244362831 2023-01-24 03:33:28.665898: step: 866/464, loss: 0.02706335112452507 2023-01-24 03:33:29.288059: step: 868/464, loss: 0.019787253811955452 2023-01-24 03:33:29.899659: step: 870/464, loss: 0.07990527153015137 2023-01-24 03:33:30.473328: step: 872/464, loss: 0.01874559558928013 2023-01-24 03:33:31.060246: step: 874/464, loss: 0.13096484541893005 2023-01-24 03:33:31.690074: step: 876/464, loss: 0.08135256171226501 2023-01-24 03:33:32.297330: step: 878/464, loss: 0.0434761568903923 2023-01-24 03:33:32.919384: step: 880/464, loss: 0.037891894578933716 2023-01-24 03:33:33.518512: step: 882/464, loss: 0.02107781358063221 2023-01-24 03:33:34.117900: step: 884/464, loss: 0.020204002037644386 2023-01-24 03:33:34.706024: step: 886/464, loss: 0.030770068988204002 2023-01-24 03:33:35.360413: step: 888/464, loss: 0.0335783027112484 2023-01-24 03:33:35.978078: step: 890/464, loss: 0.07361844927072525 2023-01-24 03:33:36.600056: step: 892/464, loss: 0.02100438065826893 2023-01-24 03:33:37.229911: step: 894/464, loss: 0.06870966404676437 2023-01-24 03:33:37.792194: step: 896/464, loss: 0.3108283579349518 2023-01-24 03:33:38.482925: step: 898/464, loss: 0.43348509073257446 2023-01-24 03:33:39.213583: step: 900/464, loss: 0.008296527899801731 2023-01-24 03:33:39.820649: step: 902/464, loss: 0.004848845303058624 2023-01-24 03:33:40.402691: step: 904/464, loss: 0.449689120054245 2023-01-24 03:33:41.021432: step: 906/464, loss: 0.03436870872974396 2023-01-24 03:33:41.623477: step: 908/464, loss: 0.05464969947934151 2023-01-24 03:33:42.268221: step: 910/464, loss: 0.013983510434627533 2023-01-24 03:33:42.908750: step: 912/464, loss: 0.027595987543463707 2023-01-24 03:33:43.543868: step: 914/464, loss: 0.14348651468753815 2023-01-24 03:33:44.201004: step: 916/464, loss: 0.0415404848754406 2023-01-24 03:33:44.855737: step: 918/464, loss: 0.009719896130263805 2023-01-24 03:33:45.526311: step: 920/464, loss: 0.03497597947716713 2023-01-24 03:33:46.156950: step: 922/464, loss: 0.04873776435852051 2023-01-24 03:33:46.826185: step: 924/464, loss: 0.07699619978666306 2023-01-24 03:33:47.425613: step: 926/464, loss: 0.07241154462099075 2023-01-24 03:33:48.190034: step: 928/464, loss: 0.6287901997566223 2023-01-24 03:33:48.748800: step: 930/464, loss: 0.029939774423837662 ================================================== Loss: 0.142 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3336655417764121, 'r': 0.33936381478587646, 'f1': 0.33649055577075615}, 'combined': 0.247940409515294, 'epoch': 22} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31476146560767226, 'r': 0.3133149515194017, 'f1': 0.3140365428402663}, 'combined': 0.20501867563665052, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32463278068644563, 'r': 0.3529688488298546, 'f1': 0.3382083333333334}, 'combined': 0.2492061403508772, 'epoch': 22} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3256956875346786, 'r': 0.3119254654514109, 'f1': 0.31866188395411743}, 'combined': 0.20803832838455336, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3317436390140472, 'r': 0.339297573868257, 'f1': 0.3354780889841865}, 'combined': 0.247194381356769, 'epoch': 22} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32780253925581626, 'r': 0.3036994113693592, 'f1': 0.31529099195597593}, 'combined': 0.20583764241685476, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2521929824561403, 'r': 0.2738095238095238, 'f1': 0.26255707762557073}, 'combined': 0.17503805175038048, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30833333333333335, 'r': 0.40217391304347827, 'f1': 0.34905660377358494}, 'combined': 0.17452830188679247, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:36:25.328687: step: 2/464, loss: 0.10644122958183289 2023-01-24 03:36:25.988433: step: 4/464, loss: 0.07475408166646957 2023-01-24 03:36:26.608865: step: 6/464, loss: 0.011993331834673882 2023-01-24 03:36:27.207576: step: 8/464, loss: 0.019554661586880684 2023-01-24 03:36:27.773781: step: 10/464, loss: 0.04917929321527481 2023-01-24 03:36:28.339379: step: 12/464, loss: 0.052631910890340805 2023-01-24 03:36:28.982565: step: 14/464, loss: 0.11645985394716263 2023-01-24 03:36:29.612882: step: 16/464, loss: 0.02561560459434986 2023-01-24 03:36:30.247959: step: 18/464, loss: 0.028937913477420807 2023-01-24 03:36:30.863979: step: 20/464, loss: 0.04139014706015587 2023-01-24 03:36:31.503573: step: 22/464, loss: 0.015588788315653801 2023-01-24 03:36:32.113760: step: 24/464, loss: 0.003131264355033636 2023-01-24 03:36:32.723311: step: 26/464, loss: 0.07951285690069199 2023-01-24 03:36:33.389837: step: 28/464, loss: 0.007127698510885239 2023-01-24 03:36:34.048547: step: 30/464, loss: 0.10955634713172913 2023-01-24 03:36:34.624624: step: 32/464, loss: 0.04282316938042641 2023-01-24 03:36:35.219366: step: 34/464, loss: 0.10839519649744034 2023-01-24 03:36:35.792702: step: 36/464, loss: 0.025897495448589325 2023-01-24 03:36:36.341289: step: 38/464, loss: 0.03440878540277481 2023-01-24 03:36:36.943154: step: 40/464, loss: 0.024650786072015762 2023-01-24 03:36:37.512271: step: 42/464, loss: 0.0286126546561718 2023-01-24 03:36:38.137255: step: 44/464, loss: 0.10195796191692352 2023-01-24 03:36:38.802894: step: 46/464, loss: 0.010873687453567982 2023-01-24 03:36:39.362334: step: 48/464, loss: 0.02549760416150093 2023-01-24 03:36:40.003669: step: 50/464, loss: 0.05368296802043915 2023-01-24 03:36:40.591171: step: 52/464, loss: 0.11347991228103638 2023-01-24 03:36:41.221276: step: 54/464, loss: 0.10324886441230774 2023-01-24 03:36:41.841846: step: 56/464, loss: 0.060572996735572815 2023-01-24 03:36:42.464567: step: 58/464, loss: 0.03887447714805603 2023-01-24 03:36:43.118246: step: 60/464, loss: 0.022154150530695915 2023-01-24 03:36:43.744558: step: 62/464, loss: 0.009307364001870155 2023-01-24 03:36:44.371750: step: 64/464, loss: 0.016641004011034966 2023-01-24 03:36:45.024372: step: 66/464, loss: 0.23638705909252167 2023-01-24 03:36:45.594673: step: 68/464, loss: 0.01452475693076849 2023-01-24 03:36:46.183587: step: 70/464, loss: 0.02883811853826046 2023-01-24 03:36:46.745222: step: 72/464, loss: 0.007868830114603043 2023-01-24 03:36:47.300304: step: 74/464, loss: 0.04357173666357994 2023-01-24 03:36:47.965840: step: 76/464, loss: 0.09538904577493668 2023-01-24 03:36:48.584046: step: 78/464, loss: 0.0018587729427963495 2023-01-24 03:36:49.225471: step: 80/464, loss: 0.055834852159023285 2023-01-24 03:36:49.896037: step: 82/464, loss: 0.17760033905506134 2023-01-24 03:36:50.456503: step: 84/464, loss: 0.0023912456817924976 2023-01-24 03:36:51.059835: step: 86/464, loss: 0.038486480712890625 2023-01-24 03:36:51.676187: step: 88/464, loss: 0.07755500823259354 2023-01-24 03:36:52.330733: step: 90/464, loss: 0.011968021281063557 2023-01-24 03:36:52.883262: step: 92/464, loss: 0.005228791851550341 2023-01-24 03:36:53.555033: step: 94/464, loss: 0.021261105313897133 2023-01-24 03:36:54.145000: step: 96/464, loss: 0.021069560199975967 2023-01-24 03:36:54.781905: step: 98/464, loss: 0.011009802110493183 2023-01-24 03:36:55.389992: step: 100/464, loss: 0.044996075332164764 2023-01-24 03:36:56.035042: step: 102/464, loss: 0.8489930033683777 2023-01-24 03:36:56.728334: step: 104/464, loss: 0.004066376946866512 2023-01-24 03:36:57.369591: step: 106/464, loss: 0.05179396644234657 2023-01-24 03:36:57.999699: step: 108/464, loss: 0.062286872416734695 2023-01-24 03:36:58.695508: step: 110/464, loss: 0.017060136422514915 2023-01-24 03:36:59.382258: step: 112/464, loss: 0.03774134814739227 2023-01-24 03:37:00.021381: step: 114/464, loss: 0.001183267217129469 2023-01-24 03:37:00.639489: step: 116/464, loss: 0.0009860256686806679 2023-01-24 03:37:01.278806: step: 118/464, loss: 0.009680584073066711 2023-01-24 03:37:01.876423: step: 120/464, loss: 0.01292145811021328 2023-01-24 03:37:02.557101: step: 122/464, loss: 0.1006164699792862 2023-01-24 03:37:03.103473: step: 124/464, loss: 0.00143814692273736 2023-01-24 03:37:03.699593: step: 126/464, loss: 0.006221970543265343 2023-01-24 03:37:04.324746: step: 128/464, loss: 0.020568178966641426 2023-01-24 03:37:04.956461: step: 130/464, loss: 0.09491739422082901 2023-01-24 03:37:06.198213: step: 132/464, loss: 0.025057677179574966 2023-01-24 03:37:06.786144: step: 134/464, loss: 0.0070098889991641045 2023-01-24 03:37:07.339943: step: 136/464, loss: 0.011948754079639912 2023-01-24 03:37:07.946942: step: 138/464, loss: 0.07747643440961838 2023-01-24 03:37:08.524842: step: 140/464, loss: 0.2736450135707855 2023-01-24 03:37:09.229015: step: 142/464, loss: 0.014300171285867691 2023-01-24 03:37:09.828516: step: 144/464, loss: 0.0038129989989101887 2023-01-24 03:37:10.522720: step: 146/464, loss: 0.01623530127108097 2023-01-24 03:37:11.152778: step: 148/464, loss: 0.08698973804712296 2023-01-24 03:37:11.823835: step: 150/464, loss: 0.14513812959194183 2023-01-24 03:37:12.412134: step: 152/464, loss: 0.0026721323374658823 2023-01-24 03:37:13.064494: step: 154/464, loss: 0.054639026522636414 2023-01-24 03:37:13.683715: step: 156/464, loss: 0.0822264775633812 2023-01-24 03:37:14.326675: step: 158/464, loss: 0.029045280069112778 2023-01-24 03:37:14.936208: step: 160/464, loss: 0.0024003051221370697 2023-01-24 03:37:15.585635: step: 162/464, loss: 0.004694780800491571 2023-01-24 03:37:16.241269: step: 164/464, loss: 0.009777367115020752 2023-01-24 03:37:16.849716: step: 166/464, loss: 0.013787111267447472 2023-01-24 03:37:17.499203: step: 168/464, loss: 0.024741878733038902 2023-01-24 03:37:18.086471: step: 170/464, loss: 0.015733882784843445 2023-01-24 03:37:18.715514: step: 172/464, loss: 0.020991239696741104 2023-01-24 03:37:19.321418: step: 174/464, loss: 0.1043391227722168 2023-01-24 03:37:19.915853: step: 176/464, loss: 0.026850463822484016 2023-01-24 03:37:20.646321: step: 178/464, loss: 0.03842521458864212 2023-01-24 03:37:21.265456: step: 180/464, loss: 0.546832263469696 2023-01-24 03:37:21.830267: step: 182/464, loss: 0.019418247044086456 2023-01-24 03:37:22.422950: step: 184/464, loss: 0.02360132336616516 2023-01-24 03:37:23.036195: step: 186/464, loss: 0.009908870793879032 2023-01-24 03:37:23.661465: step: 188/464, loss: 0.0006705268751829863 2023-01-24 03:37:24.288247: step: 190/464, loss: 0.0026790674310177565 2023-01-24 03:37:24.950095: step: 192/464, loss: 0.10986961424350739 2023-01-24 03:37:25.541203: step: 194/464, loss: 0.0328039713203907 2023-01-24 03:37:26.148780: step: 196/464, loss: 0.3907298147678375 2023-01-24 03:37:26.743260: step: 198/464, loss: 0.07607251405715942 2023-01-24 03:37:27.306363: step: 200/464, loss: 0.029756220057606697 2023-01-24 03:37:27.921483: step: 202/464, loss: 0.0925208330154419 2023-01-24 03:37:28.575948: step: 204/464, loss: 0.02148488536477089 2023-01-24 03:37:29.200111: step: 206/464, loss: 0.013174588792026043 2023-01-24 03:37:29.818970: step: 208/464, loss: 0.03021945431828499 2023-01-24 03:37:30.431540: step: 210/464, loss: 0.03675977513194084 2023-01-24 03:37:31.107330: step: 212/464, loss: 0.02435053512454033 2023-01-24 03:37:31.768929: step: 214/464, loss: 0.12001971900463104 2023-01-24 03:37:32.414222: step: 216/464, loss: 0.053732819855213165 2023-01-24 03:37:33.002231: step: 218/464, loss: 0.015495366416871548 2023-01-24 03:37:33.629997: step: 220/464, loss: 0.09464208781719208 2023-01-24 03:37:34.288148: step: 222/464, loss: 0.03359712287783623 2023-01-24 03:37:34.920783: step: 224/464, loss: 0.0051836310885846615 2023-01-24 03:37:35.544811: step: 226/464, loss: 0.01719290390610695 2023-01-24 03:37:36.166165: step: 228/464, loss: 0.036921728402376175 2023-01-24 03:37:36.824007: step: 230/464, loss: 0.038592804223299026 2023-01-24 03:37:37.412463: step: 232/464, loss: 0.04210580512881279 2023-01-24 03:37:37.992849: step: 234/464, loss: 0.01965983398258686 2023-01-24 03:37:38.606857: step: 236/464, loss: 0.012154348194599152 2023-01-24 03:37:39.228641: step: 238/464, loss: 0.024885592982172966 2023-01-24 03:37:39.874703: step: 240/464, loss: 0.013276264071464539 2023-01-24 03:37:40.498089: step: 242/464, loss: 0.11231344938278198 2023-01-24 03:37:41.106119: step: 244/464, loss: 0.0007568482542410493 2023-01-24 03:37:41.723441: step: 246/464, loss: 0.05851550027728081 2023-01-24 03:37:42.345383: step: 248/464, loss: 0.009488094598054886 2023-01-24 03:37:42.970398: step: 250/464, loss: 0.05608345568180084 2023-01-24 03:37:43.538789: step: 252/464, loss: 0.06864190101623535 2023-01-24 03:37:44.168160: step: 254/464, loss: 0.03570174798369408 2023-01-24 03:37:44.822290: step: 256/464, loss: 0.07568265497684479 2023-01-24 03:37:45.456628: step: 258/464, loss: 0.04004029929637909 2023-01-24 03:37:46.144069: step: 260/464, loss: 0.04522702470421791 2023-01-24 03:37:46.734240: step: 262/464, loss: 0.01839270070195198 2023-01-24 03:37:47.305258: step: 264/464, loss: 0.06256843358278275 2023-01-24 03:37:47.929231: step: 266/464, loss: 0.0010576738277450204 2023-01-24 03:37:48.555835: step: 268/464, loss: 0.20907624065876007 2023-01-24 03:37:49.185863: step: 270/464, loss: 1.4093396663665771 2023-01-24 03:37:49.786985: step: 272/464, loss: 0.12446978688240051 2023-01-24 03:37:50.420687: step: 274/464, loss: 0.004322616849094629 2023-01-24 03:37:51.012632: step: 276/464, loss: 0.020683592185378075 2023-01-24 03:37:51.646041: step: 278/464, loss: 0.11281999945640564 2023-01-24 03:37:52.276576: step: 280/464, loss: 0.06595201045274734 2023-01-24 03:37:52.899476: step: 282/464, loss: 0.4145338535308838 2023-01-24 03:37:53.554283: step: 284/464, loss: 0.05684361606836319 2023-01-24 03:37:54.178273: step: 286/464, loss: 1.880172848701477 2023-01-24 03:37:54.849803: step: 288/464, loss: 0.08005440980195999 2023-01-24 03:37:55.506066: step: 290/464, loss: 0.006799811031669378 2023-01-24 03:37:56.137283: step: 292/464, loss: 0.03461438789963722 2023-01-24 03:37:56.708192: step: 294/464, loss: 0.010340527631342411 2023-01-24 03:37:57.346969: step: 296/464, loss: 0.06825224310159683 2023-01-24 03:37:57.919145: step: 298/464, loss: 0.007516786921769381 2023-01-24 03:37:58.562664: step: 300/464, loss: 0.060877662152051926 2023-01-24 03:37:59.175571: step: 302/464, loss: 0.031738415360450745 2023-01-24 03:37:59.871639: step: 304/464, loss: 0.060117222368717194 2023-01-24 03:38:00.464912: step: 306/464, loss: 0.022865887731313705 2023-01-24 03:38:01.116478: step: 308/464, loss: 0.09713611006736755 2023-01-24 03:38:01.739592: step: 310/464, loss: 0.06979311257600784 2023-01-24 03:38:02.347545: step: 312/464, loss: 0.09803225100040436 2023-01-24 03:38:03.047516: step: 314/464, loss: 0.06322037428617477 2023-01-24 03:38:03.661745: step: 316/464, loss: 0.3646462559700012 2023-01-24 03:38:04.253035: step: 318/464, loss: 0.005568439140915871 2023-01-24 03:38:04.885795: step: 320/464, loss: 0.08541621267795563 2023-01-24 03:38:05.496301: step: 322/464, loss: 0.016559764742851257 2023-01-24 03:38:06.142903: step: 324/464, loss: 0.10714079439640045 2023-01-24 03:38:06.711812: step: 326/464, loss: 0.005957483313977718 2023-01-24 03:38:07.358256: step: 328/464, loss: 0.01278592087328434 2023-01-24 03:38:07.992046: step: 330/464, loss: 0.019597185775637627 2023-01-24 03:38:08.632497: step: 332/464, loss: 0.00973434280604124 2023-01-24 03:38:09.275888: step: 334/464, loss: 0.025493955239653587 2023-01-24 03:38:09.836357: step: 336/464, loss: 0.02513478696346283 2023-01-24 03:38:10.425352: step: 338/464, loss: 0.0401698537170887 2023-01-24 03:38:11.022406: step: 340/464, loss: 0.02699063904583454 2023-01-24 03:38:11.714452: step: 342/464, loss: 0.060390595346689224 2023-01-24 03:38:12.342493: step: 344/464, loss: 0.010543850250542164 2023-01-24 03:38:12.987193: step: 346/464, loss: 0.001764788175933063 2023-01-24 03:38:13.608283: step: 348/464, loss: 0.009307467378675938 2023-01-24 03:38:14.214248: step: 350/464, loss: 0.0627625361084938 2023-01-24 03:38:14.817554: step: 352/464, loss: 0.05518518015742302 2023-01-24 03:38:15.413725: step: 354/464, loss: 0.06610012799501419 2023-01-24 03:38:16.043699: step: 356/464, loss: 0.07874742150306702 2023-01-24 03:38:16.626611: step: 358/464, loss: 0.09541633725166321 2023-01-24 03:38:17.265850: step: 360/464, loss: 5.9227447509765625 2023-01-24 03:38:17.869416: step: 362/464, loss: 0.02257785201072693 2023-01-24 03:38:18.479342: step: 364/464, loss: 0.023625222966074944 2023-01-24 03:38:18.992409: step: 366/464, loss: 0.023329224437475204 2023-01-24 03:38:19.586279: step: 368/464, loss: 0.12411260604858398 2023-01-24 03:38:20.187998: step: 370/464, loss: 0.1140187606215477 2023-01-24 03:38:20.830709: step: 372/464, loss: 1.646920919418335 2023-01-24 03:38:21.584112: step: 374/464, loss: 0.29714834690093994 2023-01-24 03:38:22.266960: step: 376/464, loss: 0.5195530652999878 2023-01-24 03:38:22.894068: step: 378/464, loss: 0.03506457060575485 2023-01-24 03:38:23.555552: step: 380/464, loss: 0.035885609686374664 2023-01-24 03:38:24.142858: step: 382/464, loss: 0.01231367141008377 2023-01-24 03:38:24.729961: step: 384/464, loss: 0.014884741976857185 2023-01-24 03:38:25.363147: step: 386/464, loss: 0.004058377351611853 2023-01-24 03:38:25.972983: step: 388/464, loss: 0.026437105610966682 2023-01-24 03:38:26.691140: step: 390/464, loss: 0.058611735701560974 2023-01-24 03:38:27.326880: step: 392/464, loss: 0.4463246166706085 2023-01-24 03:38:27.947334: step: 394/464, loss: 0.0023168607149273157 2023-01-24 03:38:28.597401: step: 396/464, loss: 0.07154099643230438 2023-01-24 03:38:29.291141: step: 398/464, loss: 0.06256363540887833 2023-01-24 03:38:30.010130: step: 400/464, loss: 0.043433479964733124 2023-01-24 03:38:30.665724: step: 402/464, loss: 0.08834725618362427 2023-01-24 03:38:31.260368: step: 404/464, loss: 0.9852097630500793 2023-01-24 03:38:31.940908: step: 406/464, loss: 0.056845128536224365 2023-01-24 03:38:32.526008: step: 408/464, loss: 0.18970215320587158 2023-01-24 03:38:33.069296: step: 410/464, loss: 0.009288941510021687 2023-01-24 03:38:33.668401: step: 412/464, loss: 0.01798395812511444 2023-01-24 03:38:34.349980: step: 414/464, loss: 0.03440636768937111 2023-01-24 03:38:34.997492: step: 416/464, loss: 0.031902141869068146 2023-01-24 03:38:35.605201: step: 418/464, loss: 0.2523932456970215 2023-01-24 03:38:36.250125: step: 420/464, loss: 0.05770578980445862 2023-01-24 03:38:36.928378: step: 422/464, loss: 0.009200010448694229 2023-01-24 03:38:37.701197: step: 424/464, loss: 0.04757911339402199 2023-01-24 03:38:38.281841: step: 426/464, loss: 0.09651003032922745 2023-01-24 03:38:39.012286: step: 428/464, loss: 0.02554561011493206 2023-01-24 03:38:39.712169: step: 430/464, loss: 0.07525847852230072 2023-01-24 03:38:40.373253: step: 432/464, loss: 0.05352248623967171 2023-01-24 03:38:41.012053: step: 434/464, loss: 0.004007582552731037 2023-01-24 03:38:41.704278: step: 436/464, loss: 0.039869554340839386 2023-01-24 03:38:42.317691: step: 438/464, loss: 0.031047623604536057 2023-01-24 03:38:42.973790: step: 440/464, loss: 0.009951122105121613 2023-01-24 03:38:43.612211: step: 442/464, loss: 0.027510803192853928 2023-01-24 03:38:44.208911: step: 444/464, loss: 0.0036701064091175795 2023-01-24 03:38:44.789510: step: 446/464, loss: 0.05184917896986008 2023-01-24 03:38:45.396937: step: 448/464, loss: 0.10439484566450119 2023-01-24 03:38:46.041460: step: 450/464, loss: 0.0792376846075058 2023-01-24 03:38:46.630554: step: 452/464, loss: 0.04133224859833717 2023-01-24 03:38:47.220191: step: 454/464, loss: 0.02767222933471203 2023-01-24 03:38:47.819168: step: 456/464, loss: 0.029987365007400513 2023-01-24 03:38:48.439499: step: 458/464, loss: 0.03149545192718506 2023-01-24 03:38:49.061779: step: 460/464, loss: 0.006614563055336475 2023-01-24 03:38:49.661390: step: 462/464, loss: 0.025004135444760323 2023-01-24 03:38:50.257058: step: 464/464, loss: 0.036268845200538635 2023-01-24 03:38:50.931942: step: 466/464, loss: 0.024740692228078842 2023-01-24 03:38:51.546279: step: 468/464, loss: 0.027130400761961937 2023-01-24 03:38:52.133116: step: 470/464, loss: 0.008292856626212597 2023-01-24 03:38:52.845580: step: 472/464, loss: 0.023867568001151085 2023-01-24 03:38:53.424129: step: 474/464, loss: 0.09256128966808319 2023-01-24 03:38:54.022072: step: 476/464, loss: 0.03897581249475479 2023-01-24 03:38:54.696049: step: 478/464, loss: 0.09847109764814377 2023-01-24 03:38:55.327988: step: 480/464, loss: 0.004689089488238096 2023-01-24 03:38:55.943657: step: 482/464, loss: 0.015606822445988655 2023-01-24 03:38:56.509902: step: 484/464, loss: 0.14244210720062256 2023-01-24 03:38:57.115854: step: 486/464, loss: 0.012135368771851063 2023-01-24 03:38:57.748710: step: 488/464, loss: 0.08063202351331711 2023-01-24 03:38:58.420148: step: 490/464, loss: 0.023309670388698578 2023-01-24 03:38:58.944784: step: 492/464, loss: 0.03579302877187729 2023-01-24 03:38:59.562329: step: 494/464, loss: 0.013103240169584751 2023-01-24 03:39:00.231365: step: 496/464, loss: 0.021631481125950813 2023-01-24 03:39:00.840169: step: 498/464, loss: 0.05697460100054741 2023-01-24 03:39:01.547646: step: 500/464, loss: 0.05060938373208046 2023-01-24 03:39:02.174561: step: 502/464, loss: 0.017666872590780258 2023-01-24 03:39:02.831026: step: 504/464, loss: 0.16124327480793 2023-01-24 03:39:03.389507: step: 506/464, loss: 0.037429243326187134 2023-01-24 03:39:04.004833: step: 508/464, loss: 0.03328167274594307 2023-01-24 03:39:04.714080: step: 510/464, loss: 0.03959827497601509 2023-01-24 03:39:05.332197: step: 512/464, loss: 0.026566093787550926 2023-01-24 03:39:05.986073: step: 514/464, loss: 0.11199571192264557 2023-01-24 03:39:06.610321: step: 516/464, loss: 0.07202999293804169 2023-01-24 03:39:07.406243: step: 518/464, loss: 0.09872186183929443 2023-01-24 03:39:08.017873: step: 520/464, loss: 0.056988392025232315 2023-01-24 03:39:08.571341: step: 522/464, loss: 0.007081233896315098 2023-01-24 03:39:09.152593: step: 524/464, loss: 0.24260111153125763 2023-01-24 03:39:09.799763: step: 526/464, loss: 0.006394832860678434 2023-01-24 03:39:10.412022: step: 528/464, loss: 0.062192559242248535 2023-01-24 03:39:11.025947: step: 530/464, loss: 0.1292833387851715 2023-01-24 03:39:11.638843: step: 532/464, loss: 0.0303835216909647 2023-01-24 03:39:12.255235: step: 534/464, loss: 0.06893176585435867 2023-01-24 03:39:12.906884: step: 536/464, loss: 0.05994332581758499 2023-01-24 03:39:13.545286: step: 538/464, loss: 0.022876126691699028 2023-01-24 03:39:14.132898: step: 540/464, loss: 0.037819162011146545 2023-01-24 03:39:14.703007: step: 542/464, loss: 0.022822659462690353 2023-01-24 03:39:15.307088: step: 544/464, loss: 0.022124748677015305 2023-01-24 03:39:15.899463: step: 546/464, loss: 0.0019097479525953531 2023-01-24 03:39:16.496277: step: 548/464, loss: 0.059465598315000534 2023-01-24 03:39:17.071434: step: 550/464, loss: 0.11557956784963608 2023-01-24 03:39:17.688406: step: 552/464, loss: 0.04174748808145523 2023-01-24 03:39:18.302231: step: 554/464, loss: 0.02896018885076046 2023-01-24 03:39:18.907818: step: 556/464, loss: 0.04794749245047569 2023-01-24 03:39:19.507990: step: 558/464, loss: 0.029625695198774338 2023-01-24 03:39:20.114406: step: 560/464, loss: 0.02156044915318489 2023-01-24 03:39:20.673898: step: 562/464, loss: 0.015180575661361217 2023-01-24 03:39:21.347307: step: 564/464, loss: 0.035031288862228394 2023-01-24 03:39:21.989056: step: 566/464, loss: 0.0358675979077816 2023-01-24 03:39:22.586154: step: 568/464, loss: 0.04682903736829758 2023-01-24 03:39:23.196320: step: 570/464, loss: 0.04473865032196045 2023-01-24 03:39:23.829579: step: 572/464, loss: 0.034532204270362854 2023-01-24 03:39:24.489816: step: 574/464, loss: 0.018957484513521194 2023-01-24 03:39:25.175462: step: 576/464, loss: 0.11535761505365372 2023-01-24 03:39:25.754104: step: 578/464, loss: 0.020611165091395378 2023-01-24 03:39:26.331293: step: 580/464, loss: 0.06758003681898117 2023-01-24 03:39:26.955910: step: 582/464, loss: 0.0010374293196946383 2023-01-24 03:39:27.608423: step: 584/464, loss: 0.037798311561346054 2023-01-24 03:39:28.212455: step: 586/464, loss: 0.03922104090452194 2023-01-24 03:39:28.870456: step: 588/464, loss: 0.011756215244531631 2023-01-24 03:39:29.491884: step: 590/464, loss: 0.013177670538425446 2023-01-24 03:39:30.143187: step: 592/464, loss: 0.052139610052108765 2023-01-24 03:39:30.783571: step: 594/464, loss: 0.047864366322755814 2023-01-24 03:39:31.370085: step: 596/464, loss: 0.011701811105012894 2023-01-24 03:39:31.929278: step: 598/464, loss: 0.021516425535082817 2023-01-24 03:39:32.685258: step: 600/464, loss: 0.0323818065226078 2023-01-24 03:39:33.378826: step: 602/464, loss: 0.9882540106773376 2023-01-24 03:39:33.975375: step: 604/464, loss: 0.2516738176345825 2023-01-24 03:39:34.611471: step: 606/464, loss: 0.036247603595256805 2023-01-24 03:39:35.303352: step: 608/464, loss: 0.08317970484495163 2023-01-24 03:39:35.903973: step: 610/464, loss: 0.05322250723838806 2023-01-24 03:39:36.480798: step: 612/464, loss: 0.007082348223775625 2023-01-24 03:39:37.038532: step: 614/464, loss: 0.013145468197762966 2023-01-24 03:39:37.651483: step: 616/464, loss: 0.03757704794406891 2023-01-24 03:39:38.287005: step: 618/464, loss: 0.01598481647670269 2023-01-24 03:39:38.895366: step: 620/464, loss: 0.009986592456698418 2023-01-24 03:39:39.532286: step: 622/464, loss: 0.04212084785103798 2023-01-24 03:39:40.188814: step: 624/464, loss: 0.009833576157689095 2023-01-24 03:39:40.824009: step: 626/464, loss: 0.060698915272951126 2023-01-24 03:39:41.383108: step: 628/464, loss: 0.01126753631979227 2023-01-24 03:39:42.052677: step: 630/464, loss: 0.04587607830762863 2023-01-24 03:39:42.767365: step: 632/464, loss: 0.03733550384640694 2023-01-24 03:39:43.442891: step: 634/464, loss: 0.06836825609207153 2023-01-24 03:39:44.031564: step: 636/464, loss: 0.014130858704447746 2023-01-24 03:39:44.603407: step: 638/464, loss: 0.020342597737908363 2023-01-24 03:39:45.268671: step: 640/464, loss: 0.052543386816978455 2023-01-24 03:39:45.883637: step: 642/464, loss: 0.059955451637506485 2023-01-24 03:39:46.551247: step: 644/464, loss: 0.025845695286989212 2023-01-24 03:39:47.272513: step: 646/464, loss: 0.04973992705345154 2023-01-24 03:39:47.884920: step: 648/464, loss: 0.04457143321633339 2023-01-24 03:39:48.456764: step: 650/464, loss: 0.14433138072490692 2023-01-24 03:39:49.149817: step: 652/464, loss: 0.03164048120379448 2023-01-24 03:39:49.780282: step: 654/464, loss: 0.14378276467323303 2023-01-24 03:39:50.378180: step: 656/464, loss: 0.004824694711714983 2023-01-24 03:39:50.961121: step: 658/464, loss: 0.08035098761320114 2023-01-24 03:39:51.566114: step: 660/464, loss: 0.007931775413453579 2023-01-24 03:39:52.167829: step: 662/464, loss: 0.006059381645172834 2023-01-24 03:39:52.741286: step: 664/464, loss: 0.020819762721657753 2023-01-24 03:39:53.367706: step: 666/464, loss: 0.034322306513786316 2023-01-24 03:39:54.008354: step: 668/464, loss: 0.04108717292547226 2023-01-24 03:39:54.603385: step: 670/464, loss: 0.017097413539886475 2023-01-24 03:39:55.170778: step: 672/464, loss: 0.036327969282865524 2023-01-24 03:39:55.755468: step: 674/464, loss: 0.014165619388222694 2023-01-24 03:39:56.302205: step: 676/464, loss: 0.020011477172374725 2023-01-24 03:39:56.914150: step: 678/464, loss: 0.0024309654254466295 2023-01-24 03:39:57.558197: step: 680/464, loss: 0.019209301099181175 2023-01-24 03:39:58.198555: step: 682/464, loss: 0.025400152429938316 2023-01-24 03:39:58.849951: step: 684/464, loss: 0.028395840898156166 2023-01-24 03:39:59.492878: step: 686/464, loss: 0.1376052349805832 2023-01-24 03:40:00.113359: step: 688/464, loss: 0.01598648726940155 2023-01-24 03:40:00.721090: step: 690/464, loss: 0.049407944083213806 2023-01-24 03:40:01.337000: step: 692/464, loss: 0.0032649594359099865 2023-01-24 03:40:01.965553: step: 694/464, loss: 0.34030041098594666 2023-01-24 03:40:02.657043: step: 696/464, loss: 0.07703862339258194 2023-01-24 03:40:03.266322: step: 698/464, loss: 0.038921430706977844 2023-01-24 03:40:03.901357: step: 700/464, loss: 0.06986559927463531 2023-01-24 03:40:04.530912: step: 702/464, loss: 0.04618273675441742 2023-01-24 03:40:05.125710: step: 704/464, loss: 0.052316583693027496 2023-01-24 03:40:05.793346: step: 706/464, loss: 0.0034637299831956625 2023-01-24 03:40:06.401260: step: 708/464, loss: 0.14313380420207977 2023-01-24 03:40:06.986565: step: 710/464, loss: 0.0035805453080683947 2023-01-24 03:40:07.551766: step: 712/464, loss: 0.008872403763234615 2023-01-24 03:40:08.207646: step: 714/464, loss: 0.0516185462474823 2023-01-24 03:40:08.985068: step: 716/464, loss: 0.014751007780432701 2023-01-24 03:40:09.608519: step: 718/464, loss: 0.05501936003565788 2023-01-24 03:40:10.195549: step: 720/464, loss: 0.13606928288936615 2023-01-24 03:40:10.875897: step: 722/464, loss: 0.10207492858171463 2023-01-24 03:40:11.458675: step: 724/464, loss: 0.027091432362794876 2023-01-24 03:40:12.078774: step: 726/464, loss: 0.04187340661883354 2023-01-24 03:40:12.670736: step: 728/464, loss: 0.020310498774051666 2023-01-24 03:40:13.371857: step: 730/464, loss: 0.0079060522839427 2023-01-24 03:40:13.976342: step: 732/464, loss: 0.006408375222235918 2023-01-24 03:40:14.554776: step: 734/464, loss: 0.05852990970015526 2023-01-24 03:40:15.155657: step: 736/464, loss: 2.211858034133911 2023-01-24 03:40:15.795552: step: 738/464, loss: 0.07533301413059235 2023-01-24 03:40:16.482956: step: 740/464, loss: 0.0711047425866127 2023-01-24 03:40:17.128473: step: 742/464, loss: 0.02632574737071991 2023-01-24 03:40:17.784093: step: 744/464, loss: 0.011705012992024422 2023-01-24 03:40:18.425651: step: 746/464, loss: 0.032878875732421875 2023-01-24 03:40:19.029008: step: 748/464, loss: 0.0006193576846271753 2023-01-24 03:40:19.696213: step: 750/464, loss: 0.10979987680912018 2023-01-24 03:40:20.353846: step: 752/464, loss: 0.04243628680706024 2023-01-24 03:40:20.990661: step: 754/464, loss: 0.05799168720841408 2023-01-24 03:40:21.530763: step: 756/464, loss: 0.019762758165597916 2023-01-24 03:40:22.145435: step: 758/464, loss: 0.013637124560773373 2023-01-24 03:40:22.743110: step: 760/464, loss: 0.014164343476295471 2023-01-24 03:40:23.428162: step: 762/464, loss: 0.02152133919298649 2023-01-24 03:40:24.068653: step: 764/464, loss: 0.029147664085030556 2023-01-24 03:40:24.656711: step: 766/464, loss: 0.0378149151802063 2023-01-24 03:40:25.307761: step: 768/464, loss: 0.07730662822723389 2023-01-24 03:40:25.906450: step: 770/464, loss: 0.005782376509159803 2023-01-24 03:40:26.507036: step: 772/464, loss: 0.027559412643313408 2023-01-24 03:40:27.117933: step: 774/464, loss: 0.02795860543847084 2023-01-24 03:40:27.786740: step: 776/464, loss: 0.02722206711769104 2023-01-24 03:40:28.436793: step: 778/464, loss: 0.033019471913576126 2023-01-24 03:40:29.063453: step: 780/464, loss: 0.02790030650794506 2023-01-24 03:40:29.698853: step: 782/464, loss: 0.2831510901451111 2023-01-24 03:40:30.299048: step: 784/464, loss: 0.03222643956542015 2023-01-24 03:40:30.898062: step: 786/464, loss: 0.21533460915088654 2023-01-24 03:40:31.494952: step: 788/464, loss: 0.06409087032079697 2023-01-24 03:40:32.200400: step: 790/464, loss: 0.004153969697654247 2023-01-24 03:40:32.818498: step: 792/464, loss: 0.12987728416919708 2023-01-24 03:40:33.447252: step: 794/464, loss: 0.10713863372802734 2023-01-24 03:40:34.079521: step: 796/464, loss: 0.2646576166152954 2023-01-24 03:40:34.722731: step: 798/464, loss: 0.011718453839421272 2023-01-24 03:40:35.327120: step: 800/464, loss: 0.020255332812666893 2023-01-24 03:40:35.969317: step: 802/464, loss: 0.025662105530500412 2023-01-24 03:40:36.651547: step: 804/464, loss: 0.011995234526693821 2023-01-24 03:40:37.338122: step: 806/464, loss: 0.054274559020996094 2023-01-24 03:40:37.845492: step: 808/464, loss: 0.044902458786964417 2023-01-24 03:40:38.464968: step: 810/464, loss: 0.04050503671169281 2023-01-24 03:40:39.038410: step: 812/464, loss: 0.006103217601776123 2023-01-24 03:40:39.725986: step: 814/464, loss: 0.06136700510978699 2023-01-24 03:40:40.357585: step: 816/464, loss: 0.014977425336837769 2023-01-24 03:40:40.988615: step: 818/464, loss: 0.028046803548932076 2023-01-24 03:40:41.687905: step: 820/464, loss: 0.3037695586681366 2023-01-24 03:40:42.326910: step: 822/464, loss: 0.7152191996574402 2023-01-24 03:40:42.988518: step: 824/464, loss: 0.1377854347229004 2023-01-24 03:40:43.661877: step: 826/464, loss: 0.07740423083305359 2023-01-24 03:40:44.292721: step: 828/464, loss: 0.011711751110851765 2023-01-24 03:40:44.931671: step: 830/464, loss: 0.0030323874671012163 2023-01-24 03:40:45.530073: step: 832/464, loss: 0.04836704954504967 2023-01-24 03:40:46.086976: step: 834/464, loss: 0.0130617655813694 2023-01-24 03:40:46.698545: step: 836/464, loss: 0.015250151976943016 2023-01-24 03:40:47.326524: step: 838/464, loss: 0.008788347244262695 2023-01-24 03:40:47.941310: step: 840/464, loss: 0.056761596351861954 2023-01-24 03:40:48.560914: step: 842/464, loss: 0.0005920501425862312 2023-01-24 03:40:49.135161: step: 844/464, loss: 0.4124147295951843 2023-01-24 03:40:49.684163: step: 846/464, loss: 0.009998713620007038 2023-01-24 03:40:50.336878: step: 848/464, loss: 0.057369478046894073 2023-01-24 03:40:50.950940: step: 850/464, loss: 0.2438540756702423 2023-01-24 03:40:51.668690: step: 852/464, loss: 0.02027585729956627 2023-01-24 03:40:52.229747: step: 854/464, loss: 0.017415670678019524 2023-01-24 03:40:52.950915: step: 856/464, loss: 0.0011836671037599444 2023-01-24 03:40:53.567010: step: 858/464, loss: 0.010898541659116745 2023-01-24 03:40:54.127610: step: 860/464, loss: 0.053165942430496216 2023-01-24 03:40:54.726530: step: 862/464, loss: 0.1877359002828598 2023-01-24 03:40:55.383530: step: 864/464, loss: 0.06324882805347443 2023-01-24 03:40:56.079627: step: 866/464, loss: 0.03348288685083389 2023-01-24 03:40:56.787546: step: 868/464, loss: 0.12745660543441772 2023-01-24 03:40:57.318860: step: 870/464, loss: 0.050736475735902786 2023-01-24 03:40:58.002479: step: 872/464, loss: 0.011317925527691841 2023-01-24 03:40:58.666724: step: 874/464, loss: 0.04550086334347725 2023-01-24 03:40:59.370312: step: 876/464, loss: 0.12162181735038757 2023-01-24 03:40:59.964557: step: 878/464, loss: 0.05083238705992699 2023-01-24 03:41:00.580160: step: 880/464, loss: 0.025550205260515213 2023-01-24 03:41:01.255135: step: 882/464, loss: 0.020611297339200974 2023-01-24 03:41:01.965130: step: 884/464, loss: 0.15503253042697906 2023-01-24 03:41:02.598130: step: 886/464, loss: 0.060559242963790894 2023-01-24 03:41:03.279884: step: 888/464, loss: 0.04286627843976021 2023-01-24 03:41:03.917543: step: 890/464, loss: 0.30708542466163635 2023-01-24 03:41:04.514547: step: 892/464, loss: 0.08181405812501907 2023-01-24 03:41:05.132739: step: 894/464, loss: 0.02677612379193306 2023-01-24 03:41:05.722136: step: 896/464, loss: 0.04505239799618721 2023-01-24 03:41:06.323098: step: 898/464, loss: 0.08125241100788116 2023-01-24 03:41:06.998586: step: 900/464, loss: 0.03448351100087166 2023-01-24 03:41:07.645910: step: 902/464, loss: 0.028867723420262337 2023-01-24 03:41:08.348273: step: 904/464, loss: 0.1862575262784958 2023-01-24 03:41:08.964609: step: 906/464, loss: 0.06476181745529175 2023-01-24 03:41:09.568014: step: 908/464, loss: 0.015432149171829224 2023-01-24 03:41:10.209090: step: 910/464, loss: 0.00814574584364891 2023-01-24 03:41:10.778439: step: 912/464, loss: 0.007910625077784061 2023-01-24 03:41:11.423554: step: 914/464, loss: 0.035133056342601776 2023-01-24 03:41:12.055274: step: 916/464, loss: 0.009897212497889996 2023-01-24 03:41:12.657407: step: 918/464, loss: 0.0011734727304428816 2023-01-24 03:41:13.282885: step: 920/464, loss: 0.024619368836283684 2023-01-24 03:41:13.877945: step: 922/464, loss: 0.023495979607105255 2023-01-24 03:41:14.464623: step: 924/464, loss: 0.03111870028078556 2023-01-24 03:41:15.125542: step: 926/464, loss: 0.06664323806762695 2023-01-24 03:41:15.761229: step: 928/464, loss: 0.05593162775039673 2023-01-24 03:41:16.222153: step: 930/464, loss: 0.020524058490991592 ================================================== Loss: 0.090 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3332451276881721, 'r': 0.3136424731182796, 'f1': 0.3231467904855002}, 'combined': 0.23810816141036856, 'epoch': 23} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3163709613629447, 'r': 0.28668389783118725, 'f1': 0.3007967151954638}, 'combined': 0.19637505758874838, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348237491877843, 'r': 0.3259290006325111, 'f1': 0.33031650641025645}, 'combined': 0.24339110998650473, 'epoch': 23} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32594901203132587, 'r': 0.28966581933970637, 'f1': 0.3067381837528113}, 'combined': 0.20025394379717215, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34302432392710175, 'r': 0.31633742206560045, 'f1': 0.3291408122972783}, 'combined': 0.24252480906115242, 'epoch': 23} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33459519441581426, 'r': 0.2884653422445163, 'f1': 0.30982259858048744}, 'combined': 0.2022676032183493, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2691441441441441, 'r': 0.2845238095238095, 'f1': 0.27662037037037035}, 'combined': 0.18441358024691357, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3203125, 'r': 0.44565217391304346, 'f1': 0.3727272727272727}, 'combined': 0.18636363636363634, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:43:52.202270: step: 2/464, loss: 0.002368804533034563 2023-01-24 03:43:52.795370: step: 4/464, loss: 0.050723202526569366 2023-01-24 03:43:53.391463: step: 6/464, loss: 0.05543776601552963 2023-01-24 03:43:53.905394: step: 8/464, loss: 0.02150196023285389 2023-01-24 03:43:54.564701: step: 10/464, loss: 0.07164845615625381 2023-01-24 03:43:55.201161: step: 12/464, loss: 0.007598825264722109 2023-01-24 03:43:55.834753: step: 14/464, loss: 0.002054923679679632 2023-01-24 03:43:56.457129: step: 16/464, loss: 0.06091110408306122 2023-01-24 03:43:57.121129: step: 18/464, loss: 0.255815327167511 2023-01-24 03:43:57.761333: step: 20/464, loss: 0.00489705428481102 2023-01-24 03:43:58.396478: step: 22/464, loss: 0.006008407566696405 2023-01-24 03:43:59.006625: step: 24/464, loss: 0.022913167253136635 2023-01-24 03:43:59.625589: step: 26/464, loss: 0.5145890712738037 2023-01-24 03:44:00.306887: step: 28/464, loss: 0.04759300500154495 2023-01-24 03:44:01.033481: step: 30/464, loss: 0.02373594418168068 2023-01-24 03:44:01.638776: step: 32/464, loss: 0.0328323096036911 2023-01-24 03:44:02.247527: step: 34/464, loss: 0.07039014995098114 2023-01-24 03:44:02.900456: step: 36/464, loss: 0.009671245701611042 2023-01-24 03:44:03.477885: step: 38/464, loss: 0.013924448750913143 2023-01-24 03:44:04.126931: step: 40/464, loss: 0.16166502237319946 2023-01-24 03:44:04.772304: step: 42/464, loss: 0.08743336796760559 2023-01-24 03:44:05.329254: step: 44/464, loss: 0.017455067485570908 2023-01-24 03:44:06.009888: step: 46/464, loss: 0.05810796469449997 2023-01-24 03:44:06.611061: step: 48/464, loss: 0.3676496744155884 2023-01-24 03:44:07.192825: step: 50/464, loss: 0.02534743957221508 2023-01-24 03:44:07.815304: step: 52/464, loss: 0.024781718850135803 2023-01-24 03:44:08.457069: step: 54/464, loss: 0.04081379622220993 2023-01-24 03:44:09.075647: step: 56/464, loss: 0.013650135137140751 2023-01-24 03:44:09.656827: step: 58/464, loss: 0.003687690244987607 2023-01-24 03:44:10.300849: step: 60/464, loss: 0.05775059387087822 2023-01-24 03:44:10.957400: step: 62/464, loss: 0.009221754968166351 2023-01-24 03:44:11.557189: step: 64/464, loss: 0.011974228546023369 2023-01-24 03:44:12.163946: step: 66/464, loss: 0.004298684652894735 2023-01-24 03:44:12.895123: step: 68/464, loss: 0.02135203778743744 2023-01-24 03:44:13.527869: step: 70/464, loss: 0.0393330343067646 2023-01-24 03:44:14.177103: step: 72/464, loss: 0.02525671012699604 2023-01-24 03:44:14.736980: step: 74/464, loss: 0.009668112732470036 2023-01-24 03:44:15.281379: step: 76/464, loss: 0.01860128343105316 2023-01-24 03:44:15.950827: step: 78/464, loss: 0.33107542991638184 2023-01-24 03:44:16.568433: step: 80/464, loss: 0.030080964788794518 2023-01-24 03:44:17.246924: step: 82/464, loss: 0.07465073466300964 2023-01-24 03:44:17.997204: step: 84/464, loss: 0.018534686416387558 2023-01-24 03:44:18.585971: step: 86/464, loss: 0.030880525708198547 2023-01-24 03:44:19.212601: step: 88/464, loss: 0.022276412695646286 2023-01-24 03:44:19.834868: step: 90/464, loss: 0.0007963493699207902 2023-01-24 03:44:20.405204: step: 92/464, loss: 0.06814160197973251 2023-01-24 03:44:21.006946: step: 94/464, loss: 0.3641512989997864 2023-01-24 03:44:21.569411: step: 96/464, loss: 0.05214928463101387 2023-01-24 03:44:22.123477: step: 98/464, loss: 0.016802603378891945 2023-01-24 03:44:22.746192: step: 100/464, loss: 0.14683029055595398 2023-01-24 03:44:23.437470: step: 102/464, loss: 0.02423812262713909 2023-01-24 03:44:24.095365: step: 104/464, loss: 0.014936227351427078 2023-01-24 03:44:24.700077: step: 106/464, loss: 0.04058907553553581 2023-01-24 03:44:25.314732: step: 108/464, loss: 0.10261441022157669 2023-01-24 03:44:25.906669: step: 110/464, loss: 0.035076435655355453 2023-01-24 03:44:26.522847: step: 112/464, loss: 0.37915679812431335 2023-01-24 03:44:27.224566: step: 114/464, loss: 0.34553590416908264 2023-01-24 03:44:27.815037: step: 116/464, loss: 0.019644953310489655 2023-01-24 03:44:28.437775: step: 118/464, loss: 0.047253821045160294 2023-01-24 03:44:29.098348: step: 120/464, loss: 0.018846411257982254 2023-01-24 03:44:29.689039: step: 122/464, loss: 0.04492904245853424 2023-01-24 03:44:30.309501: step: 124/464, loss: 0.007162538822740316 2023-01-24 03:44:30.947843: step: 126/464, loss: 0.02271394059062004 2023-01-24 03:44:31.589801: step: 128/464, loss: 0.014112776145339012 2023-01-24 03:44:32.221459: step: 130/464, loss: 0.010637849569320679 2023-01-24 03:44:32.892839: step: 132/464, loss: 0.04476896673440933 2023-01-24 03:44:33.453652: step: 134/464, loss: 0.0009231481235474348 2023-01-24 03:44:34.034854: step: 136/464, loss: 0.022022517397999763 2023-01-24 03:44:34.678209: step: 138/464, loss: 0.767135739326477 2023-01-24 03:44:35.292973: step: 140/464, loss: 0.005619126372039318 2023-01-24 03:44:35.910570: step: 142/464, loss: 0.02571299485862255 2023-01-24 03:44:36.541012: step: 144/464, loss: 0.09120732545852661 2023-01-24 03:44:37.187535: step: 146/464, loss: 0.016840722411870956 2023-01-24 03:44:37.811063: step: 148/464, loss: 0.012031922116875648 2023-01-24 03:44:38.541387: step: 150/464, loss: 0.03083711303770542 2023-01-24 03:44:39.230402: step: 152/464, loss: 0.026089321821928024 2023-01-24 03:44:39.869979: step: 154/464, loss: 0.10427644103765488 2023-01-24 03:44:40.481857: step: 156/464, loss: 0.04840332269668579 2023-01-24 03:44:41.124642: step: 158/464, loss: 0.06633774936199188 2023-01-24 03:44:41.718964: step: 160/464, loss: 0.02363380417227745 2023-01-24 03:44:42.328613: step: 162/464, loss: 0.012433268129825592 2023-01-24 03:44:42.954341: step: 164/464, loss: 0.0023826195392757654 2023-01-24 03:44:43.678737: step: 166/464, loss: 0.8820675015449524 2023-01-24 03:44:44.286146: step: 168/464, loss: 0.010968861170113087 2023-01-24 03:44:44.966834: step: 170/464, loss: 0.049012307077646255 2023-01-24 03:44:45.549419: step: 172/464, loss: 0.0016267724568024278 2023-01-24 03:44:46.179500: step: 174/464, loss: 0.08859127759933472 2023-01-24 03:44:46.868995: step: 176/464, loss: 0.08113919198513031 2023-01-24 03:44:47.476194: step: 178/464, loss: 0.2307984083890915 2023-01-24 03:44:48.103189: step: 180/464, loss: 0.017306754365563393 2023-01-24 03:44:48.660718: step: 182/464, loss: 0.03797895461320877 2023-01-24 03:44:49.297418: step: 184/464, loss: 0.034086208790540695 2023-01-24 03:44:49.933148: step: 186/464, loss: 0.01586483046412468 2023-01-24 03:44:50.663020: step: 188/464, loss: 0.5600696802139282 2023-01-24 03:44:51.283725: step: 190/464, loss: 0.048893045634031296 2023-01-24 03:44:51.840157: step: 192/464, loss: 0.0026721959002316 2023-01-24 03:44:52.479843: step: 194/464, loss: 0.05101538822054863 2023-01-24 03:44:53.128375: step: 196/464, loss: 0.026031123474240303 2023-01-24 03:44:53.871401: step: 198/464, loss: 0.05834761634469032 2023-01-24 03:44:54.481003: step: 200/464, loss: 0.008085883222520351 2023-01-24 03:44:55.134348: step: 202/464, loss: 0.0739692896604538 2023-01-24 03:44:55.754031: step: 204/464, loss: 0.00796657893806696 2023-01-24 03:44:56.403160: step: 206/464, loss: 0.009787347167730331 2023-01-24 03:44:57.053764: step: 208/464, loss: 0.05727081000804901 2023-01-24 03:44:57.717971: step: 210/464, loss: 0.03274077922105789 2023-01-24 03:44:58.294117: step: 212/464, loss: 0.05570128932595253 2023-01-24 03:44:58.974826: step: 214/464, loss: 0.8114643096923828 2023-01-24 03:44:59.514809: step: 216/464, loss: 0.0107719199731946 2023-01-24 03:45:00.124960: step: 218/464, loss: 0.006827626843005419 2023-01-24 03:45:00.785189: step: 220/464, loss: 0.006962248589843512 2023-01-24 03:45:01.353819: step: 222/464, loss: 0.00787719339132309 2023-01-24 03:45:01.982668: step: 224/464, loss: 0.049737539142370224 2023-01-24 03:45:02.656403: step: 226/464, loss: 0.06598670035600662 2023-01-24 03:45:03.246174: step: 228/464, loss: 0.021805718541145325 2023-01-24 03:45:03.862484: step: 230/464, loss: 0.04670698195695877 2023-01-24 03:45:04.438495: step: 232/464, loss: 0.00791192427277565 2023-01-24 03:45:05.071289: step: 234/464, loss: 0.003434679936617613 2023-01-24 03:45:05.761668: step: 236/464, loss: 0.07541213184595108 2023-01-24 03:45:06.491667: step: 238/464, loss: 0.037969619035720825 2023-01-24 03:45:07.219193: step: 240/464, loss: 0.06740114837884903 2023-01-24 03:45:07.821181: step: 242/464, loss: 0.030239589512348175 2023-01-24 03:45:08.457836: step: 244/464, loss: 0.025601763278245926 2023-01-24 03:45:09.042857: step: 246/464, loss: 0.017570916563272476 2023-01-24 03:45:09.647105: step: 248/464, loss: 0.10075247287750244 2023-01-24 03:45:10.262085: step: 250/464, loss: 0.030370311811566353 2023-01-24 03:45:10.881960: step: 252/464, loss: 0.6089963912963867 2023-01-24 03:45:11.471471: step: 254/464, loss: 0.02403067983686924 2023-01-24 03:45:12.091616: step: 256/464, loss: 0.09271278232336044 2023-01-24 03:45:12.734947: step: 258/464, loss: 0.07433987408876419 2023-01-24 03:45:13.460723: step: 260/464, loss: 0.08124585449695587 2023-01-24 03:45:14.153132: step: 262/464, loss: 0.011426866054534912 2023-01-24 03:45:14.835187: step: 264/464, loss: 0.289348304271698 2023-01-24 03:45:15.454759: step: 266/464, loss: 0.025414174422621727 2023-01-24 03:45:16.103940: step: 268/464, loss: 0.5135270953178406 2023-01-24 03:45:16.737210: step: 270/464, loss: 0.008043395355343819 2023-01-24 03:45:17.395045: step: 272/464, loss: 0.005079520400613546 2023-01-24 03:45:17.995385: step: 274/464, loss: 0.22397971153259277 2023-01-24 03:45:18.618464: step: 276/464, loss: 0.019696544855833054 2023-01-24 03:45:19.250005: step: 278/464, loss: 0.004111408721655607 2023-01-24 03:45:19.957459: step: 280/464, loss: 0.08541527390480042 2023-01-24 03:45:20.609776: step: 282/464, loss: 0.06891372799873352 2023-01-24 03:45:21.216014: step: 284/464, loss: 0.024619489908218384 2023-01-24 03:45:21.843957: step: 286/464, loss: 0.20153610408306122 2023-01-24 03:45:22.575516: step: 288/464, loss: 0.057165857404470444 2023-01-24 03:45:23.292956: step: 290/464, loss: 0.007611685432493687 2023-01-24 03:45:23.912712: step: 292/464, loss: 0.05858425796031952 2023-01-24 03:45:24.522398: step: 294/464, loss: 0.03117351420223713 2023-01-24 03:45:25.105477: step: 296/464, loss: 0.01491332147270441 2023-01-24 03:45:25.720988: step: 298/464, loss: 0.00705760857090354 2023-01-24 03:45:26.345941: step: 300/464, loss: 0.001687489217147231 2023-01-24 03:45:26.963731: step: 302/464, loss: 0.014096073806285858 2023-01-24 03:45:27.597646: step: 304/464, loss: 0.027246735990047455 2023-01-24 03:45:28.247638: step: 306/464, loss: 0.009920596145093441 2023-01-24 03:45:28.925349: step: 308/464, loss: 0.2069355994462967 2023-01-24 03:45:29.536461: step: 310/464, loss: 0.09881345927715302 2023-01-24 03:45:30.178378: step: 312/464, loss: 0.037157244980335236 2023-01-24 03:45:30.799925: step: 314/464, loss: 0.01551311369985342 2023-01-24 03:45:31.430562: step: 316/464, loss: 0.014572424814105034 2023-01-24 03:45:32.070791: step: 318/464, loss: 0.022611740976572037 2023-01-24 03:45:32.714208: step: 320/464, loss: 0.07246682792901993 2023-01-24 03:45:33.345271: step: 322/464, loss: 0.05923476070165634 2023-01-24 03:45:33.969325: step: 324/464, loss: 0.020231883972883224 2023-01-24 03:45:34.579887: step: 326/464, loss: 0.4922608733177185 2023-01-24 03:45:35.205073: step: 328/464, loss: 0.09566665440797806 2023-01-24 03:45:35.827748: step: 330/464, loss: 0.15764565765857697 2023-01-24 03:45:36.421100: step: 332/464, loss: 0.003560718148946762 2023-01-24 03:45:37.032483: step: 334/464, loss: 0.036379773169755936 2023-01-24 03:45:37.608697: step: 336/464, loss: 0.0053924997337162495 2023-01-24 03:45:38.224892: step: 338/464, loss: 0.06640272587537766 2023-01-24 03:45:38.778405: step: 340/464, loss: 0.012376081198453903 2023-01-24 03:45:39.400067: step: 342/464, loss: 0.1460789442062378 2023-01-24 03:45:40.053396: step: 344/464, loss: 0.045234907418489456 2023-01-24 03:45:40.646515: step: 346/464, loss: 0.03875354677438736 2023-01-24 03:45:41.241124: step: 348/464, loss: 0.014639015309512615 2023-01-24 03:45:41.894116: step: 350/464, loss: 0.03967462480068207 2023-01-24 03:45:42.516746: step: 352/464, loss: 0.00846176128834486 2023-01-24 03:45:43.212552: step: 354/464, loss: 0.08720821887254715 2023-01-24 03:45:43.883484: step: 356/464, loss: 0.05173289775848389 2023-01-24 03:45:44.503484: step: 358/464, loss: 0.12688124179840088 2023-01-24 03:45:45.137184: step: 360/464, loss: 0.07991411536931992 2023-01-24 03:45:45.717401: step: 362/464, loss: 0.004395974334329367 2023-01-24 03:45:46.350751: step: 364/464, loss: 0.0535275898873806 2023-01-24 03:45:46.962887: step: 366/464, loss: 0.032049063593149185 2023-01-24 03:45:47.589291: step: 368/464, loss: 0.04657631739974022 2023-01-24 03:45:48.232871: step: 370/464, loss: 0.22765032947063446 2023-01-24 03:45:48.871428: step: 372/464, loss: 0.1500069946050644 2023-01-24 03:45:49.425848: step: 374/464, loss: 0.2392122894525528 2023-01-24 03:45:50.020675: step: 376/464, loss: 0.0341184064745903 2023-01-24 03:45:50.638568: step: 378/464, loss: 0.14299477636814117 2023-01-24 03:45:51.346499: step: 380/464, loss: 0.030992744490504265 2023-01-24 03:45:51.966328: step: 382/464, loss: 0.07625354826450348 2023-01-24 03:45:52.566612: step: 384/464, loss: 0.004008305259048939 2023-01-24 03:45:53.252846: step: 386/464, loss: 0.054362375289201736 2023-01-24 03:45:53.938876: step: 388/464, loss: 0.029171712696552277 2023-01-24 03:45:54.586766: step: 390/464, loss: 0.15573173761367798 2023-01-24 03:45:55.241365: step: 392/464, loss: 0.002424610545858741 2023-01-24 03:45:55.861695: step: 394/464, loss: 0.09689239412546158 2023-01-24 03:45:56.432325: step: 396/464, loss: 0.012249691411852837 2023-01-24 03:45:57.071298: step: 398/464, loss: 0.07477396726608276 2023-01-24 03:45:57.752842: step: 400/464, loss: 0.0323805995285511 2023-01-24 03:45:58.348132: step: 402/464, loss: 0.03978384658694267 2023-01-24 03:45:58.923293: step: 404/464, loss: 0.10091626644134521 2023-01-24 03:45:59.559492: step: 406/464, loss: 0.022926170378923416 2023-01-24 03:46:00.141383: step: 408/464, loss: 0.005678442306816578 2023-01-24 03:46:00.800614: step: 410/464, loss: 0.03556925430893898 2023-01-24 03:46:01.375954: step: 412/464, loss: 0.07874489575624466 2023-01-24 03:46:02.037650: step: 414/464, loss: 0.008459270000457764 2023-01-24 03:46:02.630961: step: 416/464, loss: 0.04491732642054558 2023-01-24 03:46:03.336342: step: 418/464, loss: 0.009785176254808903 2023-01-24 03:46:04.014591: step: 420/464, loss: 0.022729527205228806 2023-01-24 03:46:04.654063: step: 422/464, loss: 0.03167550638318062 2023-01-24 03:46:05.271599: step: 424/464, loss: 0.012540756724774837 2023-01-24 03:46:05.904588: step: 426/464, loss: 0.13927604258060455 2023-01-24 03:46:06.494062: step: 428/464, loss: 0.020264718681573868 2023-01-24 03:46:07.076172: step: 430/464, loss: 0.0176665261387825 2023-01-24 03:46:07.738903: step: 432/464, loss: 0.050978146493434906 2023-01-24 03:46:08.400094: step: 434/464, loss: 0.03432874381542206 2023-01-24 03:46:08.996509: step: 436/464, loss: 0.2599300444126129 2023-01-24 03:46:09.567399: step: 438/464, loss: 0.03393875062465668 2023-01-24 03:46:10.159328: step: 440/464, loss: 0.010586414486169815 2023-01-24 03:46:10.767670: step: 442/464, loss: 0.09786012023687363 2023-01-24 03:46:11.321474: step: 444/464, loss: 0.020368900150060654 2023-01-24 03:46:11.953978: step: 446/464, loss: 0.04849978908896446 2023-01-24 03:46:12.568013: step: 448/464, loss: 0.06933494657278061 2023-01-24 03:46:13.199758: step: 450/464, loss: 0.0121515654027462 2023-01-24 03:46:13.816886: step: 452/464, loss: 0.2773245573043823 2023-01-24 03:46:14.494699: step: 454/464, loss: 0.021812712773680687 2023-01-24 03:46:15.087437: step: 456/464, loss: 0.04217422008514404 2023-01-24 03:46:15.717704: step: 458/464, loss: 0.09407848864793777 2023-01-24 03:46:16.332067: step: 460/464, loss: 0.0038906384725123644 2023-01-24 03:46:17.036022: step: 462/464, loss: 0.023373616859316826 2023-01-24 03:46:17.695768: step: 464/464, loss: 0.0807759091258049 2023-01-24 03:46:18.305785: step: 466/464, loss: 0.0077609531581401825 2023-01-24 03:46:18.996980: step: 468/464, loss: 0.14945146441459656 2023-01-24 03:46:19.733931: step: 470/464, loss: 0.03414865955710411 2023-01-24 03:46:20.353034: step: 472/464, loss: 0.04370049387216568 2023-01-24 03:46:20.953213: step: 474/464, loss: 0.02193155698478222 2023-01-24 03:46:21.570731: step: 476/464, loss: 0.039664387702941895 2023-01-24 03:46:22.242488: step: 478/464, loss: 0.014600432477891445 2023-01-24 03:46:22.858056: step: 480/464, loss: 0.02662612497806549 2023-01-24 03:46:23.474891: step: 482/464, loss: 0.00013828226656187326 2023-01-24 03:46:24.096673: step: 484/464, loss: 0.43025022745132446 2023-01-24 03:46:24.664574: step: 486/464, loss: 0.019535545259714127 2023-01-24 03:46:25.203101: step: 488/464, loss: 0.010172214359045029 2023-01-24 03:46:25.773600: step: 490/464, loss: 0.017740504816174507 2023-01-24 03:46:26.417017: step: 492/464, loss: 0.007563222665339708 2023-01-24 03:46:27.037980: step: 494/464, loss: 0.02568977326154709 2023-01-24 03:46:27.663843: step: 496/464, loss: 0.020611144602298737 2023-01-24 03:46:28.277836: step: 498/464, loss: 0.1862446367740631 2023-01-24 03:46:28.890622: step: 500/464, loss: 0.039029985666275024 2023-01-24 03:46:29.496367: step: 502/464, loss: 0.09535839408636093 2023-01-24 03:46:30.090282: step: 504/464, loss: 0.05537525564432144 2023-01-24 03:46:30.699109: step: 506/464, loss: 0.07750478386878967 2023-01-24 03:46:31.359523: step: 508/464, loss: 0.04838641732931137 2023-01-24 03:46:32.013297: step: 510/464, loss: 0.04079439863562584 2023-01-24 03:46:32.647530: step: 512/464, loss: 0.0039293644949793816 2023-01-24 03:46:33.262844: step: 514/464, loss: 0.0030393495690077543 2023-01-24 03:46:33.916797: step: 516/464, loss: 0.06650812178850174 2023-01-24 03:46:34.547427: step: 518/464, loss: 0.01758035458624363 2023-01-24 03:46:35.179115: step: 520/464, loss: 0.025199897587299347 2023-01-24 03:46:35.773617: step: 522/464, loss: 0.03755476325750351 2023-01-24 03:46:36.443650: step: 524/464, loss: 0.02975156530737877 2023-01-24 03:46:37.077712: step: 526/464, loss: 0.009417744353413582 2023-01-24 03:46:37.609453: step: 528/464, loss: 0.00026950312894769013 2023-01-24 03:46:38.214614: step: 530/464, loss: 0.044083017855882645 2023-01-24 03:46:38.810286: step: 532/464, loss: 0.06772328168153763 2023-01-24 03:46:39.447558: step: 534/464, loss: 0.024661093950271606 2023-01-24 03:46:40.070025: step: 536/464, loss: 0.12543563544750214 2023-01-24 03:46:40.689013: step: 538/464, loss: 0.016915543004870415 2023-01-24 03:46:41.329672: step: 540/464, loss: 0.005815993528813124 2023-01-24 03:46:41.927062: step: 542/464, loss: 0.023189205676317215 2023-01-24 03:46:42.558961: step: 544/464, loss: 0.02398007921874523 2023-01-24 03:46:43.254521: step: 546/464, loss: 0.04083281382918358 2023-01-24 03:46:43.901403: step: 548/464, loss: 0.0018307537538930774 2023-01-24 03:46:44.544188: step: 550/464, loss: 0.003622284159064293 2023-01-24 03:46:45.163115: step: 552/464, loss: 0.0152712631970644 2023-01-24 03:46:45.815666: step: 554/464, loss: 0.01601882465183735 2023-01-24 03:46:46.475972: step: 556/464, loss: 0.10541737824678421 2023-01-24 03:46:47.063699: step: 558/464, loss: 0.45145365595817566 2023-01-24 03:46:47.710725: step: 560/464, loss: 0.007181126624345779 2023-01-24 03:46:48.341028: step: 562/464, loss: 0.008781393989920616 2023-01-24 03:46:49.024943: step: 564/464, loss: 0.10936623066663742 2023-01-24 03:46:49.541713: step: 566/464, loss: 0.13772058486938477 2023-01-24 03:46:50.225270: step: 568/464, loss: 0.00794376153498888 2023-01-24 03:46:50.910813: step: 570/464, loss: 0.020956581458449364 2023-01-24 03:46:51.544418: step: 572/464, loss: 0.02092120237648487 2023-01-24 03:46:52.195116: step: 574/464, loss: 0.07100170105695724 2023-01-24 03:46:52.814299: step: 576/464, loss: 0.25848352909088135 2023-01-24 03:46:53.400102: step: 578/464, loss: 0.07166639715433121 2023-01-24 03:46:53.964203: step: 580/464, loss: 0.025764163583517075 2023-01-24 03:46:54.594365: step: 582/464, loss: 0.46929800510406494 2023-01-24 03:46:55.213309: step: 584/464, loss: 0.02144567295908928 2023-01-24 03:46:55.797295: step: 586/464, loss: 0.003281695768237114 2023-01-24 03:46:56.399318: step: 588/464, loss: 0.03542201966047287 2023-01-24 03:46:57.025919: step: 590/464, loss: 0.018880341202020645 2023-01-24 03:46:57.678603: step: 592/464, loss: 0.04871486499905586 2023-01-24 03:46:58.244297: step: 594/464, loss: 0.0028785879258066416 2023-01-24 03:46:58.808573: step: 596/464, loss: 0.007425788324326277 2023-01-24 03:46:59.391123: step: 598/464, loss: 0.008637347258627415 2023-01-24 03:47:00.009508: step: 600/464, loss: 0.022225894033908844 2023-01-24 03:47:00.624053: step: 602/464, loss: 0.04577256366610527 2023-01-24 03:47:01.311314: step: 604/464, loss: 0.03168138116598129 2023-01-24 03:47:01.920193: step: 606/464, loss: 0.04955857992172241 2023-01-24 03:47:02.519013: step: 608/464, loss: 0.04444937780499458 2023-01-24 03:47:03.225118: step: 610/464, loss: 0.061654992401599884 2023-01-24 03:47:03.835588: step: 612/464, loss: 0.12426318973302841 2023-01-24 03:47:04.448146: step: 614/464, loss: 0.02370787225663662 2023-01-24 03:47:05.062785: step: 616/464, loss: 0.029241489246487617 2023-01-24 03:47:05.676440: step: 618/464, loss: 0.016876980662345886 2023-01-24 03:47:06.311809: step: 620/464, loss: 0.12833718955516815 2023-01-24 03:47:07.020604: step: 622/464, loss: 0.009948083199560642 2023-01-24 03:47:07.627692: step: 624/464, loss: 0.0030475077219307423 2023-01-24 03:47:08.189765: step: 626/464, loss: 0.028402313590049744 2023-01-24 03:47:08.843315: step: 628/464, loss: 0.003545090788975358 2023-01-24 03:47:09.446301: step: 630/464, loss: 0.016871029511094093 2023-01-24 03:47:10.084646: step: 632/464, loss: 0.013180889189243317 2023-01-24 03:47:10.647133: step: 634/464, loss: 0.012245554476976395 2023-01-24 03:47:11.279555: step: 636/464, loss: 0.2753337025642395 2023-01-24 03:47:11.926832: step: 638/464, loss: 0.0011746642412617803 2023-01-24 03:47:12.500504: step: 640/464, loss: 0.014507484622299671 2023-01-24 03:47:13.062581: step: 642/464, loss: 0.030890408903360367 2023-01-24 03:47:13.621141: step: 644/464, loss: 0.011430202051997185 2023-01-24 03:47:14.248370: step: 646/464, loss: 0.01833469420671463 2023-01-24 03:47:14.831348: step: 648/464, loss: 0.017186565324664116 2023-01-24 03:47:15.399594: step: 650/464, loss: 0.008223423734307289 2023-01-24 03:47:16.018246: step: 652/464, loss: 0.13581836223602295 2023-01-24 03:47:16.630787: step: 654/464, loss: 0.07392071187496185 2023-01-24 03:47:17.297257: step: 656/464, loss: 0.02771533839404583 2023-01-24 03:47:17.986716: step: 658/464, loss: 0.02668609656393528 2023-01-24 03:47:18.594791: step: 660/464, loss: 0.004450938664376736 2023-01-24 03:47:19.236237: step: 662/464, loss: 0.18493559956550598 2023-01-24 03:47:19.807078: step: 664/464, loss: 0.004563276655972004 2023-01-24 03:47:20.411497: step: 666/464, loss: 0.03617537021636963 2023-01-24 03:47:21.024636: step: 668/464, loss: 0.017863882705569267 2023-01-24 03:47:21.699598: step: 670/464, loss: 0.015972094610333443 2023-01-24 03:47:22.261830: step: 672/464, loss: 0.017575904726982117 2023-01-24 03:47:22.844606: step: 674/464, loss: 0.05038394406437874 2023-01-24 03:47:23.478367: step: 676/464, loss: 0.0042044371366500854 2023-01-24 03:47:24.101785: step: 678/464, loss: 0.01900422014296055 2023-01-24 03:47:24.705782: step: 680/464, loss: 0.004950101021677256 2023-01-24 03:47:25.343847: step: 682/464, loss: 0.04155075177550316 2023-01-24 03:47:26.027521: step: 684/464, loss: 0.02320196107029915 2023-01-24 03:47:26.692685: step: 686/464, loss: 0.016851622611284256 2023-01-24 03:47:27.307147: step: 688/464, loss: 0.025884533300995827 2023-01-24 03:47:27.932149: step: 690/464, loss: 0.0692359209060669 2023-01-24 03:47:28.535516: step: 692/464, loss: 0.01677657850086689 2023-01-24 03:47:29.151805: step: 694/464, loss: 0.02548445202410221 2023-01-24 03:47:29.779660: step: 696/464, loss: 0.03491387888789177 2023-01-24 03:47:30.430173: step: 698/464, loss: 0.04632949456572533 2023-01-24 03:47:31.117739: step: 700/464, loss: 0.12078473716974258 2023-01-24 03:47:31.659273: step: 702/464, loss: 0.016643565148115158 2023-01-24 03:47:32.279028: step: 704/464, loss: 0.06165686622262001 2023-01-24 03:47:32.809115: step: 706/464, loss: 0.01805802620947361 2023-01-24 03:47:33.437355: step: 708/464, loss: 0.06611974537372589 2023-01-24 03:47:34.039818: step: 710/464, loss: 0.047510262578725815 2023-01-24 03:47:34.701111: step: 712/464, loss: 0.07157109677791595 2023-01-24 03:47:35.331415: step: 714/464, loss: 0.06719717383384705 2023-01-24 03:47:35.997065: step: 716/464, loss: 0.2461855709552765 2023-01-24 03:47:36.603147: step: 718/464, loss: 0.018671412020921707 2023-01-24 03:47:37.237560: step: 720/464, loss: 0.02307756617665291 2023-01-24 03:47:37.895318: step: 722/464, loss: 0.016722489148378372 2023-01-24 03:47:38.499025: step: 724/464, loss: 1.2357501983642578 2023-01-24 03:47:39.138116: step: 726/464, loss: 0.013528386130928993 2023-01-24 03:47:39.733107: step: 728/464, loss: 0.12979376316070557 2023-01-24 03:47:40.311657: step: 730/464, loss: 0.05797789990901947 2023-01-24 03:47:40.941307: step: 732/464, loss: 0.057885877788066864 2023-01-24 03:47:41.515965: step: 734/464, loss: 0.004529331810772419 2023-01-24 03:47:42.158796: step: 736/464, loss: 0.018771404400467873 2023-01-24 03:47:42.771018: step: 738/464, loss: 0.015658782795071602 2023-01-24 03:47:43.466902: step: 740/464, loss: 0.01374346949160099 2023-01-24 03:47:44.070340: step: 742/464, loss: 0.005287653300911188 2023-01-24 03:47:44.641210: step: 744/464, loss: 0.050519898533821106 2023-01-24 03:47:45.207540: step: 746/464, loss: 0.010387287475168705 2023-01-24 03:47:45.790551: step: 748/464, loss: 0.021677561104297638 2023-01-24 03:47:46.385883: step: 750/464, loss: 0.009915663860738277 2023-01-24 03:47:47.035912: step: 752/464, loss: 0.015420086681842804 2023-01-24 03:47:47.691289: step: 754/464, loss: 0.11248577386140823 2023-01-24 03:47:48.310014: step: 756/464, loss: 0.039718352258205414 2023-01-24 03:47:48.913331: step: 758/464, loss: 0.006666975561529398 2023-01-24 03:47:49.483382: step: 760/464, loss: 0.04726141691207886 2023-01-24 03:47:50.126027: step: 762/464, loss: 0.0873069241642952 2023-01-24 03:47:50.731570: step: 764/464, loss: 0.061604391783475876 2023-01-24 03:47:51.465426: step: 766/464, loss: 0.01452117133885622 2023-01-24 03:47:52.142880: step: 768/464, loss: 0.02040213905274868 2023-01-24 03:47:52.767465: step: 770/464, loss: 0.08517874032258987 2023-01-24 03:47:53.387620: step: 772/464, loss: 0.011451417580246925 2023-01-24 03:47:54.073716: step: 774/464, loss: 0.00817059725522995 2023-01-24 03:47:54.696809: step: 776/464, loss: 0.020757826045155525 2023-01-24 03:47:55.316808: step: 778/464, loss: 0.01110632810741663 2023-01-24 03:47:55.906146: step: 780/464, loss: 0.126845121383667 2023-01-24 03:47:56.547603: step: 782/464, loss: 0.07332803308963776 2023-01-24 03:47:57.138146: step: 784/464, loss: 0.022527750581502914 2023-01-24 03:47:57.764217: step: 786/464, loss: 0.05135509744286537 2023-01-24 03:47:58.353092: step: 788/464, loss: 0.1043723076581955 2023-01-24 03:47:58.966964: step: 790/464, loss: 0.09155040234327316 2023-01-24 03:47:59.617308: step: 792/464, loss: 0.021373983472585678 2023-01-24 03:48:00.204102: step: 794/464, loss: 0.00931843277066946 2023-01-24 03:48:00.804968: step: 796/464, loss: 0.003143107518553734 2023-01-24 03:48:01.409391: step: 798/464, loss: 0.07849517464637756 2023-01-24 03:48:01.986206: step: 800/464, loss: 0.018494488671422005 2023-01-24 03:48:02.577021: step: 802/464, loss: 0.010890285484492779 2023-01-24 03:48:03.406365: step: 804/464, loss: 0.0023902691900730133 2023-01-24 03:48:04.027323: step: 806/464, loss: 0.097561776638031 2023-01-24 03:48:04.610661: step: 808/464, loss: 0.047068167477846146 2023-01-24 03:48:05.250572: step: 810/464, loss: 0.04132939130067825 2023-01-24 03:48:05.895163: step: 812/464, loss: 0.01832829788327217 2023-01-24 03:48:06.457557: step: 814/464, loss: 0.018750792369246483 2023-01-24 03:48:07.072874: step: 816/464, loss: 0.031217556446790695 2023-01-24 03:48:07.664153: step: 818/464, loss: 0.03930818289518356 2023-01-24 03:48:08.264458: step: 820/464, loss: 0.06962235271930695 2023-01-24 03:48:08.891372: step: 822/464, loss: 0.25037118792533875 2023-01-24 03:48:09.531019: step: 824/464, loss: 0.018033863976597786 2023-01-24 03:48:10.148033: step: 826/464, loss: 0.03868522867560387 2023-01-24 03:48:10.771397: step: 828/464, loss: 0.04825150594115257 2023-01-24 03:48:11.382480: step: 830/464, loss: 0.012502568773925304 2023-01-24 03:48:12.018396: step: 832/464, loss: 0.01429159939289093 2023-01-24 03:48:12.619078: step: 834/464, loss: 0.017824998125433922 2023-01-24 03:48:13.299275: step: 836/464, loss: 0.013970798812806606 2023-01-24 03:48:13.936669: step: 838/464, loss: 0.09116805344820023 2023-01-24 03:48:14.588380: step: 840/464, loss: 0.03165208920836449 2023-01-24 03:48:15.169708: step: 842/464, loss: 0.6362419128417969 2023-01-24 03:48:15.817638: step: 844/464, loss: 0.22480525076389313 2023-01-24 03:48:16.441099: step: 846/464, loss: 0.060532331466674805 2023-01-24 03:48:17.031205: step: 848/464, loss: 0.011441945098340511 2023-01-24 03:48:17.686990: step: 850/464, loss: 0.1043558344244957 2023-01-24 03:48:18.295739: step: 852/464, loss: 0.04051947966217995 2023-01-24 03:48:18.860015: step: 854/464, loss: 0.05182785913348198 2023-01-24 03:48:19.489734: step: 856/464, loss: 0.04946213960647583 2023-01-24 03:48:20.138754: step: 858/464, loss: 0.04631351679563522 2023-01-24 03:48:20.841879: step: 860/464, loss: 0.002893391763791442 2023-01-24 03:48:21.441005: step: 862/464, loss: 0.02671853080391884 2023-01-24 03:48:22.031280: step: 864/464, loss: 0.0488939993083477 2023-01-24 03:48:22.650406: step: 866/464, loss: 0.010563675314188004 2023-01-24 03:48:23.282798: step: 868/464, loss: 0.003323337761685252 2023-01-24 03:48:23.895148: step: 870/464, loss: 0.012685799039900303 2023-01-24 03:48:24.440286: step: 872/464, loss: 0.00802539847791195 2023-01-24 03:48:25.048806: step: 874/464, loss: 0.0006253144238144159 2023-01-24 03:48:25.696950: step: 876/464, loss: 0.03252573311328888 2023-01-24 03:48:26.335364: step: 878/464, loss: 0.01457914337515831 2023-01-24 03:48:27.028010: step: 880/464, loss: 0.04448043555021286 2023-01-24 03:48:27.641283: step: 882/464, loss: 0.6970482468605042 2023-01-24 03:48:28.225589: step: 884/464, loss: 0.04112203046679497 2023-01-24 03:48:28.805136: step: 886/464, loss: 0.1385151445865631 2023-01-24 03:48:29.385097: step: 888/464, loss: 0.009903647005558014 2023-01-24 03:48:30.000011: step: 890/464, loss: 0.08298555761575699 2023-01-24 03:48:30.583790: step: 892/464, loss: 0.008226878941059113 2023-01-24 03:48:31.149675: step: 894/464, loss: 0.039297979325056076 2023-01-24 03:48:31.755277: step: 896/464, loss: 0.02497115731239319 2023-01-24 03:48:32.382784: step: 898/464, loss: 0.03017505444586277 2023-01-24 03:48:33.074439: step: 900/464, loss: 0.0020218086428940296 2023-01-24 03:48:33.712609: step: 902/464, loss: 0.5994249582290649 2023-01-24 03:48:34.294772: step: 904/464, loss: 0.07182029634714127 2023-01-24 03:48:34.943131: step: 906/464, loss: 0.032601069658994675 2023-01-24 03:48:35.573924: step: 908/464, loss: 0.03782958909869194 2023-01-24 03:48:36.181480: step: 910/464, loss: 0.0879812017083168 2023-01-24 03:48:36.856518: step: 912/464, loss: 0.014246356673538685 2023-01-24 03:48:37.439373: step: 914/464, loss: 0.0419892780482769 2023-01-24 03:48:38.069829: step: 916/464, loss: 0.06602410227060318 2023-01-24 03:48:38.674745: step: 918/464, loss: 0.5060414671897888 2023-01-24 03:48:39.274695: step: 920/464, loss: 0.04006993770599365 2023-01-24 03:48:39.991074: step: 922/464, loss: 0.31256306171417236 2023-01-24 03:48:40.570091: step: 924/464, loss: 0.12733837962150574 2023-01-24 03:48:41.156053: step: 926/464, loss: 0.2875961661338806 2023-01-24 03:48:41.816652: step: 928/464, loss: 0.05005858838558197 2023-01-24 03:48:42.365426: step: 930/464, loss: 0.001698375097475946 ================================================== Loss: 0.071 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31325134758700535, 'r': 0.3221674200989694, 'f1': 0.31764682954566303}, 'combined': 0.2340555586125938, 'epoch': 24} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31088618485808506, 'r': 0.30631012325943796, 'f1': 0.30858118997498524}, 'combined': 0.20145714993185565, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31694804710793084, 'r': 0.33619346932321315, 'f1': 0.32628721608348676}, 'combined': 0.2404221592194113, 'epoch': 24} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32553290729240664, 'r': 0.31085847080912427, 'f1': 0.31802650143013467}, 'combined': 0.20762351906837806, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33864012287334594, 'r': 0.339925284629981, 'f1': 0.33928148674242425}, 'combined': 0.24999688496810207, 'epoch': 24} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3187946656569331, 'r': 0.292155258333902, 'f1': 0.30489417951664516}, 'combined': 0.19905008610931238, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27927927927927926, 'r': 0.2952380952380952, 'f1': 0.28703703703703703}, 'combined': 0.19135802469135801, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29838709677419356, 'r': 0.40217391304347827, 'f1': 0.34259259259259267}, 'combined': 0.17129629629629634, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:51:19.797450: step: 2/464, loss: 0.01094895415008068 2023-01-24 03:51:20.403138: step: 4/464, loss: 0.14575912058353424 2023-01-24 03:51:21.042922: step: 6/464, loss: 0.024992192164063454 2023-01-24 03:51:21.721907: step: 8/464, loss: 0.04499730095267296 2023-01-24 03:51:22.295221: step: 10/464, loss: 0.02886922098696232 2023-01-24 03:51:22.879370: step: 12/464, loss: 0.00381839950568974 2023-01-24 03:51:23.450549: step: 14/464, loss: 0.024683594703674316 2023-01-24 03:51:24.095891: step: 16/464, loss: 0.09384645521640778 2023-01-24 03:51:24.705903: step: 18/464, loss: 0.01277201622724533 2023-01-24 03:51:25.328955: step: 20/464, loss: 0.037589460611343384 2023-01-24 03:51:25.946027: step: 22/464, loss: 0.20418094098567963 2023-01-24 03:51:26.557868: step: 24/464, loss: 0.06199120730161667 2023-01-24 03:51:27.165736: step: 26/464, loss: 0.0007472278084605932 2023-01-24 03:51:27.925568: step: 28/464, loss: 0.009938925504684448 2023-01-24 03:51:28.620419: step: 30/464, loss: 0.6464297771453857 2023-01-24 03:51:29.243964: step: 32/464, loss: 0.028073349967598915 2023-01-24 03:51:29.867055: step: 34/464, loss: 0.0018921191804111004 2023-01-24 03:51:30.525940: step: 36/464, loss: 0.00033368379808962345 2023-01-24 03:51:31.084403: step: 38/464, loss: 0.002079681260511279 2023-01-24 03:51:31.663073: step: 40/464, loss: 0.00684864679351449 2023-01-24 03:51:32.312165: step: 42/464, loss: 0.015362454578280449 2023-01-24 03:51:32.902921: step: 44/464, loss: 0.09487225860357285 2023-01-24 03:51:33.553034: step: 46/464, loss: 0.06227598711848259 2023-01-24 03:51:34.161711: step: 48/464, loss: 0.024862142279744148 2023-01-24 03:51:34.829601: step: 50/464, loss: 0.0038648508489131927 2023-01-24 03:51:35.446031: step: 52/464, loss: 0.005437001120299101 2023-01-24 03:51:36.035530: step: 54/464, loss: 0.030410602688789368 2023-01-24 03:51:36.614879: step: 56/464, loss: 0.009388812817633152 2023-01-24 03:51:37.231880: step: 58/464, loss: 0.0414985828101635 2023-01-24 03:51:37.889033: step: 60/464, loss: 0.08260368555784225 2023-01-24 03:51:38.470401: step: 62/464, loss: 0.013569515198469162 2023-01-24 03:51:39.064074: step: 64/464, loss: 0.004947878886014223 2023-01-24 03:51:39.665121: step: 66/464, loss: 0.052869606763124466 2023-01-24 03:51:40.303042: step: 68/464, loss: 0.060434550046920776 2023-01-24 03:51:40.933381: step: 70/464, loss: 0.02222449891269207 2023-01-24 03:51:41.574088: step: 72/464, loss: 0.008808063343167305 2023-01-24 03:51:42.245962: step: 74/464, loss: 0.01708069071173668 2023-01-24 03:51:42.917972: step: 76/464, loss: 0.005162985995411873 2023-01-24 03:51:43.574148: step: 78/464, loss: 0.003248975146561861 2023-01-24 03:51:44.144234: step: 80/464, loss: 0.014609781093895435 2023-01-24 03:51:44.731875: step: 82/464, loss: 0.04347887635231018 2023-01-24 03:51:45.326850: step: 84/464, loss: 0.0234987810254097 2023-01-24 03:51:45.978843: step: 86/464, loss: 0.1509189009666443 2023-01-24 03:51:46.617228: step: 88/464, loss: 0.019962340593338013 2023-01-24 03:51:47.292269: step: 90/464, loss: 0.03884506598114967 2023-01-24 03:51:47.899907: step: 92/464, loss: 0.0009039240540005267 2023-01-24 03:51:48.505345: step: 94/464, loss: 0.0034994767047464848 2023-01-24 03:51:49.115896: step: 96/464, loss: 0.08864002674818039 2023-01-24 03:51:49.767856: step: 98/464, loss: 0.006740335375070572 2023-01-24 03:51:50.343599: step: 100/464, loss: 0.05475465953350067 2023-01-24 03:51:50.948299: step: 102/464, loss: 0.03010905347764492 2023-01-24 03:51:51.614250: step: 104/464, loss: 0.016187317669391632 2023-01-24 03:51:52.214324: step: 106/464, loss: 0.0055516562424600124 2023-01-24 03:51:52.812196: step: 108/464, loss: 0.022660691291093826 2023-01-24 03:51:53.466232: step: 110/464, loss: 0.031852543354034424 2023-01-24 03:51:54.057948: step: 112/464, loss: 0.07544074952602386 2023-01-24 03:51:54.683801: step: 114/464, loss: 0.008051667362451553 2023-01-24 03:51:55.255651: step: 116/464, loss: 0.012353715486824512 2023-01-24 03:51:55.907090: step: 118/464, loss: 0.016197267919778824 2023-01-24 03:51:56.580364: step: 120/464, loss: 0.01565691828727722 2023-01-24 03:51:57.182470: step: 122/464, loss: 0.04089139774441719 2023-01-24 03:51:57.876316: step: 124/464, loss: 0.030935386195778847 2023-01-24 03:51:58.476735: step: 126/464, loss: 0.031809840351343155 2023-01-24 03:51:59.073044: step: 128/464, loss: 0.23842322826385498 2023-01-24 03:51:59.695072: step: 130/464, loss: 0.020492425188422203 2023-01-24 03:52:00.321012: step: 132/464, loss: 0.0230704378336668 2023-01-24 03:52:00.905605: step: 134/464, loss: 0.0029736270662397146 2023-01-24 03:52:01.558577: step: 136/464, loss: 0.05544079467654228 2023-01-24 03:52:02.162220: step: 138/464, loss: 0.02358139678835869 2023-01-24 03:52:02.785269: step: 140/464, loss: 0.017108041793107986 2023-01-24 03:52:03.403839: step: 142/464, loss: 0.0015132430708035827 2023-01-24 03:52:04.049872: step: 144/464, loss: 0.012823511846363544 2023-01-24 03:52:04.647955: step: 146/464, loss: 0.025987349450588226 2023-01-24 03:52:05.297286: step: 148/464, loss: 0.04257926717400551 2023-01-24 03:52:05.882002: step: 150/464, loss: 0.04711727797985077 2023-01-24 03:52:06.465357: step: 152/464, loss: 0.07601583749055862 2023-01-24 03:52:07.074224: step: 154/464, loss: 0.001698075095191598 2023-01-24 03:52:07.729876: step: 156/464, loss: 0.002336803823709488 2023-01-24 03:52:08.313437: step: 158/464, loss: 0.001236848533153534 2023-01-24 03:52:08.958229: step: 160/464, loss: 0.007346364203840494 2023-01-24 03:52:09.597407: step: 162/464, loss: 0.00528703723102808 2023-01-24 03:52:10.222096: step: 164/464, loss: 0.05930967628955841 2023-01-24 03:52:10.778782: step: 166/464, loss: 0.011992568150162697 2023-01-24 03:52:11.428281: step: 168/464, loss: 0.042673785239458084 2023-01-24 03:52:12.152342: step: 170/464, loss: 0.2894831895828247 2023-01-24 03:52:12.798405: step: 172/464, loss: 0.021240130066871643 2023-01-24 03:52:13.351926: step: 174/464, loss: 0.007897155359387398 2023-01-24 03:52:13.897158: step: 176/464, loss: 0.013241810724139214 2023-01-24 03:52:14.486474: step: 178/464, loss: 0.0017146612517535686 2023-01-24 03:52:15.092171: step: 180/464, loss: 0.052760347723960876 2023-01-24 03:52:15.714303: step: 182/464, loss: 0.02053925022482872 2023-01-24 03:52:16.327334: step: 184/464, loss: 0.019012922421097755 2023-01-24 03:52:17.003361: step: 186/464, loss: 0.08293021470308304 2023-01-24 03:52:17.595542: step: 188/464, loss: 0.05099023878574371 2023-01-24 03:52:18.189770: step: 190/464, loss: 0.011305413208901882 2023-01-24 03:52:18.779652: step: 192/464, loss: 0.017405983060598373 2023-01-24 03:52:19.400344: step: 194/464, loss: 0.046850383281707764 2023-01-24 03:52:20.014633: step: 196/464, loss: 0.0115335863083601 2023-01-24 03:52:20.635954: step: 198/464, loss: 0.028845172375440598 2023-01-24 03:52:21.237923: step: 200/464, loss: 0.015684882178902626 2023-01-24 03:52:21.929436: step: 202/464, loss: 0.005955643951892853 2023-01-24 03:52:22.575351: step: 204/464, loss: 0.03082350827753544 2023-01-24 03:52:23.132742: step: 206/464, loss: 0.001468464732170105 2023-01-24 03:52:23.717622: step: 208/464, loss: 0.13009512424468994 2023-01-24 03:52:24.328110: step: 210/464, loss: 0.020539836958050728 2023-01-24 03:52:24.934008: step: 212/464, loss: 0.13391415774822235 2023-01-24 03:52:25.548030: step: 214/464, loss: 0.013976972550153732 2023-01-24 03:52:26.169680: step: 216/464, loss: 0.1380707025527954 2023-01-24 03:52:26.787210: step: 218/464, loss: 0.02879462204873562 2023-01-24 03:52:27.399839: step: 220/464, loss: 0.01533366460353136 2023-01-24 03:52:28.013731: step: 222/464, loss: 1.0113894939422607 2023-01-24 03:52:28.606297: step: 224/464, loss: 0.0025485767982900143 2023-01-24 03:52:29.312360: step: 226/464, loss: 0.05745875835418701 2023-01-24 03:52:29.878579: step: 228/464, loss: 0.0076041747815907 2023-01-24 03:52:30.445377: step: 230/464, loss: 0.08491874486207962 2023-01-24 03:52:31.047638: step: 232/464, loss: 0.029457198455929756 2023-01-24 03:52:31.686646: step: 234/464, loss: 0.05527511239051819 2023-01-24 03:52:32.331872: step: 236/464, loss: 0.005635358393192291 2023-01-24 03:52:33.012468: step: 238/464, loss: 0.010738439857959747 2023-01-24 03:52:33.640284: step: 240/464, loss: 0.0373210646212101 2023-01-24 03:52:34.277488: step: 242/464, loss: 0.24425114691257477 2023-01-24 03:52:34.878933: step: 244/464, loss: 0.02821933850646019 2023-01-24 03:52:35.480693: step: 246/464, loss: 0.009423431940376759 2023-01-24 03:52:36.174304: step: 248/464, loss: 0.036287467926740646 2023-01-24 03:52:36.769752: step: 250/464, loss: 0.05840136110782623 2023-01-24 03:52:37.405335: step: 252/464, loss: 0.0367143414914608 2023-01-24 03:52:38.002556: step: 254/464, loss: 0.09120728075504303 2023-01-24 03:52:38.581038: step: 256/464, loss: 0.06416139751672745 2023-01-24 03:52:39.228512: step: 258/464, loss: 0.2775331139564514 2023-01-24 03:52:39.843511: step: 260/464, loss: 0.01628378964960575 2023-01-24 03:52:40.399287: step: 262/464, loss: 0.03143422678112984 2023-01-24 03:52:40.959284: step: 264/464, loss: 0.04143984988331795 2023-01-24 03:52:41.678942: step: 266/464, loss: 0.0008044016431085765 2023-01-24 03:52:42.308425: step: 268/464, loss: 0.44265875220298767 2023-01-24 03:52:42.919535: step: 270/464, loss: 0.052031636238098145 2023-01-24 03:52:43.533420: step: 272/464, loss: 0.3623453676700592 2023-01-24 03:52:44.160979: step: 274/464, loss: 0.018070541322231293 2023-01-24 03:52:44.841274: step: 276/464, loss: 0.03422160446643829 2023-01-24 03:52:45.453093: step: 278/464, loss: 0.09279845654964447 2023-01-24 03:52:46.089324: step: 280/464, loss: 0.009126405231654644 2023-01-24 03:52:46.691609: step: 282/464, loss: 0.045235250145196915 2023-01-24 03:52:47.345083: step: 284/464, loss: 0.23937299847602844 2023-01-24 03:52:48.054158: step: 286/464, loss: 0.018642853945493698 2023-01-24 03:52:48.710528: step: 288/464, loss: 0.0056993127800524235 2023-01-24 03:52:49.278299: step: 290/464, loss: 0.007555719930678606 2023-01-24 03:52:49.882533: step: 292/464, loss: 0.03261446952819824 2023-01-24 03:52:50.502550: step: 294/464, loss: 0.0309117641299963 2023-01-24 03:52:51.112417: step: 296/464, loss: 0.013928813859820366 2023-01-24 03:52:51.696609: step: 298/464, loss: 0.0032964213751256466 2023-01-24 03:52:52.324336: step: 300/464, loss: 0.03079323098063469 2023-01-24 03:52:52.974424: step: 302/464, loss: 0.06833625584840775 2023-01-24 03:52:53.549956: step: 304/464, loss: 0.024271182715892792 2023-01-24 03:52:54.197774: step: 306/464, loss: 0.1986471265554428 2023-01-24 03:52:54.845840: step: 308/464, loss: 0.0008915074868127704 2023-01-24 03:52:55.569804: step: 310/464, loss: 0.016452452167868614 2023-01-24 03:52:56.162500: step: 312/464, loss: 0.052120424807071686 2023-01-24 03:52:56.919950: step: 314/464, loss: 0.050676923245191574 2023-01-24 03:52:57.528907: step: 316/464, loss: 0.0032034774776548147 2023-01-24 03:52:58.139573: step: 318/464, loss: 0.03368563950061798 2023-01-24 03:52:58.705936: step: 320/464, loss: 0.00951187964528799 2023-01-24 03:52:59.314438: step: 322/464, loss: 0.05167800188064575 2023-01-24 03:53:00.017263: step: 324/464, loss: 0.03427198529243469 2023-01-24 03:53:00.693535: step: 326/464, loss: 0.046183791011571884 2023-01-24 03:53:01.257482: step: 328/464, loss: 0.0038602089043706656 2023-01-24 03:53:01.911560: step: 330/464, loss: 0.4236118793487549 2023-01-24 03:53:02.605090: step: 332/464, loss: 0.015052303671836853 2023-01-24 03:53:03.260152: step: 334/464, loss: 0.023639438673853874 2023-01-24 03:53:03.902507: step: 336/464, loss: 0.019682051613926888 2023-01-24 03:53:04.522140: step: 338/464, loss: 0.13966712355613708 2023-01-24 03:53:05.219688: step: 340/464, loss: 0.0024587088264524937 2023-01-24 03:53:05.866335: step: 342/464, loss: 0.02569568157196045 2023-01-24 03:53:06.462977: step: 344/464, loss: 0.04826827347278595 2023-01-24 03:53:07.111426: step: 346/464, loss: 0.03584090992808342 2023-01-24 03:53:07.756967: step: 348/464, loss: 0.15560881793498993 2023-01-24 03:53:08.391917: step: 350/464, loss: 0.01604614406824112 2023-01-24 03:53:09.056217: step: 352/464, loss: 0.06136579066514969 2023-01-24 03:53:09.754128: step: 354/464, loss: 0.04448382183909416 2023-01-24 03:53:10.348349: step: 356/464, loss: 0.0005365906399674714 2023-01-24 03:53:10.974950: step: 358/464, loss: 0.09390643984079361 2023-01-24 03:53:11.612615: step: 360/464, loss: 0.034720465540885925 2023-01-24 03:53:12.185012: step: 362/464, loss: 0.9380174279212952 2023-01-24 03:53:12.743849: step: 364/464, loss: 0.026559194549918175 2023-01-24 03:53:13.361518: step: 366/464, loss: 0.01126338355243206 2023-01-24 03:53:14.053088: step: 368/464, loss: 0.04304554685950279 2023-01-24 03:53:14.611079: step: 370/464, loss: 0.014592224732041359 2023-01-24 03:53:15.212377: step: 372/464, loss: 0.043677981942892075 2023-01-24 03:53:15.803220: step: 374/464, loss: 0.005666018463671207 2023-01-24 03:53:16.402109: step: 376/464, loss: 0.03042283095419407 2023-01-24 03:53:16.998520: step: 378/464, loss: 0.01923169009387493 2023-01-24 03:53:17.608098: step: 380/464, loss: 0.019020728766918182 2023-01-24 03:53:18.221306: step: 382/464, loss: 0.0346912145614624 2023-01-24 03:53:18.832848: step: 384/464, loss: 0.04588151350617409 2023-01-24 03:53:19.508358: step: 386/464, loss: 0.0751832127571106 2023-01-24 03:53:20.094338: step: 388/464, loss: 0.004094639793038368 2023-01-24 03:53:20.783046: step: 390/464, loss: 0.022207781672477722 2023-01-24 03:53:21.407777: step: 392/464, loss: 0.06836681813001633 2023-01-24 03:53:22.013608: step: 394/464, loss: 0.0121278902515769 2023-01-24 03:53:22.610510: step: 396/464, loss: 0.6992283463478088 2023-01-24 03:53:23.205366: step: 398/464, loss: 0.0797678679227829 2023-01-24 03:53:23.842959: step: 400/464, loss: 0.003167911432683468 2023-01-24 03:53:24.464559: step: 402/464, loss: 0.011715085245668888 2023-01-24 03:53:25.111367: step: 404/464, loss: 0.052617043256759644 2023-01-24 03:53:25.741684: step: 406/464, loss: 0.019754866138100624 2023-01-24 03:53:26.319843: step: 408/464, loss: 0.0014015489723533392 2023-01-24 03:53:26.959127: step: 410/464, loss: 0.0034204889088869095 2023-01-24 03:53:27.635005: step: 412/464, loss: 0.3143950402736664 2023-01-24 03:53:28.280509: step: 414/464, loss: 0.02351572923362255 2023-01-24 03:53:28.914163: step: 416/464, loss: 0.05147172138094902 2023-01-24 03:53:29.529667: step: 418/464, loss: 1.0159331560134888 2023-01-24 03:53:30.189074: step: 420/464, loss: 0.23005777597427368 2023-01-24 03:53:30.754341: step: 422/464, loss: 0.01528138481080532 2023-01-24 03:53:31.375824: step: 424/464, loss: 0.032140932977199554 2023-01-24 03:53:31.988143: step: 426/464, loss: 0.0063538933172822 2023-01-24 03:53:32.591742: step: 428/464, loss: 0.02749541401863098 2023-01-24 03:53:33.275464: step: 430/464, loss: 0.058274418115615845 2023-01-24 03:53:33.845165: step: 432/464, loss: 0.024172352626919746 2023-01-24 03:53:34.473381: step: 434/464, loss: 0.08550658077001572 2023-01-24 03:53:35.072939: step: 436/464, loss: 0.04020044207572937 2023-01-24 03:53:35.760990: step: 438/464, loss: 1.4247195720672607 2023-01-24 03:53:36.405466: step: 440/464, loss: 0.0749029591679573 2023-01-24 03:53:37.064514: step: 442/464, loss: 0.1032380685210228 2023-01-24 03:53:37.686460: step: 444/464, loss: 0.01930875889956951 2023-01-24 03:53:38.311815: step: 446/464, loss: 0.002203272422775626 2023-01-24 03:53:38.963024: step: 448/464, loss: 0.3476521670818329 2023-01-24 03:53:39.577409: step: 450/464, loss: 0.018893271684646606 2023-01-24 03:53:40.144660: step: 452/464, loss: 0.03659308701753616 2023-01-24 03:53:40.766913: step: 454/464, loss: 0.03388116881251335 2023-01-24 03:53:41.395939: step: 456/464, loss: 0.06619930267333984 2023-01-24 03:53:42.004181: step: 458/464, loss: 0.003419067244976759 2023-01-24 03:53:42.632925: step: 460/464, loss: 0.06586448848247528 2023-01-24 03:53:43.212318: step: 462/464, loss: 0.006851766724139452 2023-01-24 03:53:43.786983: step: 464/464, loss: 0.42624276876449585 2023-01-24 03:53:44.451243: step: 466/464, loss: 0.21237380802631378 2023-01-24 03:53:45.078901: step: 468/464, loss: 0.11286122351884842 2023-01-24 03:53:45.674965: step: 470/464, loss: 0.04325117543339729 2023-01-24 03:53:46.324761: step: 472/464, loss: 0.06070108339190483 2023-01-24 03:53:46.999161: step: 474/464, loss: 0.023442333564162254 2023-01-24 03:53:47.607291: step: 476/464, loss: 0.04634851962327957 2023-01-24 03:53:48.218108: step: 478/464, loss: 0.00919394101947546 2023-01-24 03:53:48.817804: step: 480/464, loss: 0.019036393612623215 2023-01-24 03:53:49.515576: step: 482/464, loss: 0.022216234356164932 2023-01-24 03:53:50.201435: step: 484/464, loss: 0.030017098411917686 2023-01-24 03:53:50.880346: step: 486/464, loss: 0.0024047254119068384 2023-01-24 03:53:51.467241: step: 488/464, loss: 0.09450496733188629 2023-01-24 03:53:52.092849: step: 490/464, loss: 0.04067765921354294 2023-01-24 03:53:52.727681: step: 492/464, loss: 0.07102949917316437 2023-01-24 03:53:53.404934: step: 494/464, loss: 0.06277811527252197 2023-01-24 03:53:54.128751: step: 496/464, loss: 0.023850714787840843 2023-01-24 03:53:54.737731: step: 498/464, loss: 0.005263129249215126 2023-01-24 03:53:55.357233: step: 500/464, loss: 0.2657757103443146 2023-01-24 03:53:55.967752: step: 502/464, loss: 0.13945844769477844 2023-01-24 03:53:56.664102: step: 504/464, loss: 0.04101903364062309 2023-01-24 03:53:57.269408: step: 506/464, loss: 0.07458826899528503 2023-01-24 03:53:57.905554: step: 508/464, loss: 0.06257973611354828 2023-01-24 03:53:58.544551: step: 510/464, loss: 0.03478274866938591 2023-01-24 03:53:59.155109: step: 512/464, loss: 0.011548043228685856 2023-01-24 03:53:59.790029: step: 514/464, loss: 0.02769150212407112 2023-01-24 03:54:00.472126: step: 516/464, loss: 0.017998792231082916 2023-01-24 03:54:01.104626: step: 518/464, loss: 0.1678367555141449 2023-01-24 03:54:01.765654: step: 520/464, loss: 1.509641170501709 2023-01-24 03:54:02.366367: step: 522/464, loss: 0.07102823257446289 2023-01-24 03:54:02.957286: step: 524/464, loss: 0.055757131427526474 2023-01-24 03:54:03.684291: step: 526/464, loss: 0.01679256744682789 2023-01-24 03:54:04.311442: step: 528/464, loss: 0.04048018902540207 2023-01-24 03:54:05.089663: step: 530/464, loss: 0.04043349251151085 2023-01-24 03:54:05.681524: step: 532/464, loss: 0.005710093304514885 2023-01-24 03:54:06.301372: step: 534/464, loss: 0.0880838930606842 2023-01-24 03:54:06.886082: step: 536/464, loss: 0.07711517810821533 2023-01-24 03:54:07.501753: step: 538/464, loss: 0.0020625698380172253 2023-01-24 03:54:08.068862: step: 540/464, loss: 0.0283072330057621 2023-01-24 03:54:08.675533: step: 542/464, loss: 0.03311380743980408 2023-01-24 03:54:09.264374: step: 544/464, loss: 0.018642157316207886 2023-01-24 03:54:09.859183: step: 546/464, loss: 0.0008091746713034809 2023-01-24 03:54:10.445956: step: 548/464, loss: 0.006606127135455608 2023-01-24 03:54:11.069422: step: 550/464, loss: 0.13055063784122467 2023-01-24 03:54:11.752509: step: 552/464, loss: 0.06652757525444031 2023-01-24 03:54:12.351822: step: 554/464, loss: 0.016496941447257996 2023-01-24 03:54:12.973640: step: 556/464, loss: 0.021097218617796898 2023-01-24 03:54:13.637850: step: 558/464, loss: 0.044590700417757034 2023-01-24 03:54:14.343892: step: 560/464, loss: 0.10488364100456238 2023-01-24 03:54:14.941229: step: 562/464, loss: 0.09305278956890106 2023-01-24 03:54:15.602676: step: 564/464, loss: 0.0067495619878172874 2023-01-24 03:54:16.219409: step: 566/464, loss: 0.010041550733149052 2023-01-24 03:54:16.827810: step: 568/464, loss: 0.01691114529967308 2023-01-24 03:54:17.451983: step: 570/464, loss: 0.06539332866668701 2023-01-24 03:54:18.102078: step: 572/464, loss: 0.08897537738084793 2023-01-24 03:54:18.669601: step: 574/464, loss: 0.014962859451770782 2023-01-24 03:54:19.327998: step: 576/464, loss: 0.009278004057705402 2023-01-24 03:54:19.949506: step: 578/464, loss: 0.04308057576417923 2023-01-24 03:54:20.528306: step: 580/464, loss: 0.024415653198957443 2023-01-24 03:54:21.115907: step: 582/464, loss: 0.07050704210996628 2023-01-24 03:54:21.727334: step: 584/464, loss: 0.04006562381982803 2023-01-24 03:54:22.380527: step: 586/464, loss: 0.019665470346808434 2023-01-24 03:54:22.986875: step: 588/464, loss: 0.016614586114883423 2023-01-24 03:54:23.579319: step: 590/464, loss: 0.0010337578132748604 2023-01-24 03:54:24.219014: step: 592/464, loss: 0.17083393037319183 2023-01-24 03:54:24.882106: step: 594/464, loss: 0.07128389924764633 2023-01-24 03:54:25.489137: step: 596/464, loss: 0.04955251142382622 2023-01-24 03:54:26.106792: step: 598/464, loss: 0.03497675061225891 2023-01-24 03:54:26.699074: step: 600/464, loss: 0.004128795117139816 2023-01-24 03:54:27.233252: step: 602/464, loss: 0.014348876662552357 2023-01-24 03:54:27.826340: step: 604/464, loss: 0.04881225898861885 2023-01-24 03:54:28.485199: step: 606/464, loss: 0.027765098959207535 2023-01-24 03:54:29.053779: step: 608/464, loss: 0.03792242333292961 2023-01-24 03:54:29.640336: step: 610/464, loss: 0.00026430690195411444 2023-01-24 03:54:30.235463: step: 612/464, loss: 0.05211463198065758 2023-01-24 03:54:30.824557: step: 614/464, loss: 0.0476502850651741 2023-01-24 03:54:31.510080: step: 616/464, loss: 0.01162576675415039 2023-01-24 03:54:32.174958: step: 618/464, loss: 0.07381214946508408 2023-01-24 03:54:32.832959: step: 620/464, loss: 0.027710363268852234 2023-01-24 03:54:33.418023: step: 622/464, loss: 0.1238107681274414 2023-01-24 03:54:34.059654: step: 624/464, loss: 0.3625965714454651 2023-01-24 03:54:34.687377: step: 626/464, loss: 0.10400807857513428 2023-01-24 03:54:35.224086: step: 628/464, loss: 0.005050726234912872 2023-01-24 03:54:35.810441: step: 630/464, loss: 0.03973061591386795 2023-01-24 03:54:36.449717: step: 632/464, loss: 0.06549963355064392 2023-01-24 03:54:37.016388: step: 634/464, loss: 0.15217608213424683 2023-01-24 03:54:37.621500: step: 636/464, loss: 0.030870968475937843 2023-01-24 03:54:38.230943: step: 638/464, loss: 0.010599116794764996 2023-01-24 03:54:38.858228: step: 640/464, loss: 0.18119975924491882 2023-01-24 03:54:39.510104: step: 642/464, loss: 0.0009294974734075367 2023-01-24 03:54:40.170940: step: 644/464, loss: 0.02494949661195278 2023-01-24 03:54:40.719291: step: 646/464, loss: 0.03424317017197609 2023-01-24 03:54:41.330572: step: 648/464, loss: 0.06232265755534172 2023-01-24 03:54:41.960487: step: 650/464, loss: 0.08077694475650787 2023-01-24 03:54:42.590974: step: 652/464, loss: 0.04820266366004944 2023-01-24 03:54:43.166529: step: 654/464, loss: 0.03566118702292442 2023-01-24 03:54:43.791657: step: 656/464, loss: 0.08889000862836838 2023-01-24 03:54:44.420436: step: 658/464, loss: 0.07314766198396683 2023-01-24 03:54:45.001340: step: 660/464, loss: 0.03364182636141777 2023-01-24 03:54:45.665645: step: 662/464, loss: 0.03165220841765404 2023-01-24 03:54:46.295246: step: 664/464, loss: 0.05097051337361336 2023-01-24 03:54:46.883532: step: 666/464, loss: 0.04210424795746803 2023-01-24 03:54:47.567908: step: 668/464, loss: 0.06841326504945755 2023-01-24 03:54:48.184115: step: 670/464, loss: 0.04322848841547966 2023-01-24 03:54:48.818167: step: 672/464, loss: 0.06704898923635483 2023-01-24 03:54:49.456918: step: 674/464, loss: 0.09451629221439362 2023-01-24 03:54:50.040109: step: 676/464, loss: 0.02983608841896057 2023-01-24 03:54:50.696019: step: 678/464, loss: 0.02176210656762123 2023-01-24 03:54:51.332081: step: 680/464, loss: 0.024272693321108818 2023-01-24 03:54:51.967622: step: 682/464, loss: 0.05879077687859535 2023-01-24 03:54:52.600984: step: 684/464, loss: 0.03125687316060066 2023-01-24 03:54:53.229644: step: 686/464, loss: 0.023755548521876335 2023-01-24 03:54:53.784892: step: 688/464, loss: 0.011906933039426804 2023-01-24 03:54:54.408802: step: 690/464, loss: 0.03976349160075188 2023-01-24 03:54:55.020332: step: 692/464, loss: 0.2375657856464386 2023-01-24 03:54:55.596119: step: 694/464, loss: 0.011740381829440594 2023-01-24 03:54:56.225163: step: 696/464, loss: 0.02481149137020111 2023-01-24 03:54:56.859160: step: 698/464, loss: 0.23874157667160034 2023-01-24 03:54:57.457559: step: 700/464, loss: 0.040182825177907944 2023-01-24 03:54:58.137606: step: 702/464, loss: 0.031606245785951614 2023-01-24 03:54:58.784459: step: 704/464, loss: 0.01072809100151062 2023-01-24 03:54:59.396848: step: 706/464, loss: 0.012844547629356384 2023-01-24 03:55:00.054009: step: 708/464, loss: 0.023855801671743393 2023-01-24 03:55:00.685205: step: 710/464, loss: 0.025959152728319168 2023-01-24 03:55:01.311838: step: 712/464, loss: 0.05024197697639465 2023-01-24 03:55:01.999090: step: 714/464, loss: 0.02967996522784233 2023-01-24 03:55:02.548706: step: 716/464, loss: 0.022867849096655846 2023-01-24 03:55:03.184974: step: 718/464, loss: 0.02212836965918541 2023-01-24 03:55:03.753321: step: 720/464, loss: 0.02384709008038044 2023-01-24 03:55:04.369896: step: 722/464, loss: 0.01609223149716854 2023-01-24 03:55:05.002499: step: 724/464, loss: 0.03944723308086395 2023-01-24 03:55:05.604751: step: 726/464, loss: 0.32151544094085693 2023-01-24 03:55:06.251894: step: 728/464, loss: 0.03509880602359772 2023-01-24 03:55:06.961151: step: 730/464, loss: 0.0048608374781906605 2023-01-24 03:55:07.582592: step: 732/464, loss: 0.050102487206459045 2023-01-24 03:55:08.110862: step: 734/464, loss: 0.09356710314750671 2023-01-24 03:55:08.708575: step: 736/464, loss: 0.010913309641182423 2023-01-24 03:55:09.317128: step: 738/464, loss: 0.0152819212526083 2023-01-24 03:55:09.953649: step: 740/464, loss: 0.03910623863339424 2023-01-24 03:55:10.614300: step: 742/464, loss: 0.04414622113108635 2023-01-24 03:55:11.185339: step: 744/464, loss: 0.011421327479183674 2023-01-24 03:55:11.774234: step: 746/464, loss: 0.01073770597577095 2023-01-24 03:55:12.424629: step: 748/464, loss: 0.023593632504343987 2023-01-24 03:55:13.004953: step: 750/464, loss: 0.044311657547950745 2023-01-24 03:55:13.670671: step: 752/464, loss: 0.06402845680713654 2023-01-24 03:55:14.242482: step: 754/464, loss: 0.002189035527408123 2023-01-24 03:55:14.873687: step: 756/464, loss: 0.07352970540523529 2023-01-24 03:55:15.510290: step: 758/464, loss: 0.04176979884505272 2023-01-24 03:55:16.169187: step: 760/464, loss: 0.02098209597170353 2023-01-24 03:55:16.747437: step: 762/464, loss: 0.07031551003456116 2023-01-24 03:55:17.336984: step: 764/464, loss: 0.005239306483417749 2023-01-24 03:55:17.923477: step: 766/464, loss: 0.006717341020703316 2023-01-24 03:55:18.609452: step: 768/464, loss: 0.20943881571292877 2023-01-24 03:55:19.205795: step: 770/464, loss: 0.03284173086285591 2023-01-24 03:55:19.846981: step: 772/464, loss: 0.016307709738612175 2023-01-24 03:55:20.453983: step: 774/464, loss: 0.06885068863630295 2023-01-24 03:55:21.038383: step: 776/464, loss: 0.000454139692010358 2023-01-24 03:55:21.649593: step: 778/464, loss: 0.02808069810271263 2023-01-24 03:55:22.350275: step: 780/464, loss: 0.1787603348493576 2023-01-24 03:55:22.987557: step: 782/464, loss: 0.02926480397582054 2023-01-24 03:55:23.549049: step: 784/464, loss: 0.04911215603351593 2023-01-24 03:55:24.165534: step: 786/464, loss: 0.046737734228372574 2023-01-24 03:55:24.828062: step: 788/464, loss: 0.002738171024248004 2023-01-24 03:55:25.484350: step: 790/464, loss: 0.29980382323265076 2023-01-24 03:55:26.146192: step: 792/464, loss: 0.05199276655912399 2023-01-24 03:55:26.711325: step: 794/464, loss: 0.005134768784046173 2023-01-24 03:55:27.378897: step: 796/464, loss: 0.08432340621948242 2023-01-24 03:55:27.957476: step: 798/464, loss: 0.058853939175605774 2023-01-24 03:55:28.587172: step: 800/464, loss: 0.05134215205907822 2023-01-24 03:55:29.238509: step: 802/464, loss: 0.7040004134178162 2023-01-24 03:55:29.897002: step: 804/464, loss: 0.01902609132230282 2023-01-24 03:55:30.524752: step: 806/464, loss: 0.016261931508779526 2023-01-24 03:55:31.116624: step: 808/464, loss: 0.020014706999063492 2023-01-24 03:55:31.771938: step: 810/464, loss: 0.048233408480882645 2023-01-24 03:55:32.447768: step: 812/464, loss: 0.03912653028964996 2023-01-24 03:55:33.085920: step: 814/464, loss: 0.01740916632115841 2023-01-24 03:55:33.703418: step: 816/464, loss: 0.16831934452056885 2023-01-24 03:55:34.315564: step: 818/464, loss: 0.03522450849413872 2023-01-24 03:55:34.961606: step: 820/464, loss: 0.0683288425207138 2023-01-24 03:55:35.601542: step: 822/464, loss: 0.07296296209096909 2023-01-24 03:55:36.300534: step: 824/464, loss: 0.22791068255901337 2023-01-24 03:55:36.917972: step: 826/464, loss: 0.032518237829208374 2023-01-24 03:55:37.542224: step: 828/464, loss: 0.11278972774744034 2023-01-24 03:55:38.165774: step: 830/464, loss: 0.038358110934495926 2023-01-24 03:55:38.821627: step: 832/464, loss: 0.05259322375059128 2023-01-24 03:55:39.478144: step: 834/464, loss: 0.016430115327239037 2023-01-24 03:55:40.077859: step: 836/464, loss: 0.014489209279417992 2023-01-24 03:55:40.674920: step: 838/464, loss: 0.006181823089718819 2023-01-24 03:55:41.403722: step: 840/464, loss: 0.02740761823952198 2023-01-24 03:55:42.051229: step: 842/464, loss: 0.0018187963869422674 2023-01-24 03:55:42.699451: step: 844/464, loss: 0.038892749696969986 2023-01-24 03:55:43.306287: step: 846/464, loss: 0.016636164858937263 2023-01-24 03:55:43.952757: step: 848/464, loss: 0.0516558475792408 2023-01-24 03:55:44.566005: step: 850/464, loss: 0.02258567325770855 2023-01-24 03:55:45.264647: step: 852/464, loss: 0.055364180356264114 2023-01-24 03:55:45.883138: step: 854/464, loss: 0.06610779464244843 2023-01-24 03:55:46.481113: step: 856/464, loss: 0.1411096751689911 2023-01-24 03:55:47.086032: step: 858/464, loss: 0.03036370873451233 2023-01-24 03:55:47.677337: step: 860/464, loss: 0.037055253982543945 2023-01-24 03:55:48.307446: step: 862/464, loss: 0.029586972668766975 2023-01-24 03:55:48.920119: step: 864/464, loss: 0.028649387881159782 2023-01-24 03:55:49.528711: step: 866/464, loss: 0.021050531417131424 2023-01-24 03:55:50.163286: step: 868/464, loss: 0.01597771607339382 2023-01-24 03:55:50.852298: step: 870/464, loss: 0.014619365334510803 2023-01-24 03:55:51.465142: step: 872/464, loss: 0.069264717400074 2023-01-24 03:55:52.100403: step: 874/464, loss: 0.022397087886929512 2023-01-24 03:55:52.741860: step: 876/464, loss: 0.00017047250003088266 2023-01-24 03:55:53.430182: step: 878/464, loss: 0.0019862446933984756 2023-01-24 03:55:54.048808: step: 880/464, loss: 0.010659612715244293 2023-01-24 03:55:54.645073: step: 882/464, loss: 0.04737301543354988 2023-01-24 03:55:55.303109: step: 884/464, loss: 0.07188910245895386 2023-01-24 03:55:55.880869: step: 886/464, loss: 0.06555438041687012 2023-01-24 03:55:56.532984: step: 888/464, loss: 0.017469795420765877 2023-01-24 03:55:57.111251: step: 890/464, loss: 0.05681777000427246 2023-01-24 03:55:57.714349: step: 892/464, loss: 0.011965092271566391 2023-01-24 03:55:58.360708: step: 894/464, loss: 0.10202238708734512 2023-01-24 03:55:59.052987: step: 896/464, loss: 0.04688293859362602 2023-01-24 03:55:59.656910: step: 898/464, loss: 0.033308885991573334 2023-01-24 03:56:00.282664: step: 900/464, loss: 0.015246191993355751 2023-01-24 03:56:00.841994: step: 902/464, loss: 0.21349254250526428 2023-01-24 03:56:01.448394: step: 904/464, loss: 0.09325052052736282 2023-01-24 03:56:02.064391: step: 906/464, loss: 0.0010594649938866496 2023-01-24 03:56:02.720164: step: 908/464, loss: 0.016955086961388588 2023-01-24 03:56:03.351471: step: 910/464, loss: 0.05161542445421219 2023-01-24 03:56:03.946995: step: 912/464, loss: 0.12337085604667664 2023-01-24 03:56:04.563873: step: 914/464, loss: 0.016918625682592392 2023-01-24 03:56:05.102381: step: 916/464, loss: 0.028833843767642975 2023-01-24 03:56:05.690156: step: 918/464, loss: 0.0705912858247757 2023-01-24 03:56:06.359870: step: 920/464, loss: 0.1489098221063614 2023-01-24 03:56:07.030570: step: 922/464, loss: 0.02580682933330536 2023-01-24 03:56:07.664476: step: 924/464, loss: 0.25764748454093933 2023-01-24 03:56:08.307000: step: 926/464, loss: 0.01660194620490074 2023-01-24 03:56:08.957031: step: 928/464, loss: 0.0014980026753619313 2023-01-24 03:56:09.467670: step: 930/464, loss: 0.002765296958386898 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3250271570453135, 'r': 0.33119465528146746, 'f1': 0.3280819235588973}, 'combined': 0.24174457525392432, 'epoch': 25} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3243084128742867, 'r': 0.3037410594842814, 'f1': 0.313687966510582}, 'combined': 0.2047911076701209, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.329285123345367, 'r': 0.3461555186590765, 'f1': 0.3375096361393771}, 'combined': 0.24869131083954102, 'epoch': 25} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.339781799299745, 'r': 0.3053972565923191, 'f1': 0.32167327317427413}, 'combined': 0.21000431305678, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3437203510436433, 'r': 0.3437203510436433, 'f1': 0.3437203510436433}, 'combined': 0.2532676270847898, 'epoch': 25} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33822211125299084, 'r': 0.2956334814352889, 'f1': 0.31549703560725284}, 'combined': 0.20597215796121168, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.274074074074074, 'r': 0.35238095238095235, 'f1': 0.3083333333333333}, 'combined': 0.20555555555555552, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3203125, 'r': 0.44565217391304346, 'f1': 0.3727272727272727}, 'combined': 0.18636363636363634, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:58:46.375641: step: 2/464, loss: 0.04391373693943024 2023-01-24 03:58:47.049772: step: 4/464, loss: 0.007874233648180962 2023-01-24 03:58:47.686806: step: 6/464, loss: 0.02224157564342022 2023-01-24 03:58:48.321799: step: 8/464, loss: 0.019984586164355278 2023-01-24 03:58:48.963049: step: 10/464, loss: 0.003371479921042919 2023-01-24 03:58:49.597157: step: 12/464, loss: 0.01139430794864893 2023-01-24 03:58:50.226826: step: 14/464, loss: 0.03944086283445358 2023-01-24 03:58:50.769389: step: 16/464, loss: 0.08040128648281097 2023-01-24 03:58:51.449042: step: 18/464, loss: 0.21492762863636017 2023-01-24 03:58:52.066197: step: 20/464, loss: 0.01825053244829178 2023-01-24 03:58:52.742492: step: 22/464, loss: 0.026273366063833237 2023-01-24 03:58:53.313405: step: 24/464, loss: 0.0006889184005558491 2023-01-24 03:58:53.997457: step: 26/464, loss: 0.01594449020922184 2023-01-24 03:58:54.628006: step: 28/464, loss: 0.040616974234580994 2023-01-24 03:58:55.292122: step: 30/464, loss: 0.002110840752720833 2023-01-24 03:58:55.825640: step: 32/464, loss: 0.007855681702494621 2023-01-24 03:58:56.444504: step: 34/464, loss: 0.018313631415367126 2023-01-24 03:58:57.027440: step: 36/464, loss: 0.013031150214374065 2023-01-24 03:58:57.670537: step: 38/464, loss: 0.014306176453828812 2023-01-24 03:58:58.304097: step: 40/464, loss: 0.009338781237602234 2023-01-24 03:58:58.973080: step: 42/464, loss: 0.022448761388659477 2023-01-24 03:58:59.591846: step: 44/464, loss: 0.012604963965713978 2023-01-24 03:59:00.219965: step: 46/464, loss: 0.03372041508555412 2023-01-24 03:59:00.884566: step: 48/464, loss: 0.017403187230229378 2023-01-24 03:59:01.444628: step: 50/464, loss: 0.014642245136201382 2023-01-24 03:59:02.067428: step: 52/464, loss: 0.007306413725018501 2023-01-24 03:59:02.624437: step: 54/464, loss: 0.012175238691270351 2023-01-24 03:59:03.255299: step: 56/464, loss: 0.013652827590703964 2023-01-24 03:59:03.812543: step: 58/464, loss: 0.12427759170532227 2023-01-24 03:59:04.474032: step: 60/464, loss: 0.012083382345736027 2023-01-24 03:59:05.076138: step: 62/464, loss: 0.009524141438305378 2023-01-24 03:59:05.693773: step: 64/464, loss: 0.7988739609718323 2023-01-24 03:59:06.303102: step: 66/464, loss: 0.021220678463578224 2023-01-24 03:59:06.897357: step: 68/464, loss: 0.004771019332110882 2023-01-24 03:59:07.531508: step: 70/464, loss: 0.03755776584148407 2023-01-24 03:59:08.257522: step: 72/464, loss: 0.11564905196428299 2023-01-24 03:59:08.878682: step: 74/464, loss: 0.004108037333935499 2023-01-24 03:59:09.527170: step: 76/464, loss: 0.01695541851222515 2023-01-24 03:59:10.215611: step: 78/464, loss: 0.06649809330701828 2023-01-24 03:59:10.974688: step: 80/464, loss: 0.019438304007053375 2023-01-24 03:59:11.612460: step: 82/464, loss: 0.026070335879921913 2023-01-24 03:59:12.314102: step: 84/464, loss: 0.008641884662210941 2023-01-24 03:59:12.962615: step: 86/464, loss: 0.0016679865075275302 2023-01-24 03:59:13.648121: step: 88/464, loss: 0.1379016488790512 2023-01-24 03:59:14.191147: step: 90/464, loss: 0.09687651693820953 2023-01-24 03:59:14.806920: step: 92/464, loss: 0.12728893756866455 2023-01-24 03:59:15.440383: step: 94/464, loss: 0.0065714772790670395 2023-01-24 03:59:16.008366: step: 96/464, loss: 0.03216441348195076 2023-01-24 03:59:16.609659: step: 98/464, loss: 0.1337531954050064 2023-01-24 03:59:17.235878: step: 100/464, loss: 0.027989579364657402 2023-01-24 03:59:17.865667: step: 102/464, loss: 0.05962573364377022 2023-01-24 03:59:18.531345: step: 104/464, loss: 0.033715952187776566 2023-01-24 03:59:19.131866: step: 106/464, loss: 0.005919927265495062 2023-01-24 03:59:19.750330: step: 108/464, loss: 0.010916773229837418 2023-01-24 03:59:20.344426: step: 110/464, loss: 0.05076739192008972 2023-01-24 03:59:20.938207: step: 112/464, loss: 0.02558835968375206 2023-01-24 03:59:21.588943: step: 114/464, loss: 0.02433537319302559 2023-01-24 03:59:22.252793: step: 116/464, loss: 0.040127020329236984 2023-01-24 03:59:22.842325: step: 118/464, loss: 0.004513516556471586 2023-01-24 03:59:23.479704: step: 120/464, loss: 0.006538499612361193 2023-01-24 03:59:24.047278: step: 122/464, loss: 0.01373555138707161 2023-01-24 03:59:24.656273: step: 124/464, loss: 0.007735065184533596 2023-01-24 03:59:25.242667: step: 126/464, loss: 0.003255724674090743 2023-01-24 03:59:25.882954: step: 128/464, loss: 0.028008991852402687 2023-01-24 03:59:26.548621: step: 130/464, loss: 0.03875065594911575 2023-01-24 03:59:27.116880: step: 132/464, loss: 0.04180491715669632 2023-01-24 03:59:27.689878: step: 134/464, loss: 0.002450503408908844 2023-01-24 03:59:28.300780: step: 136/464, loss: 0.039049725979566574 2023-01-24 03:59:28.988902: step: 138/464, loss: 0.009984776377677917 2023-01-24 03:59:29.658587: step: 140/464, loss: 0.017390163615345955 2023-01-24 03:59:30.301562: step: 142/464, loss: 0.04249592870473862 2023-01-24 03:59:30.868935: step: 144/464, loss: 0.00951914582401514 2023-01-24 03:59:31.458329: step: 146/464, loss: 0.02458665519952774 2023-01-24 03:59:32.138684: step: 148/464, loss: 0.02037750743329525 2023-01-24 03:59:32.727014: step: 150/464, loss: 0.0328526496887207 2023-01-24 03:59:33.374571: step: 152/464, loss: 0.052238550037145615 2023-01-24 03:59:34.018585: step: 154/464, loss: 0.23137396574020386 2023-01-24 03:59:34.613220: step: 156/464, loss: 0.005670635029673576 2023-01-24 03:59:35.242437: step: 158/464, loss: 0.07691482454538345 2023-01-24 03:59:35.891101: step: 160/464, loss: 0.1446666568517685 2023-01-24 03:59:36.494179: step: 162/464, loss: 0.033114343881607056 2023-01-24 03:59:37.108145: step: 164/464, loss: 0.0020079005043953657 2023-01-24 03:59:37.720682: step: 166/464, loss: 0.0011540675768628716 2023-01-24 03:59:38.326776: step: 168/464, loss: 0.02620735578238964 2023-01-24 03:59:38.920931: step: 170/464, loss: 0.014304089359939098 2023-01-24 03:59:39.562109: step: 172/464, loss: 0.027400551363825798 2023-01-24 03:59:40.250506: step: 174/464, loss: 0.002438499126583338 2023-01-24 03:59:40.885382: step: 176/464, loss: 0.00013663464051205665 2023-01-24 03:59:41.468265: step: 178/464, loss: 0.009929274208843708 2023-01-24 03:59:42.114406: step: 180/464, loss: 0.550064742565155 2023-01-24 03:59:42.815180: step: 182/464, loss: 0.015332071110606194 2023-01-24 03:59:43.428799: step: 184/464, loss: 0.009523403830826283 2023-01-24 03:59:44.033550: step: 186/464, loss: 0.02809010073542595 2023-01-24 03:59:44.695957: step: 188/464, loss: 0.19071964919567108 2023-01-24 03:59:45.282179: step: 190/464, loss: 0.6839189529418945 2023-01-24 03:59:45.938016: step: 192/464, loss: 0.045062173157930374 2023-01-24 03:59:46.661089: step: 194/464, loss: 0.022391490638256073 2023-01-24 03:59:47.307004: step: 196/464, loss: 0.0592818409204483 2023-01-24 03:59:47.933591: step: 198/464, loss: 0.4567221999168396 2023-01-24 03:59:48.517971: step: 200/464, loss: 0.00868324562907219 2023-01-24 03:59:49.103123: step: 202/464, loss: 0.014107579365372658 2023-01-24 03:59:49.726955: step: 204/464, loss: 0.044665493071079254 2023-01-24 03:59:50.371043: step: 206/464, loss: 0.0018825248116627336 2023-01-24 03:59:51.033316: step: 208/464, loss: 0.02518109232187271 2023-01-24 03:59:51.678790: step: 210/464, loss: 0.016200797632336617 2023-01-24 03:59:52.281832: step: 212/464, loss: 0.008192908018827438 2023-01-24 03:59:52.876307: step: 214/464, loss: 0.019114751368761063 2023-01-24 03:59:53.576805: step: 216/464, loss: 0.014841261319816113 2023-01-24 03:59:54.159183: step: 218/464, loss: 0.019637972116470337 2023-01-24 03:59:54.817386: step: 220/464, loss: 0.004951437469571829 2023-01-24 03:59:55.494785: step: 222/464, loss: 0.01096371840685606 2023-01-24 03:59:56.085068: step: 224/464, loss: 0.00991129782050848 2023-01-24 03:59:56.741847: step: 226/464, loss: 0.020300405099987984 2023-01-24 03:59:57.344554: step: 228/464, loss: 0.005053536035120487 2023-01-24 03:59:58.044484: step: 230/464, loss: 0.08775275945663452 2023-01-24 03:59:58.725393: step: 232/464, loss: 0.010210863314568996 2023-01-24 03:59:59.300988: step: 234/464, loss: 0.06775163114070892 2023-01-24 03:59:59.983633: step: 236/464, loss: 0.14004534482955933 2023-01-24 04:00:00.570503: step: 238/464, loss: 0.042682547122240067 2023-01-24 04:00:01.164244: step: 240/464, loss: 0.004201119765639305 2023-01-24 04:00:01.740651: step: 242/464, loss: 0.004126168787479401 2023-01-24 04:00:02.379357: step: 244/464, loss: 0.005134413484483957 2023-01-24 04:00:03.044334: step: 246/464, loss: 0.08357443660497665 2023-01-24 04:00:03.632702: step: 248/464, loss: 0.06599913537502289 2023-01-24 04:00:04.259820: step: 250/464, loss: 0.019962815567851067 2023-01-24 04:00:04.889964: step: 252/464, loss: 0.05692172423005104 2023-01-24 04:00:05.597016: step: 254/464, loss: 0.05064583569765091 2023-01-24 04:00:06.173807: step: 256/464, loss: 0.01198429986834526 2023-01-24 04:00:06.859631: step: 258/464, loss: 0.06167212128639221 2023-01-24 04:00:07.498155: step: 260/464, loss: 0.27730756998062134 2023-01-24 04:00:08.160782: step: 262/464, loss: 0.030600082129240036 2023-01-24 04:00:08.737993: step: 264/464, loss: 0.00288955494761467 2023-01-24 04:00:09.413351: step: 266/464, loss: 0.02289651893079281 2023-01-24 04:00:10.038425: step: 268/464, loss: 0.011433032341301441 2023-01-24 04:00:10.669539: step: 270/464, loss: 0.22712191939353943 2023-01-24 04:00:11.296829: step: 272/464, loss: 0.015967974439263344 2023-01-24 04:00:11.880247: step: 274/464, loss: 0.009424269199371338 2023-01-24 04:00:12.512717: step: 276/464, loss: 0.008342879824340343 2023-01-24 04:00:13.150212: step: 278/464, loss: 0.16277647018432617 2023-01-24 04:00:13.730115: step: 280/464, loss: 0.0248686745762825 2023-01-24 04:00:14.344837: step: 282/464, loss: 0.016462594270706177 2023-01-24 04:00:14.956860: step: 284/464, loss: 0.03282509371638298 2023-01-24 04:00:15.672661: step: 286/464, loss: 0.033736176788806915 2023-01-24 04:00:16.377491: step: 288/464, loss: 0.012655275873839855 2023-01-24 04:00:16.993956: step: 290/464, loss: 0.06633565574884415 2023-01-24 04:00:17.679237: step: 292/464, loss: 0.032862331718206406 2023-01-24 04:00:18.352077: step: 294/464, loss: 0.028874710202217102 2023-01-24 04:00:18.979402: step: 296/464, loss: 0.5041033625602722 2023-01-24 04:00:19.576192: step: 298/464, loss: 0.0024245341774076223 2023-01-24 04:00:20.199926: step: 300/464, loss: 0.013810448348522186 2023-01-24 04:00:20.784756: step: 302/464, loss: 0.10798203200101852 2023-01-24 04:00:21.397780: step: 304/464, loss: 0.051899854093790054 2023-01-24 04:00:22.019746: step: 306/464, loss: 0.060129180550575256 2023-01-24 04:00:22.625625: step: 308/464, loss: 0.009245732799172401 2023-01-24 04:00:23.213126: step: 310/464, loss: 0.05325021967291832 2023-01-24 04:00:23.811441: step: 312/464, loss: 0.005145874805748463 2023-01-24 04:00:24.414736: step: 314/464, loss: 0.03422572836279869 2023-01-24 04:00:25.046575: step: 316/464, loss: 0.011272044852375984 2023-01-24 04:00:25.663674: step: 318/464, loss: 0.05873195827007294 2023-01-24 04:00:26.271374: step: 320/464, loss: 0.020659292116761208 2023-01-24 04:00:26.923198: step: 322/464, loss: 0.025461694225668907 2023-01-24 04:00:27.637751: step: 324/464, loss: 0.15957403182983398 2023-01-24 04:00:28.297933: step: 326/464, loss: 0.049867674708366394 2023-01-24 04:00:28.929586: step: 328/464, loss: 0.049304038286209106 2023-01-24 04:00:29.547277: step: 330/464, loss: 0.0015535791171714664 2023-01-24 04:00:30.125865: step: 332/464, loss: 0.02825341187417507 2023-01-24 04:00:30.705988: step: 334/464, loss: 0.10240758955478668 2023-01-24 04:00:31.274963: step: 336/464, loss: 0.06264287978410721 2023-01-24 04:00:31.892369: step: 338/464, loss: 0.035395149141550064 2023-01-24 04:00:32.498913: step: 340/464, loss: 0.02415728010237217 2023-01-24 04:00:33.168394: step: 342/464, loss: 0.04024200513958931 2023-01-24 04:00:33.790221: step: 344/464, loss: 0.005539848934859037 2023-01-24 04:00:34.321923: step: 346/464, loss: 0.006253221072256565 2023-01-24 04:00:34.949288: step: 348/464, loss: 0.05701465159654617 2023-01-24 04:00:35.573555: step: 350/464, loss: 0.041590526700019836 2023-01-24 04:00:36.150266: step: 352/464, loss: 0.02236504666507244 2023-01-24 04:00:36.808274: step: 354/464, loss: 0.035344481468200684 2023-01-24 04:00:37.394838: step: 356/464, loss: 0.0006936495774425566 2023-01-24 04:00:38.008555: step: 358/464, loss: 0.07155963033437729 2023-01-24 04:00:38.585175: step: 360/464, loss: 0.03924102708697319 2023-01-24 04:00:39.235555: step: 362/464, loss: 0.03008159250020981 2023-01-24 04:00:39.895244: step: 364/464, loss: 0.018457040190696716 2023-01-24 04:00:40.482679: step: 366/464, loss: 0.11442568153142929 2023-01-24 04:00:41.122602: step: 368/464, loss: 0.0151005107909441 2023-01-24 04:00:41.771903: step: 370/464, loss: 0.02877574786543846 2023-01-24 04:00:42.441458: step: 372/464, loss: 0.011581357568502426 2023-01-24 04:00:43.076918: step: 374/464, loss: 0.019876640290021896 2023-01-24 04:00:43.780218: step: 376/464, loss: 0.009046114049851894 2023-01-24 04:00:44.445839: step: 378/464, loss: 0.03848227113485336 2023-01-24 04:00:45.081304: step: 380/464, loss: 0.005663391202688217 2023-01-24 04:00:45.796063: step: 382/464, loss: 0.03750398010015488 2023-01-24 04:00:46.429392: step: 384/464, loss: 0.007229901850223541 2023-01-24 04:00:47.070951: step: 386/464, loss: 0.01637895777821541 2023-01-24 04:00:47.692943: step: 388/464, loss: 0.11286875605583191 2023-01-24 04:00:48.300561: step: 390/464, loss: 0.01437292154878378 2023-01-24 04:00:48.970471: step: 392/464, loss: 0.024987051263451576 2023-01-24 04:00:49.637365: step: 394/464, loss: 0.000298120838124305 2023-01-24 04:00:50.237709: step: 396/464, loss: 0.060187749564647675 2023-01-24 04:00:50.808103: step: 398/464, loss: 0.008325144648551941 2023-01-24 04:00:51.381802: step: 400/464, loss: 0.002636404475197196 2023-01-24 04:00:51.994386: step: 402/464, loss: 0.010944833047688007 2023-01-24 04:00:52.665143: step: 404/464, loss: 0.06787846237421036 2023-01-24 04:00:53.241090: step: 406/464, loss: 0.035629112273454666 2023-01-24 04:00:53.816488: step: 408/464, loss: 0.02141980081796646 2023-01-24 04:00:54.357150: step: 410/464, loss: 0.0010935988975688815 2023-01-24 04:00:55.017991: step: 412/464, loss: 0.02366950921714306 2023-01-24 04:00:55.624931: step: 414/464, loss: 0.0024041745346039534 2023-01-24 04:00:56.277691: step: 416/464, loss: 0.0168649572879076 2023-01-24 04:00:56.858557: step: 418/464, loss: 0.022741887718439102 2023-01-24 04:00:57.535414: step: 420/464, loss: 0.0457460917532444 2023-01-24 04:00:58.159971: step: 422/464, loss: 0.01615896075963974 2023-01-24 04:00:58.763508: step: 424/464, loss: 0.0027021560817956924 2023-01-24 04:00:59.352877: step: 426/464, loss: 0.02107520028948784 2023-01-24 04:00:59.958619: step: 428/464, loss: 0.0010393020929768682 2023-01-24 04:01:00.573786: step: 430/464, loss: 0.03457380831241608 2023-01-24 04:01:01.160073: step: 432/464, loss: 0.006112591363489628 2023-01-24 04:01:01.771282: step: 434/464, loss: 0.017656736075878143 2023-01-24 04:01:02.379623: step: 436/464, loss: 0.0700252577662468 2023-01-24 04:01:03.019107: step: 438/464, loss: 0.019659023731946945 2023-01-24 04:01:03.680289: step: 440/464, loss: 0.018227603286504745 2023-01-24 04:01:04.315998: step: 442/464, loss: 0.002650787588208914 2023-01-24 04:01:04.976561: step: 444/464, loss: 0.03067925199866295 2023-01-24 04:01:05.584269: step: 446/464, loss: 0.045789267867803574 2023-01-24 04:01:06.164264: step: 448/464, loss: 0.04718998074531555 2023-01-24 04:01:06.836742: step: 450/464, loss: 0.03693093731999397 2023-01-24 04:01:07.444944: step: 452/464, loss: 0.05785346403717995 2023-01-24 04:01:08.005105: step: 454/464, loss: 0.012178266420960426 2023-01-24 04:01:08.586105: step: 456/464, loss: 0.0007852213457226753 2023-01-24 04:01:09.164044: step: 458/464, loss: 0.03168824315071106 2023-01-24 04:01:09.734838: step: 460/464, loss: 0.0010413274867460132 2023-01-24 04:01:10.418167: step: 462/464, loss: 0.060342393815517426 2023-01-24 04:01:11.050342: step: 464/464, loss: 0.00010225496225757524 2023-01-24 04:01:11.670074: step: 466/464, loss: 0.11511260271072388 2023-01-24 04:01:12.274526: step: 468/464, loss: 0.23531371355056763 2023-01-24 04:01:12.924501: step: 470/464, loss: 0.031879253685474396 2023-01-24 04:01:13.540956: step: 472/464, loss: 0.020632173866033554 2023-01-24 04:01:14.199637: step: 474/464, loss: 0.017486222088336945 2023-01-24 04:01:14.835577: step: 476/464, loss: 0.02898748219013214 2023-01-24 04:01:15.441415: step: 478/464, loss: 0.0013614544877782464 2023-01-24 04:01:16.127731: step: 480/464, loss: 0.03617943823337555 2023-01-24 04:01:16.733063: step: 482/464, loss: 0.0786338523030281 2023-01-24 04:01:17.332950: step: 484/464, loss: 0.018358413130044937 2023-01-24 04:01:17.930926: step: 486/464, loss: 0.020440151914954185 2023-01-24 04:01:18.543014: step: 488/464, loss: 0.0033799484372138977 2023-01-24 04:01:19.146798: step: 490/464, loss: 0.007676076143980026 2023-01-24 04:01:19.727001: step: 492/464, loss: 0.015695925801992416 2023-01-24 04:01:20.371162: step: 494/464, loss: 0.015165206044912338 2023-01-24 04:01:21.018425: step: 496/464, loss: 0.014235205017030239 2023-01-24 04:01:21.674184: step: 498/464, loss: 0.7016212940216064 2023-01-24 04:01:22.303177: step: 500/464, loss: 0.025006012991070747 2023-01-24 04:01:22.957401: step: 502/464, loss: 0.005251850001513958 2023-01-24 04:01:23.559937: step: 504/464, loss: 0.2685335874557495 2023-01-24 04:01:24.239302: step: 506/464, loss: 0.0096308384090662 2023-01-24 04:01:24.845368: step: 508/464, loss: 0.01977044716477394 2023-01-24 04:01:25.459457: step: 510/464, loss: 0.008932722732424736 2023-01-24 04:01:26.093573: step: 512/464, loss: 0.002780807903036475 2023-01-24 04:01:26.696264: step: 514/464, loss: 0.08242969214916229 2023-01-24 04:01:27.316834: step: 516/464, loss: 0.02369045838713646 2023-01-24 04:01:27.941335: step: 518/464, loss: 0.007691401522606611 2023-01-24 04:01:28.582135: step: 520/464, loss: 0.07566172629594803 2023-01-24 04:01:29.230441: step: 522/464, loss: 0.05063261091709137 2023-01-24 04:01:29.905177: step: 524/464, loss: 0.013660422526299953 2023-01-24 04:01:30.537936: step: 526/464, loss: 0.24020728468894958 2023-01-24 04:01:31.087613: step: 528/464, loss: 0.01348627358675003 2023-01-24 04:01:31.660068: step: 530/464, loss: 0.007197367958724499 2023-01-24 04:01:32.242038: step: 532/464, loss: 0.010228368453681469 2023-01-24 04:01:32.916403: step: 534/464, loss: 0.023895204067230225 2023-01-24 04:01:33.491172: step: 536/464, loss: 0.021335279569029808 2023-01-24 04:01:34.146850: step: 538/464, loss: 0.08497224748134613 2023-01-24 04:01:34.719353: step: 540/464, loss: 0.022408263757824898 2023-01-24 04:01:35.327062: step: 542/464, loss: 0.02243008278310299 2023-01-24 04:01:35.869732: step: 544/464, loss: 0.02225842885673046 2023-01-24 04:01:36.478953: step: 546/464, loss: 0.035303566604852676 2023-01-24 04:01:37.015135: step: 548/464, loss: 0.000824810762424022 2023-01-24 04:01:37.602052: step: 550/464, loss: 0.2665346562862396 2023-01-24 04:01:38.265021: step: 552/464, loss: 0.012323501519858837 2023-01-24 04:01:39.012156: step: 554/464, loss: 0.1625223010778427 2023-01-24 04:01:39.642192: step: 556/464, loss: 0.06891467422246933 2023-01-24 04:01:40.280112: step: 558/464, loss: 0.01002768985927105 2023-01-24 04:01:40.851599: step: 560/464, loss: 0.033113449811935425 2023-01-24 04:01:41.427640: step: 562/464, loss: 0.0017599809216335416 2023-01-24 04:01:42.016169: step: 564/464, loss: 0.007383772637695074 2023-01-24 04:01:42.616194: step: 566/464, loss: 0.05605030059814453 2023-01-24 04:01:43.260165: step: 568/464, loss: 0.013501882553100586 2023-01-24 04:01:43.839851: step: 570/464, loss: 0.1326901763677597 2023-01-24 04:01:44.494269: step: 572/464, loss: 0.013902074657380581 2023-01-24 04:01:45.060657: step: 574/464, loss: 0.0038822093047201633 2023-01-24 04:01:45.720934: step: 576/464, loss: 0.06847728043794632 2023-01-24 04:01:46.357133: step: 578/464, loss: 0.2082699090242386 2023-01-24 04:01:46.966812: step: 580/464, loss: 0.03914272040128708 2023-01-24 04:01:47.701773: step: 582/464, loss: 0.02024165354669094 2023-01-24 04:01:48.311903: step: 584/464, loss: 0.14907458424568176 2023-01-24 04:01:48.975546: step: 586/464, loss: 0.00691397488117218 2023-01-24 04:01:49.625639: step: 588/464, loss: 0.04703788459300995 2023-01-24 04:01:50.269392: step: 590/464, loss: 0.004772043786942959 2023-01-24 04:01:50.899551: step: 592/464, loss: 0.023602420464158058 2023-01-24 04:01:51.483315: step: 594/464, loss: 0.007593000307679176 2023-01-24 04:01:52.087337: step: 596/464, loss: 0.03346647694706917 2023-01-24 04:01:52.715922: step: 598/464, loss: 0.03485536202788353 2023-01-24 04:01:53.356818: step: 600/464, loss: 0.02620747685432434 2023-01-24 04:01:53.962496: step: 602/464, loss: 0.005401256028562784 2023-01-24 04:01:54.673588: step: 604/464, loss: 0.18123404681682587 2023-01-24 04:01:55.271040: step: 606/464, loss: 0.01579710841178894 2023-01-24 04:01:55.883436: step: 608/464, loss: 0.028616640716791153 2023-01-24 04:01:56.471202: step: 610/464, loss: 0.0857577919960022 2023-01-24 04:01:57.058778: step: 612/464, loss: 0.0643952488899231 2023-01-24 04:01:57.680266: step: 614/464, loss: 0.027972189709544182 2023-01-24 04:01:58.447350: step: 616/464, loss: 0.036930255591869354 2023-01-24 04:01:59.101091: step: 618/464, loss: 0.010757374577224255 2023-01-24 04:01:59.685811: step: 620/464, loss: 0.006771758198738098 2023-01-24 04:02:00.358495: step: 622/464, loss: 0.022771861404180527 2023-01-24 04:02:00.968377: step: 624/464, loss: 0.004120378289371729 2023-01-24 04:02:01.639529: step: 626/464, loss: 0.03058917075395584 2023-01-24 04:02:02.281251: step: 628/464, loss: 0.05850667878985405 2023-01-24 04:02:02.879275: step: 630/464, loss: 0.013523498550057411 2023-01-24 04:02:03.499177: step: 632/464, loss: 0.2245532125234604 2023-01-24 04:02:04.296451: step: 634/464, loss: 0.0036853866185992956 2023-01-24 04:02:04.949795: step: 636/464, loss: 0.02361578680574894 2023-01-24 04:02:05.589075: step: 638/464, loss: 0.015455491840839386 2023-01-24 04:02:06.176784: step: 640/464, loss: 0.024254411458969116 2023-01-24 04:02:06.892805: step: 642/464, loss: 0.04180416092276573 2023-01-24 04:02:07.586690: step: 644/464, loss: 0.03187788277864456 2023-01-24 04:02:08.209149: step: 646/464, loss: 0.0003483604232314974 2023-01-24 04:02:08.833338: step: 648/464, loss: 0.00013609840243589133 2023-01-24 04:02:09.479664: step: 650/464, loss: 0.04429443180561066 2023-01-24 04:02:10.073176: step: 652/464, loss: 0.04185933619737625 2023-01-24 04:02:10.645209: step: 654/464, loss: 0.006211341358721256 2023-01-24 04:02:11.239366: step: 656/464, loss: 0.034331731498241425 2023-01-24 04:02:11.845030: step: 658/464, loss: 0.08382061123847961 2023-01-24 04:02:12.434326: step: 660/464, loss: 0.005517153535038233 2023-01-24 04:02:13.106390: step: 662/464, loss: 0.011968887411057949 2023-01-24 04:02:13.705447: step: 664/464, loss: 0.002495914464816451 2023-01-24 04:02:14.289718: step: 666/464, loss: 0.015255164355039597 2023-01-24 04:02:14.868216: step: 668/464, loss: 0.026585770770907402 2023-01-24 04:02:15.484527: step: 670/464, loss: 0.05654933676123619 2023-01-24 04:02:16.060465: step: 672/464, loss: 0.0011084601283073425 2023-01-24 04:02:16.647233: step: 674/464, loss: 0.05362668260931969 2023-01-24 04:02:17.259452: step: 676/464, loss: 0.03868886083364487 2023-01-24 04:02:17.883890: step: 678/464, loss: 0.016717160120606422 2023-01-24 04:02:18.486725: step: 680/464, loss: 0.03761445730924606 2023-01-24 04:02:19.121879: step: 682/464, loss: 0.037883222103118896 2023-01-24 04:02:19.708237: step: 684/464, loss: 0.16490760445594788 2023-01-24 04:02:20.276484: step: 686/464, loss: 0.019592376425862312 2023-01-24 04:02:20.831377: step: 688/464, loss: 0.01852923259139061 2023-01-24 04:02:21.428776: step: 690/464, loss: 0.0750657320022583 2023-01-24 04:02:22.060791: step: 692/464, loss: 0.03591347485780716 2023-01-24 04:02:22.707384: step: 694/464, loss: 0.030060309916734695 2023-01-24 04:02:23.383355: step: 696/464, loss: 0.03932882100343704 2023-01-24 04:02:24.026981: step: 698/464, loss: 0.10937557369470596 2023-01-24 04:02:24.657055: step: 700/464, loss: 0.03864862024784088 2023-01-24 04:02:25.301874: step: 702/464, loss: 0.02068173885345459 2023-01-24 04:02:25.916077: step: 704/464, loss: 0.0007597276126034558 2023-01-24 04:02:26.537632: step: 706/464, loss: 0.0005308131221681833 2023-01-24 04:02:27.150700: step: 708/464, loss: 0.005687220022082329 2023-01-24 04:02:27.790220: step: 710/464, loss: 0.027716923505067825 2023-01-24 04:02:28.401385: step: 712/464, loss: 0.017588965594768524 2023-01-24 04:02:28.989567: step: 714/464, loss: 0.042746786028146744 2023-01-24 04:02:29.612901: step: 716/464, loss: 0.11261896789073944 2023-01-24 04:02:30.216783: step: 718/464, loss: 0.01578381098806858 2023-01-24 04:02:30.796705: step: 720/464, loss: 0.04451392590999603 2023-01-24 04:02:31.462881: step: 722/464, loss: 0.6705551147460938 2023-01-24 04:02:32.081146: step: 724/464, loss: 0.8189102411270142 2023-01-24 04:02:32.700910: step: 726/464, loss: 0.011479933746159077 2023-01-24 04:02:33.342260: step: 728/464, loss: 0.015497986227273941 2023-01-24 04:02:33.969045: step: 730/464, loss: 0.09625409543514252 2023-01-24 04:02:34.614484: step: 732/464, loss: 0.03274373710155487 2023-01-24 04:02:35.162168: step: 734/464, loss: 0.040542371571063995 2023-01-24 04:02:35.774531: step: 736/464, loss: 0.010151658207178116 2023-01-24 04:02:36.444307: step: 738/464, loss: 0.009617464616894722 2023-01-24 04:02:37.083422: step: 740/464, loss: 0.00679465476423502 2023-01-24 04:02:37.704332: step: 742/464, loss: 0.013655357994139194 2023-01-24 04:02:38.341064: step: 744/464, loss: 0.007171467877924442 2023-01-24 04:02:38.949177: step: 746/464, loss: 0.0001739412546157837 2023-01-24 04:02:39.582577: step: 748/464, loss: 0.02507726289331913 2023-01-24 04:02:40.136876: step: 750/464, loss: 0.01518337707966566 2023-01-24 04:02:40.735366: step: 752/464, loss: 0.04084146022796631 2023-01-24 04:02:41.373500: step: 754/464, loss: 0.0035083615221083164 2023-01-24 04:02:42.043269: step: 756/464, loss: 0.012889510951936245 2023-01-24 04:02:42.640731: step: 758/464, loss: 0.9736987948417664 2023-01-24 04:02:43.239505: step: 760/464, loss: 0.07425379753112793 2023-01-24 04:02:43.890596: step: 762/464, loss: 0.05876192823052406 2023-01-24 04:02:44.442533: step: 764/464, loss: 0.005951893515884876 2023-01-24 04:02:45.050393: step: 766/464, loss: 0.02159908041357994 2023-01-24 04:02:45.835076: step: 768/464, loss: 0.038079917430877686 2023-01-24 04:02:46.455683: step: 770/464, loss: 0.09124045819044113 2023-01-24 04:02:47.060156: step: 772/464, loss: 0.023096373304724693 2023-01-24 04:02:47.658509: step: 774/464, loss: 0.0024730029981583357 2023-01-24 04:02:48.335628: step: 776/464, loss: 0.050720226019620895 2023-01-24 04:02:48.940103: step: 778/464, loss: 0.024673130363225937 2023-01-24 04:02:49.591012: step: 780/464, loss: 0.03335406631231308 2023-01-24 04:02:50.223360: step: 782/464, loss: 0.004928721114993095 2023-01-24 04:02:50.827283: step: 784/464, loss: 0.024341443553566933 2023-01-24 04:02:51.367969: step: 786/464, loss: 0.01013362966477871 2023-01-24 04:02:51.984247: step: 788/464, loss: 0.006453828886151314 2023-01-24 04:02:52.588899: step: 790/464, loss: 0.16598333418369293 2023-01-24 04:02:53.213553: step: 792/464, loss: 0.03524326533079147 2023-01-24 04:02:53.887431: step: 794/464, loss: 0.1181488037109375 2023-01-24 04:02:54.512070: step: 796/464, loss: 0.09138718247413635 2023-01-24 04:02:55.159407: step: 798/464, loss: 0.5004169940948486 2023-01-24 04:02:55.790524: step: 800/464, loss: 0.07259329408407211 2023-01-24 04:02:56.379425: step: 802/464, loss: 0.015081651508808136 2023-01-24 04:02:56.979933: step: 804/464, loss: 0.038550931960344315 2023-01-24 04:02:57.731124: step: 806/464, loss: 0.12904329597949982 2023-01-24 04:02:58.298578: step: 808/464, loss: 0.40018248558044434 2023-01-24 04:02:58.917392: step: 810/464, loss: 0.04147998243570328 2023-01-24 04:02:59.510120: step: 812/464, loss: 0.02219959907233715 2023-01-24 04:03:00.099776: step: 814/464, loss: 0.042695704847574234 2023-01-24 04:03:00.784267: step: 816/464, loss: 0.015478466637432575 2023-01-24 04:03:01.406293: step: 818/464, loss: 0.04031830653548241 2023-01-24 04:03:01.962613: step: 820/464, loss: 0.002473577158525586 2023-01-24 04:03:02.594076: step: 822/464, loss: 0.013560102321207523 2023-01-24 04:03:03.277291: step: 824/464, loss: 0.021678712218999863 2023-01-24 04:03:03.878893: step: 826/464, loss: 0.003429161384701729 2023-01-24 04:03:04.509340: step: 828/464, loss: 0.031064260751008987 2023-01-24 04:03:05.098696: step: 830/464, loss: 0.016433386132121086 2023-01-24 04:03:05.781081: step: 832/464, loss: 0.025123601779341698 2023-01-24 04:03:06.347152: step: 834/464, loss: 0.02761431410908699 2023-01-24 04:03:07.066876: step: 836/464, loss: 0.006232484709471464 2023-01-24 04:03:07.708682: step: 838/464, loss: 0.00811823084950447 2023-01-24 04:03:08.327657: step: 840/464, loss: 0.12833282351493835 2023-01-24 04:03:08.952522: step: 842/464, loss: 0.009222570806741714 2023-01-24 04:03:09.522395: step: 844/464, loss: 0.07020155340433121 2023-01-24 04:03:10.100824: step: 846/464, loss: 0.002051841700449586 2023-01-24 04:03:10.743425: step: 848/464, loss: 0.03661072626709938 2023-01-24 04:03:11.387747: step: 850/464, loss: 0.026866046711802483 2023-01-24 04:03:11.973023: step: 852/464, loss: 0.007825160399079323 2023-01-24 04:03:12.553899: step: 854/464, loss: 0.04166368395090103 2023-01-24 04:03:13.183264: step: 856/464, loss: 0.0006112029659561813 2023-01-24 04:03:13.768235: step: 858/464, loss: 0.05629737675189972 2023-01-24 04:03:14.460984: step: 860/464, loss: 0.014596642926335335 2023-01-24 04:03:15.103420: step: 862/464, loss: 0.09020011126995087 2023-01-24 04:03:15.775166: step: 864/464, loss: 0.0339503139257431 2023-01-24 04:03:16.368688: step: 866/464, loss: 0.022090664133429527 2023-01-24 04:03:16.990268: step: 868/464, loss: 0.04064103960990906 2023-01-24 04:03:17.590465: step: 870/464, loss: 0.00135325628798455 2023-01-24 04:03:18.363951: step: 872/464, loss: 0.011642823927104473 2023-01-24 04:03:18.968713: step: 874/464, loss: 0.05684065818786621 2023-01-24 04:03:19.628018: step: 876/464, loss: 0.008805211633443832 2023-01-24 04:03:20.238629: step: 878/464, loss: 0.04149453341960907 2023-01-24 04:03:20.836357: step: 880/464, loss: 0.021980678662657738 2023-01-24 04:03:21.443125: step: 882/464, loss: 0.4108825922012329 2023-01-24 04:03:22.083691: step: 884/464, loss: 0.051128219813108444 2023-01-24 04:03:22.702465: step: 886/464, loss: 0.0010531533043831587 2023-01-24 04:03:23.321338: step: 888/464, loss: 0.0466160923242569 2023-01-24 04:03:23.919066: step: 890/464, loss: 0.07535409182310104 2023-01-24 04:03:24.718595: step: 892/464, loss: 0.029860571026802063 2023-01-24 04:03:25.424371: step: 894/464, loss: 0.15788963437080383 2023-01-24 04:03:26.093364: step: 896/464, loss: 0.01945146918296814 2023-01-24 04:03:26.704210: step: 898/464, loss: 0.02043139562010765 2023-01-24 04:03:27.326269: step: 900/464, loss: 0.06940814107656479 2023-01-24 04:03:27.964240: step: 902/464, loss: 0.010013815015554428 2023-01-24 04:03:28.597525: step: 904/464, loss: 0.5424590110778809 2023-01-24 04:03:29.235856: step: 906/464, loss: 0.01136374194175005 2023-01-24 04:03:29.813347: step: 908/464, loss: 0.03829554468393326 2023-01-24 04:03:30.396754: step: 910/464, loss: 0.0687684416770935 2023-01-24 04:03:30.999918: step: 912/464, loss: 0.002499011345207691 2023-01-24 04:03:31.643754: step: 914/464, loss: 0.13825161755084991 2023-01-24 04:03:32.167170: step: 916/464, loss: 0.024322301149368286 2023-01-24 04:03:32.818506: step: 918/464, loss: 0.27383631467819214 2023-01-24 04:03:33.409797: step: 920/464, loss: 0.0559036023914814 2023-01-24 04:03:33.963141: step: 922/464, loss: 0.09064502269029617 2023-01-24 04:03:34.589256: step: 924/464, loss: 0.05530287325382233 2023-01-24 04:03:35.179767: step: 926/464, loss: 0.08000970631837845 2023-01-24 04:03:35.779120: step: 928/464, loss: 0.014721273444592953 2023-01-24 04:03:36.248311: step: 930/464, loss: 0.032917868345975876 ================================================== Loss: 0.055 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3337202925917449, 'r': 0.3254881032109239, 'f1': 0.3295527961424724}, 'combined': 0.24282837610497965, 'epoch': 26} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33164524265794326, 'r': 0.29960959364314654, 'f1': 0.31481452710497854}, 'combined': 0.2055265824623176, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3298002981795355, 'r': 0.3323035262492094, 'f1': 0.33104718021424073}, 'combined': 0.24392950121049317, 'epoch': 26} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3420296501696091, 'r': 0.3026978136735639, 'f1': 0.3211640053325173}, 'combined': 0.20967183767822373, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35757190113807763, 'r': 0.33585975533842205, 'f1': 0.34637591206134716}, 'combined': 0.2552243562557295, 'epoch': 26} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.34082703877774, 'r': 0.295048982051383, 'f1': 0.3162901809564629}, 'combined': 0.2064899626969654, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27192982456140347, 'r': 0.2952380952380952, 'f1': 0.28310502283105016}, 'combined': 0.18873668188736675, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30833333333333335, 'r': 0.40217391304347827, 'f1': 0.34905660377358494}, 'combined': 0.17452830188679247, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:06:12.153721: step: 2/464, loss: 0.03520013764500618 2023-01-24 04:06:12.745763: step: 4/464, loss: 0.01785721816122532 2023-01-24 04:06:13.367998: step: 6/464, loss: 0.16269555687904358 2023-01-24 04:06:13.970626: step: 8/464, loss: 0.07691093534231186 2023-01-24 04:06:14.623456: step: 10/464, loss: 0.007392262574285269 2023-01-24 04:06:15.245445: step: 12/464, loss: 0.18206527829170227 2023-01-24 04:06:15.915819: step: 14/464, loss: 0.07249999046325684 2023-01-24 04:06:16.555263: step: 16/464, loss: 0.021117065101861954 2023-01-24 04:06:17.183517: step: 18/464, loss: 0.030323537066578865 2023-01-24 04:06:17.791671: step: 20/464, loss: 0.02338639833033085 2023-01-24 04:06:18.377661: step: 22/464, loss: 0.1333690583705902 2023-01-24 04:06:18.991744: step: 24/464, loss: 0.0010341749293729663 2023-01-24 04:06:19.697313: step: 26/464, loss: 0.007895023562014103 2023-01-24 04:06:20.389083: step: 28/464, loss: 2.761321783065796 2023-01-24 04:06:21.077107: step: 30/464, loss: 0.035957738757133484 2023-01-24 04:06:21.670288: step: 32/464, loss: 0.028074631467461586 2023-01-24 04:06:22.243177: step: 34/464, loss: 0.011213401332497597 2023-01-24 04:06:22.860300: step: 36/464, loss: 0.005153039935976267 2023-01-24 04:06:23.532270: step: 38/464, loss: 0.002318080049008131 2023-01-24 04:06:24.160014: step: 40/464, loss: 0.006432653870433569 2023-01-24 04:06:24.799417: step: 42/464, loss: 0.007253899239003658 2023-01-24 04:06:25.387315: step: 44/464, loss: 0.12813512980937958 2023-01-24 04:06:25.979318: step: 46/464, loss: 0.08304651826620102 2023-01-24 04:06:26.687057: step: 48/464, loss: 0.007072602864354849 2023-01-24 04:06:27.298527: step: 50/464, loss: 0.003953339997678995 2023-01-24 04:06:27.925531: step: 52/464, loss: 0.03858869522809982 2023-01-24 04:06:28.556485: step: 54/464, loss: 1.0059211254119873 2023-01-24 04:06:29.171703: step: 56/464, loss: 0.07535743713378906 2023-01-24 04:06:29.888491: step: 58/464, loss: 0.005572025198489428 2023-01-24 04:06:30.528230: step: 60/464, loss: 0.011123955249786377 2023-01-24 04:06:31.120371: step: 62/464, loss: 0.002408726839348674 2023-01-24 04:06:31.688738: step: 64/464, loss: 0.006805627141147852 2023-01-24 04:06:32.280389: step: 66/464, loss: 0.018881412222981453 2023-01-24 04:06:32.831218: step: 68/464, loss: 0.02203463576734066 2023-01-24 04:06:33.392723: step: 70/464, loss: 0.026272814720869064 2023-01-24 04:06:34.048363: step: 72/464, loss: 0.012682809494435787 2023-01-24 04:06:34.615593: step: 74/464, loss: 0.05023544281721115 2023-01-24 04:06:35.270635: step: 76/464, loss: 0.006426983047276735 2023-01-24 04:06:35.943424: step: 78/464, loss: 0.0036636111326515675 2023-01-24 04:06:36.538794: step: 80/464, loss: 0.026904908940196037 2023-01-24 04:06:37.125739: step: 82/464, loss: 0.024857092648744583 2023-01-24 04:06:37.736486: step: 84/464, loss: 0.07651542872190475 2023-01-24 04:06:38.439259: step: 86/464, loss: 0.159203439950943 2023-01-24 04:06:39.120605: step: 88/464, loss: 0.002764118369668722 2023-01-24 04:06:39.743559: step: 90/464, loss: 0.010748973116278648 2023-01-24 04:06:40.399489: step: 92/464, loss: 0.7456044554710388 2023-01-24 04:06:41.061254: step: 94/464, loss: 0.0021835712250322104 2023-01-24 04:06:41.765597: step: 96/464, loss: 0.000798621098510921 2023-01-24 04:06:42.408523: step: 98/464, loss: 0.016292361542582512 2023-01-24 04:06:43.136404: step: 100/464, loss: 0.043895792216062546 2023-01-24 04:06:43.701239: step: 102/464, loss: 0.059375472366809845 2023-01-24 04:06:44.355588: step: 104/464, loss: 0.025569267570972443 2023-01-24 04:06:45.028014: step: 106/464, loss: 0.016790715977549553 2023-01-24 04:06:45.662758: step: 108/464, loss: 0.09495478123426437 2023-01-24 04:06:46.286986: step: 110/464, loss: 0.04769524931907654 2023-01-24 04:06:46.898973: step: 112/464, loss: 0.006700258236378431 2023-01-24 04:06:47.437789: step: 114/464, loss: 0.0009436507825739682 2023-01-24 04:06:48.101246: step: 116/464, loss: 0.08529170602560043 2023-01-24 04:06:48.750791: step: 118/464, loss: 0.06692947447299957 2023-01-24 04:06:49.373957: step: 120/464, loss: 0.01741936057806015 2023-01-24 04:06:50.035108: step: 122/464, loss: 0.06238299608230591 2023-01-24 04:06:50.668494: step: 124/464, loss: 0.013601443730294704 2023-01-24 04:06:51.236064: step: 126/464, loss: 0.005797897465527058 2023-01-24 04:06:51.832930: step: 128/464, loss: 0.02559986710548401 2023-01-24 04:06:52.479183: step: 130/464, loss: 0.0006582848145626485 2023-01-24 04:06:53.066445: step: 132/464, loss: 0.010648852214217186 2023-01-24 04:06:53.667342: step: 134/464, loss: 0.0007578640943393111 2023-01-24 04:06:54.286929: step: 136/464, loss: 0.020853610709309578 2023-01-24 04:06:54.973624: step: 138/464, loss: 1.2599151134490967 2023-01-24 04:06:55.577310: step: 140/464, loss: 0.0707487240433693 2023-01-24 04:06:56.217584: step: 142/464, loss: 0.20164254307746887 2023-01-24 04:06:56.832712: step: 144/464, loss: 0.01755233108997345 2023-01-24 04:06:57.416005: step: 146/464, loss: 0.0019922242499887943 2023-01-24 04:06:58.020190: step: 148/464, loss: 0.011276560835540295 2023-01-24 04:06:58.637479: step: 150/464, loss: 0.001740350155159831 2023-01-24 04:06:59.252818: step: 152/464, loss: 0.024404587224125862 2023-01-24 04:06:59.911438: step: 154/464, loss: 0.029476886615157127 2023-01-24 04:07:00.523409: step: 156/464, loss: 0.002116193762049079 2023-01-24 04:07:01.233163: step: 158/464, loss: 0.020922478288412094 2023-01-24 04:07:01.950610: step: 160/464, loss: 0.22654925286769867 2023-01-24 04:07:02.560044: step: 162/464, loss: 0.0761280506849289 2023-01-24 04:07:03.177186: step: 164/464, loss: 0.049508724361658096 2023-01-24 04:07:03.757543: step: 166/464, loss: 0.00400913693010807 2023-01-24 04:07:04.398273: step: 168/464, loss: 0.0035668325144797564 2023-01-24 04:07:05.079864: step: 170/464, loss: 0.15200269222259521 2023-01-24 04:07:05.704331: step: 172/464, loss: 0.0035742297768592834 2023-01-24 04:07:06.301674: step: 174/464, loss: 0.017629073932766914 2023-01-24 04:07:06.850837: step: 176/464, loss: 0.008834774605929852 2023-01-24 04:07:07.458036: step: 178/464, loss: 0.002919742139056325 2023-01-24 04:07:08.042000: step: 180/464, loss: 0.005628170445561409 2023-01-24 04:07:08.681356: step: 182/464, loss: 0.0009138965397141874 2023-01-24 04:07:09.316889: step: 184/464, loss: 0.010701551102101803 2023-01-24 04:07:09.932171: step: 186/464, loss: 0.01624779775738716 2023-01-24 04:07:10.506628: step: 188/464, loss: 0.02109084278345108 2023-01-24 04:07:11.081473: step: 190/464, loss: 0.006756091956049204 2023-01-24 04:07:11.789224: step: 192/464, loss: 0.01783033274114132 2023-01-24 04:07:12.421002: step: 194/464, loss: 0.005625900812447071 2023-01-24 04:07:13.057047: step: 196/464, loss: 0.0433078333735466 2023-01-24 04:07:13.701304: step: 198/464, loss: 0.10533846914768219 2023-01-24 04:07:14.382564: step: 200/464, loss: 0.03561583533883095 2023-01-24 04:07:15.083106: step: 202/464, loss: 0.029630184173583984 2023-01-24 04:07:15.696486: step: 204/464, loss: 0.007734477519989014 2023-01-24 04:07:16.283872: step: 206/464, loss: 0.010086983442306519 2023-01-24 04:07:16.829338: step: 208/464, loss: 0.011256583966314793 2023-01-24 04:07:17.410248: step: 210/464, loss: 0.010693291202187538 2023-01-24 04:07:18.008305: step: 212/464, loss: 0.011730333790183067 2023-01-24 04:07:18.676803: step: 214/464, loss: 0.07052070647478104 2023-01-24 04:07:19.353095: step: 216/464, loss: 0.026574324816465378 2023-01-24 04:07:19.940440: step: 218/464, loss: 0.03399469703435898 2023-01-24 04:07:20.563612: step: 220/464, loss: 0.030823074281215668 2023-01-24 04:07:21.214109: step: 222/464, loss: 0.051768504083156586 2023-01-24 04:07:21.839360: step: 224/464, loss: 0.09838014841079712 2023-01-24 04:07:22.564185: step: 226/464, loss: 0.04117704927921295 2023-01-24 04:07:23.188796: step: 228/464, loss: 0.012569655664265156 2023-01-24 04:07:23.773631: step: 230/464, loss: 0.011671887710690498 2023-01-24 04:07:24.394575: step: 232/464, loss: 0.0007638675742782652 2023-01-24 04:07:25.061376: step: 234/464, loss: 0.051818255335092545 2023-01-24 04:07:25.731243: step: 236/464, loss: 0.0054818205535411835 2023-01-24 04:07:26.317077: step: 238/464, loss: 0.0012856718385592103 2023-01-24 04:07:26.907315: step: 240/464, loss: 0.0016444082139059901 2023-01-24 04:07:27.524374: step: 242/464, loss: 0.002486496465280652 2023-01-24 04:07:28.157570: step: 244/464, loss: 0.0032496124040335417 2023-01-24 04:07:28.856886: step: 246/464, loss: 0.009185628965497017 2023-01-24 04:07:29.515059: step: 248/464, loss: 0.0025085736997425556 2023-01-24 04:07:30.197667: step: 250/464, loss: 0.04779759421944618 2023-01-24 04:07:30.933609: step: 252/464, loss: 0.1278035044670105 2023-01-24 04:07:31.517005: step: 254/464, loss: 0.029156673699617386 2023-01-24 04:07:32.164713: step: 256/464, loss: 0.004071261268109083 2023-01-24 04:07:32.757102: step: 258/464, loss: 0.0031680958345532417 2023-01-24 04:07:33.345008: step: 260/464, loss: 0.037706535309553146 2023-01-24 04:07:33.977609: step: 262/464, loss: 0.05441688373684883 2023-01-24 04:07:34.583831: step: 264/464, loss: 0.00813133455812931 2023-01-24 04:07:35.249846: step: 266/464, loss: 0.0021063615567982197 2023-01-24 04:07:35.864193: step: 268/464, loss: 0.008211891166865826 2023-01-24 04:07:36.638676: step: 270/464, loss: 0.0014769110130146146 2023-01-24 04:07:37.209306: step: 272/464, loss: 0.10388434678316116 2023-01-24 04:07:37.885844: step: 274/464, loss: 0.0014674770645797253 2023-01-24 04:07:38.507971: step: 276/464, loss: 0.005743144545704126 2023-01-24 04:07:39.061324: step: 278/464, loss: 0.03033411130309105 2023-01-24 04:07:39.703972: step: 280/464, loss: 0.04292159900069237 2023-01-24 04:07:40.394663: step: 282/464, loss: 0.005484298337250948 2023-01-24 04:07:41.125921: step: 284/464, loss: 0.07534424960613251 2023-01-24 04:07:41.783018: step: 286/464, loss: 0.020643752068281174 2023-01-24 04:07:42.390490: step: 288/464, loss: 0.007683439180254936 2023-01-24 04:07:43.076891: step: 290/464, loss: 0.03788389265537262 2023-01-24 04:07:43.724799: step: 292/464, loss: 0.02440880984067917 2023-01-24 04:07:44.362921: step: 294/464, loss: 0.04807315021753311 2023-01-24 04:07:45.015129: step: 296/464, loss: 0.08031445741653442 2023-01-24 04:07:45.709149: step: 298/464, loss: 0.004971335642039776 2023-01-24 04:07:46.344300: step: 300/464, loss: 0.06626083701848984 2023-01-24 04:07:46.938031: step: 302/464, loss: 0.008229141123592854 2023-01-24 04:07:47.567293: step: 304/464, loss: 0.012430696748197079 2023-01-24 04:07:48.230980: step: 306/464, loss: 0.015409497544169426 2023-01-24 04:07:48.777624: step: 308/464, loss: 0.03891903534531593 2023-01-24 04:07:49.420041: step: 310/464, loss: 0.003821104532107711 2023-01-24 04:07:50.038278: step: 312/464, loss: 0.01957610994577408 2023-01-24 04:07:50.640186: step: 314/464, loss: 0.004578686784952879 2023-01-24 04:07:51.278048: step: 316/464, loss: 0.021410878747701645 2023-01-24 04:07:51.821393: step: 318/464, loss: 0.002462534699589014 2023-01-24 04:07:52.403914: step: 320/464, loss: 0.028250494971871376 2023-01-24 04:07:53.111997: step: 322/464, loss: 0.0746573656797409 2023-01-24 04:07:53.738503: step: 324/464, loss: 0.011263798922300339 2023-01-24 04:07:54.288639: step: 326/464, loss: 0.02622237429022789 2023-01-24 04:07:54.895463: step: 328/464, loss: 0.006810697726905346 2023-01-24 04:07:55.508447: step: 330/464, loss: 0.02033045142889023 2023-01-24 04:07:56.105049: step: 332/464, loss: 0.028290104120969772 2023-01-24 04:07:56.740090: step: 334/464, loss: 0.006970271468162537 2023-01-24 04:07:57.353090: step: 336/464, loss: 0.10415491461753845 2023-01-24 04:07:58.014077: step: 338/464, loss: 0.03805846348404884 2023-01-24 04:07:58.634756: step: 340/464, loss: 0.008582512848079205 2023-01-24 04:07:59.296486: step: 342/464, loss: 0.02193283848464489 2023-01-24 04:07:59.912671: step: 344/464, loss: 0.00040405866457149386 2023-01-24 04:08:00.489708: step: 346/464, loss: 0.005250776186585426 2023-01-24 04:08:01.104685: step: 348/464, loss: 0.316387414932251 2023-01-24 04:08:01.796771: step: 350/464, loss: 0.021355492994189262 2023-01-24 04:08:02.410010: step: 352/464, loss: 0.0944470763206482 2023-01-24 04:08:02.999208: step: 354/464, loss: 0.005789034068584442 2023-01-24 04:08:03.540665: step: 356/464, loss: 0.03233187273144722 2023-01-24 04:08:04.182643: step: 358/464, loss: 0.013468354009091854 2023-01-24 04:08:04.772400: step: 360/464, loss: 0.007178623229265213 2023-01-24 04:08:05.385504: step: 362/464, loss: 0.02026214264333248 2023-01-24 04:08:06.018141: step: 364/464, loss: 0.014081430621445179 2023-01-24 04:08:06.614831: step: 366/464, loss: 0.00034141322248615324 2023-01-24 04:08:07.228336: step: 368/464, loss: 0.009255238808691502 2023-01-24 04:08:07.820745: step: 370/464, loss: 0.0002265293151140213 2023-01-24 04:08:08.387459: step: 372/464, loss: 0.02084532380104065 2023-01-24 04:08:09.117944: step: 374/464, loss: 0.01657322235405445 2023-01-24 04:08:09.763999: step: 376/464, loss: 0.01808655634522438 2023-01-24 04:08:10.399528: step: 378/464, loss: 0.026209594681859016 2023-01-24 04:08:11.062783: step: 380/464, loss: 0.03522813320159912 2023-01-24 04:08:11.716757: step: 382/464, loss: 0.4924827516078949 2023-01-24 04:08:12.301508: step: 384/464, loss: 0.04399174824357033 2023-01-24 04:08:12.953123: step: 386/464, loss: 0.11709188669919968 2023-01-24 04:08:13.579615: step: 388/464, loss: 0.011554055847227573 2023-01-24 04:08:14.238827: step: 390/464, loss: 0.059646569192409515 2023-01-24 04:08:14.839240: step: 392/464, loss: 0.02467474900186062 2023-01-24 04:08:15.491475: step: 394/464, loss: 0.010192793793976307 2023-01-24 04:08:16.111646: step: 396/464, loss: 0.01305388007313013 2023-01-24 04:08:16.785048: step: 398/464, loss: 0.11766955256462097 2023-01-24 04:08:17.318456: step: 400/464, loss: 0.02509024366736412 2023-01-24 04:08:17.919981: step: 402/464, loss: 0.0038161729462444782 2023-01-24 04:08:18.553856: step: 404/464, loss: 0.003525168402120471 2023-01-24 04:08:19.169827: step: 406/464, loss: 0.022189294919371605 2023-01-24 04:08:19.826987: step: 408/464, loss: 0.015227475203573704 2023-01-24 04:08:20.428891: step: 410/464, loss: 0.029578283429145813 2023-01-24 04:08:21.031945: step: 412/464, loss: 0.04139862209558487 2023-01-24 04:08:21.643516: step: 414/464, loss: 0.0006235550390556455 2023-01-24 04:08:22.266231: step: 416/464, loss: 0.00983441062271595 2023-01-24 04:08:22.951742: step: 418/464, loss: 0.007791872136294842 2023-01-24 04:08:23.578784: step: 420/464, loss: 0.0033019613474607468 2023-01-24 04:08:24.263589: step: 422/464, loss: 0.04036351293325424 2023-01-24 04:08:24.902665: step: 424/464, loss: 0.051174964755773544 2023-01-24 04:08:25.480732: step: 426/464, loss: 0.06642129272222519 2023-01-24 04:08:26.022926: step: 428/464, loss: 0.04952915012836456 2023-01-24 04:08:26.786819: step: 430/464, loss: 0.00039676425512880087 2023-01-24 04:08:27.345876: step: 432/464, loss: 0.002911365358158946 2023-01-24 04:08:27.961809: step: 434/464, loss: 0.009221483021974564 2023-01-24 04:08:28.553113: step: 436/464, loss: 0.03754451125860214 2023-01-24 04:08:29.174770: step: 438/464, loss: 0.05024517700076103 2023-01-24 04:08:29.810457: step: 440/464, loss: 0.0031745005398988724 2023-01-24 04:08:30.426031: step: 442/464, loss: 0.007519016973674297 2023-01-24 04:08:31.011634: step: 444/464, loss: 0.06694741547107697 2023-01-24 04:08:31.605322: step: 446/464, loss: 0.007199062965810299 2023-01-24 04:08:32.250748: step: 448/464, loss: 0.03117830865085125 2023-01-24 04:08:32.842423: step: 450/464, loss: 0.009340462274849415 2023-01-24 04:08:33.524221: step: 452/464, loss: 0.006021823268383741 2023-01-24 04:08:34.151943: step: 454/464, loss: 0.0030698971822857857 2023-01-24 04:08:34.697654: step: 456/464, loss: 0.008624221198260784 2023-01-24 04:08:35.356583: step: 458/464, loss: 0.059307970106601715 2023-01-24 04:08:36.002783: step: 460/464, loss: 0.011224465444684029 2023-01-24 04:08:36.757590: step: 462/464, loss: 0.03303281217813492 2023-01-24 04:08:37.399414: step: 464/464, loss: 0.008707696571946144 2023-01-24 04:08:38.032874: step: 466/464, loss: 0.02195972390472889 2023-01-24 04:08:38.644212: step: 468/464, loss: 0.021515917032957077 2023-01-24 04:08:39.350321: step: 470/464, loss: 0.01887495443224907 2023-01-24 04:08:40.028822: step: 472/464, loss: 0.007549591362476349 2023-01-24 04:08:40.683444: step: 474/464, loss: 0.012140336446464062 2023-01-24 04:08:41.339446: step: 476/464, loss: 0.0239426139742136 2023-01-24 04:08:42.038777: step: 478/464, loss: 0.0699852779507637 2023-01-24 04:08:42.653182: step: 480/464, loss: 0.032511156052351 2023-01-24 04:08:43.380576: step: 482/464, loss: 0.034606803208589554 2023-01-24 04:08:43.978348: step: 484/464, loss: 0.8022106885910034 2023-01-24 04:08:44.545017: step: 486/464, loss: 0.004343557637184858 2023-01-24 04:08:45.321668: step: 488/464, loss: 0.018259840086102486 2023-01-24 04:08:45.947439: step: 490/464, loss: 0.004218821879476309 2023-01-24 04:08:46.560094: step: 492/464, loss: 0.020487593486905098 2023-01-24 04:08:47.212247: step: 494/464, loss: 0.004174598027020693 2023-01-24 04:08:47.882367: step: 496/464, loss: 0.015948574990034103 2023-01-24 04:08:48.630560: step: 498/464, loss: 0.025023166090250015 2023-01-24 04:08:49.220085: step: 500/464, loss: 0.007463703863322735 2023-01-24 04:08:49.829924: step: 502/464, loss: 0.023295581340789795 2023-01-24 04:08:50.529617: step: 504/464, loss: 0.035140346735715866 2023-01-24 04:08:51.197325: step: 506/464, loss: 0.04719272628426552 2023-01-24 04:08:51.842131: step: 508/464, loss: 0.024057535454630852 2023-01-24 04:08:52.478815: step: 510/464, loss: 0.02392573654651642 2023-01-24 04:08:53.122385: step: 512/464, loss: 0.004249283578246832 2023-01-24 04:08:53.726721: step: 514/464, loss: 0.015805572271347046 2023-01-24 04:08:54.327205: step: 516/464, loss: 0.05258805304765701 2023-01-24 04:08:54.955819: step: 518/464, loss: 0.01823163963854313 2023-01-24 04:08:55.591574: step: 520/464, loss: 0.03767416626214981 2023-01-24 04:08:56.187801: step: 522/464, loss: 0.041998717933893204 2023-01-24 04:08:56.828311: step: 524/464, loss: 0.045012932270765305 2023-01-24 04:08:57.422214: step: 526/464, loss: 0.01743290200829506 2023-01-24 04:08:58.034200: step: 528/464, loss: 0.028209581971168518 2023-01-24 04:08:58.704175: step: 530/464, loss: 0.029669439420104027 2023-01-24 04:08:59.308962: step: 532/464, loss: 0.025862492620944977 2023-01-24 04:08:59.935682: step: 534/464, loss: 0.025721795856952667 2023-01-24 04:09:00.550020: step: 536/464, loss: 0.010643397457897663 2023-01-24 04:09:01.152541: step: 538/464, loss: 0.0009918089490383863 2023-01-24 04:09:01.823607: step: 540/464, loss: 0.0919719785451889 2023-01-24 04:09:02.457261: step: 542/464, loss: 0.0036847067531198263 2023-01-24 04:09:03.007198: step: 544/464, loss: 0.03340010717511177 2023-01-24 04:09:03.816633: step: 546/464, loss: 1.4899811744689941 2023-01-24 04:09:04.408204: step: 548/464, loss: 0.013500018045306206 2023-01-24 04:09:05.010239: step: 550/464, loss: 0.027026327326893806 2023-01-24 04:09:05.592844: step: 552/464, loss: 0.03297542780637741 2023-01-24 04:09:06.213370: step: 554/464, loss: 0.04438992589712143 2023-01-24 04:09:06.849573: step: 556/464, loss: 0.02105231210589409 2023-01-24 04:09:07.484509: step: 558/464, loss: 0.00616777129471302 2023-01-24 04:09:08.122222: step: 560/464, loss: 0.17797285318374634 2023-01-24 04:09:08.705456: step: 562/464, loss: 0.45834028720855713 2023-01-24 04:09:09.266831: step: 564/464, loss: 0.0040410347282886505 2023-01-24 04:09:09.920988: step: 566/464, loss: 0.01322061289101839 2023-01-24 04:09:10.546590: step: 568/464, loss: 0.018731502816081047 2023-01-24 04:09:11.148775: step: 570/464, loss: 0.044756487011909485 2023-01-24 04:09:11.846711: step: 572/464, loss: 0.0435539074242115 2023-01-24 04:09:12.507380: step: 574/464, loss: 0.951160192489624 2023-01-24 04:09:13.105115: step: 576/464, loss: 0.0037094554863870144 2023-01-24 04:09:13.714521: step: 578/464, loss: 0.022865185514092445 2023-01-24 04:09:14.301687: step: 580/464, loss: 0.07532264292240143 2023-01-24 04:09:14.958964: step: 582/464, loss: 0.0011326372623443604 2023-01-24 04:09:15.540178: step: 584/464, loss: 0.11389389634132385 2023-01-24 04:09:16.200143: step: 586/464, loss: 0.23151782155036926 2023-01-24 04:09:16.806752: step: 588/464, loss: 0.025923380628228188 2023-01-24 04:09:17.454534: step: 590/464, loss: 0.009010151959955692 2023-01-24 04:09:18.047181: step: 592/464, loss: 0.002571831690147519 2023-01-24 04:09:18.718134: step: 594/464, loss: 0.04727496579289436 2023-01-24 04:09:19.316037: step: 596/464, loss: 0.015008511021733284 2023-01-24 04:09:19.891357: step: 598/464, loss: 0.006229538936167955 2023-01-24 04:09:20.506179: step: 600/464, loss: 1.0692181587219238 2023-01-24 04:09:21.103258: step: 602/464, loss: 0.0024318471550941467 2023-01-24 04:09:21.724439: step: 604/464, loss: 0.04745035246014595 2023-01-24 04:09:22.316455: step: 606/464, loss: 0.013820142485201359 2023-01-24 04:09:22.906948: step: 608/464, loss: 0.008597586303949356 2023-01-24 04:09:23.463957: step: 610/464, loss: 0.0037027972284704447 2023-01-24 04:09:24.068028: step: 612/464, loss: 0.02084978111088276 2023-01-24 04:09:24.652586: step: 614/464, loss: 0.003373411949723959 2023-01-24 04:09:25.290408: step: 616/464, loss: 0.034973613917827606 2023-01-24 04:09:25.892670: step: 618/464, loss: 1.1798455715179443 2023-01-24 04:09:26.482617: step: 620/464, loss: 0.001084931311197579 2023-01-24 04:09:27.119253: step: 622/464, loss: 0.009436777792870998 2023-01-24 04:09:27.739190: step: 624/464, loss: 0.0111102145165205 2023-01-24 04:09:28.278032: step: 626/464, loss: 0.042641185224056244 2023-01-24 04:09:28.807936: step: 628/464, loss: 1.43836510181427 2023-01-24 04:09:29.425026: step: 630/464, loss: 0.003855292685329914 2023-01-24 04:09:29.959970: step: 632/464, loss: 0.013502503745257854 2023-01-24 04:09:30.620232: step: 634/464, loss: 0.23189158737659454 2023-01-24 04:09:31.247762: step: 636/464, loss: 0.17636005580425262 2023-01-24 04:09:31.816584: step: 638/464, loss: 0.047434959560632706 2023-01-24 04:09:32.476319: step: 640/464, loss: 0.07551106810569763 2023-01-24 04:09:33.077283: step: 642/464, loss: 0.0022081949282437563 2023-01-24 04:09:33.661436: step: 644/464, loss: 0.011727227829396725 2023-01-24 04:09:34.279917: step: 646/464, loss: 0.01832597889006138 2023-01-24 04:09:34.891694: step: 648/464, loss: 0.011980734765529633 2023-01-24 04:09:35.504363: step: 650/464, loss: 0.35407641530036926 2023-01-24 04:09:36.093256: step: 652/464, loss: 0.00555523531511426 2023-01-24 04:09:36.785705: step: 654/464, loss: 0.009562639519572258 2023-01-24 04:09:37.374371: step: 656/464, loss: 0.5297197699546814 2023-01-24 04:09:38.028417: step: 658/464, loss: 0.04865730553865433 2023-01-24 04:09:38.588592: step: 660/464, loss: 0.04232815280556679 2023-01-24 04:09:39.186323: step: 662/464, loss: 0.01379953883588314 2023-01-24 04:09:39.747459: step: 664/464, loss: 0.002938771154731512 2023-01-24 04:09:40.410077: step: 666/464, loss: 0.00024933667737059295 2023-01-24 04:09:41.021162: step: 668/464, loss: 0.04576539248228073 2023-01-24 04:09:41.667205: step: 670/464, loss: 0.030819378793239594 2023-01-24 04:09:42.305483: step: 672/464, loss: 0.2565461993217468 2023-01-24 04:09:42.908548: step: 674/464, loss: 0.014622553251683712 2023-01-24 04:09:43.533123: step: 676/464, loss: 0.030066970735788345 2023-01-24 04:09:44.236098: step: 678/464, loss: 0.028679603710770607 2023-01-24 04:09:44.858356: step: 680/464, loss: 0.009739887900650501 2023-01-24 04:09:45.477038: step: 682/464, loss: 0.03857453912496567 2023-01-24 04:09:46.141956: step: 684/464, loss: 0.04997088387608528 2023-01-24 04:09:46.710081: step: 686/464, loss: 0.1312706023454666 2023-01-24 04:09:47.286625: step: 688/464, loss: 0.007372731808573008 2023-01-24 04:09:47.903723: step: 690/464, loss: 0.0016976733459159732 2023-01-24 04:09:48.515327: step: 692/464, loss: 0.04558177664875984 2023-01-24 04:09:49.126338: step: 694/464, loss: 0.011670437641441822 2023-01-24 04:09:49.714340: step: 696/464, loss: 0.04827771708369255 2023-01-24 04:09:50.340332: step: 698/464, loss: 0.0075889453291893005 2023-01-24 04:09:51.001520: step: 700/464, loss: 0.06361132860183716 2023-01-24 04:09:51.558573: step: 702/464, loss: 0.003011963563039899 2023-01-24 04:09:52.223440: step: 704/464, loss: 0.02163800224661827 2023-01-24 04:09:52.833494: step: 706/464, loss: 0.13394685089588165 2023-01-24 04:09:53.440887: step: 708/464, loss: 0.06432411074638367 2023-01-24 04:09:54.088242: step: 710/464, loss: 0.013955528847873211 2023-01-24 04:09:54.671003: step: 712/464, loss: 0.008491216227412224 2023-01-24 04:09:55.221140: step: 714/464, loss: 0.11980558931827545 2023-01-24 04:09:55.825615: step: 716/464, loss: 0.016868501901626587 2023-01-24 04:09:56.418568: step: 718/464, loss: 0.1915367841720581 2023-01-24 04:09:57.070616: step: 720/464, loss: 0.004546544048935175 2023-01-24 04:09:57.632383: step: 722/464, loss: 0.0005246453802101314 2023-01-24 04:09:58.314453: step: 724/464, loss: 0.018939411267638206 2023-01-24 04:09:58.940836: step: 726/464, loss: 0.10874909162521362 2023-01-24 04:09:59.531100: step: 728/464, loss: 0.004177314229309559 2023-01-24 04:10:00.152456: step: 730/464, loss: 0.04618740826845169 2023-01-24 04:10:00.780599: step: 732/464, loss: 0.008233455941081047 2023-01-24 04:10:01.368054: step: 734/464, loss: 0.004605564288794994 2023-01-24 04:10:02.105492: step: 736/464, loss: 0.18734845519065857 2023-01-24 04:10:02.753101: step: 738/464, loss: 0.0029743590857833624 2023-01-24 04:10:03.387853: step: 740/464, loss: 0.0211980901658535 2023-01-24 04:10:04.027526: step: 742/464, loss: 0.02819528616964817 2023-01-24 04:10:04.586410: step: 744/464, loss: 0.05711721256375313 2023-01-24 04:10:05.173399: step: 746/464, loss: 0.04029686003923416 2023-01-24 04:10:05.750747: step: 748/464, loss: 0.049593035131692886 2023-01-24 04:10:06.379364: step: 750/464, loss: 0.025946997106075287 2023-01-24 04:10:07.068436: step: 752/464, loss: 0.01631246879696846 2023-01-24 04:10:07.738894: step: 754/464, loss: 0.08335703611373901 2023-01-24 04:10:08.363689: step: 756/464, loss: 0.017137154936790466 2023-01-24 04:10:08.932533: step: 758/464, loss: 0.03726530447602272 2023-01-24 04:10:09.480261: step: 760/464, loss: 0.011103518307209015 2023-01-24 04:10:10.090629: step: 762/464, loss: 0.025130685418844223 2023-01-24 04:10:10.784070: step: 764/464, loss: 0.021800890564918518 2023-01-24 04:10:11.399699: step: 766/464, loss: 0.015432463027536869 2023-01-24 04:10:11.970039: step: 768/464, loss: 0.4171266555786133 2023-01-24 04:10:12.525239: step: 770/464, loss: 0.0417230986058712 2023-01-24 04:10:13.185891: step: 772/464, loss: 0.0015053371898829937 2023-01-24 04:10:13.809825: step: 774/464, loss: 0.021203722804784775 2023-01-24 04:10:14.404329: step: 776/464, loss: 0.024750908836722374 2023-01-24 04:10:14.997296: step: 778/464, loss: 0.06008352339267731 2023-01-24 04:10:15.550049: step: 780/464, loss: 0.0718572586774826 2023-01-24 04:10:16.182936: step: 782/464, loss: 0.004066292196512222 2023-01-24 04:10:16.818887: step: 784/464, loss: 0.006483875680714846 2023-01-24 04:10:17.418577: step: 786/464, loss: 0.020831121131777763 2023-01-24 04:10:18.054356: step: 788/464, loss: 0.04707172140479088 2023-01-24 04:10:18.643965: step: 790/464, loss: 0.011199343018233776 2023-01-24 04:10:19.261791: step: 792/464, loss: 0.145524799823761 2023-01-24 04:10:19.942024: step: 794/464, loss: 0.0018645271193236113 2023-01-24 04:10:20.563588: step: 796/464, loss: 0.41246137022972107 2023-01-24 04:10:21.184836: step: 798/464, loss: 0.007940721698105335 2023-01-24 04:10:21.869315: step: 800/464, loss: 0.015825096517801285 2023-01-24 04:10:22.489875: step: 802/464, loss: 0.03498406708240509 2023-01-24 04:10:23.126623: step: 804/464, loss: 0.010817916132509708 2023-01-24 04:10:23.764827: step: 806/464, loss: 0.021481124684214592 2023-01-24 04:10:24.344991: step: 808/464, loss: 0.00514302309602499 2023-01-24 04:10:24.951241: step: 810/464, loss: 0.001514037256129086 2023-01-24 04:10:25.559237: step: 812/464, loss: 0.0049459426663815975 2023-01-24 04:10:26.182283: step: 814/464, loss: 0.023048903793096542 2023-01-24 04:10:26.876112: step: 816/464, loss: 0.058704547584056854 2023-01-24 04:10:27.470086: step: 818/464, loss: 0.006693670060485601 2023-01-24 04:10:28.099249: step: 820/464, loss: 0.03921428322792053 2023-01-24 04:10:28.767695: step: 822/464, loss: 0.004509300924837589 2023-01-24 04:10:29.397554: step: 824/464, loss: 0.1113775372505188 2023-01-24 04:10:30.005564: step: 826/464, loss: 0.0070705353282392025 2023-01-24 04:10:30.667076: step: 828/464, loss: 0.008664405904710293 2023-01-24 04:10:31.262741: step: 830/464, loss: 0.0015526512870565057 2023-01-24 04:10:31.895134: step: 832/464, loss: 0.0028195951599627733 2023-01-24 04:10:32.481965: step: 834/464, loss: 0.0036235605366528034 2023-01-24 04:10:33.124318: step: 836/464, loss: 0.015041164122521877 2023-01-24 04:10:33.685926: step: 838/464, loss: 0.06809394806623459 2023-01-24 04:10:34.266089: step: 840/464, loss: 0.048990003764629364 2023-01-24 04:10:34.930483: step: 842/464, loss: 0.013416965492069721 2023-01-24 04:10:35.557716: step: 844/464, loss: 0.11701352894306183 2023-01-24 04:10:36.192434: step: 846/464, loss: 0.15656238794326782 2023-01-24 04:10:36.746801: step: 848/464, loss: 0.7169525623321533 2023-01-24 04:10:37.386502: step: 850/464, loss: 0.01056838221848011 2023-01-24 04:10:38.013790: step: 852/464, loss: 0.012577138841152191 2023-01-24 04:10:38.628439: step: 854/464, loss: 0.060538213700056076 2023-01-24 04:10:39.248879: step: 856/464, loss: 0.02752896212041378 2023-01-24 04:10:39.844701: step: 858/464, loss: 0.02793855592608452 2023-01-24 04:10:40.472552: step: 860/464, loss: 0.40132588148117065 2023-01-24 04:10:41.100640: step: 862/464, loss: 0.06512241810560226 2023-01-24 04:10:41.739524: step: 864/464, loss: 0.003781597362831235 2023-01-24 04:10:42.377185: step: 866/464, loss: 0.06454236805438995 2023-01-24 04:10:42.993516: step: 868/464, loss: 0.014481059275567532 2023-01-24 04:10:43.601530: step: 870/464, loss: 0.021092643961310387 2023-01-24 04:10:44.206188: step: 872/464, loss: 0.010052897967398167 2023-01-24 04:10:44.770776: step: 874/464, loss: 0.08746406435966492 2023-01-24 04:10:45.406525: step: 876/464, loss: 0.011426975019276142 2023-01-24 04:10:46.034424: step: 878/464, loss: 0.009791023097932339 2023-01-24 04:10:46.651860: step: 880/464, loss: 0.11707156896591187 2023-01-24 04:10:47.395726: step: 882/464, loss: 0.07980296015739441 2023-01-24 04:10:48.048107: step: 884/464, loss: 0.03473206236958504 2023-01-24 04:10:48.669141: step: 886/464, loss: 0.012438970617949963 2023-01-24 04:10:49.323705: step: 888/464, loss: 0.01267226878553629 2023-01-24 04:10:49.930165: step: 890/464, loss: 0.002416931791231036 2023-01-24 04:10:50.603095: step: 892/464, loss: 0.001159779028967023 2023-01-24 04:10:51.234720: step: 894/464, loss: 0.0028834762051701546 2023-01-24 04:10:51.918492: step: 896/464, loss: 0.045279476791620255 2023-01-24 04:10:52.535934: step: 898/464, loss: 0.015352551825344563 2023-01-24 04:10:53.165546: step: 900/464, loss: 0.1051325798034668 2023-01-24 04:10:53.811532: step: 902/464, loss: 0.03135009855031967 2023-01-24 04:10:54.438964: step: 904/464, loss: 0.12836480140686035 2023-01-24 04:10:55.028192: step: 906/464, loss: 0.003560137003660202 2023-01-24 04:10:55.741847: step: 908/464, loss: 0.18377456068992615 2023-01-24 04:10:56.334028: step: 910/464, loss: 0.019912661984562874 2023-01-24 04:10:56.965644: step: 912/464, loss: 0.021315833553671837 2023-01-24 04:10:57.614668: step: 914/464, loss: 0.154340922832489 2023-01-24 04:10:58.227958: step: 916/464, loss: 0.006456819362938404 2023-01-24 04:10:58.834313: step: 918/464, loss: 0.02382693812251091 2023-01-24 04:10:59.446073: step: 920/464, loss: 0.033757977187633514 2023-01-24 04:11:00.061498: step: 922/464, loss: 0.0026924435514956713 2023-01-24 04:11:00.652484: step: 924/464, loss: 0.0012073888210579753 2023-01-24 04:11:01.234296: step: 926/464, loss: 0.019931938499212265 2023-01-24 04:11:01.868732: step: 928/464, loss: 0.05094848573207855 2023-01-24 04:11:02.356306: step: 930/464, loss: 0.0012850743951275945 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32727019720101785, 'r': 0.32540717900063254, 'f1': 0.32633602917856014}, 'combined': 0.24045812676314957, 'epoch': 27} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3262211574663956, 'r': 0.3004352939166621, 'f1': 0.31279770314002714}, 'combined': 0.20420989945929233, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32970574593473756, 'r': 0.3372132771514678, 'f1': 0.33341725526983773}, 'combined': 0.24567587230409094, 'epoch': 27} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.34436094225572494, 'r': 0.3041643984446247, 'f1': 0.3230169502271856}, 'combined': 0.21088153227267037, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33737649914334666, 'r': 0.3296943018193995, 'f1': 0.3334911651800836}, 'combined': 0.24573033223795632, 'epoch': 27} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3335208247695564, 'r': 0.29340032151444906, 'f1': 0.31217679543647786}, 'combined': 0.2038045400258871, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2982456140350877, 'r': 0.32380952380952377, 'f1': 0.3105022831050228}, 'combined': 0.2070015220700152, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2890625, 'r': 0.40217391304347827, 'f1': 0.33636363636363636}, 'combined': 0.16818181818181818, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:13:39.000033: step: 2/464, loss: 0.01975341886281967 2023-01-24 04:13:39.572547: step: 4/464, loss: 0.01888517662882805 2023-01-24 04:13:40.245064: step: 6/464, loss: 0.011520719155669212 2023-01-24 04:13:40.901260: step: 8/464, loss: 0.6255261301994324 2023-01-24 04:13:41.492324: step: 10/464, loss: 0.03990140184760094 2023-01-24 04:13:42.136677: step: 12/464, loss: 0.025017209351062775 2023-01-24 04:13:42.823563: step: 14/464, loss: 0.026359407231211662 2023-01-24 04:13:43.493449: step: 16/464, loss: 0.005490477662533522 2023-01-24 04:13:44.119918: step: 18/464, loss: 0.053964029997587204 2023-01-24 04:13:44.754195: step: 20/464, loss: 0.008139424957334995 2023-01-24 04:13:45.361553: step: 22/464, loss: 0.016445273533463478 2023-01-24 04:13:45.957018: step: 24/464, loss: 0.041273459792137146 2023-01-24 04:13:46.585934: step: 26/464, loss: 0.0012901576701551676 2023-01-24 04:13:47.245378: step: 28/464, loss: 0.014636056497693062 2023-01-24 04:13:47.887228: step: 30/464, loss: 0.04501689225435257 2023-01-24 04:13:48.548529: step: 32/464, loss: 0.05421580374240875 2023-01-24 04:13:49.121237: step: 34/464, loss: 0.0032574422657489777 2023-01-24 04:13:49.734969: step: 36/464, loss: 0.050775010138750076 2023-01-24 04:13:50.324118: step: 38/464, loss: 0.0015177004970610142 2023-01-24 04:13:50.961014: step: 40/464, loss: 0.0023237792775034904 2023-01-24 04:13:51.537691: step: 42/464, loss: 0.06346136331558228 2023-01-24 04:13:52.089908: step: 44/464, loss: 0.0019482603529468179 2023-01-24 04:13:52.707275: step: 46/464, loss: 0.021754002198576927 2023-01-24 04:13:53.262280: step: 48/464, loss: 0.10162035375833511 2023-01-24 04:13:53.911936: step: 50/464, loss: 0.13380345702171326 2023-01-24 04:13:54.509098: step: 52/464, loss: 0.005131959915161133 2023-01-24 04:13:55.084153: step: 54/464, loss: 6.972807022975758e-05 2023-01-24 04:13:55.689405: step: 56/464, loss: 0.063968226313591 2023-01-24 04:13:56.282138: step: 58/464, loss: 0.05545021966099739 2023-01-24 04:13:56.840285: step: 60/464, loss: 0.00356881832703948 2023-01-24 04:13:57.453198: step: 62/464, loss: 0.03784513846039772 2023-01-24 04:13:58.081241: step: 64/464, loss: 0.03425294905900955 2023-01-24 04:13:58.674580: step: 66/464, loss: 0.10109174996614456 2023-01-24 04:13:59.325393: step: 68/464, loss: 0.013180552050471306 2023-01-24 04:13:59.998187: step: 70/464, loss: 0.0019827696960419416 2023-01-24 04:14:00.630978: step: 72/464, loss: 0.01931658200919628 2023-01-24 04:14:01.402128: step: 74/464, loss: 0.05979328230023384 2023-01-24 04:14:02.047655: step: 76/464, loss: 0.059981685131788254 2023-01-24 04:14:02.695898: step: 78/464, loss: 0.027963552623987198 2023-01-24 04:14:03.339213: step: 80/464, loss: 0.01616048812866211 2023-01-24 04:14:03.885298: step: 82/464, loss: 0.014934813603758812 2023-01-24 04:14:04.456393: step: 84/464, loss: 0.014981115236878395 2023-01-24 04:14:05.136467: step: 86/464, loss: 0.07403749227523804 2023-01-24 04:14:05.886997: step: 88/464, loss: 0.07628615945577621 2023-01-24 04:14:06.582164: step: 90/464, loss: 0.05683693662285805 2023-01-24 04:14:07.188407: step: 92/464, loss: 0.014785525389015675 2023-01-24 04:14:07.918730: step: 94/464, loss: 0.0014446486020460725 2023-01-24 04:14:08.485303: step: 96/464, loss: 0.01637359708547592 2023-01-24 04:14:09.115919: step: 98/464, loss: 0.5492805242538452 2023-01-24 04:14:09.702171: step: 100/464, loss: 0.04689208045601845 2023-01-24 04:14:10.376273: step: 102/464, loss: 0.017917821183800697 2023-01-24 04:14:10.971585: step: 104/464, loss: 0.18207520246505737 2023-01-24 04:14:11.631828: step: 106/464, loss: 0.028904229402542114 2023-01-24 04:14:12.187311: step: 108/464, loss: 0.08580604940652847 2023-01-24 04:14:12.773761: step: 110/464, loss: 0.06547223031520844 2023-01-24 04:14:13.407897: step: 112/464, loss: 0.012034471146762371 2023-01-24 04:14:13.979461: step: 114/464, loss: 0.011492324993014336 2023-01-24 04:14:14.542913: step: 116/464, loss: 0.012730980291962624 2023-01-24 04:14:15.185735: step: 118/464, loss: 0.01702752150595188 2023-01-24 04:14:15.796698: step: 120/464, loss: 0.003987180069088936 2023-01-24 04:14:16.397131: step: 122/464, loss: 0.01272681262344122 2023-01-24 04:14:17.009077: step: 124/464, loss: 0.003189630573615432 2023-01-24 04:14:17.639521: step: 126/464, loss: 0.012792550027370453 2023-01-24 04:14:18.220648: step: 128/464, loss: 0.033105701208114624 2023-01-24 04:14:18.900422: step: 130/464, loss: 0.3838423490524292 2023-01-24 04:14:19.547708: step: 132/464, loss: 0.012216437608003616 2023-01-24 04:14:20.268094: step: 134/464, loss: 0.1442188322544098 2023-01-24 04:14:20.910646: step: 136/464, loss: 0.027723701670765877 2023-01-24 04:14:21.492568: step: 138/464, loss: 0.027388494461774826 2023-01-24 04:14:22.151597: step: 140/464, loss: 0.028884828090667725 2023-01-24 04:14:22.784683: step: 142/464, loss: 0.002762697171419859 2023-01-24 04:14:23.441573: step: 144/464, loss: 0.017410848289728165 2023-01-24 04:14:24.095351: step: 146/464, loss: 0.24691028892993927 2023-01-24 04:14:24.703679: step: 148/464, loss: 0.016644123941659927 2023-01-24 04:14:25.251952: step: 150/464, loss: 0.0072594149969518185 2023-01-24 04:14:25.966821: step: 152/464, loss: 0.02167939767241478 2023-01-24 04:14:26.524236: step: 154/464, loss: 0.00871216133236885 2023-01-24 04:14:27.104408: step: 156/464, loss: 0.0033676326274871826 2023-01-24 04:14:27.785122: step: 158/464, loss: 0.20985104143619537 2023-01-24 04:14:28.417258: step: 160/464, loss: 0.005667650140821934 2023-01-24 04:14:29.033953: step: 162/464, loss: 0.0029226827900856733 2023-01-24 04:14:29.612034: step: 164/464, loss: 0.07242726534605026 2023-01-24 04:14:30.282006: step: 166/464, loss: 0.34875330328941345 2023-01-24 04:14:30.935093: step: 168/464, loss: 0.008606811985373497 2023-01-24 04:14:31.593631: step: 170/464, loss: 0.008142728358507156 2023-01-24 04:14:32.210185: step: 172/464, loss: 0.007337086368352175 2023-01-24 04:14:32.825332: step: 174/464, loss: 0.005187767092138529 2023-01-24 04:14:33.480169: step: 176/464, loss: 0.02520335279405117 2023-01-24 04:14:34.093937: step: 178/464, loss: 0.007265692111104727 2023-01-24 04:14:34.735564: step: 180/464, loss: 0.010373773984611034 2023-01-24 04:14:35.370848: step: 182/464, loss: 0.046069592237472534 2023-01-24 04:14:35.999144: step: 184/464, loss: 0.026207102462649345 2023-01-24 04:14:36.618883: step: 186/464, loss: 0.018650302663445473 2023-01-24 04:14:37.272984: step: 188/464, loss: 0.0171427670866251 2023-01-24 04:14:37.858272: step: 190/464, loss: 0.03326038271188736 2023-01-24 04:14:38.458826: step: 192/464, loss: 0.01100210566073656 2023-01-24 04:14:39.180917: step: 194/464, loss: 0.032505106180906296 2023-01-24 04:14:39.783464: step: 196/464, loss: 0.01049841195344925 2023-01-24 04:14:40.426903: step: 198/464, loss: 0.04443327710032463 2023-01-24 04:14:41.006724: step: 200/464, loss: 0.0108529943972826 2023-01-24 04:14:41.641864: step: 202/464, loss: 0.008697934448719025 2023-01-24 04:14:42.228763: step: 204/464, loss: 0.02991078607738018 2023-01-24 04:14:42.892304: step: 206/464, loss: 0.017947711050510406 2023-01-24 04:14:43.500760: step: 208/464, loss: 0.028416339308023453 2023-01-24 04:14:44.147820: step: 210/464, loss: 0.027255281805992126 2023-01-24 04:14:44.744396: step: 212/464, loss: 0.04304584860801697 2023-01-24 04:14:45.438530: step: 214/464, loss: 0.02375870756804943 2023-01-24 04:14:46.042961: step: 216/464, loss: 0.04932486638426781 2023-01-24 04:14:46.645840: step: 218/464, loss: 0.018953483551740646 2023-01-24 04:14:47.245464: step: 220/464, loss: 0.025900574401021004 2023-01-24 04:14:47.901786: step: 222/464, loss: 0.004785214085131884 2023-01-24 04:14:48.492405: step: 224/464, loss: 0.007400230038911104 2023-01-24 04:14:49.157464: step: 226/464, loss: 0.04882337525486946 2023-01-24 04:14:49.769820: step: 228/464, loss: 0.000596017693169415 2023-01-24 04:14:50.426498: step: 230/464, loss: 0.042570240795612335 2023-01-24 04:14:51.050808: step: 232/464, loss: 0.056120481342077255 2023-01-24 04:14:51.636905: step: 234/464, loss: 0.003333766246214509 2023-01-24 04:14:52.249146: step: 236/464, loss: 0.02945108525454998 2023-01-24 04:14:52.851920: step: 238/464, loss: 0.02998744510114193 2023-01-24 04:14:53.499407: step: 240/464, loss: 0.24635662138462067 2023-01-24 04:14:54.082154: step: 242/464, loss: 0.032885901629924774 2023-01-24 04:14:54.731590: step: 244/464, loss: 0.015478289686143398 2023-01-24 04:14:55.302507: step: 246/464, loss: 0.009795871563255787 2023-01-24 04:14:55.922671: step: 248/464, loss: 0.03254992142319679 2023-01-24 04:14:56.555129: step: 250/464, loss: 0.0371343195438385 2023-01-24 04:14:57.171564: step: 252/464, loss: 0.02613062411546707 2023-01-24 04:14:57.798143: step: 254/464, loss: 0.028173061087727547 2023-01-24 04:14:58.386045: step: 256/464, loss: 0.010123823769390583 2023-01-24 04:14:58.940429: step: 258/464, loss: 0.007183664478361607 2023-01-24 04:14:59.528476: step: 260/464, loss: 0.014079362154006958 2023-01-24 04:15:00.158605: step: 262/464, loss: 0.009176979772746563 2023-01-24 04:15:00.748920: step: 264/464, loss: 0.06820368766784668 2023-01-24 04:15:01.344182: step: 266/464, loss: 0.009957423433661461 2023-01-24 04:15:01.974188: step: 268/464, loss: 0.01703375019133091 2023-01-24 04:15:02.599108: step: 270/464, loss: 0.05943138897418976 2023-01-24 04:15:03.226236: step: 272/464, loss: 0.02275952324271202 2023-01-24 04:15:03.855299: step: 274/464, loss: 0.03507295250892639 2023-01-24 04:15:04.475036: step: 276/464, loss: 0.4227091073989868 2023-01-24 04:15:05.094678: step: 278/464, loss: 0.023873407393693924 2023-01-24 04:15:05.715524: step: 280/464, loss: 0.020889200270175934 2023-01-24 04:15:06.352480: step: 282/464, loss: 0.006823091302067041 2023-01-24 04:15:06.902412: step: 284/464, loss: 0.02021654322743416 2023-01-24 04:15:07.572315: step: 286/464, loss: 0.019388005137443542 2023-01-24 04:15:08.176270: step: 288/464, loss: 0.04393423721194267 2023-01-24 04:15:08.752758: step: 290/464, loss: 0.1124618798494339 2023-01-24 04:15:09.409040: step: 292/464, loss: 0.06299597769975662 2023-01-24 04:15:10.112789: step: 294/464, loss: 0.005311224143952131 2023-01-24 04:15:10.666588: step: 296/464, loss: 0.013554797507822514 2023-01-24 04:15:11.227802: step: 298/464, loss: 0.051145315170288086 2023-01-24 04:15:11.773444: step: 300/464, loss: 0.003098517656326294 2023-01-24 04:15:12.369897: step: 302/464, loss: 0.009821097366511822 2023-01-24 04:15:12.995324: step: 304/464, loss: 0.02515912801027298 2023-01-24 04:15:13.567270: step: 306/464, loss: 0.004545064643025398 2023-01-24 04:15:14.183266: step: 308/464, loss: 0.020346971228718758 2023-01-24 04:15:14.817016: step: 310/464, loss: 0.03274424001574516 2023-01-24 04:15:15.439736: step: 312/464, loss: 0.012141672894358635 2023-01-24 04:15:16.052922: step: 314/464, loss: 0.0037430974189192057 2023-01-24 04:15:16.632948: step: 316/464, loss: 0.010012580081820488 2023-01-24 04:15:17.244866: step: 318/464, loss: 0.23996806144714355 2023-01-24 04:15:17.824622: step: 320/464, loss: 0.0048306286334991455 2023-01-24 04:15:18.423477: step: 322/464, loss: 0.028304725885391235 2023-01-24 04:15:19.051102: step: 324/464, loss: 0.007011666893959045 2023-01-24 04:15:19.714693: step: 326/464, loss: 0.026433467864990234 2023-01-24 04:15:20.348348: step: 328/464, loss: 0.014295603148639202 2023-01-24 04:15:20.979407: step: 330/464, loss: 0.0036247014068067074 2023-01-24 04:15:21.589761: step: 332/464, loss: 0.03816038370132446 2023-01-24 04:15:22.192449: step: 334/464, loss: 0.32058051228523254 2023-01-24 04:15:22.826902: step: 336/464, loss: 0.006642151158303022 2023-01-24 04:15:23.480555: step: 338/464, loss: 0.09794043749570847 2023-01-24 04:15:24.064992: step: 340/464, loss: 0.04090559482574463 2023-01-24 04:15:24.628191: step: 342/464, loss: 0.010088179260492325 2023-01-24 04:15:25.228743: step: 344/464, loss: 0.0022901766933500767 2023-01-24 04:15:25.860598: step: 346/464, loss: 0.030262116342782974 2023-01-24 04:15:26.507873: step: 348/464, loss: 0.024959390982985497 2023-01-24 04:15:27.181483: step: 350/464, loss: 0.038163553923368454 2023-01-24 04:15:27.779681: step: 352/464, loss: 0.03988020494580269 2023-01-24 04:15:28.362212: step: 354/464, loss: 0.00906051229685545 2023-01-24 04:15:29.017120: step: 356/464, loss: 0.17054124176502228 2023-01-24 04:15:29.633361: step: 358/464, loss: 0.018445579335093498 2023-01-24 04:15:30.261679: step: 360/464, loss: 0.01864020898938179 2023-01-24 04:15:30.918273: step: 362/464, loss: 0.003797942539677024 2023-01-24 04:15:31.513792: step: 364/464, loss: 0.04716122895479202 2023-01-24 04:15:32.111457: step: 366/464, loss: 0.020112771540880203 2023-01-24 04:15:32.760888: step: 368/464, loss: 0.15953640639781952 2023-01-24 04:15:33.430617: step: 370/464, loss: 0.018856249749660492 2023-01-24 04:15:34.105661: step: 372/464, loss: 0.01189302746206522 2023-01-24 04:15:34.688312: step: 374/464, loss: 0.014004247263073921 2023-01-24 04:15:35.342798: step: 376/464, loss: 0.05325167998671532 2023-01-24 04:15:35.952710: step: 378/464, loss: 0.06514342874288559 2023-01-24 04:15:36.594004: step: 380/464, loss: 0.03061126358807087 2023-01-24 04:15:37.250776: step: 382/464, loss: 0.11431025713682175 2023-01-24 04:15:37.794144: step: 384/464, loss: 0.020242024213075638 2023-01-24 04:15:38.359494: step: 386/464, loss: 0.005004480481147766 2023-01-24 04:15:39.001997: step: 388/464, loss: 0.004809098783880472 2023-01-24 04:15:39.661094: step: 390/464, loss: 0.06833072751760483 2023-01-24 04:15:40.262822: step: 392/464, loss: 0.05740839242935181 2023-01-24 04:15:40.847472: step: 394/464, loss: 0.013233033008873463 2023-01-24 04:15:41.523583: step: 396/464, loss: 0.04136299714446068 2023-01-24 04:15:42.118937: step: 398/464, loss: 0.06713368743658066 2023-01-24 04:15:42.757631: step: 400/464, loss: 0.053051579743623734 2023-01-24 04:15:43.391804: step: 402/464, loss: 0.034219738095998764 2023-01-24 04:15:44.014488: step: 404/464, loss: 0.017612462863326073 2023-01-24 04:15:44.680836: step: 406/464, loss: 0.00702511565759778 2023-01-24 04:15:45.352449: step: 408/464, loss: 0.003682539099827409 2023-01-24 04:15:45.967879: step: 410/464, loss: 0.013064548373222351 2023-01-24 04:15:46.585185: step: 412/464, loss: 0.0051510087214410305 2023-01-24 04:15:47.268137: step: 414/464, loss: 0.01193144265562296 2023-01-24 04:15:47.930667: step: 416/464, loss: 8.737286567687988 2023-01-24 04:15:48.522951: step: 418/464, loss: 0.04521564766764641 2023-01-24 04:15:49.171641: step: 420/464, loss: 0.08151457458734512 2023-01-24 04:15:49.729625: step: 422/464, loss: 0.09461862593889236 2023-01-24 04:15:50.342213: step: 424/464, loss: 0.0011021374957635999 2023-01-24 04:15:50.996033: step: 426/464, loss: 0.1207464188337326 2023-01-24 04:15:51.647055: step: 428/464, loss: 0.008433245122432709 2023-01-24 04:15:52.296905: step: 430/464, loss: 0.06080570071935654 2023-01-24 04:15:53.030118: step: 432/464, loss: 0.06965363770723343 2023-01-24 04:15:53.735989: step: 434/464, loss: 0.03309911489486694 2023-01-24 04:15:54.384740: step: 436/464, loss: 0.012495990842580795 2023-01-24 04:15:54.944588: step: 438/464, loss: 0.019360091537237167 2023-01-24 04:15:55.518380: step: 440/464, loss: 0.27905771136283875 2023-01-24 04:15:56.096619: step: 442/464, loss: 0.0017130867345258594 2023-01-24 04:15:56.752062: step: 444/464, loss: 0.10627894848585129 2023-01-24 04:15:57.392443: step: 446/464, loss: 0.006129761692136526 2023-01-24 04:15:58.057132: step: 448/464, loss: 0.018852759152650833 2023-01-24 04:15:58.740888: step: 450/464, loss: 0.015564347617328167 2023-01-24 04:15:59.357956: step: 452/464, loss: 0.06356043368577957 2023-01-24 04:15:59.901045: step: 454/464, loss: 0.02592889964580536 2023-01-24 04:16:00.537859: step: 456/464, loss: 0.007412649691104889 2023-01-24 04:16:01.131167: step: 458/464, loss: 0.027132531628012657 2023-01-24 04:16:01.746674: step: 460/464, loss: 0.0023493319749832153 2023-01-24 04:16:02.325715: step: 462/464, loss: 0.0009935208363458514 2023-01-24 04:16:02.984123: step: 464/464, loss: 0.0038652773946523666 2023-01-24 04:16:03.614329: step: 466/464, loss: 0.002001130022108555 2023-01-24 04:16:04.220681: step: 468/464, loss: 0.009971032850444317 2023-01-24 04:16:04.769790: step: 470/464, loss: 0.027622388675808907 2023-01-24 04:16:05.376432: step: 472/464, loss: 0.014356517232954502 2023-01-24 04:16:06.017075: step: 474/464, loss: 0.06259731203317642 2023-01-24 04:16:06.626669: step: 476/464, loss: 0.007335342466831207 2023-01-24 04:16:07.274719: step: 478/464, loss: 0.03115548938512802 2023-01-24 04:16:07.927055: step: 480/464, loss: 0.023620828986167908 2023-01-24 04:16:08.591213: step: 482/464, loss: 0.00033822975819930434 2023-01-24 04:16:09.248772: step: 484/464, loss: 0.01832190714776516 2023-01-24 04:16:09.796055: step: 486/464, loss: 0.000453435379313305 2023-01-24 04:16:10.384096: step: 488/464, loss: 0.05982121825218201 2023-01-24 04:16:11.045854: step: 490/464, loss: 0.23867210745811462 2023-01-24 04:16:11.719695: step: 492/464, loss: 0.037391725927591324 2023-01-24 04:16:12.346757: step: 494/464, loss: 0.06826174259185791 2023-01-24 04:16:13.057520: step: 496/464, loss: 0.025316348299384117 2023-01-24 04:16:13.652948: step: 498/464, loss: 0.06300205737352371 2023-01-24 04:16:14.332293: step: 500/464, loss: 0.015299019403755665 2023-01-24 04:16:14.939645: step: 502/464, loss: 0.009448371827602386 2023-01-24 04:16:15.543452: step: 504/464, loss: 0.07433760166168213 2023-01-24 04:16:16.176656: step: 506/464, loss: 0.013551932759582996 2023-01-24 04:16:16.775123: step: 508/464, loss: 0.0019346019253134727 2023-01-24 04:16:17.429329: step: 510/464, loss: 0.013648522086441517 2023-01-24 04:16:18.065214: step: 512/464, loss: 0.03566650673747063 2023-01-24 04:16:18.743895: step: 514/464, loss: 0.014076773077249527 2023-01-24 04:16:19.374792: step: 516/464, loss: 0.0006180580821819603 2023-01-24 04:16:20.001613: step: 518/464, loss: 0.03681022673845291 2023-01-24 04:16:20.627086: step: 520/464, loss: 0.0250330101698637 2023-01-24 04:16:21.235252: step: 522/464, loss: 0.038896046578884125 2023-01-24 04:16:21.837349: step: 524/464, loss: 0.006692049093544483 2023-01-24 04:16:22.450131: step: 526/464, loss: 0.07458885759115219 2023-01-24 04:16:23.058671: step: 528/464, loss: 0.041242264211177826 2023-01-24 04:16:23.685804: step: 530/464, loss: 0.20899175107479095 2023-01-24 04:16:24.376022: step: 532/464, loss: 0.04120601713657379 2023-01-24 04:16:25.082712: step: 534/464, loss: 0.0675627663731575 2023-01-24 04:16:25.665630: step: 536/464, loss: 0.00530249485746026 2023-01-24 04:16:26.292645: step: 538/464, loss: 0.05314037576317787 2023-01-24 04:16:26.846724: step: 540/464, loss: 0.007039431016892195 2023-01-24 04:16:27.523099: step: 542/464, loss: 0.002162415534257889 2023-01-24 04:16:28.116635: step: 544/464, loss: 0.03981251269578934 2023-01-24 04:16:28.716870: step: 546/464, loss: 0.013181759044528008 2023-01-24 04:16:29.295297: step: 548/464, loss: 0.017265036702156067 2023-01-24 04:16:29.937185: step: 550/464, loss: 0.0014363001100718975 2023-01-24 04:16:30.603242: step: 552/464, loss: 0.05713506042957306 2023-01-24 04:16:31.242473: step: 554/464, loss: 0.1750723123550415 2023-01-24 04:16:31.828648: step: 556/464, loss: 0.0009091845713555813 2023-01-24 04:16:32.467672: step: 558/464, loss: 0.01886255480349064 2023-01-24 04:16:33.024348: step: 560/464, loss: 0.0021657454781234264 2023-01-24 04:16:33.651125: step: 562/464, loss: 0.029084540903568268 2023-01-24 04:16:34.252180: step: 564/464, loss: 0.03066820092499256 2023-01-24 04:16:34.912793: step: 566/464, loss: 0.021754052489995956 2023-01-24 04:16:35.451501: step: 568/464, loss: 0.0076250056736171246 2023-01-24 04:16:36.078370: step: 570/464, loss: 0.09450612962245941 2023-01-24 04:16:36.749861: step: 572/464, loss: 0.031405095010995865 2023-01-24 04:16:37.274056: step: 574/464, loss: 0.02798873744904995 2023-01-24 04:16:37.840499: step: 576/464, loss: 0.11750727891921997 2023-01-24 04:16:38.418660: step: 578/464, loss: 0.022053493186831474 2023-01-24 04:16:39.035102: step: 580/464, loss: 0.0028090430423617363 2023-01-24 04:16:39.716754: step: 582/464, loss: 0.001743658329360187 2023-01-24 04:16:40.272151: step: 584/464, loss: 0.12413953244686127 2023-01-24 04:16:40.873389: step: 586/464, loss: 0.07030326128005981 2023-01-24 04:16:41.559871: step: 588/464, loss: 0.12284321337938309 2023-01-24 04:16:42.151810: step: 590/464, loss: 0.03481021523475647 2023-01-24 04:16:42.803070: step: 592/464, loss: 1.2914907932281494 2023-01-24 04:16:43.454978: step: 594/464, loss: 0.00812312588095665 2023-01-24 04:16:43.986898: step: 596/464, loss: 0.0014134242665022612 2023-01-24 04:16:44.648837: step: 598/464, loss: 0.020025134086608887 2023-01-24 04:16:45.244975: step: 600/464, loss: 0.06840483099222183 2023-01-24 04:16:45.903578: step: 602/464, loss: 0.10379452258348465 2023-01-24 04:16:46.556345: step: 604/464, loss: 0.01580253802239895 2023-01-24 04:16:47.176317: step: 606/464, loss: 0.051684606820344925 2023-01-24 04:16:47.814825: step: 608/464, loss: 0.21527676284313202 2023-01-24 04:16:48.525218: step: 610/464, loss: 0.013423663564026356 2023-01-24 04:16:49.062863: step: 612/464, loss: 0.005301930010318756 2023-01-24 04:16:49.814917: step: 614/464, loss: 0.27966079115867615 2023-01-24 04:16:50.464483: step: 616/464, loss: 0.07844717800617218 2023-01-24 04:16:51.106868: step: 618/464, loss: 0.015785658732056618 2023-01-24 04:16:51.766952: step: 620/464, loss: 0.05178140103816986 2023-01-24 04:16:52.426283: step: 622/464, loss: 0.003366190241649747 2023-01-24 04:16:53.096986: step: 624/464, loss: 0.2321268618106842 2023-01-24 04:16:53.709091: step: 626/464, loss: 0.04104392230510712 2023-01-24 04:16:54.334857: step: 628/464, loss: 0.019443267956376076 2023-01-24 04:16:54.903219: step: 630/464, loss: 0.026274191215634346 2023-01-24 04:16:55.576104: step: 632/464, loss: 0.026117945089936256 2023-01-24 04:16:56.219765: step: 634/464, loss: 0.003952042665332556 2023-01-24 04:16:56.791008: step: 636/464, loss: 0.0019362128805369139 2023-01-24 04:16:57.377054: step: 638/464, loss: 0.05736561492085457 2023-01-24 04:16:57.996243: step: 640/464, loss: 0.42328062653541565 2023-01-24 04:16:58.582335: step: 642/464, loss: 0.03586931154131889 2023-01-24 04:16:59.298993: step: 644/464, loss: 0.0722898617386818 2023-01-24 04:16:59.963334: step: 646/464, loss: 0.00392839265987277 2023-01-24 04:17:00.637545: step: 648/464, loss: 0.015032708644866943 2023-01-24 04:17:01.310549: step: 650/464, loss: 0.008987652137875557 2023-01-24 04:17:01.916864: step: 652/464, loss: 0.02345702238380909 2023-01-24 04:17:02.498849: step: 654/464, loss: 0.000292833661660552 2023-01-24 04:17:03.128306: step: 656/464, loss: 0.00433304812759161 2023-01-24 04:17:03.702089: step: 658/464, loss: 0.02030690759420395 2023-01-24 04:17:04.351269: step: 660/464, loss: 0.005063401069492102 2023-01-24 04:17:05.020565: step: 662/464, loss: 0.01505844946950674 2023-01-24 04:17:05.616291: step: 664/464, loss: 0.002417447743937373 2023-01-24 04:17:06.279256: step: 666/464, loss: 0.1500273495912552 2023-01-24 04:17:06.881379: step: 668/464, loss: 0.03942999243736267 2023-01-24 04:17:07.542948: step: 670/464, loss: 0.03954707458615303 2023-01-24 04:17:08.183691: step: 672/464, loss: 0.0012041820446029305 2023-01-24 04:17:08.785922: step: 674/464, loss: 0.016088049858808517 2023-01-24 04:17:09.439011: step: 676/464, loss: 0.019116047769784927 2023-01-24 04:17:10.113868: step: 678/464, loss: 0.03320910409092903 2023-01-24 04:17:10.702493: step: 680/464, loss: 0.004189823288470507 2023-01-24 04:17:11.367766: step: 682/464, loss: 0.18053016066551208 2023-01-24 04:17:11.949231: step: 684/464, loss: 0.009326234459877014 2023-01-24 04:17:12.537147: step: 686/464, loss: 0.012497087940573692 2023-01-24 04:17:13.174570: step: 688/464, loss: 0.10844147205352783 2023-01-24 04:17:13.736886: step: 690/464, loss: 0.008908872492611408 2023-01-24 04:17:14.345073: step: 692/464, loss: 0.020197900012135506 2023-01-24 04:17:14.952303: step: 694/464, loss: 0.041542936116456985 2023-01-24 04:17:15.563135: step: 696/464, loss: 0.010704525746405125 2023-01-24 04:17:16.194886: step: 698/464, loss: 0.0005475578946061432 2023-01-24 04:17:16.843556: step: 700/464, loss: 0.05417340248823166 2023-01-24 04:17:17.491267: step: 702/464, loss: 0.014135652221739292 2023-01-24 04:17:18.189233: step: 704/464, loss: 0.018218714743852615 2023-01-24 04:17:18.835998: step: 706/464, loss: 0.030065085738897324 2023-01-24 04:17:19.456888: step: 708/464, loss: 0.0010269619524478912 2023-01-24 04:17:20.087060: step: 710/464, loss: 0.0035107091534882784 2023-01-24 04:17:20.793577: step: 712/464, loss: 0.010718021541833878 2023-01-24 04:17:21.437481: step: 714/464, loss: 0.01341515127569437 2023-01-24 04:17:22.072219: step: 716/464, loss: 0.029262878000736237 2023-01-24 04:17:22.699395: step: 718/464, loss: 0.02531573548913002 2023-01-24 04:17:23.294005: step: 720/464, loss: 0.01895340532064438 2023-01-24 04:17:23.901665: step: 722/464, loss: 0.023700516670942307 2023-01-24 04:17:24.512322: step: 724/464, loss: 0.00893034040927887 2023-01-24 04:17:25.076725: step: 726/464, loss: 0.09975457936525345 2023-01-24 04:17:25.704827: step: 728/464, loss: 0.07586190849542618 2023-01-24 04:17:26.339229: step: 730/464, loss: 0.07693785429000854 2023-01-24 04:17:26.906724: step: 732/464, loss: 0.02519100159406662 2023-01-24 04:17:27.541948: step: 734/464, loss: 0.016743384301662445 2023-01-24 04:17:28.163228: step: 736/464, loss: 0.02917795442044735 2023-01-24 04:17:28.789795: step: 738/464, loss: 0.01388038881123066 2023-01-24 04:17:29.427492: step: 740/464, loss: 0.0042625125497579575 2023-01-24 04:17:30.058123: step: 742/464, loss: 0.0025034870486706495 2023-01-24 04:17:30.694978: step: 744/464, loss: 0.11787613481283188 2023-01-24 04:17:31.278250: step: 746/464, loss: 0.014147958718240261 2023-01-24 04:17:31.909917: step: 748/464, loss: 0.003653917694464326 2023-01-24 04:17:32.501214: step: 750/464, loss: 0.03221333026885986 2023-01-24 04:17:33.138331: step: 752/464, loss: 0.021750828251242638 2023-01-24 04:17:33.740213: step: 754/464, loss: 0.0009350177133455873 2023-01-24 04:17:34.357550: step: 756/464, loss: 0.06062895804643631 2023-01-24 04:17:34.951786: step: 758/464, loss: 0.030687013640999794 2023-01-24 04:17:35.621892: step: 760/464, loss: 0.016679823398590088 2023-01-24 04:17:36.252892: step: 762/464, loss: 0.0034643004182726145 2023-01-24 04:17:37.003166: step: 764/464, loss: 0.0504496768116951 2023-01-24 04:17:37.599145: step: 766/464, loss: 0.018822191283106804 2023-01-24 04:17:38.194073: step: 768/464, loss: 0.045075658708810806 2023-01-24 04:17:38.715115: step: 770/464, loss: 0.07747234404087067 2023-01-24 04:17:39.287070: step: 772/464, loss: 0.002193465130403638 2023-01-24 04:17:39.908815: step: 774/464, loss: 0.0380885973572731 2023-01-24 04:17:40.507406: step: 776/464, loss: 0.0033210322726517916 2023-01-24 04:17:41.114803: step: 778/464, loss: 0.027617856860160828 2023-01-24 04:17:41.704983: step: 780/464, loss: 0.04869770258665085 2023-01-24 04:17:42.299827: step: 782/464, loss: 0.037156715989112854 2023-01-24 04:17:42.927348: step: 784/464, loss: 0.046691734343767166 2023-01-24 04:17:43.489915: step: 786/464, loss: 0.013460393995046616 2023-01-24 04:17:44.089427: step: 788/464, loss: 0.040278829634189606 2023-01-24 04:17:44.754607: step: 790/464, loss: 0.09691322594881058 2023-01-24 04:17:45.349502: step: 792/464, loss: 0.03089674934744835 2023-01-24 04:17:45.916697: step: 794/464, loss: 0.002035632263869047 2023-01-24 04:17:46.528215: step: 796/464, loss: 0.013593480922281742 2023-01-24 04:17:47.151259: step: 798/464, loss: 0.0064804041758179665 2023-01-24 04:17:47.783618: step: 800/464, loss: 0.016399724408984184 2023-01-24 04:17:48.377707: step: 802/464, loss: 0.009268310852348804 2023-01-24 04:17:49.023600: step: 804/464, loss: 0.014182067476212978 2023-01-24 04:17:49.621326: step: 806/464, loss: 0.0030377162620425224 2023-01-24 04:17:50.190513: step: 808/464, loss: 0.015894349664449692 2023-01-24 04:17:50.817651: step: 810/464, loss: 0.003473340068012476 2023-01-24 04:17:51.435038: step: 812/464, loss: 0.02591157890856266 2023-01-24 04:17:52.061138: step: 814/464, loss: 0.058492448180913925 2023-01-24 04:17:52.836270: step: 816/464, loss: 0.24168823659420013 2023-01-24 04:17:53.464696: step: 818/464, loss: 0.00014129285409580916 2023-01-24 04:17:54.139536: step: 820/464, loss: 0.015311875380575657 2023-01-24 04:17:54.723315: step: 822/464, loss: 0.14531171321868896 2023-01-24 04:17:55.335981: step: 824/464, loss: 0.01077133696526289 2023-01-24 04:17:55.892010: step: 826/464, loss: 0.003518162528052926 2023-01-24 04:17:56.541024: step: 828/464, loss: 0.004680001176893711 2023-01-24 04:17:57.152079: step: 830/464, loss: 0.7380512952804565 2023-01-24 04:17:57.894796: step: 832/464, loss: 0.009875763207674026 2023-01-24 04:17:58.555751: step: 834/464, loss: 0.00943849328905344 2023-01-24 04:17:59.185732: step: 836/464, loss: 0.03605659678578377 2023-01-24 04:17:59.791252: step: 838/464, loss: 0.049411047250032425 2023-01-24 04:18:00.430553: step: 840/464, loss: 0.012199982069432735 2023-01-24 04:18:01.019817: step: 842/464, loss: 0.03667812421917915 2023-01-24 04:18:01.651077: step: 844/464, loss: 0.026908107101917267 2023-01-24 04:18:02.188969: step: 846/464, loss: 0.02269853465259075 2023-01-24 04:18:02.795579: step: 848/464, loss: 0.0023813126608729362 2023-01-24 04:18:03.387952: step: 850/464, loss: 0.0199296772480011 2023-01-24 04:18:04.017997: step: 852/464, loss: 0.02812843583524227 2023-01-24 04:18:04.693918: step: 854/464, loss: 0.033090393990278244 2023-01-24 04:18:05.295242: step: 856/464, loss: 0.0023359793704003096 2023-01-24 04:18:05.879159: step: 858/464, loss: 0.08649339526891708 2023-01-24 04:18:06.547758: step: 860/464, loss: 0.0031652937177568674 2023-01-24 04:18:07.176601: step: 862/464, loss: 0.03506851941347122 2023-01-24 04:18:07.831682: step: 864/464, loss: 0.023619432002305984 2023-01-24 04:18:08.459970: step: 866/464, loss: 0.0031147352419793606 2023-01-24 04:18:09.014942: step: 868/464, loss: 0.004032780881971121 2023-01-24 04:18:09.630281: step: 870/464, loss: 0.03890826925635338 2023-01-24 04:18:10.273643: step: 872/464, loss: 0.030549127608537674 2023-01-24 04:18:10.957264: step: 874/464, loss: 0.08162281662225723 2023-01-24 04:18:11.529927: step: 876/464, loss: 0.0021298760548233986 2023-01-24 04:18:12.195471: step: 878/464, loss: 0.007051995489746332 2023-01-24 04:18:12.759935: step: 880/464, loss: 0.02699892781674862 2023-01-24 04:18:13.344500: step: 882/464, loss: 0.007444900926202536 2023-01-24 04:18:13.977808: step: 884/464, loss: 0.018984658643603325 2023-01-24 04:18:14.735895: step: 886/464, loss: 0.27770155668258667 2023-01-24 04:18:15.360980: step: 888/464, loss: 0.004165395628660917 2023-01-24 04:18:16.049609: step: 890/464, loss: 0.4924483895301819 2023-01-24 04:18:16.650378: step: 892/464, loss: 0.051497023552656174 2023-01-24 04:18:17.234474: step: 894/464, loss: 0.004653936717659235 2023-01-24 04:18:17.899096: step: 896/464, loss: 0.009835487231612206 2023-01-24 04:18:18.463234: step: 898/464, loss: 0.002643953077495098 2023-01-24 04:18:19.098471: step: 900/464, loss: 0.004552288446575403 2023-01-24 04:18:19.759092: step: 902/464, loss: 0.15693362057209015 2023-01-24 04:18:20.309374: step: 904/464, loss: 0.009014283306896687 2023-01-24 04:18:20.901773: step: 906/464, loss: 0.030812203884124756 2023-01-24 04:18:21.492529: step: 908/464, loss: 0.012034276500344276 2023-01-24 04:18:22.108099: step: 910/464, loss: 0.0003978584718424827 2023-01-24 04:18:22.736359: step: 912/464, loss: 0.030327772721648216 2023-01-24 04:18:23.397954: step: 914/464, loss: 0.1456509679555893 2023-01-24 04:18:23.998223: step: 916/464, loss: 0.007402684073895216 2023-01-24 04:18:24.644887: step: 918/464, loss: 0.029965035617351532 2023-01-24 04:18:25.304936: step: 920/464, loss: 0.018859058618545532 2023-01-24 04:18:25.893195: step: 922/464, loss: 0.008814035914838314 2023-01-24 04:18:26.559889: step: 924/464, loss: 0.07245197892189026 2023-01-24 04:18:27.202197: step: 926/464, loss: 0.06136466562747955 2023-01-24 04:18:27.818437: step: 928/464, loss: 0.0006068818620406091 2023-01-24 04:18:28.320662: step: 930/464, loss: 0.0022186944261193275 ================================================== Loss: 0.066 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32571417297979804, 'r': 0.3263322264389627, 'f1': 0.326022906793049}, 'combined': 0.24022740500540452, 'epoch': 28} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3133436341414434, 'r': 0.2923003174051735, 'f1': 0.30245639697232135}, 'combined': 0.19745858040680048, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3280239247597773, 'r': 0.3442072682963129, 'f1': 0.3359207970225127}, 'combined': 0.24752058727974618, 'epoch': 28} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3312369638758671, 'r': 0.30137383373802445, 'f1': 0.31560053687209305}, 'combined': 0.20603972873514884, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3318382851176054, 'r': 0.3387647009359995, 'f1': 0.3352657228042661}, 'combined': 0.24703790101366974, 'epoch': 28} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3262203835273351, 'r': 0.28900665072384885, 'f1': 0.30648802862129143}, 'combined': 0.20009063008436642, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2624113475177305, 'r': 0.35238095238095235, 'f1': 0.30081300813008127}, 'combined': 0.20054200542005418, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3106060606060606, 'r': 0.44565217391304346, 'f1': 0.3660714285714286}, 'combined': 0.1830357142857143, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6363636363636364, 'r': 0.2413793103448276, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:21:04.814780: step: 2/464, loss: 0.20718348026275635 2023-01-24 04:21:05.402929: step: 4/464, loss: 0.00092584069352597 2023-01-24 04:21:06.050499: step: 6/464, loss: 0.00072711386019364 2023-01-24 04:21:06.650361: step: 8/464, loss: 0.2181786745786667 2023-01-24 04:21:07.322057: step: 10/464, loss: 0.02145509421825409 2023-01-24 04:21:07.959739: step: 12/464, loss: 0.031037023290991783 2023-01-24 04:21:08.585338: step: 14/464, loss: 0.021666940301656723 2023-01-24 04:21:09.202861: step: 16/464, loss: 0.005939903669059277 2023-01-24 04:21:09.823611: step: 18/464, loss: 0.019667789340019226 2023-01-24 04:21:10.427040: step: 20/464, loss: 0.802329421043396 2023-01-24 04:21:11.058341: step: 22/464, loss: 0.001183252315968275 2023-01-24 04:21:11.818191: step: 24/464, loss: 0.002743531484156847 2023-01-24 04:21:12.405641: step: 26/464, loss: 0.039093371480703354 2023-01-24 04:21:13.154401: step: 28/464, loss: 0.021364763379096985 2023-01-24 04:21:13.700202: step: 30/464, loss: 0.0051937587559223175 2023-01-24 04:21:14.284622: step: 32/464, loss: 0.04549663886427879 2023-01-24 04:21:14.886740: step: 34/464, loss: 0.03777649998664856 2023-01-24 04:21:15.471484: step: 36/464, loss: 0.009439066983759403 2023-01-24 04:21:16.089505: step: 38/464, loss: 0.021647615358233452 2023-01-24 04:21:16.679095: step: 40/464, loss: 0.050232235342264175 2023-01-24 04:21:17.246875: step: 42/464, loss: 0.0005928269238211215 2023-01-24 04:21:17.890457: step: 44/464, loss: 0.021608717739582062 2023-01-24 04:21:18.694964: step: 46/464, loss: 0.00045615588896907866 2023-01-24 04:21:19.340519: step: 48/464, loss: 0.006590969394892454 2023-01-24 04:21:19.993144: step: 50/464, loss: 0.016590625047683716 2023-01-24 04:21:20.629623: step: 52/464, loss: 0.007587079890072346 2023-01-24 04:21:21.252027: step: 54/464, loss: 0.021061744540929794 2023-01-24 04:21:21.888499: step: 56/464, loss: 0.07708916813135147 2023-01-24 04:21:22.474959: step: 58/464, loss: 0.01860615983605385 2023-01-24 04:21:23.122895: step: 60/464, loss: 0.034726161509752274 2023-01-24 04:21:23.776716: step: 62/464, loss: 0.1536983698606491 2023-01-24 04:21:24.432834: step: 64/464, loss: 0.0010793593246489763 2023-01-24 04:21:25.004252: step: 66/464, loss: 0.038470372557640076 2023-01-24 04:21:25.585379: step: 68/464, loss: 0.033768318593502045 2023-01-24 04:21:26.208753: step: 70/464, loss: 0.1774447113275528 2023-01-24 04:21:26.838175: step: 72/464, loss: 0.01382715068757534 2023-01-24 04:21:27.502876: step: 74/464, loss: 0.0067392922937870026 2023-01-24 04:21:28.114384: step: 76/464, loss: 0.24856817722320557 2023-01-24 04:21:28.836597: step: 78/464, loss: 0.10166781395673752 2023-01-24 04:21:29.426775: step: 80/464, loss: 0.023978451266884804 2023-01-24 04:21:30.122164: step: 82/464, loss: 0.03615910932421684 2023-01-24 04:21:30.783878: step: 84/464, loss: 0.009203365072607994 2023-01-24 04:21:31.390670: step: 86/464, loss: 0.024670295417308807 2023-01-24 04:21:31.979026: step: 88/464, loss: 0.11750826239585876 2023-01-24 04:21:32.633096: step: 90/464, loss: 0.0012939282460138202 2023-01-24 04:21:33.329326: step: 92/464, loss: 0.024970868602395058 2023-01-24 04:21:33.920505: step: 94/464, loss: 0.022319048643112183 2023-01-24 04:21:34.573590: step: 96/464, loss: 0.0035962723195552826 2023-01-24 04:21:35.174765: step: 98/464, loss: 0.00042239887989126146 2023-01-24 04:21:35.798066: step: 100/464, loss: 0.012578553520143032 2023-01-24 04:21:36.407337: step: 102/464, loss: 0.0005949849728494883 2023-01-24 04:21:37.020589: step: 104/464, loss: 0.004973983392119408 2023-01-24 04:21:37.602231: step: 106/464, loss: 0.0001440097257727757 2023-01-24 04:21:38.271102: step: 108/464, loss: 0.0024634215515106916 2023-01-24 04:21:38.856014: step: 110/464, loss: 0.024518417194485664 2023-01-24 04:21:39.519725: step: 112/464, loss: 0.032566480338573456 2023-01-24 04:21:40.150863: step: 114/464, loss: 0.02598331868648529 2023-01-24 04:21:40.727750: step: 116/464, loss: 0.023710399866104126 2023-01-24 04:21:41.324514: step: 118/464, loss: 0.011170746758580208 2023-01-24 04:21:41.933893: step: 120/464, loss: 0.004629192873835564 2023-01-24 04:21:42.588923: step: 122/464, loss: 0.016308680176734924 2023-01-24 04:21:43.173863: step: 124/464, loss: 0.017663175240159035 2023-01-24 04:21:43.781643: step: 126/464, loss: 0.07340771704912186 2023-01-24 04:21:44.387484: step: 128/464, loss: 0.010101350955665112 2023-01-24 04:21:44.939955: step: 130/464, loss: 0.20134709775447845 2023-01-24 04:21:45.534467: step: 132/464, loss: 0.014041811227798462 2023-01-24 04:21:46.173842: step: 134/464, loss: 0.0008428994915448129 2023-01-24 04:21:46.746382: step: 136/464, loss: 0.0007383274496532977 2023-01-24 04:21:47.394792: step: 138/464, loss: 0.013044522143900394 2023-01-24 04:21:47.994632: step: 140/464, loss: 0.019865253940224648 2023-01-24 04:21:48.542380: step: 142/464, loss: 0.006404084153473377 2023-01-24 04:21:49.112254: step: 144/464, loss: 0.027074342593550682 2023-01-24 04:21:49.696487: step: 146/464, loss: 0.011449289508163929 2023-01-24 04:21:50.312156: step: 148/464, loss: 0.10458236932754517 2023-01-24 04:21:50.973876: step: 150/464, loss: 0.0007995071937330067 2023-01-24 04:21:51.619265: step: 152/464, loss: 0.010486981831490993 2023-01-24 04:21:52.230323: step: 154/464, loss: 0.0464809276163578 2023-01-24 04:21:52.827988: step: 156/464, loss: 0.011342362500727177 2023-01-24 04:21:53.467149: step: 158/464, loss: 0.042877405881881714 2023-01-24 04:21:54.079571: step: 160/464, loss: 0.0033706913236528635 2023-01-24 04:21:54.716986: step: 162/464, loss: 0.015807805582880974 2023-01-24 04:21:55.318244: step: 164/464, loss: 0.013546344824135303 2023-01-24 04:21:55.922705: step: 166/464, loss: 0.05058148130774498 2023-01-24 04:21:56.454592: step: 168/464, loss: 3.490782910375856e-05 2023-01-24 04:21:57.117966: step: 170/464, loss: 0.01310047972947359 2023-01-24 04:21:57.701155: step: 172/464, loss: 0.018710266798734665 2023-01-24 04:21:58.334543: step: 174/464, loss: 0.000846777344122529 2023-01-24 04:21:59.051704: step: 176/464, loss: 1.2617532014846802 2023-01-24 04:21:59.664063: step: 178/464, loss: 0.008382102474570274 2023-01-24 04:22:00.279199: step: 180/464, loss: 9.83451354841236e-06 2023-01-24 04:22:00.871784: step: 182/464, loss: 0.044426459819078445 2023-01-24 04:22:01.439170: step: 184/464, loss: 0.04976968094706535 2023-01-24 04:22:02.066916: step: 186/464, loss: 0.0009115879656746984 2023-01-24 04:22:02.669034: step: 188/464, loss: 0.02329590730369091 2023-01-24 04:22:03.256652: step: 190/464, loss: 0.0021927249617874622 2023-01-24 04:22:03.896256: step: 192/464, loss: 0.00851105060428381 2023-01-24 04:22:04.492417: step: 194/464, loss: 0.011024788953363895 2023-01-24 04:22:05.077570: step: 196/464, loss: 0.020354608073830605 2023-01-24 04:22:05.710199: step: 198/464, loss: 0.21923001110553741 2023-01-24 04:22:06.301051: step: 200/464, loss: 0.4580487310886383 2023-01-24 04:22:07.003733: step: 202/464, loss: 0.05740246921777725 2023-01-24 04:22:07.590409: step: 204/464, loss: 0.021749237552285194 2023-01-24 04:22:08.206015: step: 206/464, loss: 0.0008607152849435806 2023-01-24 04:22:08.811685: step: 208/464, loss: 0.004498638678342104 2023-01-24 04:22:09.398206: step: 210/464, loss: 0.001176126766949892 2023-01-24 04:22:10.018045: step: 212/464, loss: 0.014383724890649319 2023-01-24 04:22:10.651126: step: 214/464, loss: 0.004588214214891195 2023-01-24 04:22:11.246939: step: 216/464, loss: 0.004308333154767752 2023-01-24 04:22:11.897321: step: 218/464, loss: 0.0035217590630054474 2023-01-24 04:22:12.470160: step: 220/464, loss: 0.2619556486606598 2023-01-24 04:22:13.149389: step: 222/464, loss: 0.01845749095082283 2023-01-24 04:22:13.787340: step: 224/464, loss: 0.343423455953598 2023-01-24 04:22:14.346078: step: 226/464, loss: 0.00013274258526507765 2023-01-24 04:22:15.057644: step: 228/464, loss: 0.04242993891239166 2023-01-24 04:22:15.677341: step: 230/464, loss: 0.004917399492114782 2023-01-24 04:22:16.220100: step: 232/464, loss: 0.05042678490281105 2023-01-24 04:22:16.834009: step: 234/464, loss: 0.015125767327845097 2023-01-24 04:22:17.542610: step: 236/464, loss: 0.023845607414841652 2023-01-24 04:22:18.141527: step: 238/464, loss: 0.002912424271926284 2023-01-24 04:22:18.782100: step: 240/464, loss: 0.030322978273034096 2023-01-24 04:22:19.454858: step: 242/464, loss: 0.04087292402982712 2023-01-24 04:22:20.060217: step: 244/464, loss: 0.024132249876856804 2023-01-24 04:22:20.708346: step: 246/464, loss: 0.00927796121686697 2023-01-24 04:22:21.329324: step: 248/464, loss: 0.04161551967263222 2023-01-24 04:22:21.941709: step: 250/464, loss: 0.010051172226667404 2023-01-24 04:22:22.541780: step: 252/464, loss: 0.01845390349626541 2023-01-24 04:22:23.124942: step: 254/464, loss: 0.00814163126051426 2023-01-24 04:22:23.722511: step: 256/464, loss: 0.011661508120596409 2023-01-24 04:22:24.348806: step: 258/464, loss: 0.0111940186470747 2023-01-24 04:22:24.929412: step: 260/464, loss: 0.17088641226291656 2023-01-24 04:22:25.564878: step: 262/464, loss: 0.013552245683968067 2023-01-24 04:22:26.161664: step: 264/464, loss: 0.0031578170601278543 2023-01-24 04:22:26.886808: step: 266/464, loss: 0.8463234305381775 2023-01-24 04:22:27.510532: step: 268/464, loss: 0.05910234898328781 2023-01-24 04:22:28.114433: step: 270/464, loss: 0.01681624911725521 2023-01-24 04:22:28.708263: step: 272/464, loss: 0.0013331277295947075 2023-01-24 04:22:29.298670: step: 274/464, loss: 0.0015316865174099803 2023-01-24 04:22:29.830997: step: 276/464, loss: 0.0023572868667542934 2023-01-24 04:22:30.493073: step: 278/464, loss: 0.015970777720212936 2023-01-24 04:22:31.147261: step: 280/464, loss: 0.010668879374861717 2023-01-24 04:22:31.858467: step: 282/464, loss: 0.041640013456344604 2023-01-24 04:22:32.499039: step: 284/464, loss: 0.013012656942009926 2023-01-24 04:22:33.063992: step: 286/464, loss: 0.00033443470601923764 2023-01-24 04:22:33.685403: step: 288/464, loss: 0.007970185950398445 2023-01-24 04:22:34.241452: step: 290/464, loss: 0.03468017280101776 2023-01-24 04:22:34.914605: step: 292/464, loss: 0.0018570302054286003 2023-01-24 04:22:35.493649: step: 294/464, loss: 0.003946130629628897 2023-01-24 04:22:36.155114: step: 296/464, loss: 0.033612918108701706 2023-01-24 04:22:36.776263: step: 298/464, loss: 0.0026805405505001545 2023-01-24 04:22:37.373227: step: 300/464, loss: 0.053611185401678085 2023-01-24 04:22:38.063481: step: 302/464, loss: 0.06214655190706253 2023-01-24 04:22:38.621984: step: 304/464, loss: 0.05859425291419029 2023-01-24 04:22:39.200213: step: 306/464, loss: 0.05776157230138779 2023-01-24 04:22:39.798251: step: 308/464, loss: 0.012165514752268791 2023-01-24 04:22:40.449066: step: 310/464, loss: 0.0843651294708252 2023-01-24 04:22:41.132426: step: 312/464, loss: 0.05880401283502579 2023-01-24 04:22:41.775886: step: 314/464, loss: 0.07659254223108292 2023-01-24 04:22:42.332583: step: 316/464, loss: 2.4264578819274902 2023-01-24 04:22:42.971608: step: 318/464, loss: 0.00882178358733654 2023-01-24 04:22:43.571358: step: 320/464, loss: 0.0015311307506635785 2023-01-24 04:22:44.167774: step: 322/464, loss: 0.031146302819252014 2023-01-24 04:22:44.780490: step: 324/464, loss: 0.6656548380851746 2023-01-24 04:22:45.483240: step: 326/464, loss: 0.018226604908704758 2023-01-24 04:22:46.085396: step: 328/464, loss: 0.012589401565492153 2023-01-24 04:22:46.723005: step: 330/464, loss: 0.021131210029125214 2023-01-24 04:22:47.316326: step: 332/464, loss: 0.0200370941311121 2023-01-24 04:22:47.929519: step: 334/464, loss: 0.029083983972668648 2023-01-24 04:22:48.550287: step: 336/464, loss: 0.009164446033537388 2023-01-24 04:22:49.166987: step: 338/464, loss: 0.006219303701072931 2023-01-24 04:22:49.745179: step: 340/464, loss: 0.003315514186397195 2023-01-24 04:22:50.300843: step: 342/464, loss: 0.011149341240525246 2023-01-24 04:22:50.891002: step: 344/464, loss: 0.02427523024380207 2023-01-24 04:22:51.507167: step: 346/464, loss: 0.031062254682183266 2023-01-24 04:22:52.103761: step: 348/464, loss: 0.00999541487544775 2023-01-24 04:22:52.801737: step: 350/464, loss: 0.06719443202018738 2023-01-24 04:22:53.415430: step: 352/464, loss: 0.006315178237855434 2023-01-24 04:22:54.105853: step: 354/464, loss: 0.022876959294080734 2023-01-24 04:22:54.664475: step: 356/464, loss: 0.026826461777091026 2023-01-24 04:22:55.291368: step: 358/464, loss: 0.005475881043821573 2023-01-24 04:22:55.897963: step: 360/464, loss: 0.01010300312191248 2023-01-24 04:22:56.493210: step: 362/464, loss: 0.014562626369297504 2023-01-24 04:22:57.113619: step: 364/464, loss: 0.01299215853214264 2023-01-24 04:22:57.727086: step: 366/464, loss: 0.0004880430060438812 2023-01-24 04:22:58.358520: step: 368/464, loss: 0.005313422996550798 2023-01-24 04:22:59.034721: step: 370/464, loss: 0.004638213198632002 2023-01-24 04:22:59.623200: step: 372/464, loss: 0.04647836461663246 2023-01-24 04:23:00.212283: step: 374/464, loss: 0.00019831047393381596 2023-01-24 04:23:00.747800: step: 376/464, loss: 0.02199379913508892 2023-01-24 04:23:01.377063: step: 378/464, loss: 0.006296336650848389 2023-01-24 04:23:02.073961: step: 380/464, loss: 0.011403193697333336 2023-01-24 04:23:02.690687: step: 382/464, loss: 0.012966877780854702 2023-01-24 04:23:03.311609: step: 384/464, loss: 0.007062949705868959 2023-01-24 04:23:03.886266: step: 386/464, loss: 0.09825027734041214 2023-01-24 04:23:04.484755: step: 388/464, loss: 0.02858874760568142 2023-01-24 04:23:05.093733: step: 390/464, loss: 0.018788516521453857 2023-01-24 04:23:05.720367: step: 392/464, loss: 0.005923361051827669 2023-01-24 04:23:06.390246: step: 394/464, loss: 0.1265401393175125 2023-01-24 04:23:07.082436: step: 396/464, loss: 0.012780094519257545 2023-01-24 04:23:07.655899: step: 398/464, loss: 0.00039305436075665057 2023-01-24 04:23:08.299259: step: 400/464, loss: 0.02331719361245632 2023-01-24 04:23:08.933909: step: 402/464, loss: 0.10353614389896393 2023-01-24 04:23:09.550454: step: 404/464, loss: 0.006237336900085211 2023-01-24 04:23:10.181488: step: 406/464, loss: 0.028251267969608307 2023-01-24 04:23:10.786381: step: 408/464, loss: 0.019966894760727882 2023-01-24 04:23:11.417945: step: 410/464, loss: 0.02114271931350231 2023-01-24 04:23:12.013539: step: 412/464, loss: 0.018811846151947975 2023-01-24 04:23:12.646309: step: 414/464, loss: 0.01861034333705902 2023-01-24 04:23:13.308800: step: 416/464, loss: 0.12198765575885773 2023-01-24 04:23:13.988939: step: 418/464, loss: 0.012938577681779861 2023-01-24 04:23:14.688612: step: 420/464, loss: 0.025013018399477005 2023-01-24 04:23:15.279560: step: 422/464, loss: 0.016791654750704765 2023-01-24 04:23:15.872927: step: 424/464, loss: 0.01789095811545849 2023-01-24 04:23:16.492131: step: 426/464, loss: 0.0005331274005584419 2023-01-24 04:23:17.113656: step: 428/464, loss: 0.007982496172189713 2023-01-24 04:23:17.699628: step: 430/464, loss: 0.0011905856663361192 2023-01-24 04:23:18.288240: step: 432/464, loss: 0.016070405021309853 2023-01-24 04:23:19.061800: step: 434/464, loss: 0.013353471644222736 2023-01-24 04:23:19.643774: step: 436/464, loss: 0.023610210046172142 2023-01-24 04:23:20.318055: step: 438/464, loss: 0.0031759832054376602 2023-01-24 04:23:20.915530: step: 440/464, loss: 0.015572638250887394 2023-01-24 04:23:21.568607: step: 442/464, loss: 0.015378996729850769 2023-01-24 04:23:22.186453: step: 444/464, loss: 0.008406156674027443 2023-01-24 04:23:22.885348: step: 446/464, loss: 0.0370088592171669 2023-01-24 04:23:23.545118: step: 448/464, loss: 0.5323182344436646 2023-01-24 04:23:24.131630: step: 450/464, loss: 0.0011735439766198397 2023-01-24 04:23:24.811934: step: 452/464, loss: 0.001455902587622404 2023-01-24 04:23:25.396323: step: 454/464, loss: 0.009804188273847103 2023-01-24 04:23:26.059553: step: 456/464, loss: 0.0053962101228535175 2023-01-24 04:23:26.722204: step: 458/464, loss: 0.05759469047188759 2023-01-24 04:23:27.389704: step: 460/464, loss: 0.003220080863684416 2023-01-24 04:23:28.014200: step: 462/464, loss: 0.0031053987331688404 2023-01-24 04:23:28.669744: step: 464/464, loss: 0.2687501013278961 2023-01-24 04:23:29.287123: step: 466/464, loss: 0.07093895971775055 2023-01-24 04:23:29.962619: step: 468/464, loss: 0.00012292155588511378 2023-01-24 04:23:30.543247: step: 470/464, loss: 0.03172307088971138 2023-01-24 04:23:31.194216: step: 472/464, loss: 0.09837689250707626 2023-01-24 04:23:31.746294: step: 474/464, loss: 0.017058294266462326 2023-01-24 04:23:32.414374: step: 476/464, loss: 0.011396778747439384 2023-01-24 04:23:32.975734: step: 478/464, loss: 0.003416843479499221 2023-01-24 04:23:33.646992: step: 480/464, loss: 0.0012493301182985306 2023-01-24 04:23:34.388217: step: 482/464, loss: 0.8681263327598572 2023-01-24 04:23:35.096640: step: 484/464, loss: 0.006136356852948666 2023-01-24 04:23:35.747333: step: 486/464, loss: 8.88936483534053e-06 2023-01-24 04:23:36.355321: step: 488/464, loss: 0.008741669356822968 2023-01-24 04:23:36.929625: step: 490/464, loss: 0.01587357372045517 2023-01-24 04:23:37.517293: step: 492/464, loss: 0.005742009729146957 2023-01-24 04:23:38.203879: step: 494/464, loss: 0.008913460187613964 2023-01-24 04:23:38.744749: step: 496/464, loss: 0.018264610320329666 2023-01-24 04:23:39.400947: step: 498/464, loss: 0.0014765068190172315 2023-01-24 04:23:40.005037: step: 500/464, loss: 0.05725078657269478 2023-01-24 04:23:40.579177: step: 502/464, loss: 0.01852232776582241 2023-01-24 04:23:41.190258: step: 504/464, loss: 0.03316880390048027 2023-01-24 04:23:41.839043: step: 506/464, loss: 0.04357683286070824 2023-01-24 04:23:42.463217: step: 508/464, loss: 9.290242451243103e-05 2023-01-24 04:23:43.092589: step: 510/464, loss: 0.029402637854218483 2023-01-24 04:23:43.701460: step: 512/464, loss: 0.036833539605140686 2023-01-24 04:23:44.260513: step: 514/464, loss: 0.007631601300090551 2023-01-24 04:23:44.814646: step: 516/464, loss: 0.04601089283823967 2023-01-24 04:23:45.456429: step: 518/464, loss: 0.04939999058842659 2023-01-24 04:23:46.114952: step: 520/464, loss: 0.0015252236044034362 2023-01-24 04:23:46.733236: step: 522/464, loss: 0.0031517392490059137 2023-01-24 04:23:47.333951: step: 524/464, loss: 0.025582026690244675 2023-01-24 04:23:47.951183: step: 526/464, loss: 0.011233438737690449 2023-01-24 04:23:48.592375: step: 528/464, loss: 0.0003041566233150661 2023-01-24 04:23:49.190509: step: 530/464, loss: 0.04653728008270264 2023-01-24 04:23:49.780899: step: 532/464, loss: 0.015551424585282803 2023-01-24 04:23:50.417622: step: 534/464, loss: 0.045204851776361465 2023-01-24 04:23:51.018888: step: 536/464, loss: 0.03499005362391472 2023-01-24 04:23:51.673889: step: 538/464, loss: 0.03213276341557503 2023-01-24 04:23:52.247466: step: 540/464, loss: 0.045557327568531036 2023-01-24 04:23:52.903167: step: 542/464, loss: 0.017820246517658234 2023-01-24 04:23:53.536039: step: 544/464, loss: 0.030133241787552834 2023-01-24 04:23:54.180317: step: 546/464, loss: 0.07815069705247879 2023-01-24 04:23:54.828755: step: 548/464, loss: 0.013082635588943958 2023-01-24 04:23:55.423522: step: 550/464, loss: 0.041477177292108536 2023-01-24 04:23:55.996022: step: 552/464, loss: 0.017172643914818764 2023-01-24 04:23:56.696836: step: 554/464, loss: 0.04386192187666893 2023-01-24 04:23:57.355664: step: 556/464, loss: 0.0157090462744236 2023-01-24 04:23:57.971609: step: 558/464, loss: 0.0031350385397672653 2023-01-24 04:23:58.584245: step: 560/464, loss: 0.4393223822116852 2023-01-24 04:23:59.268664: step: 562/464, loss: 0.013955345377326012 2023-01-24 04:23:59.835475: step: 564/464, loss: 0.038877204060554504 2023-01-24 04:24:00.478647: step: 566/464, loss: 0.02907596156001091 2023-01-24 04:24:01.095767: step: 568/464, loss: 0.03757504001259804 2023-01-24 04:24:01.723461: step: 570/464, loss: 0.010801984928548336 2023-01-24 04:24:02.321330: step: 572/464, loss: 0.02427094802260399 2023-01-24 04:24:02.934387: step: 574/464, loss: 0.09442934393882751 2023-01-24 04:24:03.566939: step: 576/464, loss: 0.00747566157951951 2023-01-24 04:24:04.258304: step: 578/464, loss: 0.0013725977623835206 2023-01-24 04:24:04.813999: step: 580/464, loss: 0.0008823683601804078 2023-01-24 04:24:05.396019: step: 582/464, loss: 0.002390040084719658 2023-01-24 04:24:06.009792: step: 584/464, loss: 0.12122202664613724 2023-01-24 04:24:06.644255: step: 586/464, loss: 0.046643223613500595 2023-01-24 04:24:07.342250: step: 588/464, loss: 0.06922060251235962 2023-01-24 04:24:07.936918: step: 590/464, loss: 0.0049684857949614525 2023-01-24 04:24:08.484732: step: 592/464, loss: 0.04633360728621483 2023-01-24 04:24:09.102705: step: 594/464, loss: 0.004706260748207569 2023-01-24 04:24:09.765036: step: 596/464, loss: 0.030159030109643936 2023-01-24 04:24:10.420280: step: 598/464, loss: 0.0060491920448839664 2023-01-24 04:24:11.038312: step: 600/464, loss: 0.031114358454942703 2023-01-24 04:24:11.690930: step: 602/464, loss: 0.016858017072081566 2023-01-24 04:24:12.340814: step: 604/464, loss: 0.005272059701383114 2023-01-24 04:24:13.009353: step: 606/464, loss: 0.0002044775610556826 2023-01-24 04:24:13.592025: step: 608/464, loss: 6.030996799468994 2023-01-24 04:24:14.215802: step: 610/464, loss: 0.02099800668656826 2023-01-24 04:24:14.829609: step: 612/464, loss: 0.006913003511726856 2023-01-24 04:24:15.408610: step: 614/464, loss: 0.004233603831380606 2023-01-24 04:24:16.078917: step: 616/464, loss: 0.025346634909510612 2023-01-24 04:24:16.663266: step: 618/464, loss: 0.06679163873195648 2023-01-24 04:24:17.287354: step: 620/464, loss: 0.004545097704976797 2023-01-24 04:24:17.885845: step: 622/464, loss: 0.0001765040506143123 2023-01-24 04:24:18.585082: step: 624/464, loss: 0.003950684797018766 2023-01-24 04:24:19.283552: step: 626/464, loss: 0.01283080130815506 2023-01-24 04:24:19.942798: step: 628/464, loss: 0.04789276793599129 2023-01-24 04:24:20.606527: step: 630/464, loss: 0.20595882833003998 2023-01-24 04:24:21.209645: step: 632/464, loss: 0.03244363144040108 2023-01-24 04:24:21.812123: step: 634/464, loss: 0.7603281140327454 2023-01-24 04:24:22.445623: step: 636/464, loss: 0.006186482030898333 2023-01-24 04:24:23.114579: step: 638/464, loss: 0.019982969388365746 2023-01-24 04:24:23.713753: step: 640/464, loss: 0.006591130048036575 2023-01-24 04:24:24.333290: step: 642/464, loss: 0.012419759295880795 2023-01-24 04:24:24.931330: step: 644/464, loss: 0.006895511411130428 2023-01-24 04:24:25.532404: step: 646/464, loss: 0.07272930443286896 2023-01-24 04:24:26.211822: step: 648/464, loss: 0.015621660277247429 2023-01-24 04:24:26.793909: step: 650/464, loss: 0.009616071358323097 2023-01-24 04:24:27.379962: step: 652/464, loss: 0.0020380548667162657 2023-01-24 04:24:28.006307: step: 654/464, loss: 0.0068487850949168205 2023-01-24 04:24:28.674786: step: 656/464, loss: 1.3829022645950317 2023-01-24 04:24:29.295452: step: 658/464, loss: 0.11870887130498886 2023-01-24 04:24:29.935153: step: 660/464, loss: 0.021173374727368355 2023-01-24 04:24:30.556419: step: 662/464, loss: 0.00863352045416832 2023-01-24 04:24:31.188880: step: 664/464, loss: 0.005785847082734108 2023-01-24 04:24:31.787425: step: 666/464, loss: 0.004254731349647045 2023-01-24 04:24:32.406523: step: 668/464, loss: 0.01884510926902294 2023-01-24 04:24:33.013052: step: 670/464, loss: 0.02798466570675373 2023-01-24 04:24:33.709383: step: 672/464, loss: 0.03392200171947479 2023-01-24 04:24:34.318300: step: 674/464, loss: 0.03376045823097229 2023-01-24 04:24:34.960232: step: 676/464, loss: 0.020718907937407494 2023-01-24 04:24:35.575011: step: 678/464, loss: 0.014963822439312935 2023-01-24 04:24:36.150774: step: 680/464, loss: 0.020511649549007416 2023-01-24 04:24:36.819758: step: 682/464, loss: 0.22455890476703644 2023-01-24 04:24:37.476768: step: 684/464, loss: 0.2074844390153885 2023-01-24 04:24:38.134039: step: 686/464, loss: 0.011452744714915752 2023-01-24 04:24:38.780634: step: 688/464, loss: 0.004009000025689602 2023-01-24 04:24:39.401896: step: 690/464, loss: 0.000250319397309795 2023-01-24 04:24:40.003388: step: 692/464, loss: 0.04512270167469978 2023-01-24 04:24:40.672277: step: 694/464, loss: 0.009123125113546848 2023-01-24 04:24:41.346048: step: 696/464, loss: 0.014779305085539818 2023-01-24 04:24:41.932599: step: 698/464, loss: 0.009732699953019619 2023-01-24 04:24:42.669076: step: 700/464, loss: 0.02503376267850399 2023-01-24 04:24:43.320160: step: 702/464, loss: 0.0029226504266262054 2023-01-24 04:24:43.979751: step: 704/464, loss: 0.007843797095119953 2023-01-24 04:24:44.545002: step: 706/464, loss: 0.021176619455218315 2023-01-24 04:24:45.236415: step: 708/464, loss: 0.04709320142865181 2023-01-24 04:24:45.852972: step: 710/464, loss: 0.00755814416334033 2023-01-24 04:24:46.493673: step: 712/464, loss: 0.13147243857383728 2023-01-24 04:24:47.056637: step: 714/464, loss: 0.023370176553726196 2023-01-24 04:24:47.713634: step: 716/464, loss: 0.03681657835841179 2023-01-24 04:24:48.363234: step: 718/464, loss: 0.012402649037539959 2023-01-24 04:24:49.034407: step: 720/464, loss: 0.01318159606307745 2023-01-24 04:24:49.685816: step: 722/464, loss: 0.021943572908639908 2023-01-24 04:24:50.334098: step: 724/464, loss: 0.001379357068799436 2023-01-24 04:24:50.958690: step: 726/464, loss: 0.033673420548439026 2023-01-24 04:24:51.630814: step: 728/464, loss: 0.004524344112724066 2023-01-24 04:24:52.190290: step: 730/464, loss: 0.014040261507034302 2023-01-24 04:24:52.759222: step: 732/464, loss: 0.004589345771819353 2023-01-24 04:24:53.514625: step: 734/464, loss: 0.02422994375228882 2023-01-24 04:24:54.117626: step: 736/464, loss: 0.012395837344229221 2023-01-24 04:24:54.712580: step: 738/464, loss: 0.07498691976070404 2023-01-24 04:24:55.347693: step: 740/464, loss: 0.01698930375277996 2023-01-24 04:24:55.952549: step: 742/464, loss: 0.2359449863433838 2023-01-24 04:24:56.560398: step: 744/464, loss: 0.16284212470054626 2023-01-24 04:24:57.227908: step: 746/464, loss: 0.0486256368458271 2023-01-24 04:24:57.845891: step: 748/464, loss: 0.013262578286230564 2023-01-24 04:24:58.513857: step: 750/464, loss: 0.054008420556783676 2023-01-24 04:24:59.183223: step: 752/464, loss: 0.010377590544521809 2023-01-24 04:24:59.749435: step: 754/464, loss: 0.011460366658866405 2023-01-24 04:25:00.451453: step: 756/464, loss: 0.004750390071421862 2023-01-24 04:25:01.057315: step: 758/464, loss: 0.01390005275607109 2023-01-24 04:25:01.629893: step: 760/464, loss: 0.05716053768992424 2023-01-24 04:25:02.271157: step: 762/464, loss: 0.014838357456028461 2023-01-24 04:25:02.913167: step: 764/464, loss: 0.034498848021030426 2023-01-24 04:25:03.557112: step: 766/464, loss: 0.06667480617761612 2023-01-24 04:25:04.189032: step: 768/464, loss: 0.0033385204151272774 2023-01-24 04:25:04.839953: step: 770/464, loss: 0.10323575884103775 2023-01-24 04:25:05.533824: step: 772/464, loss: 0.06185237690806389 2023-01-24 04:25:06.171032: step: 774/464, loss: 0.0025028877425938845 2023-01-24 04:25:06.770033: step: 776/464, loss: 0.01816844753921032 2023-01-24 04:25:07.367258: step: 778/464, loss: 0.07951855659484863 2023-01-24 04:25:07.978441: step: 780/464, loss: 0.06930948048830032 2023-01-24 04:25:08.638374: step: 782/464, loss: 0.029431408271193504 2023-01-24 04:25:09.337586: step: 784/464, loss: 0.03639020770788193 2023-01-24 04:25:10.011119: step: 786/464, loss: 0.028380228206515312 2023-01-24 04:25:10.648165: step: 788/464, loss: 0.07341472059488297 2023-01-24 04:25:11.283386: step: 790/464, loss: 0.14059926569461823 2023-01-24 04:25:11.952193: step: 792/464, loss: 0.02317161299288273 2023-01-24 04:25:12.578107: step: 794/464, loss: 0.0962318554520607 2023-01-24 04:25:13.185493: step: 796/464, loss: 0.023787975311279297 2023-01-24 04:25:13.837533: step: 798/464, loss: 0.016284290701150894 2023-01-24 04:25:14.476431: step: 800/464, loss: 0.004960604943335056 2023-01-24 04:25:15.123137: step: 802/464, loss: 0.3363596498966217 2023-01-24 04:25:15.742075: step: 804/464, loss: 0.018801333382725716 2023-01-24 04:25:16.416238: step: 806/464, loss: 0.02355259843170643 2023-01-24 04:25:16.934460: step: 808/464, loss: 0.017150631174445152 2023-01-24 04:25:17.560522: step: 810/464, loss: 0.003461694810539484 2023-01-24 04:25:18.223804: step: 812/464, loss: 0.7432679533958435 2023-01-24 04:25:18.876588: step: 814/464, loss: 0.016445057466626167 2023-01-24 04:25:19.496644: step: 816/464, loss: 0.2394285649061203 2023-01-24 04:25:20.106108: step: 818/464, loss: 0.17523111402988434 2023-01-24 04:25:20.676193: step: 820/464, loss: 0.0672837644815445 2023-01-24 04:25:21.296487: step: 822/464, loss: 0.002954719355329871 2023-01-24 04:25:21.868080: step: 824/464, loss: 0.06920049339532852 2023-01-24 04:25:22.473959: step: 826/464, loss: 0.07915281504392624 2023-01-24 04:25:23.135130: step: 828/464, loss: 0.010470702312886715 2023-01-24 04:25:23.722047: step: 830/464, loss: 0.009583774022758007 2023-01-24 04:25:24.337063: step: 832/464, loss: 0.009306436404585838 2023-01-24 04:25:24.887696: step: 834/464, loss: 0.006306438706815243 2023-01-24 04:25:25.484605: step: 836/464, loss: 0.0045156157575547695 2023-01-24 04:25:26.056083: step: 838/464, loss: 0.0021650197450071573 2023-01-24 04:25:26.656684: step: 840/464, loss: 0.05883180722594261 2023-01-24 04:25:27.311141: step: 842/464, loss: 0.08518681675195694 2023-01-24 04:25:27.924225: step: 844/464, loss: 0.2572651207447052 2023-01-24 04:25:28.520808: step: 846/464, loss: 0.008243846707046032 2023-01-24 04:25:29.104339: step: 848/464, loss: 0.004368755035102367 2023-01-24 04:25:29.777984: step: 850/464, loss: 0.09649864584207535 2023-01-24 04:25:30.414780: step: 852/464, loss: 0.402221143245697 2023-01-24 04:25:31.030200: step: 854/464, loss: 0.04158594086766243 2023-01-24 04:25:31.645153: step: 856/464, loss: 0.01596343144774437 2023-01-24 04:25:32.291492: step: 858/464, loss: 0.0030320510268211365 2023-01-24 04:25:32.916583: step: 860/464, loss: 0.009376936592161655 2023-01-24 04:25:33.546789: step: 862/464, loss: 0.030942099168896675 2023-01-24 04:25:34.157765: step: 864/464, loss: 0.06399189680814743 2023-01-24 04:25:34.838179: step: 866/464, loss: 0.012965050525963306 2023-01-24 04:25:35.430296: step: 868/464, loss: 0.006405264604836702 2023-01-24 04:25:36.050011: step: 870/464, loss: 0.010999604128301144 2023-01-24 04:25:36.638789: step: 872/464, loss: 0.016261886805295944 2023-01-24 04:25:37.270283: step: 874/464, loss: 0.024095727130770683 2023-01-24 04:25:37.975983: step: 876/464, loss: 0.2649255394935608 2023-01-24 04:25:38.598601: step: 878/464, loss: 0.00711466558277607 2023-01-24 04:25:39.190388: step: 880/464, loss: 0.1561778485774994 2023-01-24 04:25:39.779374: step: 882/464, loss: 0.006854875944554806 2023-01-24 04:25:40.446761: step: 884/464, loss: 0.005609060171991587 2023-01-24 04:25:41.103332: step: 886/464, loss: 0.026153935119509697 2023-01-24 04:25:41.771406: step: 888/464, loss: 0.0005563063896261156 2023-01-24 04:25:42.418909: step: 890/464, loss: 0.02071342244744301 2023-01-24 04:25:43.064584: step: 892/464, loss: 0.007495345547795296 2023-01-24 04:25:43.749982: step: 894/464, loss: 0.022217558696866035 2023-01-24 04:25:44.438460: step: 896/464, loss: 0.016546782106161118 2023-01-24 04:25:45.115638: step: 898/464, loss: 0.008249156177043915 2023-01-24 04:25:45.721851: step: 900/464, loss: 0.02281145751476288 2023-01-24 04:25:46.351204: step: 902/464, loss: 0.053221337497234344 2023-01-24 04:25:46.959260: step: 904/464, loss: 0.00181183114182204 2023-01-24 04:25:47.571297: step: 906/464, loss: 0.06797477602958679 2023-01-24 04:25:48.178340: step: 908/464, loss: 0.01370300818234682 2023-01-24 04:25:48.798218: step: 910/464, loss: 0.06573604792356491 2023-01-24 04:25:49.367816: step: 912/464, loss: 0.006355880293995142 2023-01-24 04:25:49.980487: step: 914/464, loss: 0.04955657199025154 2023-01-24 04:25:50.623545: step: 916/464, loss: 0.03783131390810013 2023-01-24 04:25:51.260087: step: 918/464, loss: 0.05432085320353508 2023-01-24 04:25:51.886870: step: 920/464, loss: 0.18720349669456482 2023-01-24 04:25:52.577488: step: 922/464, loss: 0.0176202692091465 2023-01-24 04:25:53.139498: step: 924/464, loss: 0.008661163039505482 2023-01-24 04:25:53.685481: step: 926/464, loss: 0.02440672367811203 2023-01-24 04:25:54.263284: step: 928/464, loss: 0.007154208607971668 2023-01-24 04:25:54.814328: step: 930/464, loss: 0.006525322794914246 ================================================== Loss: 0.071 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3524325728233681, 'r': 0.3250137199092161, 'f1': 0.338168273232294}, 'combined': 0.24917662238169028, 'epoch': 29} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3310935394880281, 'r': 0.29271471154369366, 'f1': 0.3107235268046826}, 'combined': 0.20285577397611404, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3512365861837693, 'r': 0.3312420936116382, 'f1': 0.3409464518229167}, 'combined': 0.25122370134320177, 'epoch': 29} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3476866475246937, 'r': 0.2984283736343507, 'f1': 0.32117984370350416}, 'combined': 0.2096821777546193, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36762921940928267, 'r': 0.33065702087286525, 'f1': 0.34816433566433563}, 'combined': 0.2565421420684578, 'epoch': 29} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3474685639497804, 'r': 0.287372804959386, 'f1': 0.31457627290116075}, 'combined': 0.20537103826707903, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2634803921568627, 'r': 0.25595238095238093, 'f1': 0.25966183574879226}, 'combined': 0.17310789049919484, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.32608695652173914, 'f1': 0.2830188679245283}, 'combined': 0.14150943396226415, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:28:31.056358: step: 2/464, loss: 0.015284399501979351 2023-01-24 04:28:31.737998: step: 4/464, loss: 0.0022311429493129253 2023-01-24 04:28:32.367641: step: 6/464, loss: 0.03886798024177551 2023-01-24 04:28:32.958008: step: 8/464, loss: 0.002904294990003109 2023-01-24 04:28:33.564827: step: 10/464, loss: 0.00875371228903532 2023-01-24 04:28:34.187555: step: 12/464, loss: 0.011434866115450859 2023-01-24 04:28:34.778484: step: 14/464, loss: 0.03433942422270775 2023-01-24 04:28:35.366766: step: 16/464, loss: 0.00443413807079196 2023-01-24 04:28:35.955339: step: 18/464, loss: 0.0009977881563827395 2023-01-24 04:28:36.700885: step: 20/464, loss: 0.014764295890927315 2023-01-24 04:28:37.294183: step: 22/464, loss: 0.004287984687834978 2023-01-24 04:28:37.911370: step: 24/464, loss: 0.049406565725803375 2023-01-24 04:28:38.465164: step: 26/464, loss: 0.040634434670209885 2023-01-24 04:28:39.091602: step: 28/464, loss: 0.003474750090390444 2023-01-24 04:28:39.710365: step: 30/464, loss: 0.014196655713021755 2023-01-24 04:28:40.301185: step: 32/464, loss: 0.0022962952498346567 2023-01-24 04:28:40.907319: step: 34/464, loss: 0.01741405576467514 2023-01-24 04:28:41.601877: step: 36/464, loss: 0.0005106744938530028 2023-01-24 04:28:42.230598: step: 38/464, loss: 0.020914586260914803 2023-01-24 04:28:42.856262: step: 40/464, loss: 0.004347099456936121 2023-01-24 04:28:43.440476: step: 42/464, loss: 0.004731880035251379 2023-01-24 04:28:44.015478: step: 44/464, loss: 0.00010120868682861328 2023-01-24 04:28:44.672170: step: 46/464, loss: 0.021717039868235588 2023-01-24 04:28:45.268981: step: 48/464, loss: 0.010191944427788258 2023-01-24 04:28:45.910019: step: 50/464, loss: 0.02108851820230484 2023-01-24 04:28:46.514276: step: 52/464, loss: 0.008559471927583218 2023-01-24 04:28:47.098766: step: 54/464, loss: 0.03864532709121704 2023-01-24 04:28:47.664733: step: 56/464, loss: 0.005444041453301907 2023-01-24 04:28:48.313400: step: 58/464, loss: 0.007448922842741013 2023-01-24 04:28:48.868897: step: 60/464, loss: 0.002818542066961527 2023-01-24 04:28:49.463562: step: 62/464, loss: 0.00016934421728365123 2023-01-24 04:28:50.094365: step: 64/464, loss: 0.0505225844681263 2023-01-24 04:28:50.685591: step: 66/464, loss: 0.009668254293501377 2023-01-24 04:28:51.336375: step: 68/464, loss: 0.000820213055703789 2023-01-24 04:28:51.991959: step: 70/464, loss: 0.020397549495100975 2023-01-24 04:28:52.672966: step: 72/464, loss: 0.0032460566144436598 2023-01-24 04:28:53.292273: step: 74/464, loss: 5.213002077653073e-05 2023-01-24 04:28:53.976539: step: 76/464, loss: 0.09260469675064087 2023-01-24 04:28:54.570419: step: 78/464, loss: 0.021970590576529503 2023-01-24 04:28:55.143887: step: 80/464, loss: 0.008407541550695896 2023-01-24 04:28:55.707129: step: 82/464, loss: 0.04296931251883507 2023-01-24 04:28:56.333810: step: 84/464, loss: 0.007767929695546627 2023-01-24 04:28:56.919384: step: 86/464, loss: 0.0032091503962874413 2023-01-24 04:28:57.524367: step: 88/464, loss: 0.006214894820004702 2023-01-24 04:28:58.102119: step: 90/464, loss: 0.03251410275697708 2023-01-24 04:28:58.671295: step: 92/464, loss: 0.013027353212237358 2023-01-24 04:28:59.250699: step: 94/464, loss: 0.008885751478374004 2023-01-24 04:28:59.877428: step: 96/464, loss: 0.029318206012248993 2023-01-24 04:29:00.469877: step: 98/464, loss: 0.0012817407259717584 2023-01-24 04:29:01.102437: step: 100/464, loss: 0.4874168932437897 2023-01-24 04:29:01.829453: step: 102/464, loss: 0.3966943919658661 2023-01-24 04:29:02.410125: step: 104/464, loss: 0.42609456181526184 2023-01-24 04:29:03.001112: step: 106/464, loss: 0.02490398660302162 2023-01-24 04:29:03.635124: step: 108/464, loss: 0.2202860713005066 2023-01-24 04:29:04.232043: step: 110/464, loss: 0.004081774968653917 2023-01-24 04:29:04.857493: step: 112/464, loss: 0.0020068904850631952 2023-01-24 04:29:05.490331: step: 114/464, loss: 0.008217177353799343 2023-01-24 04:29:06.093905: step: 116/464, loss: 0.0032490852754563093 2023-01-24 04:29:06.779382: step: 118/464, loss: 0.045100998133420944 2023-01-24 04:29:07.360717: step: 120/464, loss: 0.027695056051015854 2023-01-24 04:29:07.958994: step: 122/464, loss: 0.0003648280689958483 2023-01-24 04:29:08.662211: step: 124/464, loss: 0.10975858569145203 2023-01-24 04:29:09.247497: step: 126/464, loss: 0.00019776269618887454 2023-01-24 04:29:09.841857: step: 128/464, loss: 0.007610243279486895 2023-01-24 04:29:10.454710: step: 130/464, loss: 0.037492718547582626 2023-01-24 04:29:11.040637: step: 132/464, loss: 0.026891810819506645 2023-01-24 04:29:11.633123: step: 134/464, loss: 0.07546871155500412 2023-01-24 04:29:12.267084: step: 136/464, loss: 0.0031861874740570784 2023-01-24 04:29:12.917017: step: 138/464, loss: 0.0006391544593498111 2023-01-24 04:29:13.566692: step: 140/464, loss: 0.012248599901795387 2023-01-24 04:29:14.176158: step: 142/464, loss: 0.007616680581122637 2023-01-24 04:29:14.757388: step: 144/464, loss: 0.01309220027178526 2023-01-24 04:29:15.428364: step: 146/464, loss: 0.018922271206974983 2023-01-24 04:29:16.018927: step: 148/464, loss: 0.010868867859244347 2023-01-24 04:29:16.589073: step: 150/464, loss: 0.014443072490394115 2023-01-24 04:29:17.215153: step: 152/464, loss: 0.04655880481004715 2023-01-24 04:29:17.813898: step: 154/464, loss: 0.0075754448771476746 2023-01-24 04:29:18.542014: step: 156/464, loss: 0.012672694399952888 2023-01-24 04:29:19.191597: step: 158/464, loss: 0.008323279209434986 2023-01-24 04:29:19.816047: step: 160/464, loss: 0.01113010011613369 2023-01-24 04:29:20.408160: step: 162/464, loss: 0.004104962106794119 2023-01-24 04:29:21.011109: step: 164/464, loss: 0.004299768712371588 2023-01-24 04:29:21.619238: step: 166/464, loss: 0.0036474387161433697 2023-01-24 04:29:22.300593: step: 168/464, loss: 0.024346238002181053 2023-01-24 04:29:22.959926: step: 170/464, loss: 0.009751499630510807 2023-01-24 04:29:23.678511: step: 172/464, loss: 0.017550071701407433 2023-01-24 04:29:24.260983: step: 174/464, loss: 0.01081873383373022 2023-01-24 04:29:24.865304: step: 176/464, loss: 0.004264844581484795 2023-01-24 04:29:25.497158: step: 178/464, loss: 0.028513511642813683 2023-01-24 04:29:26.160200: step: 180/464, loss: 0.029072461649775505 2023-01-24 04:29:26.793998: step: 182/464, loss: 0.0013808414805680513 2023-01-24 04:29:27.395838: step: 184/464, loss: 0.0019550700671970844 2023-01-24 04:29:28.002521: step: 186/464, loss: 0.04768887162208557 2023-01-24 04:29:28.636806: step: 188/464, loss: 0.01719737984240055 2023-01-24 04:29:29.260591: step: 190/464, loss: 0.018912989646196365 2023-01-24 04:29:29.881878: step: 192/464, loss: 0.006397397257387638 2023-01-24 04:29:30.490229: step: 194/464, loss: 0.0028882757760584354 2023-01-24 04:29:31.182946: step: 196/464, loss: 0.0076980902813375 2023-01-24 04:29:31.764690: step: 198/464, loss: 0.0008096446981653571 2023-01-24 04:29:32.361540: step: 200/464, loss: 0.043125562369823456 2023-01-24 04:29:32.967914: step: 202/464, loss: 0.010093793272972107 2023-01-24 04:29:33.528353: step: 204/464, loss: 0.0005989423370920122 2023-01-24 04:29:34.158371: step: 206/464, loss: 0.0317736454308033 2023-01-24 04:29:34.753763: step: 208/464, loss: 0.04524993523955345 2023-01-24 04:29:35.376622: step: 210/464, loss: 0.0008322806097567081 2023-01-24 04:29:36.012509: step: 212/464, loss: 0.00863353069871664 2023-01-24 04:29:36.696703: step: 214/464, loss: 0.012559205293655396 2023-01-24 04:29:37.306549: step: 216/464, loss: 0.006640761159360409 2023-01-24 04:29:37.893300: step: 218/464, loss: 0.014305575750768185 2023-01-24 04:29:38.532969: step: 220/464, loss: 0.005848568864166737 2023-01-24 04:29:39.142315: step: 222/464, loss: 0.020689699798822403 2023-01-24 04:29:39.749586: step: 224/464, loss: 0.017257021740078926 2023-01-24 04:29:40.431895: step: 226/464, loss: 0.010369797237217426 2023-01-24 04:29:41.075092: step: 228/464, loss: 0.41616204380989075 2023-01-24 04:29:41.698836: step: 230/464, loss: 0.05756490305066109 2023-01-24 04:29:42.356208: step: 232/464, loss: 0.003744078567251563 2023-01-24 04:29:42.954612: step: 234/464, loss: 0.16744425892829895 2023-01-24 04:29:43.568295: step: 236/464, loss: 0.00409188587218523 2023-01-24 04:29:44.171726: step: 238/464, loss: 0.05485359579324722 2023-01-24 04:29:44.811398: step: 240/464, loss: 0.03245936334133148 2023-01-24 04:29:45.375932: step: 242/464, loss: 0.0014707011869177222 2023-01-24 04:29:45.991417: step: 244/464, loss: 0.028101148083806038 2023-01-24 04:29:46.624514: step: 246/464, loss: 0.025969622656702995 2023-01-24 04:29:47.220999: step: 248/464, loss: 0.03521401435136795 2023-01-24 04:29:47.819125: step: 250/464, loss: 0.001902177231386304 2023-01-24 04:29:48.463443: step: 252/464, loss: 0.0075007411651313305 2023-01-24 04:29:49.114561: step: 254/464, loss: 0.025483937934041023 2023-01-24 04:29:49.737208: step: 256/464, loss: 0.013025188818573952 2023-01-24 04:29:50.320628: step: 258/464, loss: 0.0023038771469146013 2023-01-24 04:29:50.909289: step: 260/464, loss: 0.011823596432805061 2023-01-24 04:29:51.514150: step: 262/464, loss: 0.0017983571160584688 2023-01-24 04:29:52.206225: step: 264/464, loss: 0.009744478389620781 2023-01-24 04:29:52.753320: step: 266/464, loss: 0.0004119996738154441 2023-01-24 04:29:53.312156: step: 268/464, loss: 0.004429759457707405 2023-01-24 04:29:53.910313: step: 270/464, loss: 0.001029555220156908 2023-01-24 04:29:54.540580: step: 272/464, loss: 0.009982489980757236 2023-01-24 04:29:55.217548: step: 274/464, loss: 0.062253210693597794 2023-01-24 04:29:55.822931: step: 276/464, loss: 0.008633971214294434 2023-01-24 04:29:56.454702: step: 278/464, loss: 0.0017187268240377307 2023-01-24 04:29:57.149555: step: 280/464, loss: 0.02328815683722496 2023-01-24 04:29:57.768972: step: 282/464, loss: 0.00636210897937417 2023-01-24 04:29:58.478976: step: 284/464, loss: 0.007372157648205757 2023-01-24 04:29:59.069766: step: 286/464, loss: 0.015651492401957512 2023-01-24 04:29:59.675946: step: 288/464, loss: 0.030818969011306763 2023-01-24 04:30:00.222926: step: 290/464, loss: 0.0004586923460010439 2023-01-24 04:30:00.853113: step: 292/464, loss: 0.023854084312915802 2023-01-24 04:30:01.375348: step: 294/464, loss: 0.015337026678025723 2023-01-24 04:30:02.012973: step: 296/464, loss: 0.029326308518648148 2023-01-24 04:30:02.627804: step: 298/464, loss: 0.03470027819275856 2023-01-24 04:30:03.238719: step: 300/464, loss: 0.048934027552604675 2023-01-24 04:30:03.915553: step: 302/464, loss: 0.0003046983329113573 2023-01-24 04:30:04.510817: step: 304/464, loss: 0.038852933794260025 2023-01-24 04:30:05.125151: step: 306/464, loss: 0.04453202709555626 2023-01-24 04:30:05.715157: step: 308/464, loss: 0.002494563115760684 2023-01-24 04:30:06.395323: step: 310/464, loss: 0.006276068277657032 2023-01-24 04:30:07.044588: step: 312/464, loss: 0.03738127648830414 2023-01-24 04:30:07.631598: step: 314/464, loss: 0.0026037623174488544 2023-01-24 04:30:08.261843: step: 316/464, loss: 0.00884893536567688 2023-01-24 04:30:08.929928: step: 318/464, loss: 0.010780254378914833 2023-01-24 04:30:09.633934: step: 320/464, loss: 0.00928980391472578 2023-01-24 04:30:10.261813: step: 322/464, loss: 0.017196692526340485 2023-01-24 04:30:10.894910: step: 324/464, loss: 0.00023449427681043744 2023-01-24 04:30:11.549807: step: 326/464, loss: 0.01587003655731678 2023-01-24 04:30:12.161564: step: 328/464, loss: 0.027382345870137215 2023-01-24 04:30:12.819534: step: 330/464, loss: 0.32194334268569946 2023-01-24 04:30:13.454702: step: 332/464, loss: 0.011561702936887741 2023-01-24 04:30:14.112737: step: 334/464, loss: 0.22262977063655853 2023-01-24 04:30:14.762637: step: 336/464, loss: 0.02064697816967964 2023-01-24 04:30:15.355667: step: 338/464, loss: 0.021129708737134933 2023-01-24 04:30:15.956767: step: 340/464, loss: 0.012493270449340343 2023-01-24 04:30:16.550093: step: 342/464, loss: 0.038638342171907425 2023-01-24 04:30:17.172596: step: 344/464, loss: 0.15898793935775757 2023-01-24 04:30:17.831430: step: 346/464, loss: 0.030613379552960396 2023-01-24 04:30:18.439845: step: 348/464, loss: 0.042543865740299225 2023-01-24 04:30:19.070809: step: 350/464, loss: 0.006682658568024635 2023-01-24 04:30:19.730625: step: 352/464, loss: 0.03325079381465912 2023-01-24 04:30:20.469101: step: 354/464, loss: 0.047726865857839584 2023-01-24 04:30:21.018010: step: 356/464, loss: 0.012037553824484348 2023-01-24 04:30:21.652686: step: 358/464, loss: 0.03466491773724556 2023-01-24 04:30:22.294305: step: 360/464, loss: 0.016195526346564293 2023-01-24 04:30:22.967798: step: 362/464, loss: 0.03511173278093338 2023-01-24 04:30:23.560597: step: 364/464, loss: 0.0003981849877163768 2023-01-24 04:30:24.146190: step: 366/464, loss: 0.07734028995037079 2023-01-24 04:30:24.808598: step: 368/464, loss: 0.07165095210075378 2023-01-24 04:30:25.431108: step: 370/464, loss: 0.020368773490190506 2023-01-24 04:30:26.047576: step: 372/464, loss: 0.04683218523859978 2023-01-24 04:30:26.655212: step: 374/464, loss: 0.00047499319771304727 2023-01-24 04:30:27.230259: step: 376/464, loss: 0.0017459866357967257 2023-01-24 04:30:27.863891: step: 378/464, loss: 0.0060515995137393475 2023-01-24 04:30:28.514052: step: 380/464, loss: 0.0008434664341621101 2023-01-24 04:30:29.113500: step: 382/464, loss: 0.01529900822788477 2023-01-24 04:30:29.751223: step: 384/464, loss: 0.01310059241950512 2023-01-24 04:30:30.401090: step: 386/464, loss: 0.04771070182323456 2023-01-24 04:30:30.974639: step: 388/464, loss: 0.0028760803397744894 2023-01-24 04:30:31.597247: step: 390/464, loss: 0.013585273176431656 2023-01-24 04:30:32.164034: step: 392/464, loss: 0.0009268809226341546 2023-01-24 04:30:32.792629: step: 394/464, loss: 0.028596773743629456 2023-01-24 04:30:33.381284: step: 396/464, loss: 0.02000209130346775 2023-01-24 04:30:33.998779: step: 398/464, loss: 0.008988582529127598 2023-01-24 04:30:34.701193: step: 400/464, loss: 0.015802456066012383 2023-01-24 04:30:35.307187: step: 402/464, loss: 0.04091927409172058 2023-01-24 04:30:35.899415: step: 404/464, loss: 0.005606489256024361 2023-01-24 04:30:36.493589: step: 406/464, loss: 0.00022503657964989543 2023-01-24 04:30:37.156661: step: 408/464, loss: 0.01612733118236065 2023-01-24 04:30:37.706670: step: 410/464, loss: 0.03441927209496498 2023-01-24 04:30:38.367344: step: 412/464, loss: 0.007009692490100861 2023-01-24 04:30:38.976740: step: 414/464, loss: 0.006589479278773069 2023-01-24 04:30:39.652416: step: 416/464, loss: 0.028384795412421227 2023-01-24 04:30:40.368179: step: 418/464, loss: 0.0031787222251296043 2023-01-24 04:30:40.876552: step: 420/464, loss: 0.05162772163748741 2023-01-24 04:30:41.533749: step: 422/464, loss: 0.003943289164453745 2023-01-24 04:30:42.287764: step: 424/464, loss: 0.0566033273935318 2023-01-24 04:30:42.931977: step: 426/464, loss: 0.0016180764650925994 2023-01-24 04:30:43.547224: step: 428/464, loss: 0.023380430415272713 2023-01-24 04:30:44.189671: step: 430/464, loss: 0.0003730784519575536 2023-01-24 04:30:44.773967: step: 432/464, loss: 0.014182335697114468 2023-01-24 04:30:45.350114: step: 434/464, loss: 0.015464498661458492 2023-01-24 04:30:45.918336: step: 436/464, loss: 0.025532225146889687 2023-01-24 04:30:46.565970: step: 438/464, loss: 0.02354058437049389 2023-01-24 04:30:47.202498: step: 440/464, loss: 0.00013290946662891656 2023-01-24 04:30:47.817095: step: 442/464, loss: 0.0007013229769654572 2023-01-24 04:30:48.422083: step: 444/464, loss: 2.71716507995734e-05 2023-01-24 04:30:49.052108: step: 446/464, loss: 0.06851676851511002 2023-01-24 04:30:49.703219: step: 448/464, loss: 0.00036148345679976046 2023-01-24 04:30:50.325866: step: 450/464, loss: 0.004605370108038187 2023-01-24 04:30:51.014619: step: 452/464, loss: 0.013162552379071712 2023-01-24 04:30:51.622537: step: 454/464, loss: 0.007672559469938278 2023-01-24 04:30:52.271415: step: 456/464, loss: 0.08680587261915207 2023-01-24 04:30:52.925978: step: 458/464, loss: 0.007985075935721397 2023-01-24 04:30:53.516583: step: 460/464, loss: 0.0062067508697509766 2023-01-24 04:30:54.064273: step: 462/464, loss: 0.04797374829649925 2023-01-24 04:30:54.675481: step: 464/464, loss: 0.018111487850546837 2023-01-24 04:30:55.371952: step: 466/464, loss: 0.006130396854132414 2023-01-24 04:30:56.035369: step: 468/464, loss: 0.0072482856921851635 2023-01-24 04:30:56.733057: step: 470/464, loss: 0.00020309189858380705 2023-01-24 04:30:57.406180: step: 472/464, loss: 0.023160506039857864 2023-01-24 04:30:58.021730: step: 474/464, loss: 0.06689438223838806 2023-01-24 04:30:58.684608: step: 476/464, loss: 0.03167044743895531 2023-01-24 04:30:59.325357: step: 478/464, loss: 0.009050305932760239 2023-01-24 04:30:59.965701: step: 480/464, loss: 0.04196888580918312 2023-01-24 04:31:00.594911: step: 482/464, loss: 0.13746626675128937 2023-01-24 04:31:01.229390: step: 484/464, loss: 0.047761742025613785 2023-01-24 04:31:01.844616: step: 486/464, loss: 0.008654715493321419 2023-01-24 04:31:02.462664: step: 488/464, loss: 0.005094234831631184 2023-01-24 04:31:03.044233: step: 490/464, loss: 0.06825714558362961 2023-01-24 04:31:03.674022: step: 492/464, loss: 0.01563999429345131 2023-01-24 04:31:04.228685: step: 494/464, loss: 0.04438317194581032 2023-01-24 04:31:04.893343: step: 496/464, loss: 0.031057126820087433 2023-01-24 04:31:05.479281: step: 498/464, loss: 0.0031860892195254564 2023-01-24 04:31:06.066450: step: 500/464, loss: 0.03213706985116005 2023-01-24 04:31:06.653186: step: 502/464, loss: 0.0020905076526105404 2023-01-24 04:31:07.245681: step: 504/464, loss: 0.0006013476522639394 2023-01-24 04:31:07.850684: step: 506/464, loss: 0.0025591696612536907 2023-01-24 04:31:08.433282: step: 508/464, loss: 0.03588062524795532 2023-01-24 04:31:09.074264: step: 510/464, loss: 0.3627071678638458 2023-01-24 04:31:09.652002: step: 512/464, loss: 0.0039060432463884354 2023-01-24 04:31:10.284136: step: 514/464, loss: 0.02504384145140648 2023-01-24 04:31:10.944807: step: 516/464, loss: 0.014837171882390976 2023-01-24 04:31:11.589444: step: 518/464, loss: 0.016154740005731583 2023-01-24 04:31:12.171895: step: 520/464, loss: 0.5221549868583679 2023-01-24 04:31:12.809463: step: 522/464, loss: 0.03457175940275192 2023-01-24 04:31:13.445277: step: 524/464, loss: 0.006636395119130611 2023-01-24 04:31:14.017734: step: 526/464, loss: 0.003287211060523987 2023-01-24 04:31:14.693111: step: 528/464, loss: 0.01604713499546051 2023-01-24 04:31:15.312210: step: 530/464, loss: 0.04508698731660843 2023-01-24 04:31:15.888005: step: 532/464, loss: 0.036552201956510544 2023-01-24 04:31:16.522585: step: 534/464, loss: 0.03101583756506443 2023-01-24 04:31:17.167985: step: 536/464, loss: 0.035786811262369156 2023-01-24 04:31:17.760200: step: 538/464, loss: 0.0007765466580167413 2023-01-24 04:31:18.378115: step: 540/464, loss: 0.010097292251884937 2023-01-24 04:31:19.019389: step: 542/464, loss: 0.004103609826415777 2023-01-24 04:31:19.647372: step: 544/464, loss: 0.02460542879998684 2023-01-24 04:31:20.340453: step: 546/464, loss: 0.029425041750073433 2023-01-24 04:31:20.928841: step: 548/464, loss: 0.002205929020419717 2023-01-24 04:31:21.554551: step: 550/464, loss: 0.004628791008144617 2023-01-24 04:31:22.205936: step: 552/464, loss: 0.005380065180361271 2023-01-24 04:31:22.882854: step: 554/464, loss: 0.041205957531929016 2023-01-24 04:31:23.502678: step: 556/464, loss: 0.005021595396101475 2023-01-24 04:31:24.180327: step: 558/464, loss: 0.018770398572087288 2023-01-24 04:31:24.877830: step: 560/464, loss: 0.040609098970890045 2023-01-24 04:31:25.488624: step: 562/464, loss: 0.029900405555963516 2023-01-24 04:31:26.069455: step: 564/464, loss: 0.023872636258602142 2023-01-24 04:31:26.657047: step: 566/464, loss: 0.04181007668375969 2023-01-24 04:31:27.255837: step: 568/464, loss: 0.038093894720077515 2023-01-24 04:31:27.877145: step: 570/464, loss: 0.03319160267710686 2023-01-24 04:31:28.490816: step: 572/464, loss: 0.0010064254747703671 2023-01-24 04:31:29.113903: step: 574/464, loss: 0.012854035943746567 2023-01-24 04:31:29.693492: step: 576/464, loss: 0.034058213233947754 2023-01-24 04:31:30.263520: step: 578/464, loss: 0.018953103572130203 2023-01-24 04:31:30.821222: step: 580/464, loss: 0.00026215752586722374 2023-01-24 04:31:31.394854: step: 582/464, loss: 0.005657382775098085 2023-01-24 04:31:32.027516: step: 584/464, loss: 0.03003401681780815 2023-01-24 04:31:32.707197: step: 586/464, loss: 0.005930361337959766 2023-01-24 04:31:33.297157: step: 588/464, loss: 0.037126656621694565 2023-01-24 04:31:33.882032: step: 590/464, loss: 0.01343232486397028 2023-01-24 04:31:34.491365: step: 592/464, loss: 0.002364499494433403 2023-01-24 04:31:35.124607: step: 594/464, loss: 0.2112520933151245 2023-01-24 04:31:35.712608: step: 596/464, loss: 0.016033155843615532 2023-01-24 04:31:36.310498: step: 598/464, loss: 0.029524585232138634 2023-01-24 04:31:36.904546: step: 600/464, loss: 0.008080963045358658 2023-01-24 04:31:37.552049: step: 602/464, loss: 0.030004560947418213 2023-01-24 04:31:38.139646: step: 604/464, loss: 0.029935169965028763 2023-01-24 04:31:38.873571: step: 606/464, loss: 0.0037073304411023855 2023-01-24 04:31:39.482657: step: 608/464, loss: 0.031628526747226715 2023-01-24 04:31:40.121111: step: 610/464, loss: 0.4366910755634308 2023-01-24 04:31:40.772844: step: 612/464, loss: 0.0004109439323656261 2023-01-24 04:31:41.404053: step: 614/464, loss: 0.031441304832696915 2023-01-24 04:31:42.046372: step: 616/464, loss: 0.007856231182813644 2023-01-24 04:31:42.780344: step: 618/464, loss: 0.032301947474479675 2023-01-24 04:31:43.395996: step: 620/464, loss: 0.0006108984816819429 2023-01-24 04:31:43.948362: step: 622/464, loss: 0.028849845752120018 2023-01-24 04:31:44.573899: step: 624/464, loss: 0.036408454179763794 2023-01-24 04:31:45.166232: step: 626/464, loss: 0.003924418706446886 2023-01-24 04:31:45.827257: step: 628/464, loss: 0.0049882857128977776 2023-01-24 04:31:46.397213: step: 630/464, loss: 0.01432995218783617 2023-01-24 04:31:47.001456: step: 632/464, loss: 0.01632368192076683 2023-01-24 04:31:47.610164: step: 634/464, loss: 0.06306029856204987 2023-01-24 04:31:48.280903: step: 636/464, loss: 0.02596416138112545 2023-01-24 04:31:48.916701: step: 638/464, loss: 0.0005985710886307061 2023-01-24 04:31:49.558788: step: 640/464, loss: 0.03676480054855347 2023-01-24 04:31:50.229648: step: 642/464, loss: 0.008700869046151638 2023-01-24 04:31:50.791592: step: 644/464, loss: 0.026712315157055855 2023-01-24 04:31:51.351826: step: 646/464, loss: 0.00017385899263899773 2023-01-24 04:31:51.998934: step: 648/464, loss: 0.0314791165292263 2023-01-24 04:31:52.586578: step: 650/464, loss: 0.002847842639312148 2023-01-24 04:31:53.214506: step: 652/464, loss: 0.003370642429217696 2023-01-24 04:31:53.841983: step: 654/464, loss: 0.022391922771930695 2023-01-24 04:31:54.513198: step: 656/464, loss: 0.001920976908877492 2023-01-24 04:31:55.121195: step: 658/464, loss: 4.001665365649387e-05 2023-01-24 04:31:55.687392: step: 660/464, loss: 0.09575840830802917 2023-01-24 04:31:56.275418: step: 662/464, loss: 0.0033561275340616703 2023-01-24 04:31:56.810888: step: 664/464, loss: 0.007227852009236813 2023-01-24 04:31:57.442528: step: 666/464, loss: 0.016562720760703087 2023-01-24 04:31:58.037030: step: 668/464, loss: 0.04740128293633461 2023-01-24 04:31:58.722648: step: 670/464, loss: 0.01998371258378029 2023-01-24 04:31:59.304037: step: 672/464, loss: 0.010545728728175163 2023-01-24 04:31:59.933815: step: 674/464, loss: 0.029419176280498505 2023-01-24 04:32:00.603624: step: 676/464, loss: 0.00707714119926095 2023-01-24 04:32:01.193098: step: 678/464, loss: 0.0011200368171557784 2023-01-24 04:32:01.834461: step: 680/464, loss: 0.4056033790111542 2023-01-24 04:32:02.477398: step: 682/464, loss: 0.2395782321691513 2023-01-24 04:32:03.135800: step: 684/464, loss: 0.02981843799352646 2023-01-24 04:32:03.779192: step: 686/464, loss: 0.0024933991953730583 2023-01-24 04:32:04.359945: step: 688/464, loss: 0.0015063255559653044 2023-01-24 04:32:05.001187: step: 690/464, loss: 0.1298820823431015 2023-01-24 04:32:05.608924: step: 692/464, loss: 0.011537283658981323 2023-01-24 04:32:06.190021: step: 694/464, loss: 0.025633297860622406 2023-01-24 04:32:06.820748: step: 696/464, loss: 0.18669280409812927 2023-01-24 04:32:07.489868: step: 698/464, loss: 0.0602891631424427 2023-01-24 04:32:08.053085: step: 700/464, loss: 0.018188297748565674 2023-01-24 04:32:08.654645: step: 702/464, loss: 0.07520942389965057 2023-01-24 04:32:09.241331: step: 704/464, loss: 0.017319330945611 2023-01-24 04:32:09.843553: step: 706/464, loss: 0.034891169518232346 2023-01-24 04:32:10.429662: step: 708/464, loss: 0.0018907490884885192 2023-01-24 04:32:11.091460: step: 710/464, loss: 0.005784806329756975 2023-01-24 04:32:11.650236: step: 712/464, loss: 0.0198514387011528 2023-01-24 04:32:12.247412: step: 714/464, loss: 0.018424250185489655 2023-01-24 04:32:12.929571: step: 716/464, loss: 0.02576695941388607 2023-01-24 04:32:13.571017: step: 718/464, loss: 0.020191747695207596 2023-01-24 04:32:14.168143: step: 720/464, loss: 0.006152929272502661 2023-01-24 04:32:14.729246: step: 722/464, loss: 0.022174587473273277 2023-01-24 04:32:15.277105: step: 724/464, loss: 0.002321321051567793 2023-01-24 04:32:15.926931: step: 726/464, loss: 0.0443158783018589 2023-01-24 04:32:16.670782: step: 728/464, loss: 0.031350310891866684 2023-01-24 04:32:17.269843: step: 730/464, loss: 0.13132141530513763 2023-01-24 04:32:17.947620: step: 732/464, loss: 0.12540392577648163 2023-01-24 04:32:18.546597: step: 734/464, loss: 3.791404378716834e-05 2023-01-24 04:32:19.147385: step: 736/464, loss: 0.0021358360536396503 2023-01-24 04:32:19.786756: step: 738/464, loss: 0.044767990708351135 2023-01-24 04:32:20.374323: step: 740/464, loss: 0.03425537794828415 2023-01-24 04:32:20.992124: step: 742/464, loss: 0.004247912671416998 2023-01-24 04:32:21.598018: step: 744/464, loss: 0.09336188435554504 2023-01-24 04:32:22.233435: step: 746/464, loss: 0.01353756058961153 2023-01-24 04:32:22.940588: step: 748/464, loss: 0.0011425215052440763 2023-01-24 04:32:23.605932: step: 750/464, loss: 0.03342825174331665 2023-01-24 04:32:24.242676: step: 752/464, loss: 0.03215102478861809 2023-01-24 04:32:24.876873: step: 754/464, loss: 0.0020427191630005836 2023-01-24 04:32:25.528775: step: 756/464, loss: 0.040538668632507324 2023-01-24 04:32:26.198794: step: 758/464, loss: 0.03943773731589317 2023-01-24 04:32:26.858919: step: 760/464, loss: 0.05264481529593468 2023-01-24 04:32:27.465419: step: 762/464, loss: 0.011125179007649422 2023-01-24 04:32:28.167694: step: 764/464, loss: 0.008942645974457264 2023-01-24 04:32:28.792433: step: 766/464, loss: 0.02644633874297142 2023-01-24 04:32:29.467446: step: 768/464, loss: 0.018786994740366936 2023-01-24 04:32:30.070340: step: 770/464, loss: 0.010282132774591446 2023-01-24 04:32:30.712530: step: 772/464, loss: 0.010151483118534088 2023-01-24 04:32:31.343420: step: 774/464, loss: 0.032033149152994156 2023-01-24 04:32:31.932919: step: 776/464, loss: 0.009923784993588924 2023-01-24 04:32:32.612812: step: 778/464, loss: 0.0010054961312562227 2023-01-24 04:32:33.223403: step: 780/464, loss: 0.013663525693118572 2023-01-24 04:32:33.904615: step: 782/464, loss: 0.01674613729119301 2023-01-24 04:32:34.520400: step: 784/464, loss: 0.003713731886819005 2023-01-24 04:32:35.193321: step: 786/464, loss: 0.006112845614552498 2023-01-24 04:32:35.815651: step: 788/464, loss: 0.04470454528927803 2023-01-24 04:32:36.480915: step: 790/464, loss: 0.0272509828209877 2023-01-24 04:32:37.182734: step: 792/464, loss: 0.02032126858830452 2023-01-24 04:32:37.832740: step: 794/464, loss: 0.11515185981988907 2023-01-24 04:32:38.419748: step: 796/464, loss: 0.0023762150667607784 2023-01-24 04:32:39.035946: step: 798/464, loss: 0.02724236063659191 2023-01-24 04:32:39.633150: step: 800/464, loss: 0.008190082386136055 2023-01-24 04:32:40.193611: step: 802/464, loss: 0.000902436557225883 2023-01-24 04:32:40.841500: step: 804/464, loss: 0.2506117522716522 2023-01-24 04:32:41.454243: step: 806/464, loss: 0.019741175696253777 2023-01-24 04:32:42.111296: step: 808/464, loss: 0.0019921723287552595 2023-01-24 04:32:42.753030: step: 810/464, loss: 0.04300304129719734 2023-01-24 04:32:43.392066: step: 812/464, loss: 0.01057429052889347 2023-01-24 04:32:43.987564: step: 814/464, loss: 0.015183776617050171 2023-01-24 04:32:44.607007: step: 816/464, loss: 0.05513448268175125 2023-01-24 04:32:45.209310: step: 818/464, loss: 0.014224525541067123 2023-01-24 04:32:45.822498: step: 820/464, loss: 0.029979640617966652 2023-01-24 04:32:46.421688: step: 822/464, loss: 0.017401661723852158 2023-01-24 04:32:47.032160: step: 824/464, loss: 0.03680138662457466 2023-01-24 04:32:47.606670: step: 826/464, loss: 0.04820271208882332 2023-01-24 04:32:48.237592: step: 828/464, loss: 0.02106683887541294 2023-01-24 04:32:48.833351: step: 830/464, loss: 0.0020527937449514866 2023-01-24 04:32:49.458611: step: 832/464, loss: 0.023780956864356995 2023-01-24 04:32:50.035851: step: 834/464, loss: 0.004647578578442335 2023-01-24 04:32:50.686577: step: 836/464, loss: 0.00931186880916357 2023-01-24 04:32:51.286426: step: 838/464, loss: 0.04925067722797394 2023-01-24 04:32:51.901612: step: 840/464, loss: 0.05159972980618477 2023-01-24 04:32:52.557523: step: 842/464, loss: 0.018002033233642578 2023-01-24 04:32:53.142224: step: 844/464, loss: 0.01309546921402216 2023-01-24 04:32:53.781035: step: 846/464, loss: 0.20000101625919342 2023-01-24 04:32:54.370059: step: 848/464, loss: 0.027313530445098877 2023-01-24 04:32:54.955791: step: 850/464, loss: 0.039358749985694885 2023-01-24 04:32:55.761731: step: 852/464, loss: 0.02418423257768154 2023-01-24 04:32:56.381829: step: 854/464, loss: 0.2089696079492569 2023-01-24 04:32:57.199979: step: 856/464, loss: 0.005666371434926987 2023-01-24 04:32:57.867753: step: 858/464, loss: 0.3731476366519928 2023-01-24 04:32:58.459714: step: 860/464, loss: 0.007269079331308603 2023-01-24 04:32:59.116013: step: 862/464, loss: 0.0024322133976966143 2023-01-24 04:32:59.752685: step: 864/464, loss: 0.07621672749519348 2023-01-24 04:33:00.317938: step: 866/464, loss: 0.0018724174005910754 2023-01-24 04:33:00.921283: step: 868/464, loss: 0.16458921134471893 2023-01-24 04:33:01.564568: step: 870/464, loss: 0.008762835524976254 2023-01-24 04:33:02.199956: step: 872/464, loss: 0.035141076892614365 2023-01-24 04:33:02.818710: step: 874/464, loss: 0.008137132972478867 2023-01-24 04:33:03.381862: step: 876/464, loss: 0.045945920050144196 2023-01-24 04:33:04.005311: step: 878/464, loss: 0.009523745626211166 2023-01-24 04:33:04.651892: step: 880/464, loss: 0.0029311825055629015 2023-01-24 04:33:05.302406: step: 882/464, loss: 0.002755506196990609 2023-01-24 04:33:05.950358: step: 884/464, loss: 0.030550362542271614 2023-01-24 04:33:06.577520: step: 886/464, loss: 0.0654936134815216 2023-01-24 04:33:07.162515: step: 888/464, loss: 0.02397008426487446 2023-01-24 04:33:07.823370: step: 890/464, loss: 0.0029233875684440136 2023-01-24 04:33:08.409403: step: 892/464, loss: 0.011992864310741425 2023-01-24 04:33:09.061391: step: 894/464, loss: 0.036842696368694305 2023-01-24 04:33:09.623591: step: 896/464, loss: 0.004817049019038677 2023-01-24 04:33:10.278439: step: 898/464, loss: 0.003490469418466091 2023-01-24 04:33:10.882245: step: 900/464, loss: 0.07046039402484894 2023-01-24 04:33:11.608308: step: 902/464, loss: 0.0031182370148599148 2023-01-24 04:33:12.188331: step: 904/464, loss: 0.011086697690188885 2023-01-24 04:33:12.874933: step: 906/464, loss: 0.004499399568885565 2023-01-24 04:33:13.484495: step: 908/464, loss: 0.0021340511739253998 2023-01-24 04:33:14.168459: step: 910/464, loss: 0.03975437954068184 2023-01-24 04:33:14.894036: step: 912/464, loss: 0.0034305299632251263 2023-01-24 04:33:15.503465: step: 914/464, loss: 0.0036547647323459387 2023-01-24 04:33:16.075816: step: 916/464, loss: 0.017508070915937424 2023-01-24 04:33:16.745076: step: 918/464, loss: 0.050328031182289124 2023-01-24 04:33:17.401937: step: 920/464, loss: 0.07117703557014465 2023-01-24 04:33:18.026488: step: 922/464, loss: 0.005616291426122189 2023-01-24 04:33:18.638201: step: 924/464, loss: 0.0036431937478482723 2023-01-24 04:33:19.414196: step: 926/464, loss: 0.012959247455000877 2023-01-24 04:33:20.052292: step: 928/464, loss: 0.062261439859867096 2023-01-24 04:33:20.530341: step: 930/464, loss: 0.007322330493479967 ================================================== Loss: 0.034 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3258186181669638, 'r': 0.32643686981433945, 'f1': 0.3261274509803922}, 'combined': 0.2403044375644995, 'epoch': 30} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3344190435968166, 'r': 0.3159768169278745, 'f1': 0.3249364620203473}, 'combined': 0.21213468504955316, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3261256105006105, 'r': 0.33788345983554713, 'f1': 0.3319004349176763}, 'combined': 0.24455821520249832, 'epoch': 30} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3443311329703288, 'r': 0.3196229191969289, 'f1': 0.33151728355635657}, 'combined': 0.21643097268446077, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34406190019193855, 'r': 0.3401446869070209, 'f1': 0.34209208015267173}, 'combined': 0.2520678485335476, 'epoch': 30} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3434493810905109, 'r': 0.3058500468220925, 'f1': 0.32356107143125495}, 'combined': 0.21123676165978303, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.274074074074074, 'r': 0.35238095238095235, 'f1': 0.3083333333333333}, 'combined': 0.20555555555555552, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2661290322580645, 'r': 0.358695652173913, 'f1': 0.30555555555555547}, 'combined': 0.15277777777777773, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:35:56.366842: step: 2/464, loss: 0.012801757082343102 2023-01-24 04:35:56.939321: step: 4/464, loss: 0.0031697582453489304 2023-01-24 04:35:57.568954: step: 6/464, loss: 0.20902244746685028 2023-01-24 04:35:58.198526: step: 8/464, loss: 0.008833828382194042 2023-01-24 04:35:58.819602: step: 10/464, loss: 0.0024788305163383484 2023-01-24 04:35:59.434069: step: 12/464, loss: 0.011560603976249695 2023-01-24 04:36:00.062278: step: 14/464, loss: 0.0013002302730455995 2023-01-24 04:36:00.662058: step: 16/464, loss: 0.000876986188814044 2023-01-24 04:36:01.271134: step: 18/464, loss: 0.008125225082039833 2023-01-24 04:36:01.855066: step: 20/464, loss: 0.004483926109969616 2023-01-24 04:36:02.486197: step: 22/464, loss: 0.3768042325973511 2023-01-24 04:36:03.126600: step: 24/464, loss: 0.0006819473928771913 2023-01-24 04:36:03.683754: step: 26/464, loss: 0.0023310992401093245 2023-01-24 04:36:04.288002: step: 28/464, loss: 0.021799221634864807 2023-01-24 04:36:04.886129: step: 30/464, loss: 0.0017524950671941042 2023-01-24 04:36:05.466747: step: 32/464, loss: 0.08368723094463348 2023-01-24 04:36:06.135837: step: 34/464, loss: 0.025992590934038162 2023-01-24 04:36:06.732661: step: 36/464, loss: 0.001605651923455298 2023-01-24 04:36:07.343400: step: 38/464, loss: 0.03241652250289917 2023-01-24 04:36:08.011203: step: 40/464, loss: 0.025575609877705574 2023-01-24 04:36:08.669998: step: 42/464, loss: 0.04545750468969345 2023-01-24 04:36:09.259618: step: 44/464, loss: 0.08779991418123245 2023-01-24 04:36:09.887897: step: 46/464, loss: 0.0172318946570158 2023-01-24 04:36:10.547916: step: 48/464, loss: 0.0016695542726665735 2023-01-24 04:36:11.158124: step: 50/464, loss: 0.002183513715863228 2023-01-24 04:36:11.808370: step: 52/464, loss: 0.01340510230511427 2023-01-24 04:36:12.381820: step: 54/464, loss: 0.003130425699055195 2023-01-24 04:36:13.102490: step: 56/464, loss: 0.004296897444874048 2023-01-24 04:36:13.723115: step: 58/464, loss: 0.007053975015878677 2023-01-24 04:36:14.394488: step: 60/464, loss: 0.022943077608942986 2023-01-24 04:36:14.976573: step: 62/464, loss: 0.03398842364549637 2023-01-24 04:36:15.610331: step: 64/464, loss: 0.0008252724655903876 2023-01-24 04:36:16.207571: step: 66/464, loss: 0.00969845149666071 2023-01-24 04:36:16.781786: step: 68/464, loss: 0.004528787452727556 2023-01-24 04:36:17.400586: step: 70/464, loss: 0.004442054778337479 2023-01-24 04:36:18.031277: step: 72/464, loss: 0.024034662172198296 2023-01-24 04:36:18.658988: step: 74/464, loss: 0.008690690621733665 2023-01-24 04:36:19.282994: step: 76/464, loss: 0.07943525910377502 2023-01-24 04:36:19.884172: step: 78/464, loss: 0.16995173692703247 2023-01-24 04:36:20.502429: step: 80/464, loss: 0.00436097476631403 2023-01-24 04:36:21.058406: step: 82/464, loss: 0.0009877000702545047 2023-01-24 04:36:21.712560: step: 84/464, loss: 0.03591643646359444 2023-01-24 04:36:22.301224: step: 86/464, loss: 0.006157433148473501 2023-01-24 04:36:22.995961: step: 88/464, loss: 0.054600995033979416 2023-01-24 04:36:23.589561: step: 90/464, loss: 0.007441329304128885 2023-01-24 04:36:24.189540: step: 92/464, loss: 0.02023962140083313 2023-01-24 04:36:24.797621: step: 94/464, loss: 0.028564514592289925 2023-01-24 04:36:25.417861: step: 96/464, loss: 0.01373264566063881 2023-01-24 04:36:26.097562: step: 98/464, loss: 0.0034415097907185555 2023-01-24 04:36:26.682953: step: 100/464, loss: 0.0004931734292767942 2023-01-24 04:36:27.241863: step: 102/464, loss: 0.654776930809021 2023-01-24 04:36:27.877263: step: 104/464, loss: 0.0618969202041626 2023-01-24 04:36:28.468918: step: 106/464, loss: 0.01492062583565712 2023-01-24 04:36:29.054452: step: 108/464, loss: 0.01146434061229229 2023-01-24 04:36:29.661311: step: 110/464, loss: 0.3723164498806 2023-01-24 04:36:30.281673: step: 112/464, loss: 0.025970859453082085 2023-01-24 04:36:30.961696: step: 114/464, loss: 0.0003878195711877197 2023-01-24 04:36:31.573010: step: 116/464, loss: 0.01065858080983162 2023-01-24 04:36:32.261394: step: 118/464, loss: 0.0033834916539490223 2023-01-24 04:36:32.839079: step: 120/464, loss: 0.025389622896909714 2023-01-24 04:36:33.457886: step: 122/464, loss: 0.0264760572463274 2023-01-24 04:36:34.089289: step: 124/464, loss: 8.322868234245107e-05 2023-01-24 04:36:34.669587: step: 126/464, loss: 0.0043555330485105515 2023-01-24 04:36:35.302693: step: 128/464, loss: 0.024566804990172386 2023-01-24 04:36:35.910824: step: 130/464, loss: 0.0017508940072730184 2023-01-24 04:36:36.534329: step: 132/464, loss: 0.023766344413161278 2023-01-24 04:36:37.190073: step: 134/464, loss: 0.008885723538696766 2023-01-24 04:36:37.779399: step: 136/464, loss: 0.012811705470085144 2023-01-24 04:36:38.368772: step: 138/464, loss: 0.011576555669307709 2023-01-24 04:36:39.031534: step: 140/464, loss: 0.0056231520138680935 2023-01-24 04:36:39.685292: step: 142/464, loss: 0.02179417386651039 2023-01-24 04:36:40.278011: step: 144/464, loss: 0.0001759190927259624 2023-01-24 04:36:40.903525: step: 146/464, loss: 1.1809196472167969 2023-01-24 04:36:41.485624: step: 148/464, loss: 0.015272276476025581 2023-01-24 04:36:42.149263: step: 150/464, loss: 0.005801384802907705 2023-01-24 04:36:42.787111: step: 152/464, loss: 0.003080572932958603 2023-01-24 04:36:43.400483: step: 154/464, loss: 0.003637011395767331 2023-01-24 04:36:44.048570: step: 156/464, loss: 0.03135967627167702 2023-01-24 04:36:44.767374: step: 158/464, loss: 0.18661335110664368 2023-01-24 04:36:45.379124: step: 160/464, loss: 0.03001299314200878 2023-01-24 04:36:46.007172: step: 162/464, loss: 0.017466213554143906 2023-01-24 04:36:46.590662: step: 164/464, loss: 0.03128621727228165 2023-01-24 04:36:47.237190: step: 166/464, loss: 0.006187606602907181 2023-01-24 04:36:47.781913: step: 168/464, loss: 0.00043134059524163604 2023-01-24 04:36:48.381212: step: 170/464, loss: 0.006160136312246323 2023-01-24 04:36:49.045509: step: 172/464, loss: 0.019473716616630554 2023-01-24 04:36:49.736030: step: 174/464, loss: 0.010255226865410805 2023-01-24 04:36:50.409808: step: 176/464, loss: 0.06173697113990784 2023-01-24 04:36:51.008213: step: 178/464, loss: 0.03466442972421646 2023-01-24 04:36:51.596936: step: 180/464, loss: 0.014254294335842133 2023-01-24 04:36:52.179845: step: 182/464, loss: 0.010866356082260609 2023-01-24 04:36:52.820058: step: 184/464, loss: 0.026608051732182503 2023-01-24 04:36:53.376922: step: 186/464, loss: 0.09039905667304993 2023-01-24 04:36:53.965631: step: 188/464, loss: 0.005921590141952038 2023-01-24 04:36:54.630171: step: 190/464, loss: 0.06350155919790268 2023-01-24 04:36:55.185407: step: 192/464, loss: 0.0011387598933652043 2023-01-24 04:36:55.718254: step: 194/464, loss: 0.8153964877128601 2023-01-24 04:36:56.320640: step: 196/464, loss: 0.01737302727997303 2023-01-24 04:36:56.932486: step: 198/464, loss: 0.00207641557790339 2023-01-24 04:36:57.538332: step: 200/464, loss: 0.003981293644756079 2023-01-24 04:36:58.184378: step: 202/464, loss: 0.11159694194793701 2023-01-24 04:36:58.740718: step: 204/464, loss: 0.02585158683359623 2023-01-24 04:36:59.312174: step: 206/464, loss: 0.2752548158168793 2023-01-24 04:36:59.973528: step: 208/464, loss: 0.00791968870908022 2023-01-24 04:37:00.564815: step: 210/464, loss: 0.00585110392421484 2023-01-24 04:37:01.185194: step: 212/464, loss: 0.011281449347734451 2023-01-24 04:37:01.756594: step: 214/464, loss: 0.1474539339542389 2023-01-24 04:37:02.334492: step: 216/464, loss: 0.016068890690803528 2023-01-24 04:37:03.001423: step: 218/464, loss: 0.45983371138572693 2023-01-24 04:37:03.582387: step: 220/464, loss: 0.01438911259174347 2023-01-24 04:37:04.252409: step: 222/464, loss: 0.6527339220046997 2023-01-24 04:37:04.841347: step: 224/464, loss: 0.0012176393065601587 2023-01-24 04:37:05.469650: step: 226/464, loss: 0.0028944960795342922 2023-01-24 04:37:06.095358: step: 228/464, loss: 0.09391969442367554 2023-01-24 04:37:06.733630: step: 230/464, loss: 0.006250654812902212 2023-01-24 04:37:07.321831: step: 232/464, loss: 0.02643810398876667 2023-01-24 04:37:08.010332: step: 234/464, loss: 0.008487485349178314 2023-01-24 04:37:08.563389: step: 236/464, loss: 0.0020647584460675716 2023-01-24 04:37:09.137647: step: 238/464, loss: 0.0007894524023868144 2023-01-24 04:37:09.718008: step: 240/464, loss: 0.017385359853506088 2023-01-24 04:37:10.324243: step: 242/464, loss: 0.001665014191530645 2023-01-24 04:37:10.894474: step: 244/464, loss: 4.0113471186487004e-05 2023-01-24 04:37:11.458762: step: 246/464, loss: 0.012017929926514626 2023-01-24 04:37:12.020901: step: 248/464, loss: 0.04166106879711151 2023-01-24 04:37:12.802325: step: 250/464, loss: 0.00880045723170042 2023-01-24 04:37:13.408801: step: 252/464, loss: 0.09647093713283539 2023-01-24 04:37:13.968786: step: 254/464, loss: 0.031999941915273666 2023-01-24 04:37:14.541483: step: 256/464, loss: 0.017220599576830864 2023-01-24 04:37:15.170409: step: 258/464, loss: 0.005170927383005619 2023-01-24 04:37:15.823349: step: 260/464, loss: 0.051612719893455505 2023-01-24 04:37:16.360916: step: 262/464, loss: 0.019278405234217644 2023-01-24 04:37:17.144320: step: 264/464, loss: 0.007655612658709288 2023-01-24 04:37:17.771593: step: 266/464, loss: 0.002190679544582963 2023-01-24 04:37:18.337765: step: 268/464, loss: 0.0022087539546191692 2023-01-24 04:37:18.914274: step: 270/464, loss: 0.0024045798927545547 2023-01-24 04:37:19.469137: step: 272/464, loss: 0.11145664751529694 2023-01-24 04:37:20.183028: step: 274/464, loss: 0.08271219581365585 2023-01-24 04:37:20.787288: step: 276/464, loss: 0.012441127561032772 2023-01-24 04:37:21.394464: step: 278/464, loss: 0.1865144520998001 2023-01-24 04:37:22.017303: step: 280/464, loss: 0.0013879581820219755 2023-01-24 04:37:22.617602: step: 282/464, loss: 0.028617314994335175 2023-01-24 04:37:23.237197: step: 284/464, loss: 0.004597559105604887 2023-01-24 04:37:23.897378: step: 286/464, loss: 0.03204414248466492 2023-01-24 04:37:24.561184: step: 288/464, loss: 0.16663306951522827 2023-01-24 04:37:25.185749: step: 290/464, loss: 0.006491425447165966 2023-01-24 04:37:25.840214: step: 292/464, loss: 0.05674513056874275 2023-01-24 04:37:26.467496: step: 294/464, loss: 0.019246671348810196 2023-01-24 04:37:27.157844: step: 296/464, loss: 0.0095362588763237 2023-01-24 04:37:27.770859: step: 298/464, loss: 0.49789726734161377 2023-01-24 04:37:28.404491: step: 300/464, loss: 0.0010461852652952075 2023-01-24 04:37:29.063108: step: 302/464, loss: 0.0013769067591056228 2023-01-24 04:37:29.678223: step: 304/464, loss: 0.010894766077399254 2023-01-24 04:37:30.281725: step: 306/464, loss: 0.007526957895606756 2023-01-24 04:37:30.863633: step: 308/464, loss: 0.11928742378950119 2023-01-24 04:37:31.496132: step: 310/464, loss: 0.03182828798890114 2023-01-24 04:37:32.106143: step: 312/464, loss: 0.0009552632691338658 2023-01-24 04:37:32.666925: step: 314/464, loss: 0.007491611409932375 2023-01-24 04:37:33.314609: step: 316/464, loss: 0.0003158711770083755 2023-01-24 04:37:33.924407: step: 318/464, loss: 0.14994694292545319 2023-01-24 04:37:34.550426: step: 320/464, loss: 0.022585401311516762 2023-01-24 04:37:35.165008: step: 322/464, loss: 0.021226288750767708 2023-01-24 04:37:35.924867: step: 324/464, loss: 0.007166236639022827 2023-01-24 04:37:36.486488: step: 326/464, loss: 0.00879592727869749 2023-01-24 04:37:37.133582: step: 328/464, loss: 0.03222249075770378 2023-01-24 04:37:37.662094: step: 330/464, loss: 0.003964867442846298 2023-01-24 04:37:38.303983: step: 332/464, loss: 0.0036193111445754766 2023-01-24 04:37:38.904285: step: 334/464, loss: 0.01711282692849636 2023-01-24 04:37:39.532151: step: 336/464, loss: 0.017109058797359467 2023-01-24 04:37:40.125460: step: 338/464, loss: 0.010447176173329353 2023-01-24 04:37:40.754023: step: 340/464, loss: 0.0787728950381279 2023-01-24 04:37:41.355081: step: 342/464, loss: 0.02096523903310299 2023-01-24 04:37:42.004812: step: 344/464, loss: 0.04040298983454704 2023-01-24 04:37:42.620330: step: 346/464, loss: 0.03919469192624092 2023-01-24 04:37:43.303648: step: 348/464, loss: 0.6271852850914001 2023-01-24 04:37:43.904682: step: 350/464, loss: 0.00023240085283759981 2023-01-24 04:37:44.532523: step: 352/464, loss: 0.03590834513306618 2023-01-24 04:37:45.233694: step: 354/464, loss: 0.011127419769763947 2023-01-24 04:37:45.839798: step: 356/464, loss: 0.004278120584785938 2023-01-24 04:37:46.438400: step: 358/464, loss: 0.015366523526608944 2023-01-24 04:37:46.996179: step: 360/464, loss: 0.012401978485286236 2023-01-24 04:37:47.573027: step: 362/464, loss: 0.08308908343315125 2023-01-24 04:37:48.173967: step: 364/464, loss: 0.06377576291561127 2023-01-24 04:37:48.775821: step: 366/464, loss: 0.002781669842079282 2023-01-24 04:37:49.391443: step: 368/464, loss: 0.0010306322947144508 2023-01-24 04:37:50.025619: step: 370/464, loss: 0.012695304118096828 2023-01-24 04:37:50.732525: step: 372/464, loss: 0.043762415647506714 2023-01-24 04:37:51.382572: step: 374/464, loss: 0.0017013137694448233 2023-01-24 04:37:52.009289: step: 376/464, loss: 0.03337812051177025 2023-01-24 04:37:52.607225: step: 378/464, loss: 0.0051479581743478775 2023-01-24 04:37:53.288747: step: 380/464, loss: 0.032452017068862915 2023-01-24 04:37:53.928042: step: 382/464, loss: 0.010491715744137764 2023-01-24 04:37:54.589105: step: 384/464, loss: 0.03126560151576996 2023-01-24 04:37:55.242026: step: 386/464, loss: 0.0004801799077540636 2023-01-24 04:37:55.867715: step: 388/464, loss: 0.0002512967330403626 2023-01-24 04:37:56.626211: step: 390/464, loss: 0.00766033073887229 2023-01-24 04:37:57.358824: step: 392/464, loss: 0.04300985857844353 2023-01-24 04:37:57.944568: step: 394/464, loss: 0.009559862315654755 2023-01-24 04:37:58.551834: step: 396/464, loss: 0.0011807261034846306 2023-01-24 04:37:59.141387: step: 398/464, loss: 0.009414239786565304 2023-01-24 04:37:59.726643: step: 400/464, loss: 0.0029089900199323893 2023-01-24 04:38:00.333487: step: 402/464, loss: 0.0033333939500153065 2023-01-24 04:38:00.978105: step: 404/464, loss: 0.04904211685061455 2023-01-24 04:38:01.631796: step: 406/464, loss: 0.009782656095921993 2023-01-24 04:38:02.319787: step: 408/464, loss: 0.07015188038349152 2023-01-24 04:38:02.863198: step: 410/464, loss: 0.004249213729053736 2023-01-24 04:38:03.433239: step: 412/464, loss: 0.006609804462641478 2023-01-24 04:38:04.029832: step: 414/464, loss: 0.0011780187487602234 2023-01-24 04:38:04.672314: step: 416/464, loss: 0.18723560869693756 2023-01-24 04:38:05.297125: step: 418/464, loss: 0.00481009716168046 2023-01-24 04:38:05.905919: step: 420/464, loss: 0.01568804867565632 2023-01-24 04:38:06.496400: step: 422/464, loss: 0.06890048086643219 2023-01-24 04:38:07.115547: step: 424/464, loss: 0.01726832427084446 2023-01-24 04:38:07.692493: step: 426/464, loss: 0.0030659439507871866 2023-01-24 04:38:08.325509: step: 428/464, loss: 0.009536556899547577 2023-01-24 04:38:09.054360: step: 430/464, loss: 0.020365754142403603 2023-01-24 04:38:09.680657: step: 432/464, loss: 0.06949149072170258 2023-01-24 04:38:10.334870: step: 434/464, loss: 0.01435832865536213 2023-01-24 04:38:10.995284: step: 436/464, loss: 0.016788696870207787 2023-01-24 04:38:11.598452: step: 438/464, loss: 0.17906318604946136 2023-01-24 04:38:12.183695: step: 440/464, loss: 0.0181912649422884 2023-01-24 04:38:12.792393: step: 442/464, loss: 0.10065846145153046 2023-01-24 04:38:13.499509: step: 444/464, loss: 0.0006792846834287047 2023-01-24 04:38:14.080207: step: 446/464, loss: 0.027612632140517235 2023-01-24 04:38:14.688742: step: 448/464, loss: 0.019827254116535187 2023-01-24 04:38:15.344687: step: 450/464, loss: 0.017565179616212845 2023-01-24 04:38:15.955060: step: 452/464, loss: 0.003037205198779702 2023-01-24 04:38:16.554972: step: 454/464, loss: 0.0018163080094382167 2023-01-24 04:38:17.197039: step: 456/464, loss: 0.01641642488539219 2023-01-24 04:38:17.800050: step: 458/464, loss: 0.0009892077650874853 2023-01-24 04:38:18.458163: step: 460/464, loss: 0.0039025216829031706 2023-01-24 04:38:19.145958: step: 462/464, loss: 0.035910461097955704 2023-01-24 04:38:19.727365: step: 464/464, loss: 0.41302695870399475 2023-01-24 04:38:20.286855: step: 466/464, loss: 0.027785949409008026 2023-01-24 04:38:20.953778: step: 468/464, loss: 0.06098407134413719 2023-01-24 04:38:21.607934: step: 470/464, loss: 0.06805586069822311 2023-01-24 04:38:22.296548: step: 472/464, loss: 0.0003168827679473907 2023-01-24 04:38:22.930853: step: 474/464, loss: 0.017504960298538208 2023-01-24 04:38:23.563230: step: 476/464, loss: 0.004010757897049189 2023-01-24 04:38:24.239165: step: 478/464, loss: 0.0031999624334275723 2023-01-24 04:38:24.871706: step: 480/464, loss: 0.006119747180491686 2023-01-24 04:38:25.509562: step: 482/464, loss: 0.02626779116690159 2023-01-24 04:38:26.148598: step: 484/464, loss: 0.01521299872547388 2023-01-24 04:38:26.796513: step: 486/464, loss: 0.008856347762048244 2023-01-24 04:38:27.413749: step: 488/464, loss: 0.007745738606899977 2023-01-24 04:38:28.076941: step: 490/464, loss: 0.016153812408447266 2023-01-24 04:38:28.682369: step: 492/464, loss: 0.030608683824539185 2023-01-24 04:38:29.326142: step: 494/464, loss: 0.006272049155086279 2023-01-24 04:38:29.988357: step: 496/464, loss: 0.0008258870802819729 2023-01-24 04:38:30.708597: step: 498/464, loss: 0.02096143178641796 2023-01-24 04:38:31.328130: step: 500/464, loss: 0.031219899654388428 2023-01-24 04:38:31.973405: step: 502/464, loss: 0.14262311160564423 2023-01-24 04:38:32.563396: step: 504/464, loss: 0.046533871442079544 2023-01-24 04:38:33.177653: step: 506/464, loss: 0.07897719740867615 2023-01-24 04:38:33.803017: step: 508/464, loss: 0.03483173996210098 2023-01-24 04:38:34.438422: step: 510/464, loss: 0.22256095707416534 2023-01-24 04:38:35.034886: step: 512/464, loss: 0.021794581785798073 2023-01-24 04:38:35.672277: step: 514/464, loss: 0.001026372192427516 2023-01-24 04:38:36.377911: step: 516/464, loss: 0.004475842230021954 2023-01-24 04:38:36.976007: step: 518/464, loss: 0.025761837139725685 2023-01-24 04:38:37.599373: step: 520/464, loss: 0.020630907267332077 2023-01-24 04:38:38.179209: step: 522/464, loss: 0.004385307896882296 2023-01-24 04:38:38.863192: step: 524/464, loss: 0.02225436642765999 2023-01-24 04:38:39.501252: step: 526/464, loss: 0.020955931395292282 2023-01-24 04:38:40.115038: step: 528/464, loss: 0.0035734125413000584 2023-01-24 04:38:40.788795: step: 530/464, loss: 0.19535847008228302 2023-01-24 04:38:41.417402: step: 532/464, loss: 0.00577906658872962 2023-01-24 04:38:42.095965: step: 534/464, loss: 0.1850377768278122 2023-01-24 04:38:42.811028: step: 536/464, loss: 0.0009395240340381861 2023-01-24 04:38:43.447537: step: 538/464, loss: 0.002650128910318017 2023-01-24 04:38:44.072961: step: 540/464, loss: 0.00598529726266861 2023-01-24 04:38:44.725173: step: 542/464, loss: 0.005069917067885399 2023-01-24 04:38:45.430541: step: 544/464, loss: 0.05108673498034477 2023-01-24 04:38:46.059829: step: 546/464, loss: 0.01029582042247057 2023-01-24 04:38:46.710742: step: 548/464, loss: 0.028246359899640083 2023-01-24 04:38:47.279334: step: 550/464, loss: 0.007826417684555054 2023-01-24 04:38:47.923580: step: 552/464, loss: 0.0016339016146957874 2023-01-24 04:38:48.522561: step: 554/464, loss: 0.013940151780843735 2023-01-24 04:38:49.109088: step: 556/464, loss: 0.0003476462443359196 2023-01-24 04:38:49.695767: step: 558/464, loss: 0.002292930381372571 2023-01-24 04:38:50.279560: step: 560/464, loss: 0.0012554709101095796 2023-01-24 04:38:50.930060: step: 562/464, loss: 1.4624099731445312 2023-01-24 04:38:51.531688: step: 564/464, loss: 0.0303787998855114 2023-01-24 04:38:52.131715: step: 566/464, loss: 0.011416810564696789 2023-01-24 04:38:52.744236: step: 568/464, loss: 0.0025077073369175196 2023-01-24 04:38:53.469446: step: 570/464, loss: 0.139719158411026 2023-01-24 04:38:54.016365: step: 572/464, loss: 0.010094721801578999 2023-01-24 04:38:54.592497: step: 574/464, loss: 0.0012248513521626592 2023-01-24 04:38:55.259563: step: 576/464, loss: 0.005344181787222624 2023-01-24 04:38:55.807015: step: 578/464, loss: 0.02629689872264862 2023-01-24 04:38:56.459285: step: 580/464, loss: 0.009889775887131691 2023-01-24 04:38:57.050096: step: 582/464, loss: 1.107112257159315e-05 2023-01-24 04:38:57.645100: step: 584/464, loss: 0.0021250001154839993 2023-01-24 04:38:58.256309: step: 586/464, loss: 0.0021111273672431707 2023-01-24 04:38:58.850261: step: 588/464, loss: 0.0018420673441141844 2023-01-24 04:38:59.459495: step: 590/464, loss: 3.5897333873435855e-05 2023-01-24 04:39:00.069095: step: 592/464, loss: 0.02338157594203949 2023-01-24 04:39:00.707984: step: 594/464, loss: 0.026911022141575813 2023-01-24 04:39:01.334344: step: 596/464, loss: 0.01676015369594097 2023-01-24 04:39:01.954477: step: 598/464, loss: 0.0024795825593173504 2023-01-24 04:39:02.545025: step: 600/464, loss: 0.12006665021181107 2023-01-24 04:39:03.162283: step: 602/464, loss: 0.8700026273727417 2023-01-24 04:39:03.789180: step: 604/464, loss: 0.001991113880649209 2023-01-24 04:39:04.423624: step: 606/464, loss: 0.002249934710562229 2023-01-24 04:39:04.998985: step: 608/464, loss: 0.06895353645086288 2023-01-24 04:39:05.535596: step: 610/464, loss: 0.011820238083600998 2023-01-24 04:39:06.171593: step: 612/464, loss: 0.002337306272238493 2023-01-24 04:39:06.767403: step: 614/464, loss: 0.024844679981470108 2023-01-24 04:39:07.371550: step: 616/464, loss: 0.022155219689011574 2023-01-24 04:39:07.948809: step: 618/464, loss: 0.0002563460438977927 2023-01-24 04:39:08.702751: step: 620/464, loss: 0.03686891868710518 2023-01-24 04:39:09.310831: step: 622/464, loss: 0.0467064343392849 2023-01-24 04:39:09.915576: step: 624/464, loss: 0.014262551441788673 2023-01-24 04:39:10.496575: step: 626/464, loss: 0.00019088482076767832 2023-01-24 04:39:11.100286: step: 628/464, loss: 0.025904875248670578 2023-01-24 04:39:11.735551: step: 630/464, loss: 0.008668944239616394 2023-01-24 04:39:12.353793: step: 632/464, loss: 0.014816109091043472 2023-01-24 04:39:13.011538: step: 634/464, loss: 0.11665499955415726 2023-01-24 04:39:13.631624: step: 636/464, loss: 0.01366348098963499 2023-01-24 04:39:14.185399: step: 638/464, loss: 0.007681042887270451 2023-01-24 04:39:14.838267: step: 640/464, loss: 0.005328443832695484 2023-01-24 04:39:15.438196: step: 642/464, loss: 0.03169764205813408 2023-01-24 04:39:16.033308: step: 644/464, loss: 0.01761089451611042 2023-01-24 04:39:16.705753: step: 646/464, loss: 0.007290259003639221 2023-01-24 04:39:17.383512: step: 648/464, loss: 0.051352642476558685 2023-01-24 04:39:18.007140: step: 650/464, loss: 0.018428362905979156 2023-01-24 04:39:18.654618: step: 652/464, loss: 0.11865729838609695 2023-01-24 04:39:19.275211: step: 654/464, loss: 0.16155581176280975 2023-01-24 04:39:19.908377: step: 656/464, loss: 0.8317537903785706 2023-01-24 04:39:20.505331: step: 658/464, loss: 0.0015119991730898619 2023-01-24 04:39:21.151052: step: 660/464, loss: 0.006626639515161514 2023-01-24 04:39:21.772032: step: 662/464, loss: 0.012667542323470116 2023-01-24 04:39:22.394225: step: 664/464, loss: 0.008510514162480831 2023-01-24 04:39:23.103490: step: 666/464, loss: 0.0040636323392391205 2023-01-24 04:39:23.731217: step: 668/464, loss: 0.11379527300596237 2023-01-24 04:39:24.364528: step: 670/464, loss: 0.011296688579022884 2023-01-24 04:39:24.993577: step: 672/464, loss: 0.0011617145501077175 2023-01-24 04:39:25.596827: step: 674/464, loss: 0.005766835995018482 2023-01-24 04:39:26.338802: step: 676/464, loss: 0.038305796682834625 2023-01-24 04:39:26.917269: step: 678/464, loss: 0.0003724046691786498 2023-01-24 04:39:27.526489: step: 680/464, loss: 0.060129255056381226 2023-01-24 04:39:28.165584: step: 682/464, loss: 0.019781537353992462 2023-01-24 04:39:28.924605: step: 684/464, loss: 0.011479828506708145 2023-01-24 04:39:29.506591: step: 686/464, loss: 0.011218197643756866 2023-01-24 04:39:30.139087: step: 688/464, loss: 0.0019634906202554703 2023-01-24 04:39:30.739645: step: 690/464, loss: 0.017490766942501068 2023-01-24 04:39:31.406484: step: 692/464, loss: 0.01794765330851078 2023-01-24 04:39:32.045705: step: 694/464, loss: 0.01079262513667345 2023-01-24 04:39:32.696159: step: 696/464, loss: 0.007698724512010813 2023-01-24 04:39:33.286920: step: 698/464, loss: 0.001549478736706078 2023-01-24 04:39:33.911596: step: 700/464, loss: 0.0767119973897934 2023-01-24 04:39:34.463400: step: 702/464, loss: 0.037706900388002396 2023-01-24 04:39:35.072939: step: 704/464, loss: 0.08163590729236603 2023-01-24 04:39:35.621909: step: 706/464, loss: 0.015356204472482204 2023-01-24 04:39:36.263191: step: 708/464, loss: 0.1496073603630066 2023-01-24 04:39:36.906375: step: 710/464, loss: 0.024661073461174965 2023-01-24 04:39:37.552481: step: 712/464, loss: 0.048554107546806335 2023-01-24 04:39:38.207318: step: 714/464, loss: 0.010555480606853962 2023-01-24 04:39:38.826520: step: 716/464, loss: 0.1697586625814438 2023-01-24 04:39:39.500779: step: 718/464, loss: 0.008124444633722305 2023-01-24 04:39:40.078919: step: 720/464, loss: 0.014189078472554684 2023-01-24 04:39:40.688274: step: 722/464, loss: 0.013432067818939686 2023-01-24 04:39:41.335996: step: 724/464, loss: 0.002215616637840867 2023-01-24 04:39:41.938856: step: 726/464, loss: 0.00029035957413725555 2023-01-24 04:39:42.583068: step: 728/464, loss: 1.7936588525772095 2023-01-24 04:39:43.226843: step: 730/464, loss: 0.021537071093916893 2023-01-24 04:39:43.816261: step: 732/464, loss: 0.03023180551826954 2023-01-24 04:39:44.384117: step: 734/464, loss: 0.014803903177380562 2023-01-24 04:39:44.927183: step: 736/464, loss: 0.0011444580741226673 2023-01-24 04:39:45.518194: step: 738/464, loss: 0.0055016083642840385 2023-01-24 04:39:46.153623: step: 740/464, loss: 0.02439718320965767 2023-01-24 04:39:46.762641: step: 742/464, loss: 0.0008676930447109044 2023-01-24 04:39:47.330980: step: 744/464, loss: 0.010000188834965229 2023-01-24 04:39:48.041431: step: 746/464, loss: 0.2724458873271942 2023-01-24 04:39:48.655659: step: 748/464, loss: 0.003781168255954981 2023-01-24 04:39:49.240775: step: 750/464, loss: 0.006330311298370361 2023-01-24 04:39:49.832772: step: 752/464, loss: 0.008123427629470825 2023-01-24 04:39:50.450397: step: 754/464, loss: 0.01807366870343685 2023-01-24 04:39:51.033517: step: 756/464, loss: 0.008394982665777206 2023-01-24 04:39:51.625320: step: 758/464, loss: 0.0008706397493369877 2023-01-24 04:39:52.237640: step: 760/464, loss: 0.004383837804198265 2023-01-24 04:39:52.863065: step: 762/464, loss: 0.036036934703588486 2023-01-24 04:39:53.475536: step: 764/464, loss: 0.08744674921035767 2023-01-24 04:39:54.106229: step: 766/464, loss: 0.0012109780218452215 2023-01-24 04:39:54.767535: step: 768/464, loss: 0.003694704966619611 2023-01-24 04:39:55.367386: step: 770/464, loss: 0.03521769866347313 2023-01-24 04:39:55.955147: step: 772/464, loss: 0.013438494876027107 2023-01-24 04:39:56.622649: step: 774/464, loss: 0.018687814474105835 2023-01-24 04:39:57.244285: step: 776/464, loss: 0.01550370454788208 2023-01-24 04:39:57.900919: step: 778/464, loss: 0.04559968411922455 2023-01-24 04:39:58.446502: step: 780/464, loss: 0.06916767358779907 2023-01-24 04:39:59.106849: step: 782/464, loss: 0.0004504562239162624 2023-01-24 04:39:59.745215: step: 784/464, loss: 0.02127774991095066 2023-01-24 04:40:00.343016: step: 786/464, loss: 0.07551337778568268 2023-01-24 04:40:01.025986: step: 788/464, loss: 0.04706054925918579 2023-01-24 04:40:01.675089: step: 790/464, loss: 0.041895508766174316 2023-01-24 04:40:02.311253: step: 792/464, loss: 0.021193142980337143 2023-01-24 04:40:02.862214: step: 794/464, loss: 0.014277678914368153 2023-01-24 04:40:03.510234: step: 796/464, loss: 0.017894389107823372 2023-01-24 04:40:04.205813: step: 798/464, loss: 0.1131686195731163 2023-01-24 04:40:04.799920: step: 800/464, loss: 0.005173950921744108 2023-01-24 04:40:05.486358: step: 802/464, loss: 0.007500513922423124 2023-01-24 04:40:06.069292: step: 804/464, loss: 0.025352556258440018 2023-01-24 04:40:06.748843: step: 806/464, loss: 0.0008665485074743629 2023-01-24 04:40:07.342095: step: 808/464, loss: 0.003123636357486248 2023-01-24 04:40:08.067323: step: 810/464, loss: 0.020570002496242523 2023-01-24 04:40:08.808915: step: 812/464, loss: 0.003088674508035183 2023-01-24 04:40:09.416152: step: 814/464, loss: 0.015568751841783524 2023-01-24 04:40:10.035419: step: 816/464, loss: 0.002063547261059284 2023-01-24 04:40:10.660478: step: 818/464, loss: 0.002489405684173107 2023-01-24 04:40:11.301931: step: 820/464, loss: 0.0054278913885355 2023-01-24 04:40:11.952317: step: 822/464, loss: 0.032391082495450974 2023-01-24 04:40:12.586285: step: 824/464, loss: 0.01377673726528883 2023-01-24 04:40:13.230379: step: 826/464, loss: 0.03902078792452812 2023-01-24 04:40:13.847807: step: 828/464, loss: 0.47893691062927246 2023-01-24 04:40:14.552554: step: 830/464, loss: 0.057942770421504974 2023-01-24 04:40:15.125597: step: 832/464, loss: 0.0043868571519851685 2023-01-24 04:40:15.717144: step: 834/464, loss: 0.031184788793325424 2023-01-24 04:40:16.367295: step: 836/464, loss: 0.013812736608088017 2023-01-24 04:40:17.006837: step: 838/464, loss: 0.036795474588871 2023-01-24 04:40:17.606662: step: 840/464, loss: 0.011091233231127262 2023-01-24 04:40:18.217994: step: 842/464, loss: 0.01760336197912693 2023-01-24 04:40:18.854992: step: 844/464, loss: 0.031159533187747 2023-01-24 04:40:19.588720: step: 846/464, loss: 0.0019408116349950433 2023-01-24 04:40:20.200598: step: 848/464, loss: 0.013441966846585274 2023-01-24 04:40:20.842378: step: 850/464, loss: 0.023758893832564354 2023-01-24 04:40:21.483679: step: 852/464, loss: 0.011388260871171951 2023-01-24 04:40:22.047166: step: 854/464, loss: 0.0026496564969420433 2023-01-24 04:40:22.691160: step: 856/464, loss: 0.0152037488296628 2023-01-24 04:40:23.313926: step: 858/464, loss: 0.05477975308895111 2023-01-24 04:40:23.950117: step: 860/464, loss: 0.007257247343659401 2023-01-24 04:40:24.566828: step: 862/464, loss: 0.6482580900192261 2023-01-24 04:40:25.223643: step: 864/464, loss: 0.13274411857128143 2023-01-24 04:40:25.864179: step: 866/464, loss: 0.07746298611164093 2023-01-24 04:40:26.461416: step: 868/464, loss: 0.003369443118572235 2023-01-24 04:40:27.113150: step: 870/464, loss: 0.040803004056215286 2023-01-24 04:40:27.719901: step: 872/464, loss: 0.02492385357618332 2023-01-24 04:40:28.328474: step: 874/464, loss: 0.005217418074607849 2023-01-24 04:40:28.960054: step: 876/464, loss: 0.009234433062374592 2023-01-24 04:40:29.581777: step: 878/464, loss: 0.027107814326882362 2023-01-24 04:40:30.178612: step: 880/464, loss: 0.04328801855444908 2023-01-24 04:40:30.857759: step: 882/464, loss: 0.00160908920224756 2023-01-24 04:40:31.446585: step: 884/464, loss: 0.019726203754544258 2023-01-24 04:40:32.087647: step: 886/464, loss: 0.010629426687955856 2023-01-24 04:40:32.720497: step: 888/464, loss: 0.0021526608616113663 2023-01-24 04:40:33.333649: step: 890/464, loss: 0.0002275644801557064 2023-01-24 04:40:33.941910: step: 892/464, loss: 0.0007767993956804276 2023-01-24 04:40:34.535946: step: 894/464, loss: 0.07486575841903687 2023-01-24 04:40:35.172026: step: 896/464, loss: 0.029513342306017876 2023-01-24 04:40:35.785457: step: 898/464, loss: 0.19507844746112823 2023-01-24 04:40:36.371269: step: 900/464, loss: 0.07666248828172684 2023-01-24 04:40:37.002514: step: 902/464, loss: 0.0041303797625005245 2023-01-24 04:40:37.646272: step: 904/464, loss: 0.041221149265766144 2023-01-24 04:40:38.242569: step: 906/464, loss: 0.0340176559984684 2023-01-24 04:40:38.939477: step: 908/464, loss: 0.030654940754175186 2023-01-24 04:40:39.492507: step: 910/464, loss: 0.004920803476125002 2023-01-24 04:40:40.076548: step: 912/464, loss: 0.2938683032989502 2023-01-24 04:40:40.677889: step: 914/464, loss: 0.012634389102458954 2023-01-24 04:40:41.323392: step: 916/464, loss: 0.025104986503720284 2023-01-24 04:40:41.952382: step: 918/464, loss: 0.0012669715797528625 2023-01-24 04:40:42.613447: step: 920/464, loss: 0.08574622869491577 2023-01-24 04:40:43.286231: step: 922/464, loss: 0.0421142652630806 2023-01-24 04:40:43.924918: step: 924/464, loss: 0.028157049790024757 2023-01-24 04:40:44.536542: step: 926/464, loss: 0.007767365779727697 2023-01-24 04:40:45.142984: step: 928/464, loss: 0.04685765877366066 2023-01-24 04:40:45.691692: step: 930/464, loss: 0.06659197062253952 ================================================== Loss: 0.054 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3299330766103658, 'r': 0.3149076613567628, 'f1': 0.32224531560196895}, 'combined': 0.23744391675934554, 'epoch': 31} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.336014956293044, 'r': 0.3051312286925804, 'f1': 0.31982926475677026}, 'combined': 0.208800452639135, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32679557855339103, 'r': 0.32741568401554166, 'f1': 0.3271053373956217}, 'combined': 0.24102498544940545, 'epoch': 31} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3499302077022169, 'r': 0.30844032094340623, 'f1': 0.32787793765161305}, 'combined': 0.2140550266533847, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35323116289198603, 'r': 0.329771787747357, 'f1': 0.34109859105565676}, 'combined': 0.25133580393574706, 'epoch': 31} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.35083736655335684, 'r': 0.29703586038769053, 'f1': 0.3217026871826902}, 'combined': 0.2100235159845542, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21367521367521364, 'r': 0.23809523809523805, 'f1': 0.22522522522522517}, 'combined': 0.1501501501501501, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29464285714285715, 'r': 0.358695652173913, 'f1': 0.3235294117647059}, 'combined': 0.16176470588235295, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:43:22.524057: step: 2/464, loss: 0.1374712586402893 2023-01-24 04:43:23.198761: step: 4/464, loss: 0.010180720128118992 2023-01-24 04:43:23.832261: step: 6/464, loss: 0.02252374216914177 2023-01-24 04:43:24.475922: step: 8/464, loss: 0.016324935480952263 2023-01-24 04:43:25.139186: step: 10/464, loss: 0.013695857487618923 2023-01-24 04:43:25.822857: step: 12/464, loss: 0.11942312866449356 2023-01-24 04:43:26.448012: step: 14/464, loss: 0.05008179694414139 2023-01-24 04:43:27.037834: step: 16/464, loss: 0.005809451453387737 2023-01-24 04:43:27.571455: step: 18/464, loss: 1.2273625135421753 2023-01-24 04:43:28.166947: step: 20/464, loss: 0.007264153566211462 2023-01-24 04:43:28.765577: step: 22/464, loss: 0.01582128368318081 2023-01-24 04:43:29.379774: step: 24/464, loss: 0.005514255724847317 2023-01-24 04:43:29.999441: step: 26/464, loss: 0.0349930077791214 2023-01-24 04:43:30.585993: step: 28/464, loss: 0.01226122211664915 2023-01-24 04:43:31.172935: step: 30/464, loss: 0.0031147468835115433 2023-01-24 04:43:31.770695: step: 32/464, loss: 0.010408380068838596 2023-01-24 04:43:32.381841: step: 34/464, loss: 0.0029626174364238977 2023-01-24 04:43:33.042165: step: 36/464, loss: 0.003125266870483756 2023-01-24 04:43:33.629445: step: 38/464, loss: 0.015703922137618065 2023-01-24 04:43:34.322893: step: 40/464, loss: 0.16384877264499664 2023-01-24 04:43:35.014883: step: 42/464, loss: 0.4196951687335968 2023-01-24 04:43:35.675995: step: 44/464, loss: 0.015279560349881649 2023-01-24 04:43:36.283312: step: 46/464, loss: 0.0070780557580292225 2023-01-24 04:43:36.925531: step: 48/464, loss: 0.08592184633016586 2023-01-24 04:43:37.583875: step: 50/464, loss: 0.02106037363409996 2023-01-24 04:43:38.185243: step: 52/464, loss: 0.009690009988844395 2023-01-24 04:43:38.752302: step: 54/464, loss: 0.002293247263878584 2023-01-24 04:43:39.326056: step: 56/464, loss: 0.038529325276613235 2023-01-24 04:43:39.874908: step: 58/464, loss: 0.005708710290491581 2023-01-24 04:43:40.464833: step: 60/464, loss: 0.0013180101523175836 2023-01-24 04:43:41.052052: step: 62/464, loss: 0.024688029661774635 2023-01-24 04:43:41.646159: step: 64/464, loss: 0.0011120281415060163 2023-01-24 04:43:42.193972: step: 66/464, loss: 0.0012752178590744734 2023-01-24 04:43:42.830436: step: 68/464, loss: 0.022770212963223457 2023-01-24 04:43:43.508937: step: 70/464, loss: 0.0012789067113772035 2023-01-24 04:43:44.117350: step: 72/464, loss: 0.14106199145317078 2023-01-24 04:43:44.864823: step: 74/464, loss: 0.0728851780295372 2023-01-24 04:43:45.484026: step: 76/464, loss: 0.0076300278306007385 2023-01-24 04:43:46.075079: step: 78/464, loss: 0.33532583713531494 2023-01-24 04:43:46.693301: step: 80/464, loss: 0.0028517949394881725 2023-01-24 04:43:47.317605: step: 82/464, loss: 0.001625005155801773 2023-01-24 04:43:47.921001: step: 84/464, loss: 0.06250736117362976 2023-01-24 04:43:48.555767: step: 86/464, loss: 0.003237940836697817 2023-01-24 04:43:49.194432: step: 88/464, loss: 0.03381800279021263 2023-01-24 04:43:49.830842: step: 90/464, loss: 0.15457159280776978 2023-01-24 04:43:50.480380: step: 92/464, loss: 0.02301689051091671 2023-01-24 04:43:51.011117: step: 94/464, loss: 0.0011168025666847825 2023-01-24 04:43:51.558625: step: 96/464, loss: 0.02981138974428177 2023-01-24 04:43:52.211241: step: 98/464, loss: 0.0009769659955054522 2023-01-24 04:43:52.810377: step: 100/464, loss: 0.014786754734814167 2023-01-24 04:43:53.442998: step: 102/464, loss: 0.006124967243522406 2023-01-24 04:43:54.057088: step: 104/464, loss: 0.010965868830680847 2023-01-24 04:43:54.571007: step: 106/464, loss: 0.0019391337409615517 2023-01-24 04:43:55.173467: step: 108/464, loss: 0.3080310821533203 2023-01-24 04:43:55.800464: step: 110/464, loss: 0.09916416555643082 2023-01-24 04:43:56.413841: step: 112/464, loss: 0.019109727814793587 2023-01-24 04:43:56.989147: step: 114/464, loss: 0.001565584447234869 2023-01-24 04:43:57.583414: step: 116/464, loss: 0.0002652221010066569 2023-01-24 04:43:58.185369: step: 118/464, loss: 0.1852545142173767 2023-01-24 04:43:58.769198: step: 120/464, loss: 0.038834791630506516 2023-01-24 04:43:59.393239: step: 122/464, loss: 0.09213980287313461 2023-01-24 04:44:00.040730: step: 124/464, loss: 0.00192394875921309 2023-01-24 04:44:00.647018: step: 126/464, loss: 0.002106940373778343 2023-01-24 04:44:01.216976: step: 128/464, loss: 0.0022854506969451904 2023-01-24 04:44:01.861894: step: 130/464, loss: 0.0008612187230028212 2023-01-24 04:44:02.488634: step: 132/464, loss: 0.028198976069688797 2023-01-24 04:44:03.075859: step: 134/464, loss: 0.00705569377169013 2023-01-24 04:44:03.717653: step: 136/464, loss: 0.21060891449451447 2023-01-24 04:44:04.315693: step: 138/464, loss: 0.00014653653488494456 2023-01-24 04:44:04.896137: step: 140/464, loss: 0.006557180546224117 2023-01-24 04:44:05.500413: step: 142/464, loss: 0.004567587282508612 2023-01-24 04:44:06.161258: step: 144/464, loss: 0.05902276560664177 2023-01-24 04:44:06.775191: step: 146/464, loss: 0.06563450396060944 2023-01-24 04:44:07.393930: step: 148/464, loss: 0.0009326763683930039 2023-01-24 04:44:08.007433: step: 150/464, loss: 0.002292963908985257 2023-01-24 04:44:08.651485: step: 152/464, loss: 0.04217243194580078 2023-01-24 04:44:09.308792: step: 154/464, loss: 0.008173865266144276 2023-01-24 04:44:09.942993: step: 156/464, loss: 0.03438876196742058 2023-01-24 04:44:10.572756: step: 158/464, loss: 0.003344327211380005 2023-01-24 04:44:11.162616: step: 160/464, loss: 0.011284800246357918 2023-01-24 04:44:11.703508: step: 162/464, loss: 0.03331875428557396 2023-01-24 04:44:12.339224: step: 164/464, loss: 0.00024161580950021744 2023-01-24 04:44:13.005382: step: 166/464, loss: 0.010135755874216557 2023-01-24 04:44:13.579439: step: 168/464, loss: 8.51072181831114e-05 2023-01-24 04:44:14.208990: step: 170/464, loss: 0.03148839250206947 2023-01-24 04:44:14.878631: step: 172/464, loss: 0.00888749212026596 2023-01-24 04:44:15.427698: step: 174/464, loss: 0.04342638701200485 2023-01-24 04:44:16.026195: step: 176/464, loss: 0.07448451966047287 2023-01-24 04:44:16.683705: step: 178/464, loss: 0.036748576909303665 2023-01-24 04:44:17.267755: step: 180/464, loss: 0.0023196316324174404 2023-01-24 04:44:17.892140: step: 182/464, loss: 0.04282921180129051 2023-01-24 04:44:18.471936: step: 184/464, loss: 0.05800343677401543 2023-01-24 04:44:19.070341: step: 186/464, loss: 0.016823645681142807 2023-01-24 04:44:19.839199: step: 188/464, loss: 0.00039676681626588106 2023-01-24 04:44:20.485442: step: 190/464, loss: 0.024343879893422127 2023-01-24 04:44:21.107512: step: 192/464, loss: 0.008399303071200848 2023-01-24 04:44:21.659449: step: 194/464, loss: 0.007016150280833244 2023-01-24 04:44:22.300331: step: 196/464, loss: 0.01540520042181015 2023-01-24 04:44:22.809070: step: 198/464, loss: 0.01566270925104618 2023-01-24 04:44:23.415407: step: 200/464, loss: 0.037253670394420624 2023-01-24 04:44:24.064527: step: 202/464, loss: 0.003179313614964485 2023-01-24 04:44:24.733562: step: 204/464, loss: 0.03366708382964134 2023-01-24 04:44:25.369465: step: 206/464, loss: 0.028714319691061974 2023-01-24 04:44:26.002645: step: 208/464, loss: 0.2324700802564621 2023-01-24 04:44:26.654778: step: 210/464, loss: 0.07700219005346298 2023-01-24 04:44:27.232771: step: 212/464, loss: 0.0030666450038552284 2023-01-24 04:44:27.812917: step: 214/464, loss: 0.0012264112010598183 2023-01-24 04:44:28.485096: step: 216/464, loss: 0.0726943090558052 2023-01-24 04:44:29.060555: step: 218/464, loss: 0.021450810134410858 2023-01-24 04:44:29.687817: step: 220/464, loss: 0.004526334349066019 2023-01-24 04:44:30.358083: step: 222/464, loss: 0.038611650466918945 2023-01-24 04:44:30.996306: step: 224/464, loss: 0.0003367810568306595 2023-01-24 04:44:31.624238: step: 226/464, loss: 0.008392960764467716 2023-01-24 04:44:32.240871: step: 228/464, loss: 0.0759117379784584 2023-01-24 04:44:32.866202: step: 230/464, loss: 0.0077818832360208035 2023-01-24 04:44:33.591500: step: 232/464, loss: 0.007422207854688168 2023-01-24 04:44:34.232276: step: 234/464, loss: 0.004921207204461098 2023-01-24 04:44:34.845240: step: 236/464, loss: 0.01048721931874752 2023-01-24 04:44:35.515132: step: 238/464, loss: 0.011839354410767555 2023-01-24 04:44:36.148499: step: 240/464, loss: 0.003599931485950947 2023-01-24 04:44:36.752182: step: 242/464, loss: 0.016076581552624702 2023-01-24 04:44:37.384054: step: 244/464, loss: 0.008714699186384678 2023-01-24 04:44:37.986246: step: 246/464, loss: 0.0007286720210686326 2023-01-24 04:44:38.588471: step: 248/464, loss: 0.011836939491331577 2023-01-24 04:44:39.198000: step: 250/464, loss: 0.11396396905183792 2023-01-24 04:44:39.833443: step: 252/464, loss: 0.0001828370150178671 2023-01-24 04:44:40.379560: step: 254/464, loss: 0.0002706579689402133 2023-01-24 04:44:41.087622: step: 256/464, loss: 0.006874525919556618 2023-01-24 04:44:41.751191: step: 258/464, loss: 0.012534061446785927 2023-01-24 04:44:42.364403: step: 260/464, loss: 0.03968286141753197 2023-01-24 04:44:42.964470: step: 262/464, loss: 0.030112750828266144 2023-01-24 04:44:43.555085: step: 264/464, loss: 0.002630100352689624 2023-01-24 04:44:44.142733: step: 266/464, loss: 0.0007398867746815085 2023-01-24 04:44:44.762397: step: 268/464, loss: 0.033645376563072205 2023-01-24 04:44:45.417234: step: 270/464, loss: 0.02185887098312378 2023-01-24 04:44:46.041171: step: 272/464, loss: 0.007533758878707886 2023-01-24 04:44:46.661255: step: 274/464, loss: 0.034230563789606094 2023-01-24 04:44:47.279758: step: 276/464, loss: 0.0003835291718132794 2023-01-24 04:44:47.905045: step: 278/464, loss: 0.006568636279553175 2023-01-24 04:44:48.517183: step: 280/464, loss: 0.01467649545520544 2023-01-24 04:44:49.124089: step: 282/464, loss: 0.00968607421964407 2023-01-24 04:44:49.709178: step: 284/464, loss: 0.008147986605763435 2023-01-24 04:44:50.309474: step: 286/464, loss: 0.0042988816276192665 2023-01-24 04:44:50.976403: step: 288/464, loss: 0.04639403894543648 2023-01-24 04:44:51.559881: step: 290/464, loss: 0.007763709872961044 2023-01-24 04:44:52.132484: step: 292/464, loss: 0.002152147702872753 2023-01-24 04:44:52.732465: step: 294/464, loss: 0.02833113819360733 2023-01-24 04:44:53.331249: step: 296/464, loss: 0.02986162155866623 2023-01-24 04:44:53.963253: step: 298/464, loss: 0.0022444010246545076 2023-01-24 04:44:54.622210: step: 300/464, loss: 0.0035056171473115683 2023-01-24 04:44:55.283707: step: 302/464, loss: 1.122079849243164 2023-01-24 04:44:55.860452: step: 304/464, loss: 0.007843993604183197 2023-01-24 04:44:56.510752: step: 306/464, loss: 0.0015960789751261473 2023-01-24 04:44:57.126516: step: 308/464, loss: 0.003823076607659459 2023-01-24 04:44:57.809826: step: 310/464, loss: 0.07746380567550659 2023-01-24 04:44:58.434470: step: 312/464, loss: 0.002466361504048109 2023-01-24 04:44:59.030155: step: 314/464, loss: 0.0016166861169040203 2023-01-24 04:44:59.605519: step: 316/464, loss: 0.0008088828180916607 2023-01-24 04:45:00.219044: step: 318/464, loss: 0.10763225704431534 2023-01-24 04:45:00.847604: step: 320/464, loss: 0.008803927339613438 2023-01-24 04:45:01.474352: step: 322/464, loss: 0.07278378307819366 2023-01-24 04:45:02.075860: step: 324/464, loss: 0.009004500694572926 2023-01-24 04:45:02.727551: step: 326/464, loss: 0.006256289314478636 2023-01-24 04:45:03.419692: step: 328/464, loss: 0.00622887909412384 2023-01-24 04:45:04.026195: step: 330/464, loss: 0.029455173760652542 2023-01-24 04:45:04.609063: step: 332/464, loss: 0.0009756234940141439 2023-01-24 04:45:05.264173: step: 334/464, loss: 0.002182058524340391 2023-01-24 04:45:05.840097: step: 336/464, loss: 0.0010947698028758168 2023-01-24 04:45:06.411629: step: 338/464, loss: 0.04082098603248596 2023-01-24 04:45:06.980829: step: 340/464, loss: 0.016495386138558388 2023-01-24 04:45:07.612387: step: 342/464, loss: 0.005853863898664713 2023-01-24 04:45:08.299390: step: 344/464, loss: 0.03994187340140343 2023-01-24 04:45:08.991044: step: 346/464, loss: 0.013703061267733574 2023-01-24 04:45:09.626697: step: 348/464, loss: 0.0014292305568233132 2023-01-24 04:45:10.212201: step: 350/464, loss: 0.029019614681601524 2023-01-24 04:45:10.851876: step: 352/464, loss: 0.048232026398181915 2023-01-24 04:45:11.557236: step: 354/464, loss: 0.0015602021012455225 2023-01-24 04:45:12.181541: step: 356/464, loss: 0.011963332071900368 2023-01-24 04:45:12.826688: step: 358/464, loss: 0.004896857775747776 2023-01-24 04:45:13.517299: step: 360/464, loss: 0.007370649836957455 2023-01-24 04:45:14.119743: step: 362/464, loss: 0.00025744843878783286 2023-01-24 04:45:14.710506: step: 364/464, loss: 0.14153197407722473 2023-01-24 04:45:15.335125: step: 366/464, loss: 0.019035013392567635 2023-01-24 04:45:15.930121: step: 368/464, loss: 0.0006584279472008348 2023-01-24 04:45:16.493750: step: 370/464, loss: 0.07147854566574097 2023-01-24 04:45:17.127922: step: 372/464, loss: 0.018193015828728676 2023-01-24 04:45:17.716489: step: 374/464, loss: 0.0103158513084054 2023-01-24 04:45:18.353865: step: 376/464, loss: 0.0066604227758944035 2023-01-24 04:45:18.988171: step: 378/464, loss: 0.006821786984801292 2023-01-24 04:45:19.634028: step: 380/464, loss: 0.0039542485028505325 2023-01-24 04:45:20.261055: step: 382/464, loss: 4.337979044066742e-05 2023-01-24 04:45:20.863814: step: 384/464, loss: 0.07129360735416412 2023-01-24 04:45:21.493354: step: 386/464, loss: 0.002703416394069791 2023-01-24 04:45:22.093420: step: 388/464, loss: 0.018774086609482765 2023-01-24 04:45:22.662424: step: 390/464, loss: 0.023931795731186867 2023-01-24 04:45:23.318281: step: 392/464, loss: 0.009366103447973728 2023-01-24 04:45:23.936304: step: 394/464, loss: 0.1786050796508789 2023-01-24 04:45:24.632513: step: 396/464, loss: 0.013859412632882595 2023-01-24 04:45:25.272355: step: 398/464, loss: 0.00012523426266852766 2023-01-24 04:45:25.913881: step: 400/464, loss: 0.004233901854604483 2023-01-24 04:45:26.572837: step: 402/464, loss: 0.012055453844368458 2023-01-24 04:45:27.207523: step: 404/464, loss: 0.00043928564991801977 2023-01-24 04:45:27.883136: step: 406/464, loss: 0.06493253260850906 2023-01-24 04:45:28.506103: step: 408/464, loss: 0.015318336896598339 2023-01-24 04:45:29.112280: step: 410/464, loss: 0.003939393442124128 2023-01-24 04:45:29.693917: step: 412/464, loss: 0.042990926653146744 2023-01-24 04:45:30.396868: step: 414/464, loss: 0.6873338222503662 2023-01-24 04:45:31.018554: step: 416/464, loss: 0.00958840548992157 2023-01-24 04:45:31.581578: step: 418/464, loss: 0.006285862997174263 2023-01-24 04:45:32.203143: step: 420/464, loss: 0.013670720160007477 2023-01-24 04:45:32.740307: step: 422/464, loss: 0.000234775579883717 2023-01-24 04:45:33.384757: step: 424/464, loss: 0.0011437935754656792 2023-01-24 04:45:34.072237: step: 426/464, loss: 0.01316257193684578 2023-01-24 04:45:34.621972: step: 428/464, loss: 0.0035191448405385017 2023-01-24 04:45:35.240410: step: 430/464, loss: 0.020264482125639915 2023-01-24 04:45:35.864584: step: 432/464, loss: 0.005611537955701351 2023-01-24 04:45:36.474692: step: 434/464, loss: 0.004806075245141983 2023-01-24 04:45:37.103296: step: 436/464, loss: 0.0727372094988823 2023-01-24 04:45:37.658732: step: 438/464, loss: 0.00040400371653959155 2023-01-24 04:45:38.343549: step: 440/464, loss: 0.025443876162171364 2023-01-24 04:45:38.934993: step: 442/464, loss: 0.007278925273567438 2023-01-24 04:45:39.508828: step: 444/464, loss: 0.0007324048201553524 2023-01-24 04:45:40.163669: step: 446/464, loss: 0.9470614790916443 2023-01-24 04:45:40.719695: step: 448/464, loss: 0.005414521787315607 2023-01-24 04:45:41.385063: step: 450/464, loss: 0.039313700050115585 2023-01-24 04:45:42.058394: step: 452/464, loss: 0.02977609820663929 2023-01-24 04:45:42.688875: step: 454/464, loss: 0.0796954482793808 2023-01-24 04:45:43.410383: step: 456/464, loss: 0.0037060920149087906 2023-01-24 04:45:44.106792: step: 458/464, loss: 0.030984675511717796 2023-01-24 04:45:44.760903: step: 460/464, loss: 0.031195633113384247 2023-01-24 04:45:45.409516: step: 462/464, loss: 0.050389666110277176 2023-01-24 04:45:46.023997: step: 464/464, loss: 0.002835672115907073 2023-01-24 04:45:46.668224: step: 466/464, loss: 0.009972220286726952 2023-01-24 04:45:47.285540: step: 468/464, loss: 0.006782420910894871 2023-01-24 04:45:47.924889: step: 470/464, loss: 0.001250862143933773 2023-01-24 04:45:48.478810: step: 472/464, loss: 0.0008862206595949829 2023-01-24 04:45:49.142214: step: 474/464, loss: 0.0006641732179559767 2023-01-24 04:45:49.740907: step: 476/464, loss: 0.0071775889955461025 2023-01-24 04:45:50.300293: step: 478/464, loss: 0.03260122612118721 2023-01-24 04:45:50.904019: step: 480/464, loss: 0.005589763168245554 2023-01-24 04:45:51.530929: step: 482/464, loss: 0.04313836246728897 2023-01-24 04:45:52.096327: step: 484/464, loss: 0.012813220731914043 2023-01-24 04:45:52.691569: step: 486/464, loss: 0.06320730596780777 2023-01-24 04:45:53.333687: step: 488/464, loss: 0.016038797795772552 2023-01-24 04:45:53.941001: step: 490/464, loss: 0.4349803626537323 2023-01-24 04:45:54.560936: step: 492/464, loss: 0.02631193771958351 2023-01-24 04:45:55.154384: step: 494/464, loss: 0.00018490907677914947 2023-01-24 04:45:55.715616: step: 496/464, loss: 0.007844946347177029 2023-01-24 04:45:56.333403: step: 498/464, loss: 0.02207314968109131 2023-01-24 04:45:56.976415: step: 500/464, loss: 0.0034127626568078995 2023-01-24 04:45:57.640285: step: 502/464, loss: 0.0290288794785738 2023-01-24 04:45:58.235580: step: 504/464, loss: 0.027108201757073402 2023-01-24 04:45:58.859535: step: 506/464, loss: 0.004797337576746941 2023-01-24 04:45:59.514125: step: 508/464, loss: 0.0073133353143930435 2023-01-24 04:46:00.196455: step: 510/464, loss: 0.07168885320425034 2023-01-24 04:46:00.797486: step: 512/464, loss: 0.036796074360609055 2023-01-24 04:46:01.443562: step: 514/464, loss: 0.07598067820072174 2023-01-24 04:46:02.047085: step: 516/464, loss: 0.14883510768413544 2023-01-24 04:46:02.631092: step: 518/464, loss: 5.250910180620849e-05 2023-01-24 04:46:03.182704: step: 520/464, loss: 0.017701705917716026 2023-01-24 04:46:03.782056: step: 522/464, loss: 0.08047399669885635 2023-01-24 04:46:04.372500: step: 524/464, loss: 0.006779791321605444 2023-01-24 04:46:04.940929: step: 526/464, loss: 0.0016784468898549676 2023-01-24 04:46:05.574529: step: 528/464, loss: 0.06029986962676048 2023-01-24 04:46:06.195684: step: 530/464, loss: 7.868605462135747e-05 2023-01-24 04:46:06.789021: step: 532/464, loss: 0.0018813211936503649 2023-01-24 04:46:07.399184: step: 534/464, loss: 0.002208078047260642 2023-01-24 04:46:07.977911: step: 536/464, loss: 0.0004298434068914503 2023-01-24 04:46:08.662220: step: 538/464, loss: 0.005829576402902603 2023-01-24 04:46:09.349959: step: 540/464, loss: 0.019377706572413445 2023-01-24 04:46:09.996076: step: 542/464, loss: 0.0014036053325980902 2023-01-24 04:46:10.585015: step: 544/464, loss: 0.00861271470785141 2023-01-24 04:46:11.338915: step: 546/464, loss: 0.009145848453044891 2023-01-24 04:46:11.980637: step: 548/464, loss: 0.016987405717372894 2023-01-24 04:46:12.558895: step: 550/464, loss: 0.00026997787063010037 2023-01-24 04:46:13.202796: step: 552/464, loss: 0.0016153783071786165 2023-01-24 04:46:13.858219: step: 554/464, loss: 0.3739587068557739 2023-01-24 04:46:14.521458: step: 556/464, loss: 0.015637751668691635 2023-01-24 04:46:15.085447: step: 558/464, loss: 0.010172784328460693 2023-01-24 04:46:15.705329: step: 560/464, loss: 0.06389915943145752 2023-01-24 04:46:16.331763: step: 562/464, loss: 0.0174139142036438 2023-01-24 04:46:16.876337: step: 564/464, loss: 0.003111387137323618 2023-01-24 04:46:17.475933: step: 566/464, loss: 0.03332321718335152 2023-01-24 04:46:18.275786: step: 568/464, loss: 0.009706872515380383 2023-01-24 04:46:18.945351: step: 570/464, loss: 0.012194119393825531 2023-01-24 04:46:19.531951: step: 572/464, loss: 0.02395642362535 2023-01-24 04:46:20.139936: step: 574/464, loss: 0.046492986381053925 2023-01-24 04:46:20.691167: step: 576/464, loss: 0.01595861092209816 2023-01-24 04:46:21.415787: step: 578/464, loss: 0.0010339318541809916 2023-01-24 04:46:22.052512: step: 580/464, loss: 0.17204780876636505 2023-01-24 04:46:22.760563: step: 582/464, loss: 0.012429947964847088 2023-01-24 04:46:23.467312: step: 584/464, loss: 0.21090374886989594 2023-01-24 04:46:24.086357: step: 586/464, loss: 0.03177638351917267 2023-01-24 04:46:24.647755: step: 588/464, loss: 0.0053174374625086784 2023-01-24 04:46:25.244049: step: 590/464, loss: 0.014543687924742699 2023-01-24 04:46:25.918866: step: 592/464, loss: 0.020352229475975037 2023-01-24 04:46:26.583833: step: 594/464, loss: 0.03623180836439133 2023-01-24 04:46:27.191989: step: 596/464, loss: 0.0016851243562996387 2023-01-24 04:46:27.789367: step: 598/464, loss: 0.059681087732315063 2023-01-24 04:46:28.405243: step: 600/464, loss: 0.0010504459496587515 2023-01-24 04:46:28.956493: step: 602/464, loss: 0.015765614807605743 2023-01-24 04:46:29.606096: step: 604/464, loss: 0.11763416975736618 2023-01-24 04:46:30.251827: step: 606/464, loss: 0.047599419951438904 2023-01-24 04:46:30.920889: step: 608/464, loss: 0.007170728407800198 2023-01-24 04:46:31.531419: step: 610/464, loss: 0.006606565788388252 2023-01-24 04:46:32.185039: step: 612/464, loss: 0.02955743670463562 2023-01-24 04:46:32.823531: step: 614/464, loss: 0.01352652721107006 2023-01-24 04:46:33.429034: step: 616/464, loss: 0.019116820767521858 2023-01-24 04:46:34.046877: step: 618/464, loss: 0.03399045392870903 2023-01-24 04:46:34.688294: step: 620/464, loss: 0.00732051208615303 2023-01-24 04:46:35.335061: step: 622/464, loss: 0.013931176625192165 2023-01-24 04:46:35.970681: step: 624/464, loss: 0.02485789731144905 2023-01-24 04:46:36.669212: step: 626/464, loss: 0.04422234743833542 2023-01-24 04:46:37.295761: step: 628/464, loss: 0.0006379493279382586 2023-01-24 04:46:37.906894: step: 630/464, loss: 0.04369499161839485 2023-01-24 04:46:38.548585: step: 632/464, loss: 0.12479616701602936 2023-01-24 04:46:39.115440: step: 634/464, loss: 0.22262367606163025 2023-01-24 04:46:39.731706: step: 636/464, loss: 0.05760623514652252 2023-01-24 04:46:40.463917: step: 638/464, loss: 0.10373300313949585 2023-01-24 04:46:41.118819: step: 640/464, loss: 0.10711164027452469 2023-01-24 04:46:41.793346: step: 642/464, loss: 0.005474729463458061 2023-01-24 04:46:42.385952: step: 644/464, loss: 0.03453825041651726 2023-01-24 04:46:43.013971: step: 646/464, loss: 0.007322102319449186 2023-01-24 04:46:43.638119: step: 648/464, loss: 0.0455363430082798 2023-01-24 04:46:44.265685: step: 650/464, loss: 0.021714529022574425 2023-01-24 04:46:44.841158: step: 652/464, loss: 0.007238437887281179 2023-01-24 04:46:45.471853: step: 654/464, loss: 0.006591625977307558 2023-01-24 04:46:46.133801: step: 656/464, loss: 0.047648973762989044 2023-01-24 04:46:46.846080: step: 658/464, loss: 0.018461909145116806 2023-01-24 04:46:47.517223: step: 660/464, loss: 0.002493728417903185 2023-01-24 04:46:48.121745: step: 662/464, loss: 0.015594934113323689 2023-01-24 04:46:48.760841: step: 664/464, loss: 0.016749706119298935 2023-01-24 04:46:49.381185: step: 666/464, loss: 0.02515016496181488 2023-01-24 04:46:49.945776: step: 668/464, loss: 0.0007680103299207985 2023-01-24 04:46:50.496352: step: 670/464, loss: 0.005315244663506746 2023-01-24 04:46:51.134135: step: 672/464, loss: 0.01851995289325714 2023-01-24 04:46:51.716386: step: 674/464, loss: 0.33771756291389465 2023-01-24 04:46:52.357232: step: 676/464, loss: 0.031938109546899796 2023-01-24 04:46:52.956478: step: 678/464, loss: 0.008982275612652302 2023-01-24 04:46:53.650305: step: 680/464, loss: 0.006587052717804909 2023-01-24 04:46:54.288308: step: 682/464, loss: 0.004672519396990538 2023-01-24 04:46:54.945453: step: 684/464, loss: 0.0054403976537287235 2023-01-24 04:46:55.546423: step: 686/464, loss: 0.015013152733445168 2023-01-24 04:46:56.154642: step: 688/464, loss: 0.022577261552214622 2023-01-24 04:46:56.793087: step: 690/464, loss: 0.008266960270702839 2023-01-24 04:46:57.438279: step: 692/464, loss: 0.012359803542494774 2023-01-24 04:46:58.075471: step: 694/464, loss: 0.010465661995112896 2023-01-24 04:46:58.635446: step: 696/464, loss: 0.018571270629763603 2023-01-24 04:46:59.235701: step: 698/464, loss: 0.19208259880542755 2023-01-24 04:46:59.817780: step: 700/464, loss: 0.05983246862888336 2023-01-24 04:47:00.394240: step: 702/464, loss: 0.010622991248965263 2023-01-24 04:47:00.948077: step: 704/464, loss: 0.05283036082983017 2023-01-24 04:47:01.621525: step: 706/464, loss: 0.03840656951069832 2023-01-24 04:47:02.242466: step: 708/464, loss: 0.005075534805655479 2023-01-24 04:47:02.847869: step: 710/464, loss: 0.017260050401091576 2023-01-24 04:47:03.491705: step: 712/464, loss: 0.003130019875243306 2023-01-24 04:47:04.083061: step: 714/464, loss: 0.004431078210473061 2023-01-24 04:47:04.720002: step: 716/464, loss: 0.05127481743693352 2023-01-24 04:47:05.328446: step: 718/464, loss: 0.0026954528875648975 2023-01-24 04:47:05.932262: step: 720/464, loss: 0.03995548561215401 2023-01-24 04:47:06.620452: step: 722/464, loss: 0.11368819326162338 2023-01-24 04:47:07.186390: step: 724/464, loss: 0.0023431519512087107 2023-01-24 04:47:07.822849: step: 726/464, loss: 0.001431989367119968 2023-01-24 04:47:08.508884: step: 728/464, loss: 0.0038282026071101427 2023-01-24 04:47:09.121179: step: 730/464, loss: 0.008284663781523705 2023-01-24 04:47:09.722096: step: 732/464, loss: 0.006050837226212025 2023-01-24 04:47:10.293414: step: 734/464, loss: 0.014839560724794865 2023-01-24 04:47:10.991043: step: 736/464, loss: 0.003803978208452463 2023-01-24 04:47:11.556639: step: 738/464, loss: 0.044560253620147705 2023-01-24 04:47:12.244533: step: 740/464, loss: 0.013758180662989616 2023-01-24 04:47:12.928712: step: 742/464, loss: 0.053472235798835754 2023-01-24 04:47:13.527924: step: 744/464, loss: 0.053535766899585724 2023-01-24 04:47:14.155317: step: 746/464, loss: 0.03132905066013336 2023-01-24 04:47:14.814531: step: 748/464, loss: 0.011415134184062481 2023-01-24 04:47:15.377886: step: 750/464, loss: 0.009031401947140694 2023-01-24 04:47:16.039269: step: 752/464, loss: 0.006600252818316221 2023-01-24 04:47:16.692516: step: 754/464, loss: 8.853591862134635e-05 2023-01-24 04:47:17.347249: step: 756/464, loss: 1.2533529996871948 2023-01-24 04:47:17.939942: step: 758/464, loss: 0.011251486837863922 2023-01-24 04:47:18.572340: step: 760/464, loss: 0.021243376657366753 2023-01-24 04:47:19.236821: step: 762/464, loss: 0.022263169288635254 2023-01-24 04:47:19.844224: step: 764/464, loss: 0.01969255320727825 2023-01-24 04:47:20.468195: step: 766/464, loss: 0.021357337012887 2023-01-24 04:47:21.106770: step: 768/464, loss: 0.006406448315829039 2023-01-24 04:47:21.787407: step: 770/464, loss: 0.013859029859304428 2023-01-24 04:47:22.356598: step: 772/464, loss: 0.008305447176098824 2023-01-24 04:47:23.057353: step: 774/464, loss: 0.0027322552632540464 2023-01-24 04:47:23.710501: step: 776/464, loss: 0.019399166107177734 2023-01-24 04:47:24.325393: step: 778/464, loss: 0.003915107809007168 2023-01-24 04:47:24.897275: step: 780/464, loss: 0.006939701735973358 2023-01-24 04:47:25.544893: step: 782/464, loss: 0.01559397205710411 2023-01-24 04:47:26.132218: step: 784/464, loss: 0.013243515975773335 2023-01-24 04:47:26.708750: step: 786/464, loss: 0.021561570465564728 2023-01-24 04:47:27.295622: step: 788/464, loss: 0.0011126205790787935 2023-01-24 04:47:27.943176: step: 790/464, loss: 0.0189791489392519 2023-01-24 04:47:28.592336: step: 792/464, loss: 0.02173781767487526 2023-01-24 04:47:29.264787: step: 794/464, loss: 0.020091254264116287 2023-01-24 04:47:29.886805: step: 796/464, loss: 0.02431781403720379 2023-01-24 04:47:30.539971: step: 798/464, loss: 0.021222827956080437 2023-01-24 04:47:31.124169: step: 800/464, loss: 0.00465004239231348 2023-01-24 04:47:31.821902: step: 802/464, loss: 0.03723061829805374 2023-01-24 04:47:32.524279: step: 804/464, loss: 0.025808952748775482 2023-01-24 04:47:33.144187: step: 806/464, loss: 0.00010458481847308576 2023-01-24 04:47:33.731359: step: 808/464, loss: 0.05023520067334175 2023-01-24 04:47:34.362776: step: 810/464, loss: 0.0019826339557766914 2023-01-24 04:47:34.957717: step: 812/464, loss: 0.000529944256413728 2023-01-24 04:47:35.602542: step: 814/464, loss: 0.004346379078924656 2023-01-24 04:47:36.168569: step: 816/464, loss: 0.0038693081587553024 2023-01-24 04:47:36.748803: step: 818/464, loss: 0.002243026392534375 2023-01-24 04:47:37.406085: step: 820/464, loss: 0.14987914264202118 2023-01-24 04:47:38.023016: step: 822/464, loss: 0.00967397540807724 2023-01-24 04:47:38.614285: step: 824/464, loss: 0.02487901970744133 2023-01-24 04:47:39.182698: step: 826/464, loss: 0.26290661096572876 2023-01-24 04:47:39.777391: step: 828/464, loss: 0.004128835629671812 2023-01-24 04:47:40.449387: step: 830/464, loss: 0.0026098049711436033 2023-01-24 04:47:41.118587: step: 832/464, loss: 0.004145448096096516 2023-01-24 04:47:41.752831: step: 834/464, loss: 0.291154146194458 2023-01-24 04:47:42.342584: step: 836/464, loss: 0.009006387554109097 2023-01-24 04:47:42.973817: step: 838/464, loss: 0.012768622487783432 2023-01-24 04:47:43.598031: step: 840/464, loss: 0.05619003251194954 2023-01-24 04:47:44.258229: step: 842/464, loss: 0.01126299798488617 2023-01-24 04:47:44.896572: step: 844/464, loss: 0.004039745312184095 2023-01-24 04:47:45.528984: step: 846/464, loss: 0.10724084079265594 2023-01-24 04:47:46.164227: step: 848/464, loss: 0.005021022167056799 2023-01-24 04:47:46.804612: step: 850/464, loss: 0.005245794542133808 2023-01-24 04:47:47.346724: step: 852/464, loss: 0.011281571350991726 2023-01-24 04:47:47.959205: step: 854/464, loss: 0.0015350535977631807 2023-01-24 04:47:48.526730: step: 856/464, loss: 0.014480022713541985 2023-01-24 04:47:49.190770: step: 858/464, loss: 0.0038870610296726227 2023-01-24 04:47:49.861978: step: 860/464, loss: 0.04260418564081192 2023-01-24 04:47:50.530384: step: 862/464, loss: 0.0012214086018502712 2023-01-24 04:47:51.215078: step: 864/464, loss: 0.008705553598701954 2023-01-24 04:47:51.831189: step: 866/464, loss: 2.5658152103424072 2023-01-24 04:47:52.444848: step: 868/464, loss: 0.008928397670388222 2023-01-24 04:47:53.129187: step: 870/464, loss: 0.012816757895052433 2023-01-24 04:47:53.769031: step: 872/464, loss: 0.007330951280891895 2023-01-24 04:47:54.336229: step: 874/464, loss: 0.9620086550712585 2023-01-24 04:47:54.970569: step: 876/464, loss: 0.10326791554689407 2023-01-24 04:47:55.575017: step: 878/464, loss: 0.023309925571084023 2023-01-24 04:47:56.160823: step: 880/464, loss: 0.005438762251287699 2023-01-24 04:47:56.783991: step: 882/464, loss: 0.06091240420937538 2023-01-24 04:47:57.430896: step: 884/464, loss: 0.0758993998169899 2023-01-24 04:47:58.048642: step: 886/464, loss: 0.000790759630035609 2023-01-24 04:47:58.709256: step: 888/464, loss: 0.07822221517562866 2023-01-24 04:47:59.308841: step: 890/464, loss: 0.0005388292483985424 2023-01-24 04:47:59.871312: step: 892/464, loss: 0.001909834798425436 2023-01-24 04:48:00.505131: step: 894/464, loss: 0.005446003284305334 2023-01-24 04:48:01.160382: step: 896/464, loss: 0.006591171491891146 2023-01-24 04:48:01.856155: step: 898/464, loss: 0.026384249329566956 2023-01-24 04:48:02.486050: step: 900/464, loss: 0.043026503175497055 2023-01-24 04:48:03.088890: step: 902/464, loss: 0.034636832773685455 2023-01-24 04:48:03.705074: step: 904/464, loss: 0.013785764575004578 2023-01-24 04:48:04.330026: step: 906/464, loss: 0.0037164506502449512 2023-01-24 04:48:04.911152: step: 908/464, loss: 0.003406970528885722 2023-01-24 04:48:05.518538: step: 910/464, loss: 0.009995612315833569 2023-01-24 04:48:06.134465: step: 912/464, loss: 0.010833344422280788 2023-01-24 04:48:06.750335: step: 914/464, loss: 0.010535611771047115 2023-01-24 04:48:07.382298: step: 916/464, loss: 0.000829737342428416 2023-01-24 04:48:08.002829: step: 918/464, loss: 0.011057580821216106 2023-01-24 04:48:08.657653: step: 920/464, loss: 0.008482350967824459 2023-01-24 04:48:09.322808: step: 922/464, loss: 0.04329945519566536 2023-01-24 04:48:09.932341: step: 924/464, loss: 0.0016789406072348356 2023-01-24 04:48:10.574601: step: 926/464, loss: 0.047825418412685394 2023-01-24 04:48:11.209302: step: 928/464, loss: 0.014367038384079933 2023-01-24 04:48:11.690748: step: 930/464, loss: 0.0022941920906305313 ================================================== Loss: 0.049 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32852916909338, 'r': 0.31481447892249886, 'f1': 0.3215256402948777}, 'combined': 0.2369136296909625, 'epoch': 32} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3319833579179845, 'r': 0.3078557725679194, 'f1': 0.31946465372919175}, 'combined': 0.20856241642423917, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3270093677967055, 'r': 0.33445550140877467, 'f1': 0.3306905239069874}, 'combined': 0.24366670182620123, 'epoch': 32} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33734632668502224, 'r': 0.31161126683680823, 'f1': 0.32396852204342796}, 'combined': 0.21150276568638302, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34866544646065395, 'r': 0.3347717569432465, 'f1': 0.34157737833318663}, 'combined': 0.2516885945612954, 'epoch': 32} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33449053767937065, 'r': 0.3028536915995036, 'f1': 0.31788691098974403}, 'combined': 0.20753238748553238, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24031007751937983, 'r': 0.2952380952380952, 'f1': 0.26495726495726496}, 'combined': 0.17663817663817663, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3106060606060606, 'r': 0.44565217391304346, 'f1': 0.3660714285714286}, 'combined': 0.1830357142857143, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5454545454545454, 'r': 0.20689655172413793, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:50:47.965286: step: 2/464, loss: 0.014374053105711937 2023-01-24 04:50:48.636703: step: 4/464, loss: 0.2830972969532013 2023-01-24 04:50:49.332358: step: 6/464, loss: 0.23990294337272644 2023-01-24 04:50:49.925866: step: 8/464, loss: 0.00041447050170972943 2023-01-24 04:50:50.519528: step: 10/464, loss: 0.0003809631452895701 2023-01-24 04:50:51.193477: step: 12/464, loss: 0.025611156597733498 2023-01-24 04:50:51.820848: step: 14/464, loss: 0.01804334670305252 2023-01-24 04:50:52.354573: step: 16/464, loss: 0.001929254038259387 2023-01-24 04:50:52.975740: step: 18/464, loss: 0.036881223320961 2023-01-24 04:50:53.569787: step: 20/464, loss: 0.012756387703120708 2023-01-24 04:50:54.190831: step: 22/464, loss: 0.008184123784303665 2023-01-24 04:50:54.822032: step: 24/464, loss: 0.0449734702706337 2023-01-24 04:50:55.516660: step: 26/464, loss: 0.00029686116613447666 2023-01-24 04:50:56.129059: step: 28/464, loss: 0.02302325703203678 2023-01-24 04:50:56.789612: step: 30/464, loss: 0.05118812620639801 2023-01-24 04:50:57.362348: step: 32/464, loss: 0.001830504508689046 2023-01-24 04:50:57.950457: step: 34/464, loss: 0.009466273710131645 2023-01-24 04:50:58.549918: step: 36/464, loss: 0.03142885863780975 2023-01-24 04:50:59.233676: step: 38/464, loss: 0.027976183220744133 2023-01-24 04:50:59.859132: step: 40/464, loss: 0.01014450192451477 2023-01-24 04:51:00.483846: step: 42/464, loss: 0.011748143471777439 2023-01-24 04:51:01.087081: step: 44/464, loss: 0.008829900063574314 2023-01-24 04:51:01.717777: step: 46/464, loss: 0.009688672609627247 2023-01-24 04:51:02.320890: step: 48/464, loss: 0.0021476442925632 2023-01-24 04:51:02.972081: step: 50/464, loss: 0.009073460474610329 2023-01-24 04:51:03.598454: step: 52/464, loss: 0.0006571787525899708 2023-01-24 04:51:04.244194: step: 54/464, loss: 0.005301504395902157 2023-01-24 04:51:04.858203: step: 56/464, loss: 0.006080952472984791 2023-01-24 04:51:05.497789: step: 58/464, loss: 0.8253498077392578 2023-01-24 04:51:06.101607: step: 60/464, loss: 0.06126685440540314 2023-01-24 04:51:06.644935: step: 62/464, loss: 0.01091950386762619 2023-01-24 04:51:07.245525: step: 64/464, loss: 0.018319545313715935 2023-01-24 04:51:07.803837: step: 66/464, loss: 0.00018545903731137514 2023-01-24 04:51:08.408311: step: 68/464, loss: 0.0008794991299510002 2023-01-24 04:51:09.044028: step: 70/464, loss: 0.007405332755297422 2023-01-24 04:51:09.706326: step: 72/464, loss: 0.0023415430914610624 2023-01-24 04:51:10.278356: step: 74/464, loss: 0.04160754755139351 2023-01-24 04:51:10.987223: step: 76/464, loss: 0.00813635066151619 2023-01-24 04:51:11.620454: step: 78/464, loss: 0.0026107614394277334 2023-01-24 04:51:12.253024: step: 80/464, loss: 0.004070333205163479 2023-01-24 04:51:12.798116: step: 82/464, loss: 0.0002775926550384611 2023-01-24 04:51:13.425901: step: 84/464, loss: 0.00025254907086491585 2023-01-24 04:51:14.063575: step: 86/464, loss: 0.04424852132797241 2023-01-24 04:51:14.673658: step: 88/464, loss: 0.004443845711648464 2023-01-24 04:51:15.282499: step: 90/464, loss: 0.00035951321478933096 2023-01-24 04:51:15.886599: step: 92/464, loss: 0.017380233854055405 2023-01-24 04:51:16.493710: step: 94/464, loss: 0.00895154569298029 2023-01-24 04:51:17.095982: step: 96/464, loss: 0.009547477588057518 2023-01-24 04:51:17.755018: step: 98/464, loss: 0.0018229386769235134 2023-01-24 04:51:18.394570: step: 100/464, loss: 0.020579863339662552 2023-01-24 04:51:19.082838: step: 102/464, loss: 0.32163500785827637 2023-01-24 04:51:19.655136: step: 104/464, loss: 0.012846940197050571 2023-01-24 04:51:20.409382: step: 106/464, loss: 0.0021412346977740526 2023-01-24 04:51:21.014132: step: 108/464, loss: 0.0016689874464645982 2023-01-24 04:51:21.580306: step: 110/464, loss: 0.04337398707866669 2023-01-24 04:51:22.225393: step: 112/464, loss: 0.011758524924516678 2023-01-24 04:51:22.798274: step: 114/464, loss: 0.001188629656098783 2023-01-24 04:51:23.370538: step: 116/464, loss: 0.0006290088058449328 2023-01-24 04:51:24.004202: step: 118/464, loss: 0.3245905935764313 2023-01-24 04:51:24.609467: step: 120/464, loss: 0.030581016093492508 2023-01-24 04:51:25.238950: step: 122/464, loss: 0.013289973139762878 2023-01-24 04:51:25.827522: step: 124/464, loss: 0.009468546137213707 2023-01-24 04:51:26.411743: step: 126/464, loss: 0.028483226895332336 2023-01-24 04:51:26.993859: step: 128/464, loss: 0.012841271236538887 2023-01-24 04:51:27.568979: step: 130/464, loss: 0.03689542040228844 2023-01-24 04:51:28.277229: step: 132/464, loss: 0.004030969459563494 2023-01-24 04:51:28.875327: step: 134/464, loss: 0.006892730947583914 2023-01-24 04:51:29.526966: step: 136/464, loss: 0.011415884830057621 2023-01-24 04:51:30.091159: step: 138/464, loss: 3.393308725208044e-05 2023-01-24 04:51:30.694758: step: 140/464, loss: 0.01180098857730627 2023-01-24 04:51:31.286781: step: 142/464, loss: 0.042764972895383835 2023-01-24 04:51:31.928302: step: 144/464, loss: 0.005582286510616541 2023-01-24 04:51:32.575743: step: 146/464, loss: 0.33375313878059387 2023-01-24 04:51:33.139109: step: 148/464, loss: 0.0011688163504004478 2023-01-24 04:51:33.774098: step: 150/464, loss: 0.003209100104868412 2023-01-24 04:51:34.390391: step: 152/464, loss: 0.02336471527814865 2023-01-24 04:51:35.023608: step: 154/464, loss: 0.05035460367798805 2023-01-24 04:51:35.652316: step: 156/464, loss: 0.0631193295121193 2023-01-24 04:51:36.262233: step: 158/464, loss: 0.0029422007501125336 2023-01-24 04:51:36.968405: step: 160/464, loss: 0.1768340766429901 2023-01-24 04:51:37.608485: step: 162/464, loss: 3.6499710083007812 2023-01-24 04:51:38.221829: step: 164/464, loss: 0.010985293425619602 2023-01-24 04:51:38.755140: step: 166/464, loss: 0.002972731599584222 2023-01-24 04:51:39.348004: step: 168/464, loss: 0.010138231329619884 2023-01-24 04:51:39.967472: step: 170/464, loss: 0.010469421744346619 2023-01-24 04:51:40.611116: step: 172/464, loss: 0.09447196871042252 2023-01-24 04:51:41.239179: step: 174/464, loss: 0.011761488392949104 2023-01-24 04:51:41.880852: step: 176/464, loss: 0.014840611256659031 2023-01-24 04:51:42.458890: step: 178/464, loss: 0.03037756122648716 2023-01-24 04:51:43.069858: step: 180/464, loss: 0.014970389194786549 2023-01-24 04:51:43.643709: step: 182/464, loss: 0.004324909765273333 2023-01-24 04:51:44.294563: step: 184/464, loss: 0.17347773909568787 2023-01-24 04:51:44.873041: step: 186/464, loss: 0.06512332707643509 2023-01-24 04:51:45.501815: step: 188/464, loss: 0.010195410810410976 2023-01-24 04:51:46.137031: step: 190/464, loss: 0.0038245213218033314 2023-01-24 04:51:46.729301: step: 192/464, loss: 0.006037340499460697 2023-01-24 04:51:47.312077: step: 194/464, loss: 0.0025265971198678017 2023-01-24 04:51:47.920929: step: 196/464, loss: 0.0001403355272486806 2023-01-24 04:51:48.597572: step: 198/464, loss: 0.03302770480513573 2023-01-24 04:51:49.244777: step: 200/464, loss: 0.044800423085689545 2023-01-24 04:51:49.889158: step: 202/464, loss: 0.0025492501445114613 2023-01-24 04:51:50.472651: step: 204/464, loss: 0.00408302852883935 2023-01-24 04:51:51.062524: step: 206/464, loss: 0.0031438700389117002 2023-01-24 04:51:51.683958: step: 208/464, loss: 0.014370287768542767 2023-01-24 04:51:52.291072: step: 210/464, loss: 0.0064779119566082954 2023-01-24 04:51:52.870738: step: 212/464, loss: 0.05422116443514824 2023-01-24 04:51:53.458430: step: 214/464, loss: 0.0004520063812378794 2023-01-24 04:51:54.097165: step: 216/464, loss: 0.10309507697820663 2023-01-24 04:51:54.767701: step: 218/464, loss: 0.12332595884799957 2023-01-24 04:51:55.396128: step: 220/464, loss: 0.006602240726351738 2023-01-24 04:51:55.963680: step: 222/464, loss: 0.014455534517765045 2023-01-24 04:51:56.500754: step: 224/464, loss: 0.004008309915661812 2023-01-24 04:51:57.157084: step: 226/464, loss: 0.0005987048498354852 2023-01-24 04:51:57.827606: step: 228/464, loss: 0.01726825349032879 2023-01-24 04:51:58.445647: step: 230/464, loss: 0.00909911748021841 2023-01-24 04:51:59.055096: step: 232/464, loss: 0.0018394223880022764 2023-01-24 04:51:59.607847: step: 234/464, loss: 0.5204110145568848 2023-01-24 04:52:00.229556: step: 236/464, loss: 9.062099707080051e-05 2023-01-24 04:52:00.760199: step: 238/464, loss: 0.0006134660798124969 2023-01-24 04:52:01.423045: step: 240/464, loss: 0.010994810611009598 2023-01-24 04:52:02.067774: step: 242/464, loss: 1.8539471057010815e-05 2023-01-24 04:52:02.656252: step: 244/464, loss: 0.07794538885354996 2023-01-24 04:52:03.241651: step: 246/464, loss: 0.010720056481659412 2023-01-24 04:52:03.818593: step: 248/464, loss: 0.022959182038903236 2023-01-24 04:52:04.404324: step: 250/464, loss: 0.00048418642836622894 2023-01-24 04:52:05.037090: step: 252/464, loss: 0.015802789479494095 2023-01-24 04:52:05.638915: step: 254/464, loss: 0.001293016248382628 2023-01-24 04:52:06.306041: step: 256/464, loss: 0.006310163531452417 2023-01-24 04:52:06.905966: step: 258/464, loss: 0.011721929535269737 2023-01-24 04:52:07.561119: step: 260/464, loss: 0.00028431994724087417 2023-01-24 04:52:08.157819: step: 262/464, loss: 0.001610343810170889 2023-01-24 04:52:08.774436: step: 264/464, loss: 0.004501787945628166 2023-01-24 04:52:09.513278: step: 266/464, loss: 0.012815630063414574 2023-01-24 04:52:10.103675: step: 268/464, loss: 0.021414536982774734 2023-01-24 04:52:10.690058: step: 270/464, loss: 0.015873683616518974 2023-01-24 04:52:11.275387: step: 272/464, loss: 0.0038522426038980484 2023-01-24 04:52:11.883531: step: 274/464, loss: 0.03324449434876442 2023-01-24 04:52:12.495063: step: 276/464, loss: 0.003817223245278001 2023-01-24 04:52:13.114572: step: 278/464, loss: 0.005478051956743002 2023-01-24 04:52:13.749257: step: 280/464, loss: 0.016270257532596588 2023-01-24 04:52:14.373275: step: 282/464, loss: 0.018138011917471886 2023-01-24 04:52:14.986818: step: 284/464, loss: 0.0062078433111310005 2023-01-24 04:52:15.612570: step: 286/464, loss: 0.007623214274644852 2023-01-24 04:52:16.269857: step: 288/464, loss: 0.0191037654876709 2023-01-24 04:52:16.923167: step: 290/464, loss: 0.03720256686210632 2023-01-24 04:52:17.527798: step: 292/464, loss: 0.07807682454586029 2023-01-24 04:52:18.253210: step: 294/464, loss: 0.002669744659215212 2023-01-24 04:52:18.881276: step: 296/464, loss: 0.018069475889205933 2023-01-24 04:52:19.535554: step: 298/464, loss: 0.05704755336046219 2023-01-24 04:52:20.210135: step: 300/464, loss: 0.018284089863300323 2023-01-24 04:52:20.864488: step: 302/464, loss: 0.052654922008514404 2023-01-24 04:52:21.433780: step: 304/464, loss: 0.9298496842384338 2023-01-24 04:52:22.027634: step: 306/464, loss: 0.0066015636548399925 2023-01-24 04:52:22.668650: step: 308/464, loss: 0.006413571536540985 2023-01-24 04:52:23.319222: step: 310/464, loss: 0.007779460400342941 2023-01-24 04:52:24.005826: step: 312/464, loss: 0.030138764530420303 2023-01-24 04:52:24.640290: step: 314/464, loss: 0.0013628305168822408 2023-01-24 04:52:25.245291: step: 316/464, loss: 0.01542107854038477 2023-01-24 04:52:25.808275: step: 318/464, loss: 0.003668449819087982 2023-01-24 04:52:26.423301: step: 320/464, loss: 0.028360677883028984 2023-01-24 04:52:27.097091: step: 322/464, loss: 0.8447021842002869 2023-01-24 04:52:27.691631: step: 324/464, loss: 0.0032662516459822655 2023-01-24 04:52:28.379367: step: 326/464, loss: 0.23302333056926727 2023-01-24 04:52:29.037673: step: 328/464, loss: 0.012652361765503883 2023-01-24 04:52:29.587933: step: 330/464, loss: 0.004331211093813181 2023-01-24 04:52:30.190839: step: 332/464, loss: 0.004175769165158272 2023-01-24 04:52:30.825260: step: 334/464, loss: 0.002427124185487628 2023-01-24 04:52:31.532571: step: 336/464, loss: 0.0012440033024176955 2023-01-24 04:52:32.150454: step: 338/464, loss: 0.010690954513847828 2023-01-24 04:52:32.742363: step: 340/464, loss: 0.0003702449903357774 2023-01-24 04:52:33.328976: step: 342/464, loss: 9.117002628045157e-05 2023-01-24 04:52:33.931434: step: 344/464, loss: 0.0034490206744521856 2023-01-24 04:52:34.513247: step: 346/464, loss: 0.13205718994140625 2023-01-24 04:52:35.154384: step: 348/464, loss: 0.0798322930932045 2023-01-24 04:52:35.766367: step: 350/464, loss: 0.020838763564825058 2023-01-24 04:52:36.367693: step: 352/464, loss: 0.005004078149795532 2023-01-24 04:52:36.944457: step: 354/464, loss: 0.009846973232924938 2023-01-24 04:52:37.535629: step: 356/464, loss: 0.00175945064984262 2023-01-24 04:52:38.145978: step: 358/464, loss: 0.026817964389920235 2023-01-24 04:52:38.749650: step: 360/464, loss: 0.013861851766705513 2023-01-24 04:52:39.365223: step: 362/464, loss: 0.001218835124745965 2023-01-24 04:52:39.903681: step: 364/464, loss: 0.016749968752264977 2023-01-24 04:52:40.605687: step: 366/464, loss: 0.015844695270061493 2023-01-24 04:52:41.238529: step: 368/464, loss: 0.003026836784556508 2023-01-24 04:52:41.917916: step: 370/464, loss: 0.0302230603992939 2023-01-24 04:52:42.586896: step: 372/464, loss: 0.009233558550477028 2023-01-24 04:52:43.223186: step: 374/464, loss: 0.019739385694265366 2023-01-24 04:52:43.852689: step: 376/464, loss: 0.014525365084409714 2023-01-24 04:52:44.476121: step: 378/464, loss: 0.0338415801525116 2023-01-24 04:52:45.040188: step: 380/464, loss: 0.07181494683027267 2023-01-24 04:52:45.648595: step: 382/464, loss: 0.022824645042419434 2023-01-24 04:52:46.243302: step: 384/464, loss: 0.0061698416247963905 2023-01-24 04:52:46.953971: step: 386/464, loss: 0.040327515453100204 2023-01-24 04:52:47.589828: step: 388/464, loss: 0.0004780854796990752 2023-01-24 04:52:48.203636: step: 390/464, loss: 0.036215294152498245 2023-01-24 04:52:48.841020: step: 392/464, loss: 0.015350079163908958 2023-01-24 04:52:49.441977: step: 394/464, loss: 0.11014533787965775 2023-01-24 04:52:50.025289: step: 396/464, loss: 0.00023063892149366438 2023-01-24 04:52:50.645306: step: 398/464, loss: 0.018782315775752068 2023-01-24 04:52:51.376768: step: 400/464, loss: 0.006294018588960171 2023-01-24 04:52:52.050988: step: 402/464, loss: 0.014814279973506927 2023-01-24 04:52:52.686116: step: 404/464, loss: 0.053902145475149155 2023-01-24 04:52:53.257854: step: 406/464, loss: 0.028109122067689896 2023-01-24 04:52:53.905263: step: 408/464, loss: 0.013292652554810047 2023-01-24 04:52:54.544340: step: 410/464, loss: 0.006991738453507423 2023-01-24 04:52:55.171192: step: 412/464, loss: 0.0037923045456409454 2023-01-24 04:52:55.835264: step: 414/464, loss: 0.00360084674321115 2023-01-24 04:52:56.470337: step: 416/464, loss: 0.0037328642792999744 2023-01-24 04:52:57.121570: step: 418/464, loss: 0.001704493653960526 2023-01-24 04:52:57.741415: step: 420/464, loss: 0.004305397160351276 2023-01-24 04:52:58.356684: step: 422/464, loss: 0.0022913780994713306 2023-01-24 04:52:58.956630: step: 424/464, loss: 0.005939268507063389 2023-01-24 04:52:59.583859: step: 426/464, loss: 0.03526054322719574 2023-01-24 04:53:00.204767: step: 428/464, loss: 0.0073980726301670074 2023-01-24 04:53:00.786773: step: 430/464, loss: 0.0022466492373496294 2023-01-24 04:53:01.373599: step: 432/464, loss: 0.0006893372628837824 2023-01-24 04:53:02.008334: step: 434/464, loss: 0.04242715984582901 2023-01-24 04:53:02.598702: step: 436/464, loss: 0.005191306583583355 2023-01-24 04:53:03.274361: step: 438/464, loss: 0.24681374430656433 2023-01-24 04:53:03.878015: step: 440/464, loss: 0.0015327599830925465 2023-01-24 04:53:04.497676: step: 442/464, loss: 0.0064181857742369175 2023-01-24 04:53:05.126687: step: 444/464, loss: 0.00031510370899923146 2023-01-24 04:53:05.789145: step: 446/464, loss: 0.00802667811512947 2023-01-24 04:53:06.404057: step: 448/464, loss: 1.0939445495605469 2023-01-24 04:53:07.085679: step: 450/464, loss: 0.010794229805469513 2023-01-24 04:53:07.693610: step: 452/464, loss: 0.003955810330808163 2023-01-24 04:53:08.384910: step: 454/464, loss: 0.009406117722392082 2023-01-24 04:53:08.992366: step: 456/464, loss: 0.023797964677214622 2023-01-24 04:53:09.628724: step: 458/464, loss: 0.09177647531032562 2023-01-24 04:53:10.356629: step: 460/464, loss: 0.045457497239112854 2023-01-24 04:53:10.925782: step: 462/464, loss: 0.00666583888232708 2023-01-24 04:53:11.539867: step: 464/464, loss: 0.1608838140964508 2023-01-24 04:53:12.147196: step: 466/464, loss: 0.931765079498291 2023-01-24 04:53:12.699564: step: 468/464, loss: 0.0022043404169380665 2023-01-24 04:53:13.340260: step: 470/464, loss: 0.001080443849787116 2023-01-24 04:53:13.958560: step: 472/464, loss: 0.05975859612226486 2023-01-24 04:53:14.553811: step: 474/464, loss: 0.004577296786010265 2023-01-24 04:53:15.157282: step: 476/464, loss: 0.004997294396162033 2023-01-24 04:53:15.802444: step: 478/464, loss: 0.10205055773258209 2023-01-24 04:53:16.395760: step: 480/464, loss: 0.24596334993839264 2023-01-24 04:53:17.065590: step: 482/464, loss: 0.025164317339658737 2023-01-24 04:53:17.771677: step: 484/464, loss: 0.028020521625876427 2023-01-24 04:53:18.392003: step: 486/464, loss: 0.010484244674444199 2023-01-24 04:53:19.023013: step: 488/464, loss: 0.0005600190488621593 2023-01-24 04:53:19.669175: step: 490/464, loss: 0.0018563539488241076 2023-01-24 04:53:20.312958: step: 492/464, loss: 0.0025135499890893698 2023-01-24 04:53:20.951341: step: 494/464, loss: 0.004712886642664671 2023-01-24 04:53:21.567181: step: 496/464, loss: 0.020760148763656616 2023-01-24 04:53:22.181175: step: 498/464, loss: 0.02509947493672371 2023-01-24 04:53:22.801739: step: 500/464, loss: 0.014062878675758839 2023-01-24 04:53:23.531825: step: 502/464, loss: 0.004606351256370544 2023-01-24 04:53:24.213429: step: 504/464, loss: 0.0003082886105403304 2023-01-24 04:53:24.783033: step: 506/464, loss: 0.005637813825160265 2023-01-24 04:53:25.347994: step: 508/464, loss: 0.0023179217241704464 2023-01-24 04:53:25.968830: step: 510/464, loss: 0.009163595736026764 2023-01-24 04:53:26.590035: step: 512/464, loss: 0.1648324877023697 2023-01-24 04:53:27.198463: step: 514/464, loss: 0.26366400718688965 2023-01-24 04:53:27.871393: step: 516/464, loss: 0.009502682834863663 2023-01-24 04:53:28.464841: step: 518/464, loss: 0.01026153564453125 2023-01-24 04:53:29.082958: step: 520/464, loss: 0.017595071345567703 2023-01-24 04:53:29.754875: step: 522/464, loss: 0.008763373829424381 2023-01-24 04:53:30.332092: step: 524/464, loss: 0.004001053050160408 2023-01-24 04:53:30.908305: step: 526/464, loss: 0.007184656802564859 2023-01-24 04:53:31.592270: step: 528/464, loss: 0.05734050273895264 2023-01-24 04:53:32.152860: step: 530/464, loss: 0.009585640393197536 2023-01-24 04:53:32.745697: step: 532/464, loss: 0.027792764827609062 2023-01-24 04:53:33.392765: step: 534/464, loss: 0.19172044098377228 2023-01-24 04:53:33.960942: step: 536/464, loss: 0.00415234686806798 2023-01-24 04:53:34.592603: step: 538/464, loss: 0.0013831807300448418 2023-01-24 04:53:35.282016: step: 540/464, loss: 0.011251897551119328 2023-01-24 04:53:35.891610: step: 542/464, loss: 0.020211560651659966 2023-01-24 04:53:36.692468: step: 544/464, loss: 0.010223829187452793 2023-01-24 04:53:37.280597: step: 546/464, loss: 0.028799928724765778 2023-01-24 04:53:37.910694: step: 548/464, loss: 0.004515249282121658 2023-01-24 04:53:38.494771: step: 550/464, loss: 0.0034398529678583145 2023-01-24 04:53:39.135213: step: 552/464, loss: 0.00010570652375463396 2023-01-24 04:53:39.732349: step: 554/464, loss: 0.01961778849363327 2023-01-24 04:53:40.360214: step: 556/464, loss: 0.7934921383857727 2023-01-24 04:53:41.010260: step: 558/464, loss: 0.007351873442530632 2023-01-24 04:53:41.672190: step: 560/464, loss: 0.02307305485010147 2023-01-24 04:53:42.352115: step: 562/464, loss: 3.743881461559795e-05 2023-01-24 04:53:42.927675: step: 564/464, loss: 0.0005810451111756265 2023-01-24 04:53:43.549293: step: 566/464, loss: 0.008573451079428196 2023-01-24 04:53:44.177883: step: 568/464, loss: 0.0031232843175530434 2023-01-24 04:53:44.784625: step: 570/464, loss: 0.06860752403736115 2023-01-24 04:53:45.416864: step: 572/464, loss: 0.005422333255410194 2023-01-24 04:53:45.978568: step: 574/464, loss: 0.0013447273522615433 2023-01-24 04:53:46.604516: step: 576/464, loss: 0.01279196422547102 2023-01-24 04:53:47.344700: step: 578/464, loss: 0.1749306619167328 2023-01-24 04:53:47.939650: step: 580/464, loss: 0.018388798460364342 2023-01-24 04:53:48.567682: step: 582/464, loss: 0.0017691449029371142 2023-01-24 04:53:49.136681: step: 584/464, loss: 0.6445823907852173 2023-01-24 04:53:49.737233: step: 586/464, loss: 0.0028402141761034727 2023-01-24 04:53:50.434541: step: 588/464, loss: 0.0070165046490728855 2023-01-24 04:53:51.086452: step: 590/464, loss: 0.016421951353549957 2023-01-24 04:53:51.708151: step: 592/464, loss: 0.06449508666992188 2023-01-24 04:53:52.339421: step: 594/464, loss: 0.022456709295511246 2023-01-24 04:53:52.930526: step: 596/464, loss: 0.024202289059758186 2023-01-24 04:53:53.621731: step: 598/464, loss: 0.0032440153881907463 2023-01-24 04:53:54.235128: step: 600/464, loss: 0.0017527195159345865 2023-01-24 04:53:54.841742: step: 602/464, loss: 0.023841669782996178 2023-01-24 04:53:55.449478: step: 604/464, loss: 0.04659492149949074 2023-01-24 04:53:55.972626: step: 606/464, loss: 6.184981612022966e-05 2023-01-24 04:53:56.583812: step: 608/464, loss: 0.003048345912247896 2023-01-24 04:53:57.241861: step: 610/464, loss: 0.018488649278879166 2023-01-24 04:53:57.771817: step: 612/464, loss: 0.023030301555991173 2023-01-24 04:53:58.351493: step: 614/464, loss: 0.00235570571385324 2023-01-24 04:53:59.011913: step: 616/464, loss: 0.0018681371584534645 2023-01-24 04:53:59.596491: step: 618/464, loss: 0.018395736813545227 2023-01-24 04:54:00.213887: step: 620/464, loss: 0.0008612312958575785 2023-01-24 04:54:00.827182: step: 622/464, loss: 0.030212650075554848 2023-01-24 04:54:01.424429: step: 624/464, loss: 0.00812254473567009 2023-01-24 04:54:02.091097: step: 626/464, loss: 0.03114047646522522 2023-01-24 04:54:02.683333: step: 628/464, loss: 0.000972464622464031 2023-01-24 04:54:03.320697: step: 630/464, loss: 0.0186289194971323 2023-01-24 04:54:03.942038: step: 632/464, loss: 0.017052393406629562 2023-01-24 04:54:04.713859: step: 634/464, loss: 0.009422325529158115 2023-01-24 04:54:05.405028: step: 636/464, loss: 0.002550107426941395 2023-01-24 04:54:06.022322: step: 638/464, loss: 0.001601535128429532 2023-01-24 04:54:06.681699: step: 640/464, loss: 0.03888073191046715 2023-01-24 04:54:07.302375: step: 642/464, loss: 0.0026565822772681713 2023-01-24 04:54:07.895399: step: 644/464, loss: 0.0016079742927104235 2023-01-24 04:54:08.603883: step: 646/464, loss: 0.09242173284292221 2023-01-24 04:54:09.260514: step: 648/464, loss: 0.0007898823241703212 2023-01-24 04:54:09.979588: step: 650/464, loss: 0.0019716075621545315 2023-01-24 04:54:10.606300: step: 652/464, loss: 0.0017185850301757455 2023-01-24 04:54:11.222049: step: 654/464, loss: 0.13222934305667877 2023-01-24 04:54:11.879048: step: 656/464, loss: 0.01925063319504261 2023-01-24 04:54:12.512859: step: 658/464, loss: 0.012142672203481197 2023-01-24 04:54:13.146337: step: 660/464, loss: 0.0601552352309227 2023-01-24 04:54:13.730253: step: 662/464, loss: 0.012354286387562752 2023-01-24 04:54:14.417448: step: 664/464, loss: 0.03817038610577583 2023-01-24 04:54:14.994879: step: 666/464, loss: 0.0015115304850041866 2023-01-24 04:54:15.594169: step: 668/464, loss: 0.042541444301605225 2023-01-24 04:54:16.196535: step: 670/464, loss: 8.507548773195595e-05 2023-01-24 04:54:16.812416: step: 672/464, loss: 0.19663390517234802 2023-01-24 04:54:17.444181: step: 674/464, loss: 0.004419144243001938 2023-01-24 04:54:18.155029: step: 676/464, loss: 0.008868024684488773 2023-01-24 04:54:18.713643: step: 678/464, loss: 0.0003636557958088815 2023-01-24 04:54:19.347429: step: 680/464, loss: 0.03782545030117035 2023-01-24 04:54:19.935405: step: 682/464, loss: 0.008933094330132008 2023-01-24 04:54:20.584384: step: 684/464, loss: 0.07945708185434341 2023-01-24 04:54:21.251374: step: 686/464, loss: 0.006608300376683474 2023-01-24 04:54:21.928220: step: 688/464, loss: 0.05401575192809105 2023-01-24 04:54:22.548403: step: 690/464, loss: 0.0007566389977000654 2023-01-24 04:54:23.114166: step: 692/464, loss: 0.006353262811899185 2023-01-24 04:54:23.725418: step: 694/464, loss: 0.008637607097625732 2023-01-24 04:54:24.379683: step: 696/464, loss: 0.026068687438964844 2023-01-24 04:54:25.019400: step: 698/464, loss: 0.0015986430225893855 2023-01-24 04:54:25.670666: step: 700/464, loss: 0.001187260844744742 2023-01-24 04:54:26.230423: step: 702/464, loss: 0.001432645134627819 2023-01-24 04:54:26.944112: step: 704/464, loss: 0.02928977645933628 2023-01-24 04:54:27.583871: step: 706/464, loss: 0.009631271474063396 2023-01-24 04:54:28.250168: step: 708/464, loss: 0.041873540729284286 2023-01-24 04:54:28.876023: step: 710/464, loss: 0.0003186961112078279 2023-01-24 04:54:29.462916: step: 712/464, loss: 0.000907586480025202 2023-01-24 04:54:30.222262: step: 714/464, loss: 0.0038270133081823587 2023-01-24 04:54:30.866227: step: 716/464, loss: 0.14580540359020233 2023-01-24 04:54:31.488458: step: 718/464, loss: 0.004005967639386654 2023-01-24 04:54:32.055314: step: 720/464, loss: 0.004942305386066437 2023-01-24 04:54:32.628273: step: 722/464, loss: 0.0007397461449727416 2023-01-24 04:54:33.240449: step: 724/464, loss: 0.034463461488485336 2023-01-24 04:54:33.900741: step: 726/464, loss: 0.019920919090509415 2023-01-24 04:54:34.529372: step: 728/464, loss: 0.023921029642224312 2023-01-24 04:54:35.139173: step: 730/464, loss: 0.02836955524981022 2023-01-24 04:54:35.730448: step: 732/464, loss: 0.0049094269052147865 2023-01-24 04:54:36.302676: step: 734/464, loss: 0.023686110973358154 2023-01-24 04:54:36.868071: step: 736/464, loss: 0.22229552268981934 2023-01-24 04:54:37.468875: step: 738/464, loss: 0.038047630339860916 2023-01-24 04:54:38.080345: step: 740/464, loss: 8.502315904479474e-05 2023-01-24 04:54:38.752491: step: 742/464, loss: 0.004070245660841465 2023-01-24 04:54:39.377138: step: 744/464, loss: 0.0008259325986728072 2023-01-24 04:54:40.071251: step: 746/464, loss: 0.03511256352066994 2023-01-24 04:54:40.728148: step: 748/464, loss: 0.02191769890487194 2023-01-24 04:54:41.364761: step: 750/464, loss: 0.01094724703580141 2023-01-24 04:54:42.110124: step: 752/464, loss: 0.04792044311761856 2023-01-24 04:54:42.751897: step: 754/464, loss: 0.1264556348323822 2023-01-24 04:54:43.343058: step: 756/464, loss: 0.0019065093947574496 2023-01-24 04:54:43.929422: step: 758/464, loss: 0.0019241455011069775 2023-01-24 04:54:44.525734: step: 760/464, loss: 0.001199294812977314 2023-01-24 04:54:45.213898: step: 762/464, loss: 0.034956250339746475 2023-01-24 04:54:45.863393: step: 764/464, loss: 0.19993415474891663 2023-01-24 04:54:46.443094: step: 766/464, loss: 0.06722612679004669 2023-01-24 04:54:47.065134: step: 768/464, loss: 0.009393088519573212 2023-01-24 04:54:47.696705: step: 770/464, loss: 0.008216023445129395 2023-01-24 04:54:48.322719: step: 772/464, loss: 0.10442720353603363 2023-01-24 04:54:48.990687: step: 774/464, loss: 0.028010781854391098 2023-01-24 04:54:49.556425: step: 776/464, loss: 0.0024207436945289373 2023-01-24 04:54:50.128312: step: 778/464, loss: 0.05850352719426155 2023-01-24 04:54:50.706651: step: 780/464, loss: 0.01725189760327339 2023-01-24 04:54:51.329999: step: 782/464, loss: 0.028750715777277946 2023-01-24 04:54:51.920322: step: 784/464, loss: 0.0005509581533260643 2023-01-24 04:54:52.481220: step: 786/464, loss: 0.0018569778185337782 2023-01-24 04:54:53.124619: step: 788/464, loss: 0.11752443015575409 2023-01-24 04:54:53.776668: step: 790/464, loss: 0.010551149025559425 2023-01-24 04:54:54.375901: step: 792/464, loss: 0.0026742308400571346 2023-01-24 04:54:54.970447: step: 794/464, loss: 0.005357430782169104 2023-01-24 04:54:55.576236: step: 796/464, loss: 0.3049858808517456 2023-01-24 04:54:56.198497: step: 798/464, loss: 0.020999440923333168 2023-01-24 04:54:56.919472: step: 800/464, loss: 0.01579812727868557 2023-01-24 04:54:57.569749: step: 802/464, loss: 0.010090960189700127 2023-01-24 04:54:58.194000: step: 804/464, loss: 0.00029950885800644755 2023-01-24 04:54:58.832882: step: 806/464, loss: 0.7088912129402161 2023-01-24 04:54:59.409582: step: 808/464, loss: 0.00036309740971773863 2023-01-24 04:55:00.029075: step: 810/464, loss: 0.009281838312745094 2023-01-24 04:55:00.623987: step: 812/464, loss: 0.009021191857755184 2023-01-24 04:55:01.299348: step: 814/464, loss: 0.02642044611275196 2023-01-24 04:55:01.873510: step: 816/464, loss: 0.04272956773638725 2023-01-24 04:55:02.447054: step: 818/464, loss: 0.0760403648018837 2023-01-24 04:55:03.032427: step: 820/464, loss: 0.0499514602124691 2023-01-24 04:55:03.732365: step: 822/464, loss: 6.40268611907959 2023-01-24 04:55:04.328288: step: 824/464, loss: 0.003643059404566884 2023-01-24 04:55:04.953585: step: 826/464, loss: 0.0012509127845987678 2023-01-24 04:55:05.539228: step: 828/464, loss: 0.0003478997678030282 2023-01-24 04:55:06.152183: step: 830/464, loss: 0.03138204663991928 2023-01-24 04:55:06.722007: step: 832/464, loss: 0.012598642148077488 2023-01-24 04:55:07.359979: step: 834/464, loss: 0.4908383786678314 2023-01-24 04:55:07.951412: step: 836/464, loss: 0.0039227623492479324 2023-01-24 04:55:08.598783: step: 838/464, loss: 0.003043395932763815 2023-01-24 04:55:09.244493: step: 840/464, loss: 0.0032116002403199673 2023-01-24 04:55:09.895646: step: 842/464, loss: 0.014169511385262012 2023-01-24 04:55:10.556835: step: 844/464, loss: 0.3224925994873047 2023-01-24 04:55:11.232179: step: 846/464, loss: 0.019328588619828224 2023-01-24 04:55:11.922082: step: 848/464, loss: 0.005719814915210009 2023-01-24 04:55:12.484208: step: 850/464, loss: 0.00022499442275147885 2023-01-24 04:55:13.125805: step: 852/464, loss: 0.02572174370288849 2023-01-24 04:55:13.762232: step: 854/464, loss: 0.00021003466099500656 2023-01-24 04:55:14.422189: step: 856/464, loss: 0.023753268644213676 2023-01-24 04:55:15.118621: step: 858/464, loss: 0.005463962908834219 2023-01-24 04:55:15.767883: step: 860/464, loss: 0.007517958525568247 2023-01-24 04:55:16.357468: step: 862/464, loss: 0.00097902852576226 2023-01-24 04:55:16.959883: step: 864/464, loss: 0.011939832009375095 2023-01-24 04:55:17.589686: step: 866/464, loss: 0.002941383281722665 2023-01-24 04:55:18.253943: step: 868/464, loss: 0.04505275562405586 2023-01-24 04:55:18.898659: step: 870/464, loss: 0.007924825884401798 2023-01-24 04:55:19.466838: step: 872/464, loss: 0.0026531440671533346 2023-01-24 04:55:20.095346: step: 874/464, loss: 0.005883718375116587 2023-01-24 04:55:20.738039: step: 876/464, loss: 0.010706374421715736 2023-01-24 04:55:21.329653: step: 878/464, loss: 0.030511975288391113 2023-01-24 04:55:21.899094: step: 880/464, loss: 0.0035022543743252754 2023-01-24 04:55:22.482929: step: 882/464, loss: 0.015873609110713005 2023-01-24 04:55:23.064027: step: 884/464, loss: 0.02552478015422821 2023-01-24 04:55:23.700243: step: 886/464, loss: 0.0021093592513352633 2023-01-24 04:55:24.296660: step: 888/464, loss: 0.003897372866049409 2023-01-24 04:55:24.881251: step: 890/464, loss: 0.03527354449033737 2023-01-24 04:55:25.516128: step: 892/464, loss: 0.020696407184004784 2023-01-24 04:55:26.031300: step: 894/464, loss: 0.002208180958405137 2023-01-24 04:55:26.630800: step: 896/464, loss: 0.0005641351453959942 2023-01-24 04:55:27.306478: step: 898/464, loss: 0.0031764640007168055 2023-01-24 04:55:27.897481: step: 900/464, loss: 0.01316324807703495 2023-01-24 04:55:28.435677: step: 902/464, loss: 0.0003927868092432618 2023-01-24 04:55:29.043825: step: 904/464, loss: 0.002863897942006588 2023-01-24 04:55:29.667758: step: 906/464, loss: 0.06512665748596191 2023-01-24 04:55:30.346132: step: 908/464, loss: 0.00653707142919302 2023-01-24 04:55:30.950936: step: 910/464, loss: 0.029721427708864212 2023-01-24 04:55:31.580388: step: 912/464, loss: 0.07226139307022095 2023-01-24 04:55:32.273222: step: 914/464, loss: 0.06094507500529289 2023-01-24 04:55:32.907287: step: 916/464, loss: 0.006911165080964565 2023-01-24 04:55:33.486993: step: 918/464, loss: 0.002758361166343093 2023-01-24 04:55:34.169355: step: 920/464, loss: 0.03506612032651901 2023-01-24 04:55:34.747253: step: 922/464, loss: 0.00020291132386773825 2023-01-24 04:55:35.332068: step: 924/464, loss: 0.023218167945742607 2023-01-24 04:55:35.969575: step: 926/464, loss: 0.003805319545790553 2023-01-24 04:55:36.644309: step: 928/464, loss: 0.011131572537124157 2023-01-24 04:55:37.117658: step: 930/464, loss: 0.0022440047468990088 ================================================== Loss: 0.065 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32213064713064715, 'r': 0.3337444655281468, 'f1': 0.32783473128300716}, 'combined': 0.24156243357695265, 'epoch': 33} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3293375244665162, 'r': 0.307563142683606, 'f1': 0.3180781219206524}, 'combined': 0.20765721949223936, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3267730110336818, 'r': 0.35591595509171414, 'f1': 0.34072244928852563}, 'combined': 0.2510586468441768, 'epoch': 33} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.34374611535924166, 'r': 0.31060255051744146, 'f1': 0.3263349517737518}, 'combined': 0.21304768872276025, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33955095978062155, 'r': 0.3524371442125237, 'f1': 0.3458740689013035}, 'combined': 0.254854577085171, 'epoch': 33} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33916202155992653, 'r': 0.2967667688649357, 'f1': 0.31655122012259806}, 'combined': 0.20666038204895001, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24913194444444442, 'r': 0.3416666666666666, 'f1': 0.28815261044176704}, 'combined': 0.19210174029451135, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3014705882352941, 'r': 0.44565217391304346, 'f1': 0.3596491228070175}, 'combined': 0.17982456140350875, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5454545454545454, 'r': 0.20689655172413793, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33601917365478684, 'r': 0.3041387966476913, 'f1': 0.31928515106241695}, 'combined': 0.23526274288809668, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33420530338058735, 'r': 0.2679774634446177, 'f1': 0.2974495266340461}, 'combined': 0.19418984640357415, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.45652173913043476, 'f1': 0.39622641509433965}, 'combined': 0.19811320754716982, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:58:13.921689: step: 2/464, loss: 0.029898690059781075 2023-01-24 04:58:14.496311: step: 4/464, loss: 0.00015233525482472032 2023-01-24 04:58:15.078297: step: 6/464, loss: 0.008884308859705925 2023-01-24 04:58:15.655127: step: 8/464, loss: 0.000584449153393507 2023-01-24 04:58:16.322924: step: 10/464, loss: 0.004273217637091875 2023-01-24 04:58:16.975329: step: 12/464, loss: 0.0005258715245872736 2023-01-24 04:58:17.572568: step: 14/464, loss: 0.001208961708471179 2023-01-24 04:58:18.210542: step: 16/464, loss: 0.004396416246891022 2023-01-24 04:58:18.826199: step: 18/464, loss: 0.026321645826101303 2023-01-24 04:58:19.454193: step: 20/464, loss: 0.0018481980077922344 2023-01-24 04:58:20.081786: step: 22/464, loss: 0.0054297191090881824 2023-01-24 04:58:20.712100: step: 24/464, loss: 0.01054367981851101 2023-01-24 04:58:21.364818: step: 26/464, loss: 0.005763222463428974 2023-01-24 04:58:21.906845: step: 28/464, loss: 0.871414840221405 2023-01-24 04:58:22.548192: step: 30/464, loss: 0.00016669723845552653 2023-01-24 04:58:23.160557: step: 32/464, loss: 0.03802872449159622 2023-01-24 04:58:23.755427: step: 34/464, loss: 0.02912573330104351 2023-01-24 04:58:24.386305: step: 36/464, loss: 0.00691071804612875 2023-01-24 04:58:24.987054: step: 38/464, loss: 0.006158178672194481 2023-01-24 04:58:25.616470: step: 40/464, loss: 0.002746821381151676 2023-01-24 04:58:26.190038: step: 42/464, loss: 0.006122718099504709 2023-01-24 04:58:26.831197: step: 44/464, loss: 0.0004740080039482564 2023-01-24 04:58:27.417486: step: 46/464, loss: 0.005896700546145439 2023-01-24 04:58:28.041383: step: 48/464, loss: 0.0034731540363281965 2023-01-24 04:58:28.695388: step: 50/464, loss: 0.0029471227899193764 2023-01-24 04:58:29.292327: step: 52/464, loss: 0.009528739377856255 2023-01-24 04:58:29.979384: step: 54/464, loss: 0.004815476015210152 2023-01-24 04:58:30.608884: step: 56/464, loss: 0.004503963980823755 2023-01-24 04:58:31.221628: step: 58/464, loss: 0.0020536768715828657 2023-01-24 04:58:31.841453: step: 60/464, loss: 0.03384426608681679 2023-01-24 04:58:32.471140: step: 62/464, loss: 0.002092523267492652 2023-01-24 04:58:33.131039: step: 64/464, loss: 0.04285869002342224 2023-01-24 04:58:33.737944: step: 66/464, loss: 0.014329276978969574 2023-01-24 04:58:34.389912: step: 68/464, loss: 0.02071165293455124 2023-01-24 04:58:34.978264: step: 70/464, loss: 0.00439227931201458 2023-01-24 04:58:35.586577: step: 72/464, loss: 0.006787712220102549 2023-01-24 04:58:36.218310: step: 74/464, loss: 0.009815738536417484 2023-01-24 04:58:36.814104: step: 76/464, loss: 0.010096452198922634 2023-01-24 04:58:37.425361: step: 78/464, loss: 0.010066819377243519 2023-01-24 04:58:38.056491: step: 80/464, loss: 0.0009568737586960196 2023-01-24 04:58:38.770976: step: 82/464, loss: 0.02412015199661255 2023-01-24 04:58:39.291919: step: 84/464, loss: 0.00012044959294144064 2023-01-24 04:58:39.884524: step: 86/464, loss: 0.0015337973600253463 2023-01-24 04:58:40.559249: step: 88/464, loss: 2.098287723129033e-06 2023-01-24 04:58:41.114517: step: 90/464, loss: 0.00764232175424695 2023-01-24 04:58:41.725699: step: 92/464, loss: 0.015353173948824406 2023-01-24 04:58:42.344935: step: 94/464, loss: 0.020151887089014053 2023-01-24 04:58:43.086246: step: 96/464, loss: 0.014923064969480038 2023-01-24 04:58:43.742964: step: 98/464, loss: 0.0006783062126487494 2023-01-24 04:58:44.381111: step: 100/464, loss: 0.0035995738580822945 2023-01-24 04:58:44.970756: step: 102/464, loss: 0.0024014213122427464 2023-01-24 04:58:45.646651: step: 104/464, loss: 0.002425145823508501 2023-01-24 04:58:46.259326: step: 106/464, loss: 0.004979377146810293 2023-01-24 04:58:46.890036: step: 108/464, loss: 0.04373719543218613 2023-01-24 04:58:47.608822: step: 110/464, loss: 0.03467360511422157 2023-01-24 04:58:48.204770: step: 112/464, loss: 0.0004658075049519539 2023-01-24 04:58:48.795557: step: 114/464, loss: 3.7881338357692584e-05 2023-01-24 04:58:49.477535: step: 116/464, loss: 0.5974915623664856 2023-01-24 04:58:50.068703: step: 118/464, loss: 8.322542998939753e-05 2023-01-24 04:58:50.620221: step: 120/464, loss: 0.02062365785241127 2023-01-24 04:58:51.283560: step: 122/464, loss: 0.008859731256961823 2023-01-24 04:58:51.955397: step: 124/464, loss: 0.0002515219384804368 2023-01-24 04:58:52.589656: step: 126/464, loss: 0.028871677815914154 2023-01-24 04:58:53.170887: step: 128/464, loss: 0.0009980584727600217 2023-01-24 04:58:53.833128: step: 130/464, loss: 0.007541639730334282 2023-01-24 04:58:54.451365: step: 132/464, loss: 0.009181492030620575 2023-01-24 04:58:55.099785: step: 134/464, loss: 0.002932182513177395 2023-01-24 04:58:55.743979: step: 136/464, loss: 0.002279211301356554 2023-01-24 04:58:56.337178: step: 138/464, loss: 0.01624910905957222 2023-01-24 04:58:56.981688: step: 140/464, loss: 0.0007104272372089326 2023-01-24 04:58:57.617161: step: 142/464, loss: 0.0013539748033508658 2023-01-24 04:58:58.267715: step: 144/464, loss: 0.4728025794029236 2023-01-24 04:58:58.920684: step: 146/464, loss: 0.0043846312910318375 2023-01-24 04:58:59.608022: step: 148/464, loss: 0.00045613080146722496 2023-01-24 04:59:00.285674: step: 150/464, loss: 0.014981592074036598 2023-01-24 04:59:00.885011: step: 152/464, loss: 0.0012546187499538064 2023-01-24 04:59:01.492435: step: 154/464, loss: 0.015420163981616497 2023-01-24 04:59:02.111482: step: 156/464, loss: 0.006943312007933855 2023-01-24 04:59:02.748137: step: 158/464, loss: 0.039076171815395355 2023-01-24 04:59:03.298900: step: 160/464, loss: 9.373047214467078e-05 2023-01-24 04:59:03.893894: step: 162/464, loss: 0.0021961224265396595 2023-01-24 04:59:04.535075: step: 164/464, loss: 0.13457642495632172 2023-01-24 04:59:05.170591: step: 166/464, loss: 0.06285510957241058 2023-01-24 04:59:05.826694: step: 168/464, loss: 0.05896016210317612 2023-01-24 04:59:06.440905: step: 170/464, loss: 0.20885112881660461 2023-01-24 04:59:07.068506: step: 172/464, loss: 0.0013530661817640066 2023-01-24 04:59:07.737956: step: 174/464, loss: 0.005364841315895319 2023-01-24 04:59:08.382511: step: 176/464, loss: 0.004437359981238842 2023-01-24 04:59:09.011937: step: 178/464, loss: 0.012415021657943726 2023-01-24 04:59:09.657665: step: 180/464, loss: 0.030497750267386436 2023-01-24 04:59:10.270188: step: 182/464, loss: 0.017394132912158966 2023-01-24 04:59:10.854671: step: 184/464, loss: 0.15832246840000153 2023-01-24 04:59:11.565579: step: 186/464, loss: 0.0012430723290890455 2023-01-24 04:59:12.281231: step: 188/464, loss: 0.008626168593764305 2023-01-24 04:59:12.968176: step: 190/464, loss: 0.3963608145713806 2023-01-24 04:59:13.561824: step: 192/464, loss: 0.015922117978334427 2023-01-24 04:59:14.234398: step: 194/464, loss: 0.1013675332069397 2023-01-24 04:59:14.927714: step: 196/464, loss: 0.007717953063547611 2023-01-24 04:59:15.609975: step: 198/464, loss: 0.058719441294670105 2023-01-24 04:59:16.198631: step: 200/464, loss: 0.0013162650866433978 2023-01-24 04:59:16.813562: step: 202/464, loss: 0.005683231167495251 2023-01-24 04:59:17.426669: step: 204/464, loss: 0.0007428164826706052 2023-01-24 04:59:18.085869: step: 206/464, loss: 0.06769642233848572 2023-01-24 04:59:18.871904: step: 208/464, loss: 0.0025914330035448074 2023-01-24 04:59:19.459384: step: 210/464, loss: 0.002708293031901121 2023-01-24 04:59:20.073979: step: 212/464, loss: 0.031754009425640106 2023-01-24 04:59:20.778991: step: 214/464, loss: 0.08462861180305481 2023-01-24 04:59:21.352027: step: 216/464, loss: 0.0007086574914865196 2023-01-24 04:59:22.001633: step: 218/464, loss: 0.02312740683555603 2023-01-24 04:59:22.656616: step: 220/464, loss: 0.003332710824906826 2023-01-24 04:59:23.332884: step: 222/464, loss: 0.05921299010515213 2023-01-24 04:59:23.939760: step: 224/464, loss: 0.008811285719275475 2023-01-24 04:59:24.577929: step: 226/464, loss: 0.1366608589887619 2023-01-24 04:59:25.203541: step: 228/464, loss: 0.08018012344837189 2023-01-24 04:59:25.860711: step: 230/464, loss: 0.003253837348893285 2023-01-24 04:59:26.423819: step: 232/464, loss: 0.0008934770012274384 2023-01-24 04:59:27.026965: step: 234/464, loss: 0.04704802855849266 2023-01-24 04:59:27.664063: step: 236/464, loss: 0.0060298205353319645 2023-01-24 04:59:28.287832: step: 238/464, loss: 0.0531141459941864 2023-01-24 04:59:28.951204: step: 240/464, loss: 8.777321636443958e-05 2023-01-24 04:59:29.563060: step: 242/464, loss: 0.0010361942695453763 2023-01-24 04:59:30.178955: step: 244/464, loss: 0.7005062699317932 2023-01-24 04:59:30.852333: step: 246/464, loss: 0.007535006385296583 2023-01-24 04:59:31.515899: step: 248/464, loss: 0.01733449101448059 2023-01-24 04:59:32.100742: step: 250/464, loss: 0.04061458632349968 2023-01-24 04:59:32.717886: step: 252/464, loss: 0.03368212282657623 2023-01-24 04:59:33.334721: step: 254/464, loss: 0.0033325874246656895 2023-01-24 04:59:33.950928: step: 256/464, loss: 0.010388418100774288 2023-01-24 04:59:34.571213: step: 258/464, loss: 0.03565354645252228 2023-01-24 04:59:35.201930: step: 260/464, loss: 0.03277277201414108 2023-01-24 04:59:35.779238: step: 262/464, loss: 0.004038808401674032 2023-01-24 04:59:36.379884: step: 264/464, loss: 0.0038243993185460567 2023-01-24 04:59:37.001042: step: 266/464, loss: 0.03558727353811264 2023-01-24 04:59:37.557686: step: 268/464, loss: 0.0010459619807079434 2023-01-24 04:59:38.123931: step: 270/464, loss: 0.0026864504907280207 2023-01-24 04:59:38.749170: step: 272/464, loss: 0.00539025804027915 2023-01-24 04:59:39.312214: step: 274/464, loss: 0.11854858696460724 2023-01-24 04:59:39.929845: step: 276/464, loss: 0.02091136947274208 2023-01-24 04:59:40.585629: step: 278/464, loss: 0.43733465671539307 2023-01-24 04:59:41.212598: step: 280/464, loss: 0.07855101674795151 2023-01-24 04:59:41.770935: step: 282/464, loss: 0.000969623273704201 2023-01-24 04:59:42.400942: step: 284/464, loss: 0.0034854025579988956 2023-01-24 04:59:43.087125: step: 286/464, loss: 0.002548051765188575 2023-01-24 04:59:43.688048: step: 288/464, loss: 0.00924122054129839 2023-01-24 04:59:44.307480: step: 290/464, loss: 0.019163407385349274 2023-01-24 04:59:44.943969: step: 292/464, loss: 0.03953166678547859 2023-01-24 04:59:45.548822: step: 294/464, loss: 0.01325959898531437 2023-01-24 04:59:46.178888: step: 296/464, loss: 0.003425328526645899 2023-01-24 04:59:46.843370: step: 298/464, loss: 0.007728288881480694 2023-01-24 04:59:47.505018: step: 300/464, loss: 0.05159435048699379 2023-01-24 04:59:48.175953: step: 302/464, loss: 0.010436614975333214 2023-01-24 04:59:48.787547: step: 304/464, loss: 0.00015244621317833662 2023-01-24 04:59:49.439192: step: 306/464, loss: 0.01785079762339592 2023-01-24 04:59:50.070464: step: 308/464, loss: 0.006395932752639055 2023-01-24 04:59:50.724952: step: 310/464, loss: 0.0019831436220556498 2023-01-24 04:59:51.345018: step: 312/464, loss: 0.002111723180860281 2023-01-24 04:59:51.873787: step: 314/464, loss: 0.015801504254341125 2023-01-24 04:59:52.485472: step: 316/464, loss: 0.0014175876276567578 2023-01-24 04:59:53.132894: step: 318/464, loss: 0.004971159156411886 2023-01-24 04:59:53.785211: step: 320/464, loss: 0.022550541907548904 2023-01-24 04:59:54.431205: step: 322/464, loss: 0.07797756046056747 2023-01-24 04:59:55.017492: step: 324/464, loss: 0.0010559335350990295 2023-01-24 04:59:55.588313: step: 326/464, loss: 0.026913126930594444 2023-01-24 04:59:56.200382: step: 328/464, loss: 0.010977164842188358 2023-01-24 04:59:56.791747: step: 330/464, loss: 0.0040641240775585175 2023-01-24 04:59:57.336845: step: 332/464, loss: 0.013235529884696007 2023-01-24 04:59:57.975273: step: 334/464, loss: 0.03829799219965935 2023-01-24 04:59:58.647245: step: 336/464, loss: 0.001352404011413455 2023-01-24 04:59:59.276847: step: 338/464, loss: 0.002952033653855324 2023-01-24 04:59:59.899450: step: 340/464, loss: 0.0017866799607872963 2023-01-24 05:00:00.526938: step: 342/464, loss: 0.0002513266808819026 2023-01-24 05:00:01.165710: step: 344/464, loss: 0.035913605242967606 2023-01-24 05:00:01.805046: step: 346/464, loss: 0.04931747168302536 2023-01-24 05:00:02.359482: step: 348/464, loss: 0.0003991990233771503 2023-01-24 05:00:02.994798: step: 350/464, loss: 0.02780473604798317 2023-01-24 05:00:03.596430: step: 352/464, loss: 0.0005611433298327029 2023-01-24 05:00:04.231771: step: 354/464, loss: 0.0004125482519157231 2023-01-24 05:00:04.816682: step: 356/464, loss: 0.03075682371854782 2023-01-24 05:00:05.412048: step: 358/464, loss: 0.0006990233669057488 2023-01-24 05:00:06.038554: step: 360/464, loss: 1.3560085790231824e-05 2023-01-24 05:00:06.706882: step: 362/464, loss: 0.0019139517098665237 2023-01-24 05:00:07.313255: step: 364/464, loss: 0.013786913827061653 2023-01-24 05:00:07.875032: step: 366/464, loss: 0.0009525257046334445 2023-01-24 05:00:08.440612: step: 368/464, loss: 0.001372171682305634 2023-01-24 05:00:09.036232: step: 370/464, loss: 0.005218212027102709 2023-01-24 05:00:09.686671: step: 372/464, loss: 0.02613021992146969 2023-01-24 05:00:10.297348: step: 374/464, loss: 0.005402700509876013 2023-01-24 05:00:10.889789: step: 376/464, loss: 0.020430030301213264 2023-01-24 05:00:11.490975: step: 378/464, loss: 0.007723488844931126 2023-01-24 05:00:12.137833: step: 380/464, loss: 0.0007314560352824628 2023-01-24 05:00:12.731440: step: 382/464, loss: 0.008527128025889397 2023-01-24 05:00:13.359838: step: 384/464, loss: 0.008409584872424603 2023-01-24 05:00:14.000490: step: 386/464, loss: 0.17246656119823456 2023-01-24 05:00:14.612465: step: 388/464, loss: 0.0072148642502725124 2023-01-24 05:00:15.252227: step: 390/464, loss: 0.005247201304882765 2023-01-24 05:00:15.809840: step: 392/464, loss: 9.893503010971472e-05 2023-01-24 05:00:16.365928: step: 394/464, loss: 0.0018716860795393586 2023-01-24 05:00:17.027736: step: 396/464, loss: 0.028478436172008514 2023-01-24 05:00:17.618160: step: 398/464, loss: 0.004302097950130701 2023-01-24 05:00:18.253128: step: 400/464, loss: 0.02306191623210907 2023-01-24 05:00:18.873249: step: 402/464, loss: 0.018625525757670403 2023-01-24 05:00:19.474842: step: 404/464, loss: 0.008604537695646286 2023-01-24 05:00:20.063364: step: 406/464, loss: 0.0012301687384024262 2023-01-24 05:00:20.694690: step: 408/464, loss: 0.0857953205704689 2023-01-24 05:00:21.294023: step: 410/464, loss: 0.000577143975533545 2023-01-24 05:00:21.886175: step: 412/464, loss: 0.034847501665353775 2023-01-24 05:00:22.490718: step: 414/464, loss: 0.11727949231863022 2023-01-24 05:00:23.091246: step: 416/464, loss: 0.0004280091670807451 2023-01-24 05:00:23.700968: step: 418/464, loss: 0.03354734182357788 2023-01-24 05:00:24.345001: step: 420/464, loss: 0.006127386353909969 2023-01-24 05:00:24.997211: step: 422/464, loss: 0.04086967185139656 2023-01-24 05:00:25.616499: step: 424/464, loss: 0.00029856053879484534 2023-01-24 05:00:26.299254: step: 426/464, loss: 0.06409810483455658 2023-01-24 05:00:26.934994: step: 428/464, loss: 0.00031498592579737306 2023-01-24 05:00:27.493232: step: 430/464, loss: 0.01463546697050333 2023-01-24 05:00:28.150968: step: 432/464, loss: 0.018200265243649483 2023-01-24 05:00:28.684026: step: 434/464, loss: 0.0031720134429633617 2023-01-24 05:00:29.300464: step: 436/464, loss: 0.0008702730992808938 2023-01-24 05:00:29.980263: step: 438/464, loss: 0.012718631885945797 2023-01-24 05:00:30.602824: step: 440/464, loss: 0.01520999614149332 2023-01-24 05:00:31.203887: step: 442/464, loss: 0.006882749032229185 2023-01-24 05:00:31.779166: step: 444/464, loss: 0.020486094057559967 2023-01-24 05:00:32.446054: step: 446/464, loss: 0.011144982650876045 2023-01-24 05:00:33.055692: step: 448/464, loss: 0.08577293157577515 2023-01-24 05:00:33.648302: step: 450/464, loss: 0.0003010949585586786 2023-01-24 05:00:34.257384: step: 452/464, loss: 0.000813738617580384 2023-01-24 05:00:34.852319: step: 454/464, loss: 0.010779373347759247 2023-01-24 05:00:35.477543: step: 456/464, loss: 0.03919750824570656 2023-01-24 05:00:36.078323: step: 458/464, loss: 0.0032593016512691975 2023-01-24 05:00:36.684411: step: 460/464, loss: 0.015266706235706806 2023-01-24 05:00:37.344152: step: 462/464, loss: 0.008129923604428768 2023-01-24 05:00:37.969268: step: 464/464, loss: 0.002459451789036393 2023-01-24 05:00:38.529970: step: 466/464, loss: 0.008117524906992912 2023-01-24 05:00:39.169532: step: 468/464, loss: 0.014871403574943542 2023-01-24 05:00:39.772815: step: 470/464, loss: 0.03958430886268616 2023-01-24 05:00:40.335552: step: 472/464, loss: 0.018035847693681717 2023-01-24 05:00:40.979313: step: 474/464, loss: 0.02293417975306511 2023-01-24 05:00:41.556920: step: 476/464, loss: 7.562783139292151e-05 2023-01-24 05:00:42.185530: step: 478/464, loss: 0.012890690006315708 2023-01-24 05:00:42.813512: step: 480/464, loss: 0.017653323709964752 2023-01-24 05:00:43.460511: step: 482/464, loss: 0.0276399627327919 2023-01-24 05:00:44.045379: step: 484/464, loss: 0.0012498322175815701 2023-01-24 05:00:44.676981: step: 486/464, loss: 0.024069275707006454 2023-01-24 05:00:45.273240: step: 488/464, loss: 0.0013034387957304716 2023-01-24 05:00:45.911480: step: 490/464, loss: 0.021182270720601082 2023-01-24 05:00:46.489653: step: 492/464, loss: 0.016577089205384254 2023-01-24 05:00:47.130309: step: 494/464, loss: 0.0003162265056744218 2023-01-24 05:00:47.728318: step: 496/464, loss: 0.0013988955179229379 2023-01-24 05:00:48.353633: step: 498/464, loss: 0.0226691085845232 2023-01-24 05:00:48.970037: step: 500/464, loss: 0.013925151899456978 2023-01-24 05:00:49.542265: step: 502/464, loss: 0.005288612563163042 2023-01-24 05:00:50.140907: step: 504/464, loss: 0.013529365882277489 2023-01-24 05:00:50.787998: step: 506/464, loss: 0.007406800985336304 2023-01-24 05:00:51.402420: step: 508/464, loss: 0.006143561564385891 2023-01-24 05:00:52.126643: step: 510/464, loss: 0.017350686714053154 2023-01-24 05:00:52.729857: step: 512/464, loss: 0.08247264474630356 2023-01-24 05:00:53.390647: step: 514/464, loss: 0.028843006119132042 2023-01-24 05:00:53.998050: step: 516/464, loss: 0.00849137268960476 2023-01-24 05:00:54.610328: step: 518/464, loss: 0.0173712857067585 2023-01-24 05:00:55.285449: step: 520/464, loss: 0.005316558293998241 2023-01-24 05:00:55.911773: step: 522/464, loss: 0.00480164960026741 2023-01-24 05:00:56.522939: step: 524/464, loss: 0.007387985475361347 2023-01-24 05:00:57.135552: step: 526/464, loss: 0.6308395862579346 2023-01-24 05:00:57.706176: step: 528/464, loss: 0.007524359505623579 2023-01-24 05:00:58.305724: step: 530/464, loss: 0.0019702170975506306 2023-01-24 05:00:58.923022: step: 532/464, loss: 0.0017888193251565099 2023-01-24 05:00:59.540412: step: 534/464, loss: 0.0034075435250997543 2023-01-24 05:01:00.118639: step: 536/464, loss: 0.0363345593214035 2023-01-24 05:01:00.730388: step: 538/464, loss: 0.0026970033068209887 2023-01-24 05:01:01.364313: step: 540/464, loss: 0.0017541086999699473 2023-01-24 05:01:01.993571: step: 542/464, loss: 0.0018105398630723357 2023-01-24 05:01:02.593038: step: 544/464, loss: 0.0020671342499554157 2023-01-24 05:01:03.192129: step: 546/464, loss: 0.014645855873823166 2023-01-24 05:01:03.794712: step: 548/464, loss: 0.02025480754673481 2023-01-24 05:01:04.426413: step: 550/464, loss: 0.06476173549890518 2023-01-24 05:01:05.036482: step: 552/464, loss: 0.0009601297206245363 2023-01-24 05:01:05.639671: step: 554/464, loss: 0.005175075493752956 2023-01-24 05:01:06.271275: step: 556/464, loss: 0.0003164306690450758 2023-01-24 05:01:06.889274: step: 558/464, loss: 0.19691286981105804 2023-01-24 05:01:07.463489: step: 560/464, loss: 0.002686547813937068 2023-01-24 05:01:08.052099: step: 562/464, loss: 0.007883097976446152 2023-01-24 05:01:08.681383: step: 564/464, loss: 3.246891719754785e-05 2023-01-24 05:01:09.338156: step: 566/464, loss: 0.001068502082489431 2023-01-24 05:01:09.953326: step: 568/464, loss: 0.022433584555983543 2023-01-24 05:01:10.561660: step: 570/464, loss: 0.013481689617037773 2023-01-24 05:01:11.216555: step: 572/464, loss: 0.006244510877877474 2023-01-24 05:01:11.850586: step: 574/464, loss: 0.0014088694006204605 2023-01-24 05:01:12.468432: step: 576/464, loss: 0.0019497391767799854 2023-01-24 05:01:13.198286: step: 578/464, loss: 0.06185242161154747 2023-01-24 05:01:13.858022: step: 580/464, loss: 0.006675357930362225 2023-01-24 05:01:14.597001: step: 582/464, loss: 0.19827669858932495 2023-01-24 05:01:15.309757: step: 584/464, loss: 0.0012004076270386577 2023-01-24 05:01:15.914842: step: 586/464, loss: 0.02289130911231041 2023-01-24 05:01:16.512755: step: 588/464, loss: 0.0054329452104866505 2023-01-24 05:01:17.151077: step: 590/464, loss: 0.0020336357410997152 2023-01-24 05:01:17.679233: step: 592/464, loss: 0.0027439245022833347 2023-01-24 05:01:18.271967: step: 594/464, loss: 0.011579768732190132 2023-01-24 05:01:18.819994: step: 596/464, loss: 0.010270710103213787 2023-01-24 05:01:19.475860: step: 598/464, loss: 0.0049523417837917805 2023-01-24 05:01:20.048535: step: 600/464, loss: 0.003858374198898673 2023-01-24 05:01:20.692969: step: 602/464, loss: 0.06019090116024017 2023-01-24 05:01:21.228703: step: 604/464, loss: 0.010263212956488132 2023-01-24 05:01:21.817641: step: 606/464, loss: 0.0025204953271895647 2023-01-24 05:01:22.422652: step: 608/464, loss: 0.06136419251561165 2023-01-24 05:01:23.025821: step: 610/464, loss: 0.0016090819844976068 2023-01-24 05:01:23.742597: step: 612/464, loss: 0.016572527587413788 2023-01-24 05:01:24.411156: step: 614/464, loss: 0.5664610266685486 2023-01-24 05:01:25.052915: step: 616/464, loss: 0.04934962838888168 2023-01-24 05:01:25.687365: step: 618/464, loss: 0.054494671523571014 2023-01-24 05:01:26.321757: step: 620/464, loss: 0.00043719136738218367 2023-01-24 05:01:26.926561: step: 622/464, loss: 0.007431011646986008 2023-01-24 05:01:27.528006: step: 624/464, loss: 0.03331771120429039 2023-01-24 05:01:28.151497: step: 626/464, loss: 0.01933049038052559 2023-01-24 05:01:28.739388: step: 628/464, loss: 0.006974204443395138 2023-01-24 05:01:29.428005: step: 630/464, loss: 0.004200476221740246 2023-01-24 05:01:30.090464: step: 632/464, loss: 0.03736580163240433 2023-01-24 05:01:30.721138: step: 634/464, loss: 0.040448278188705444 2023-01-24 05:01:31.331057: step: 636/464, loss: 0.009122759103775024 2023-01-24 05:01:31.971336: step: 638/464, loss: 0.046271827071905136 2023-01-24 05:01:32.697692: step: 640/464, loss: 0.09109504520893097 2023-01-24 05:01:33.363770: step: 642/464, loss: 0.0010769476648420095 2023-01-24 05:01:33.955686: step: 644/464, loss: 0.019725728780031204 2023-01-24 05:01:34.528611: step: 646/464, loss: 0.015822215005755424 2023-01-24 05:01:35.137298: step: 648/464, loss: 0.05484939366579056 2023-01-24 05:01:35.824000: step: 650/464, loss: 0.141653910279274 2023-01-24 05:01:36.526897: step: 652/464, loss: 0.00035650088102556765 2023-01-24 05:01:37.137435: step: 654/464, loss: 0.011836215853691101 2023-01-24 05:01:37.715837: step: 656/464, loss: 0.0006070904200896621 2023-01-24 05:01:38.326589: step: 658/464, loss: 0.015699708834290504 2023-01-24 05:01:38.953541: step: 660/464, loss: 0.03450365364551544 2023-01-24 05:01:39.607414: step: 662/464, loss: 0.0009372765780426562 2023-01-24 05:01:40.241132: step: 664/464, loss: 0.0006784518482163548 2023-01-24 05:01:40.939571: step: 666/464, loss: 0.005149946082383394 2023-01-24 05:01:41.557314: step: 668/464, loss: 0.015397715382277966 2023-01-24 05:01:42.131403: step: 670/464, loss: 0.015877025201916695 2023-01-24 05:01:42.809561: step: 672/464, loss: 0.03446386754512787 2023-01-24 05:01:43.445939: step: 674/464, loss: 0.036043643951416016 2023-01-24 05:01:44.022245: step: 676/464, loss: 0.0029709930531680584 2023-01-24 05:01:44.695391: step: 678/464, loss: 0.15701141953468323 2023-01-24 05:01:45.297809: step: 680/464, loss: 0.04647189378738403 2023-01-24 05:01:45.951955: step: 682/464, loss: 0.008912712335586548 2023-01-24 05:01:46.539653: step: 684/464, loss: 0.00948739517480135 2023-01-24 05:01:47.165035: step: 686/464, loss: 0.0025628781877458096 2023-01-24 05:01:47.776496: step: 688/464, loss: 0.58965003490448 2023-01-24 05:01:48.326586: step: 690/464, loss: 0.4068011939525604 2023-01-24 05:01:48.980477: step: 692/464, loss: 0.011587106622755527 2023-01-24 05:01:49.615724: step: 694/464, loss: 0.00972069427371025 2023-01-24 05:01:50.227066: step: 696/464, loss: 0.0010438722092658281 2023-01-24 05:01:50.778676: step: 698/464, loss: 5.963250259810593e-06 2023-01-24 05:01:51.374132: step: 700/464, loss: 0.018201308324933052 2023-01-24 05:01:51.983146: step: 702/464, loss: 0.017866387963294983 2023-01-24 05:01:52.595362: step: 704/464, loss: 0.03244994580745697 2023-01-24 05:01:53.303899: step: 706/464, loss: 0.007438444998115301 2023-01-24 05:01:53.840558: step: 708/464, loss: 0.024666374549269676 2023-01-24 05:01:54.398331: step: 710/464, loss: 0.016314072534441948 2023-01-24 05:01:55.024709: step: 712/464, loss: 0.005182948894798756 2023-01-24 05:01:55.636685: step: 714/464, loss: 0.0046609812416136265 2023-01-24 05:01:56.309710: step: 716/464, loss: 0.008606769144535065 2023-01-24 05:01:56.952328: step: 718/464, loss: 0.003123520640656352 2023-01-24 05:01:57.609084: step: 720/464, loss: 0.005103525705635548 2023-01-24 05:01:58.256007: step: 722/464, loss: 0.010820210911333561 2023-01-24 05:01:58.894724: step: 724/464, loss: 0.014116911217570305 2023-01-24 05:01:59.457237: step: 726/464, loss: 0.0008885234128683805 2023-01-24 05:02:00.069374: step: 728/464, loss: 0.015137244015932083 2023-01-24 05:02:00.764444: step: 730/464, loss: 0.03720799833536148 2023-01-24 05:02:01.340074: step: 732/464, loss: 0.0055466219782829285 2023-01-24 05:02:01.959550: step: 734/464, loss: 0.026318326592445374 2023-01-24 05:02:02.508924: step: 736/464, loss: 0.0023751596454530954 2023-01-24 05:02:03.164646: step: 738/464, loss: 0.04240196943283081 2023-01-24 05:02:03.808513: step: 740/464, loss: 0.02381092496216297 2023-01-24 05:02:04.576522: step: 742/464, loss: 0.03649526461958885 2023-01-24 05:02:05.164975: step: 744/464, loss: 0.0008556586690247059 2023-01-24 05:02:05.813644: step: 746/464, loss: 0.045671314001083374 2023-01-24 05:02:06.449847: step: 748/464, loss: 0.005988705437630415 2023-01-24 05:02:07.040385: step: 750/464, loss: 0.06765050441026688 2023-01-24 05:02:07.636963: step: 752/464, loss: 0.2693063020706177 2023-01-24 05:02:08.220864: step: 754/464, loss: 0.0002042855485342443 2023-01-24 05:02:08.811468: step: 756/464, loss: 0.01689450442790985 2023-01-24 05:02:09.420353: step: 758/464, loss: 0.005171219818294048 2023-01-24 05:02:10.060008: step: 760/464, loss: 0.007994354702532291 2023-01-24 05:02:10.628150: step: 762/464, loss: 0.020884480327367783 2023-01-24 05:02:11.248677: step: 764/464, loss: 0.027513671666383743 2023-01-24 05:02:11.878040: step: 766/464, loss: 0.0021339538507163525 2023-01-24 05:02:12.517351: step: 768/464, loss: 0.03193753585219383 2023-01-24 05:02:13.142546: step: 770/464, loss: 0.049453891813755035 2023-01-24 05:02:13.773563: step: 772/464, loss: 0.0009148464305326343 2023-01-24 05:02:14.369855: step: 774/464, loss: 0.011001508682966232 2023-01-24 05:02:14.974315: step: 776/464, loss: 0.008015145547688007 2023-01-24 05:02:15.602591: step: 778/464, loss: 0.030116822570562363 2023-01-24 05:02:16.239333: step: 780/464, loss: 0.016844095662236214 2023-01-24 05:02:16.850324: step: 782/464, loss: 0.0012863740557804704 2023-01-24 05:02:17.453564: step: 784/464, loss: 0.029196161776781082 2023-01-24 05:02:18.053968: step: 786/464, loss: 0.002277803374454379 2023-01-24 05:02:18.676439: step: 788/464, loss: 0.010096283629536629 2023-01-24 05:02:19.275198: step: 790/464, loss: 0.0548112578690052 2023-01-24 05:02:19.875160: step: 792/464, loss: 0.013359840027987957 2023-01-24 05:02:20.547370: step: 794/464, loss: 0.006645455956459045 2023-01-24 05:02:21.180182: step: 796/464, loss: 0.0192551352083683 2023-01-24 05:02:21.809971: step: 798/464, loss: 0.009141849353909492 2023-01-24 05:02:22.424394: step: 800/464, loss: 0.006403472740203142 2023-01-24 05:02:23.062010: step: 802/464, loss: 0.02189779095351696 2023-01-24 05:02:23.694374: step: 804/464, loss: 0.0024047603365033865 2023-01-24 05:02:24.490669: step: 806/464, loss: 0.06709612160921097 2023-01-24 05:02:25.117986: step: 808/464, loss: 0.02736440673470497 2023-01-24 05:02:25.751432: step: 810/464, loss: 0.006112470757216215 2023-01-24 05:02:26.346923: step: 812/464, loss: 0.0044360077008605 2023-01-24 05:02:26.975906: step: 814/464, loss: 0.009985252283513546 2023-01-24 05:02:27.617521: step: 816/464, loss: 0.01186640840023756 2023-01-24 05:02:28.296889: step: 818/464, loss: 0.09793198108673096 2023-01-24 05:02:28.865631: step: 820/464, loss: 0.003989395219832659 2023-01-24 05:02:29.485367: step: 822/464, loss: 0.01700315810739994 2023-01-24 05:02:30.074212: step: 824/464, loss: 0.01287770178169012 2023-01-24 05:02:30.840240: step: 826/464, loss: 0.07658130675554276 2023-01-24 05:02:31.399744: step: 828/464, loss: 0.0006500289891846478 2023-01-24 05:02:32.083661: step: 830/464, loss: 0.037975821644067764 2023-01-24 05:02:32.711044: step: 832/464, loss: 0.003844544989988208 2023-01-24 05:02:33.244401: step: 834/464, loss: 0.0028473336715251207 2023-01-24 05:02:33.825576: step: 836/464, loss: 0.28509947657585144 2023-01-24 05:02:34.482371: step: 838/464, loss: 0.013126951642334461 2023-01-24 05:02:35.050067: step: 840/464, loss: 0.020548366010189056 2023-01-24 05:02:35.621553: step: 842/464, loss: 0.03744273632764816 2023-01-24 05:02:36.222385: step: 844/464, loss: 0.042428817600011826 2023-01-24 05:02:36.946275: step: 846/464, loss: 0.007418156135827303 2023-01-24 05:02:37.527846: step: 848/464, loss: 0.0024940096773207188 2023-01-24 05:02:38.153801: step: 850/464, loss: 0.01644994504749775 2023-01-24 05:02:38.831819: step: 852/464, loss: 0.1557295024394989 2023-01-24 05:02:39.413723: step: 854/464, loss: 0.004354654811322689 2023-01-24 05:02:40.136654: step: 856/464, loss: 0.0145355723798275 2023-01-24 05:02:40.803057: step: 858/464, loss: 0.08611620962619781 2023-01-24 05:02:41.450273: step: 860/464, loss: 0.023508962243795395 2023-01-24 05:02:42.051458: step: 862/464, loss: 0.0006393216899596155 2023-01-24 05:02:42.684678: step: 864/464, loss: 0.008929580450057983 2023-01-24 05:02:43.338781: step: 866/464, loss: 0.018466824665665627 2023-01-24 05:02:43.958989: step: 868/464, loss: 0.00402337359264493 2023-01-24 05:02:44.639042: step: 870/464, loss: 0.00023938572849147022 2023-01-24 05:02:45.273004: step: 872/464, loss: 0.0013220008695498109 2023-01-24 05:02:45.885201: step: 874/464, loss: 0.09605110436677933 2023-01-24 05:02:46.438965: step: 876/464, loss: 0.03537971153855324 2023-01-24 05:02:47.091073: step: 878/464, loss: 0.01987377181649208 2023-01-24 05:02:47.708510: step: 880/464, loss: 0.025345321744680405 2023-01-24 05:02:48.400640: step: 882/464, loss: 0.011560996063053608 2023-01-24 05:02:49.113050: step: 884/464, loss: 0.008712761104106903 2023-01-24 05:02:49.717516: step: 886/464, loss: 0.0005412409082055092 2023-01-24 05:02:50.306416: step: 888/464, loss: 0.0016988108400255442 2023-01-24 05:02:50.932676: step: 890/464, loss: 0.01637883111834526 2023-01-24 05:02:51.502640: step: 892/464, loss: 0.05341380089521408 2023-01-24 05:02:52.151776: step: 894/464, loss: 0.0030567431822419167 2023-01-24 05:02:52.691539: step: 896/464, loss: 0.005006662104278803 2023-01-24 05:02:53.301972: step: 898/464, loss: 0.014662880450487137 2023-01-24 05:02:53.913086: step: 900/464, loss: 0.0011214803671464324 2023-01-24 05:02:54.516155: step: 902/464, loss: 0.005864960141479969 2023-01-24 05:02:55.115788: step: 904/464, loss: 0.0007619211683049798 2023-01-24 05:02:55.768033: step: 906/464, loss: 0.05297547206282616 2023-01-24 05:02:56.369152: step: 908/464, loss: 0.058883581310510635 2023-01-24 05:02:57.043982: step: 910/464, loss: 0.0002728099934756756 2023-01-24 05:02:57.598058: step: 912/464, loss: 0.0058806040324270725 2023-01-24 05:02:58.157874: step: 914/464, loss: 0.015054954215884209 2023-01-24 05:02:58.830749: step: 916/464, loss: 0.045919161289930344 2023-01-24 05:02:59.461741: step: 918/464, loss: 0.0026584642473608255 2023-01-24 05:03:00.096883: step: 920/464, loss: 0.023800550028681755 2023-01-24 05:03:00.759575: step: 922/464, loss: 0.0045110126957297325 2023-01-24 05:03:01.374514: step: 924/464, loss: 0.003882175777107477 2023-01-24 05:03:02.010582: step: 926/464, loss: 0.022211167961359024 2023-01-24 05:03:02.635568: step: 928/464, loss: 0.028050176799297333 2023-01-24 05:03:03.165489: step: 930/464, loss: 0.0009635902242735028 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.333537855999838, 'r': 0.3341707551573329, 'f1': 0.3338540056263781}, 'combined': 0.2459976883562786, 'epoch': 34} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31706022671097234, 'r': 0.3089080813042623, 'f1': 0.31293107027008527}, 'combined': 0.2042969681555997, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348209212662338, 'r': 0.3557869372089012, 'f1': 0.3449856778456135}, 'combined': 0.2541999731493994, 'epoch': 34} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3240350842156237, 'r': 0.3094415923713539, 'f1': 0.31657024212508983}, 'combined': 0.2066728005583488, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3370782596515837, 'r': 0.34347443156148094, 'f1': 0.34024628840770765}, 'combined': 0.2507077914583109, 'epoch': 34} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3259872724125249, 'r': 0.3014183787196692, 'f1': 0.31322177272874885}, 'combined': 0.2044867531804267, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21240179573512905, 'r': 0.32770562770562767, 'f1': 0.257745999319033}, 'combined': 0.17183066621268867, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33088235294117646, 'r': 0.4891304347826087, 'f1': 0.39473684210526316}, 'combined': 0.19736842105263158, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5384615384615384, 'r': 0.2413793103448276, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 34} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348209212662338, 'r': 0.3557869372089012, 'f1': 0.3449856778456135}, 'combined': 0.2541999731493994, 'epoch': 34} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3240350842156237, 'r': 0.3094415923713539, 'f1': 0.31657024212508983}, 'combined': 0.2066728005583488, 'epoch': 34} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33088235294117646, 'r': 0.4891304347826087, 'f1': 0.39473684210526316}, 'combined': 0.19736842105263158, 'epoch': 34} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:05:46.123332: step: 2/464, loss: 0.0009644743404351175 2023-01-24 05:05:46.681057: step: 4/464, loss: 0.0003162659995723516 2023-01-24 05:05:47.329993: step: 6/464, loss: 0.0058449688367545605 2023-01-24 05:05:47.913136: step: 8/464, loss: 0.015397859737277031 2023-01-24 05:05:48.497008: step: 10/464, loss: 0.0003571268171072006 2023-01-24 05:05:49.152312: step: 12/464, loss: 0.0007609457243233919 2023-01-24 05:05:49.731577: step: 14/464, loss: 0.003951466176658869 2023-01-24 05:05:50.388533: step: 16/464, loss: 0.01241021417081356 2023-01-24 05:05:50.929886: step: 18/464, loss: 0.01995595172047615 2023-01-24 05:05:51.566831: step: 20/464, loss: 0.013361471705138683 2023-01-24 05:05:52.234860: step: 22/464, loss: 0.00025080880732275546 2023-01-24 05:05:52.842554: step: 24/464, loss: 0.010684192180633545 2023-01-24 05:05:53.481543: step: 26/464, loss: 0.026312733069062233 2023-01-24 05:05:54.096647: step: 28/464, loss: 0.012011365965008736 2023-01-24 05:05:54.700587: step: 30/464, loss: 0.0087062306702137 2023-01-24 05:05:55.237924: step: 32/464, loss: 0.011229324154555798 2023-01-24 05:05:55.812654: step: 34/464, loss: 0.001367847784422338 2023-01-24 05:05:56.476557: step: 36/464, loss: 0.00034738570684567094 2023-01-24 05:05:57.075160: step: 38/464, loss: 0.008835665881633759 2023-01-24 05:05:57.734308: step: 40/464, loss: 0.0559537373483181 2023-01-24 05:05:58.387607: step: 42/464, loss: 0.0006257090135477483 2023-01-24 05:05:59.014426: step: 44/464, loss: 0.054278358817100525 2023-01-24 05:05:59.586687: step: 46/464, loss: 0.00020819462952204049 2023-01-24 05:06:00.158443: step: 48/464, loss: 0.0766400471329689 2023-01-24 05:06:00.759678: step: 50/464, loss: 0.0037050002720206976 2023-01-24 05:06:01.354155: step: 52/464, loss: 0.023854294791817665 2023-01-24 05:06:01.913494: step: 54/464, loss: 0.00019046322267968208 2023-01-24 05:06:02.522476: step: 56/464, loss: 0.013129734434187412 2023-01-24 05:06:03.077255: step: 58/464, loss: 0.01394770760089159 2023-01-24 05:06:03.747138: step: 60/464, loss: 0.0300760380923748 2023-01-24 05:06:04.335100: step: 62/464, loss: 0.006102480459958315 2023-01-24 05:06:04.967405: step: 64/464, loss: 0.0016553985187783837 2023-01-24 05:06:05.593343: step: 66/464, loss: 0.00021854121587239206 2023-01-24 05:06:06.202915: step: 68/464, loss: 0.048340536653995514 2023-01-24 05:06:06.854469: step: 70/464, loss: 0.022228294983506203 2023-01-24 05:06:07.428872: step: 72/464, loss: 0.0033827880397439003 2023-01-24 05:06:08.072033: step: 74/464, loss: 0.3537949025630951 2023-01-24 05:06:08.710137: step: 76/464, loss: 0.0006294180057011545 2023-01-24 05:06:09.402787: step: 78/464, loss: 0.306569904088974 2023-01-24 05:06:09.995774: step: 80/464, loss: 0.006658884231001139 2023-01-24 05:06:10.570783: step: 82/464, loss: 0.008759608492255211 2023-01-24 05:06:11.130746: step: 84/464, loss: 0.0007474158192053437 2023-01-24 05:06:11.695445: step: 86/464, loss: 0.009229986928403378 2023-01-24 05:06:12.323362: step: 88/464, loss: 0.012151729315519333 2023-01-24 05:06:12.927220: step: 90/464, loss: 0.02027531899511814 2023-01-24 05:06:13.568334: step: 92/464, loss: 0.039069026708602905 2023-01-24 05:06:14.190933: step: 94/464, loss: 0.005869823507964611 2023-01-24 05:06:14.754781: step: 96/464, loss: 0.3314107656478882 2023-01-24 05:06:15.341579: step: 98/464, loss: 0.00550828455016017 2023-01-24 05:06:15.937024: step: 100/464, loss: 0.0024665382225066423 2023-01-24 05:06:16.518882: step: 102/464, loss: 0.017504358664155006 2023-01-24 05:06:17.098417: step: 104/464, loss: 4.007333245681366e-06 2023-01-24 05:06:17.733627: step: 106/464, loss: 0.00032739387825131416 2023-01-24 05:06:18.340877: step: 108/464, loss: 0.008687363937497139 2023-01-24 05:06:18.982135: step: 110/464, loss: 0.011980629526078701 2023-01-24 05:06:19.555063: step: 112/464, loss: 0.008489969186484814 2023-01-24 05:06:20.245536: step: 114/464, loss: 0.015836533159017563 2023-01-24 05:06:20.897787: step: 116/464, loss: 0.012812396511435509 2023-01-24 05:06:21.509868: step: 118/464, loss: 0.03620085120201111 2023-01-24 05:06:22.163035: step: 120/464, loss: 0.023637806996703148 2023-01-24 05:06:22.863853: step: 122/464, loss: 0.006331432610750198 2023-01-24 05:06:23.506550: step: 124/464, loss: 0.0021719210781157017 2023-01-24 05:06:24.165867: step: 126/464, loss: 0.014731621369719505 2023-01-24 05:06:24.791591: step: 128/464, loss: 0.0040918574668467045 2023-01-24 05:06:25.406854: step: 130/464, loss: 0.014182882383465767 2023-01-24 05:06:26.075485: step: 132/464, loss: 0.16358043253421783 2023-01-24 05:06:26.652144: step: 134/464, loss: 0.0007946311379782856 2023-01-24 05:06:27.276035: step: 136/464, loss: 0.01686181128025055 2023-01-24 05:06:27.900186: step: 138/464, loss: 0.5621294379234314 2023-01-24 05:06:28.676049: step: 140/464, loss: 0.1334395855665207 2023-01-24 05:06:29.379768: step: 142/464, loss: 1.1283353567123413 2023-01-24 05:06:29.918776: step: 144/464, loss: 0.03229658678174019 2023-01-24 05:06:30.530176: step: 146/464, loss: 0.017992986366152763 2023-01-24 05:06:31.152835: step: 148/464, loss: 0.008097312413156033 2023-01-24 05:06:31.747834: step: 150/464, loss: 0.09446200728416443 2023-01-24 05:06:32.357919: step: 152/464, loss: 0.012921723537147045 2023-01-24 05:06:32.964394: step: 154/464, loss: 0.036877911537885666 2023-01-24 05:06:33.541854: step: 156/464, loss: 0.0023639483842998743 2023-01-24 05:06:34.188633: step: 158/464, loss: 0.1434338539838791 2023-01-24 05:06:34.807068: step: 160/464, loss: 0.07745251059532166 2023-01-24 05:06:35.378590: step: 162/464, loss: 0.002024970017373562 2023-01-24 05:06:35.989520: step: 164/464, loss: 0.004907531198114157 2023-01-24 05:06:36.577095: step: 166/464, loss: 0.027228038758039474 2023-01-24 05:06:37.168736: step: 168/464, loss: 0.017929313704371452 2023-01-24 05:06:37.849812: step: 170/464, loss: 0.0005961810238659382 2023-01-24 05:06:38.450203: step: 172/464, loss: 0.002728164428845048 2023-01-24 05:06:39.080957: step: 174/464, loss: 0.0008025284623727202 2023-01-24 05:06:39.716222: step: 176/464, loss: 0.0021660258062183857 2023-01-24 05:06:40.291324: step: 178/464, loss: 0.01789056695997715 2023-01-24 05:06:40.906681: step: 180/464, loss: 0.00203095655888319 2023-01-24 05:06:41.618313: step: 182/464, loss: 0.008937697857618332 2023-01-24 05:06:42.233258: step: 184/464, loss: 0.008091083727777004 2023-01-24 05:06:42.927906: step: 186/464, loss: 0.0040649184957146645 2023-01-24 05:06:43.563449: step: 188/464, loss: 0.08236940205097198 2023-01-24 05:06:44.199861: step: 190/464, loss: 0.08165348321199417 2023-01-24 05:06:44.838857: step: 192/464, loss: 0.00024181792105082422 2023-01-24 05:06:45.479947: step: 194/464, loss: 0.03893275558948517 2023-01-24 05:06:46.106140: step: 196/464, loss: 0.012685553170740604 2023-01-24 05:06:46.721490: step: 198/464, loss: 0.02778731659054756 2023-01-24 05:06:47.344966: step: 200/464, loss: 0.006302070338279009 2023-01-24 05:06:47.965383: step: 202/464, loss: 0.017433539032936096 2023-01-24 05:06:48.488670: step: 204/464, loss: 0.0018429952906444669 2023-01-24 05:06:49.089412: step: 206/464, loss: 0.0035465233959257603 2023-01-24 05:06:49.766557: step: 208/464, loss: 0.012276760302484035 2023-01-24 05:06:50.443676: step: 210/464, loss: 0.00873927865177393 2023-01-24 05:06:51.053862: step: 212/464, loss: 0.006358860060572624 2023-01-24 05:06:51.636232: step: 214/464, loss: 0.011782050132751465 2023-01-24 05:06:52.270665: step: 216/464, loss: 0.0018012769287452102 2023-01-24 05:06:52.917543: step: 218/464, loss: 0.0972636342048645 2023-01-24 05:06:53.594432: step: 220/464, loss: 0.017716892063617706 2023-01-24 05:06:54.126527: step: 222/464, loss: 0.0028683850541710854 2023-01-24 05:06:54.638712: step: 224/464, loss: 0.0020713862031698227 2023-01-24 05:06:55.248313: step: 226/464, loss: 0.014332927763462067 2023-01-24 05:06:55.897185: step: 228/464, loss: 0.00759549718350172 2023-01-24 05:06:56.533860: step: 230/464, loss: 0.014684975147247314 2023-01-24 05:06:57.152642: step: 232/464, loss: 0.005183230619877577 2023-01-24 05:06:57.810917: step: 234/464, loss: 0.03961469233036041 2023-01-24 05:06:58.409514: step: 236/464, loss: 0.0497465543448925 2023-01-24 05:06:58.951234: step: 238/464, loss: 0.0266302190721035 2023-01-24 05:06:59.541169: step: 240/464, loss: 0.0003741618129424751 2023-01-24 05:07:00.111784: step: 242/464, loss: 0.026745961979031563 2023-01-24 05:07:00.713517: step: 244/464, loss: 0.0012106267968192697 2023-01-24 05:07:01.348902: step: 246/464, loss: 0.026130206882953644 2023-01-24 05:07:01.891058: step: 248/464, loss: 0.006671784445643425 2023-01-24 05:07:02.494822: step: 250/464, loss: 0.005533790681511164 2023-01-24 05:07:03.039491: step: 252/464, loss: 0.007999777793884277 2023-01-24 05:07:03.679736: step: 254/464, loss: 4.036992686451413e-05 2023-01-24 05:07:04.327842: step: 256/464, loss: 0.0013090185821056366 2023-01-24 05:07:04.967429: step: 258/464, loss: 0.004869567696005106 2023-01-24 05:07:05.636227: step: 260/464, loss: 0.07180750370025635 2023-01-24 05:07:06.251564: step: 262/464, loss: 0.0008018343360163271 2023-01-24 05:07:06.770026: step: 264/464, loss: 0.0023172374349087477 2023-01-24 05:07:07.466525: step: 266/464, loss: 0.10949315875768661 2023-01-24 05:07:08.052749: step: 268/464, loss: 0.007839790545403957 2023-01-24 05:07:08.651496: step: 270/464, loss: 0.0038458656053990126 2023-01-24 05:07:09.269779: step: 272/464, loss: 0.001103203627280891 2023-01-24 05:07:09.894451: step: 274/464, loss: 0.014842512086033821 2023-01-24 05:07:10.578992: step: 276/464, loss: 0.0005287323729135096 2023-01-24 05:07:11.180501: step: 278/464, loss: 0.0026160588022321463 2023-01-24 05:07:11.854750: step: 280/464, loss: 0.7641627192497253 2023-01-24 05:07:12.612693: step: 282/464, loss: 0.007436547894030809 2023-01-24 05:07:13.248040: step: 284/464, loss: 0.018948597833514214 2023-01-24 05:07:13.881719: step: 286/464, loss: 0.004016129299998283 2023-01-24 05:07:14.506306: step: 288/464, loss: 0.21743503212928772 2023-01-24 05:07:15.140053: step: 290/464, loss: 0.012820246629416943 2023-01-24 05:07:15.732690: step: 292/464, loss: 0.00928540900349617 2023-01-24 05:07:16.339535: step: 294/464, loss: 0.006636911071836948 2023-01-24 05:07:16.895514: step: 296/464, loss: 0.00810930784791708 2023-01-24 05:07:17.518367: step: 298/464, loss: 0.006895044352859259 2023-01-24 05:07:18.152256: step: 300/464, loss: 0.0021101750899106264 2023-01-24 05:07:18.736116: step: 302/464, loss: 0.004637501202523708 2023-01-24 05:07:19.320452: step: 304/464, loss: 0.0017597886035218835 2023-01-24 05:07:19.981734: step: 306/464, loss: 0.060324400663375854 2023-01-24 05:07:20.659222: step: 308/464, loss: 0.02072199061512947 2023-01-24 05:07:21.227517: step: 310/464, loss: 0.00800775270909071 2023-01-24 05:07:21.855644: step: 312/464, loss: 0.00040108468965627253 2023-01-24 05:07:22.438493: step: 314/464, loss: 0.021276621147990227 2023-01-24 05:07:23.036693: step: 316/464, loss: 0.006905107758939266 2023-01-24 05:07:23.644513: step: 318/464, loss: 0.032484881579875946 2023-01-24 05:07:24.206008: step: 320/464, loss: 0.008478713221848011 2023-01-24 05:07:24.803560: step: 322/464, loss: 0.00045442843111231923 2023-01-24 05:07:25.379094: step: 324/464, loss: 0.11216661334037781 2023-01-24 05:07:26.031922: step: 326/464, loss: 0.5668357014656067 2023-01-24 05:07:26.631539: step: 328/464, loss: 0.006192249245941639 2023-01-24 05:07:27.218929: step: 330/464, loss: 0.03319069743156433 2023-01-24 05:07:27.846905: step: 332/464, loss: 0.0035295835696160793 2023-01-24 05:07:28.481690: step: 334/464, loss: 0.00975918211042881 2023-01-24 05:07:29.098993: step: 336/464, loss: 0.0027129671070724726 2023-01-24 05:07:29.798470: step: 338/464, loss: 0.005572467111051083 2023-01-24 05:07:30.411002: step: 340/464, loss: 0.001601192052476108 2023-01-24 05:07:31.081468: step: 342/464, loss: 0.013441353105008602 2023-01-24 05:07:31.755909: step: 344/464, loss: 0.013300052843987942 2023-01-24 05:07:32.290991: step: 346/464, loss: 0.006340089254081249 2023-01-24 05:07:32.846418: step: 348/464, loss: 0.015834132209420204 2023-01-24 05:07:33.450039: step: 350/464, loss: 0.008800752460956573 2023-01-24 05:07:34.038303: step: 352/464, loss: 0.0007862219936214387 2023-01-24 05:07:34.752672: step: 354/464, loss: 0.0015308655565604568 2023-01-24 05:07:35.368664: step: 356/464, loss: 0.02814129739999771 2023-01-24 05:07:36.014687: step: 358/464, loss: 0.0009849341586232185 2023-01-24 05:07:36.610289: step: 360/464, loss: 0.008692414499819279 2023-01-24 05:07:37.247496: step: 362/464, loss: 0.0036799232475459576 2023-01-24 05:07:37.914671: step: 364/464, loss: 0.11109455674886703 2023-01-24 05:07:38.547969: step: 366/464, loss: 0.003390450030565262 2023-01-24 05:07:39.195803: step: 368/464, loss: 0.01457061618566513 2023-01-24 05:07:39.757050: step: 370/464, loss: 0.002210398204624653 2023-01-24 05:07:40.346489: step: 372/464, loss: 0.007588067092001438 2023-01-24 05:07:41.018073: step: 374/464, loss: 0.00048443939886055887 2023-01-24 05:07:41.646711: step: 376/464, loss: 0.0052320328541100025 2023-01-24 05:07:42.262825: step: 378/464, loss: 0.002138762501999736 2023-01-24 05:07:42.933542: step: 380/464, loss: 0.010138359852135181 2023-01-24 05:07:43.516147: step: 382/464, loss: 0.014441246166825294 2023-01-24 05:07:44.126746: step: 384/464, loss: 0.008367008529603481 2023-01-24 05:07:44.819696: step: 386/464, loss: 0.0007067288970574737 2023-01-24 05:07:45.503390: step: 388/464, loss: 0.0012309409212321043 2023-01-24 05:07:46.090999: step: 390/464, loss: 0.004554093815386295 2023-01-24 05:07:46.690874: step: 392/464, loss: 0.07715360075235367 2023-01-24 05:07:47.277235: step: 394/464, loss: 0.013104358687996864 2023-01-24 05:07:47.883224: step: 396/464, loss: 0.003941704053431749 2023-01-24 05:07:48.465369: step: 398/464, loss: 0.002259533852338791 2023-01-24 05:07:49.037662: step: 400/464, loss: 0.0028181481175124645 2023-01-24 05:07:49.661691: step: 402/464, loss: 0.0019231357146054506 2023-01-24 05:07:50.377512: step: 404/464, loss: 0.013126783072948456 2023-01-24 05:07:51.003548: step: 406/464, loss: 0.0007588164880871773 2023-01-24 05:07:51.624250: step: 408/464, loss: 0.0044506611302495 2023-01-24 05:07:52.306574: step: 410/464, loss: 0.011158186011016369 2023-01-24 05:07:52.950611: step: 412/464, loss: 0.017263837158679962 2023-01-24 05:07:53.621351: step: 414/464, loss: 0.026343664154410362 2023-01-24 05:07:54.251259: step: 416/464, loss: 0.022229742258787155 2023-01-24 05:07:54.883891: step: 418/464, loss: 0.012956931255757809 2023-01-24 05:07:55.557602: step: 420/464, loss: 0.027147667482495308 2023-01-24 05:07:56.115316: step: 422/464, loss: 0.0002858864900190383 2023-01-24 05:07:56.653954: step: 424/464, loss: 0.009432642720639706 2023-01-24 05:07:57.213644: step: 426/464, loss: 0.0012652672594413161 2023-01-24 05:07:57.819239: step: 428/464, loss: 0.0009018271812237799 2023-01-24 05:07:58.495228: step: 430/464, loss: 0.06326231360435486 2023-01-24 05:07:59.095673: step: 432/464, loss: 0.0007856090669520199 2023-01-24 05:07:59.783566: step: 434/464, loss: 0.019646979868412018 2023-01-24 05:08:00.426704: step: 436/464, loss: 0.01722937636077404 2023-01-24 05:08:00.972338: step: 438/464, loss: 0.01092112623155117 2023-01-24 05:08:01.560305: step: 440/464, loss: 2.2829706722404808e-05 2023-01-24 05:08:02.200303: step: 442/464, loss: 0.000356467702658847 2023-01-24 05:08:02.912304: step: 444/464, loss: 0.005447516683489084 2023-01-24 05:08:03.542781: step: 446/464, loss: 0.0006174911395646632 2023-01-24 05:08:04.132103: step: 448/464, loss: 0.13536754250526428 2023-01-24 05:08:04.737289: step: 450/464, loss: 0.015333844348788261 2023-01-24 05:08:05.364913: step: 452/464, loss: 0.011071569286286831 2023-01-24 05:08:05.939269: step: 454/464, loss: 0.0035984970163553953 2023-01-24 05:08:06.528412: step: 456/464, loss: 0.006359519436955452 2023-01-24 05:08:07.166234: step: 458/464, loss: 0.07651659846305847 2023-01-24 05:08:07.792141: step: 460/464, loss: 0.25249356031417847 2023-01-24 05:08:08.399475: step: 462/464, loss: 0.006903337314724922 2023-01-24 05:08:09.005652: step: 464/464, loss: 0.020166227594017982 2023-01-24 05:08:09.665981: step: 466/464, loss: 0.01649313047528267 2023-01-24 05:08:10.335723: step: 468/464, loss: 0.06049361452460289 2023-01-24 05:08:10.910629: step: 470/464, loss: 0.00118854152970016 2023-01-24 05:08:11.534163: step: 472/464, loss: 0.005978343542665243 2023-01-24 05:08:12.145315: step: 474/464, loss: 0.0140788983553648 2023-01-24 05:08:12.714322: step: 476/464, loss: 0.037724100053310394 2023-01-24 05:08:13.335806: step: 478/464, loss: 0.00423229718580842 2023-01-24 05:08:13.966887: step: 480/464, loss: 0.013618550263345242 2023-01-24 05:08:14.763052: step: 482/464, loss: 0.004492191597819328 2023-01-24 05:08:15.390551: step: 484/464, loss: 0.06540249288082123 2023-01-24 05:08:16.034220: step: 486/464, loss: 7.475945312762633e-05 2023-01-24 05:08:16.703811: step: 488/464, loss: 0.0009398137335665524 2023-01-24 05:08:17.289594: step: 490/464, loss: 0.0005184581386856735 2023-01-24 05:08:17.884920: step: 492/464, loss: 0.011175619438290596 2023-01-24 05:08:18.542911: step: 494/464, loss: 0.2823795676231384 2023-01-24 05:08:19.143211: step: 496/464, loss: 0.0016089630080386996 2023-01-24 05:08:19.721789: step: 498/464, loss: 0.0025382675230503082 2023-01-24 05:08:20.336549: step: 500/464, loss: 4.747584342956543 2023-01-24 05:08:21.021656: step: 502/464, loss: 0.02053333818912506 2023-01-24 05:08:21.734516: step: 504/464, loss: 0.0034306731540709734 2023-01-24 05:08:22.381595: step: 506/464, loss: 0.0025277994573116302 2023-01-24 05:08:23.046807: step: 508/464, loss: 0.0006845752359367907 2023-01-24 05:08:23.651632: step: 510/464, loss: 0.0007348746294155717 2023-01-24 05:08:24.206917: step: 512/464, loss: 0.0009130208636634052 2023-01-24 05:08:24.822341: step: 514/464, loss: 0.012286031618714333 2023-01-24 05:08:25.389345: step: 516/464, loss: 0.06218738853931427 2023-01-24 05:08:25.969457: step: 518/464, loss: 0.005067503545433283 2023-01-24 05:08:26.584964: step: 520/464, loss: 0.05404721945524216 2023-01-24 05:08:27.192845: step: 522/464, loss: 0.012997281737625599 2023-01-24 05:08:27.866654: step: 524/464, loss: 0.0011278531746938825 2023-01-24 05:08:28.479302: step: 526/464, loss: 0.015108383260667324 2023-01-24 05:08:29.115719: step: 528/464, loss: 0.01597750186920166 2023-01-24 05:08:29.748966: step: 530/464, loss: 0.003203788073733449 2023-01-24 05:08:30.368388: step: 532/464, loss: 0.26965925097465515 2023-01-24 05:08:30.964336: step: 534/464, loss: 0.004773963242769241 2023-01-24 05:08:31.566862: step: 536/464, loss: 0.00966687873005867 2023-01-24 05:08:32.171684: step: 538/464, loss: 0.005131350364536047 2023-01-24 05:08:32.821443: step: 540/464, loss: 0.15959526598453522 2023-01-24 05:08:33.426485: step: 542/464, loss: 0.014910740777850151 2023-01-24 05:08:34.089137: step: 544/464, loss: 0.00035401046625338495 2023-01-24 05:08:34.739730: step: 546/464, loss: 0.024254245683550835 2023-01-24 05:08:35.303310: step: 548/464, loss: 0.00013935545575805008 2023-01-24 05:08:35.885132: step: 550/464, loss: 0.0026739283930510283 2023-01-24 05:08:36.491167: step: 552/464, loss: 0.011179208755493164 2023-01-24 05:08:37.111332: step: 554/464, loss: 0.005806444212794304 2023-01-24 05:08:37.742783: step: 556/464, loss: 0.20071232318878174 2023-01-24 05:08:38.386420: step: 558/464, loss: 0.00031759965349920094 2023-01-24 05:08:39.010816: step: 560/464, loss: 0.015619128942489624 2023-01-24 05:08:39.707153: step: 562/464, loss: 0.006494753994047642 2023-01-24 05:08:40.288913: step: 564/464, loss: 0.07191771268844604 2023-01-24 05:08:40.941034: step: 566/464, loss: 0.039309412240982056 2023-01-24 05:08:41.611160: step: 568/464, loss: 0.009192845784127712 2023-01-24 05:08:42.204688: step: 570/464, loss: 0.14159759879112244 2023-01-24 05:08:42.814110: step: 572/464, loss: 0.0028477036394178867 2023-01-24 05:08:43.421007: step: 574/464, loss: 0.0030880693811923265 2023-01-24 05:08:44.052499: step: 576/464, loss: 0.0008346071117557585 2023-01-24 05:08:44.606470: step: 578/464, loss: 0.0027719526551663876 2023-01-24 05:08:45.245912: step: 580/464, loss: 0.002395773306488991 2023-01-24 05:08:45.870748: step: 582/464, loss: 0.005813604686409235 2023-01-24 05:08:46.447790: step: 584/464, loss: 0.005093331448733807 2023-01-24 05:08:47.003843: step: 586/464, loss: 0.002429689047858119 2023-01-24 05:08:47.638548: step: 588/464, loss: 0.01826808974146843 2023-01-24 05:08:48.238159: step: 590/464, loss: 0.01065827440470457 2023-01-24 05:08:48.869566: step: 592/464, loss: 0.014795523136854172 2023-01-24 05:08:49.447538: step: 594/464, loss: 0.01895059272646904 2023-01-24 05:08:50.051927: step: 596/464, loss: 0.0066263931803405285 2023-01-24 05:08:50.626867: step: 598/464, loss: 0.027680065482854843 2023-01-24 05:08:51.229476: step: 600/464, loss: 0.041019223630428314 2023-01-24 05:08:51.828735: step: 602/464, loss: 0.0005921527626924217 2023-01-24 05:08:52.436308: step: 604/464, loss: 0.00405742134898901 2023-01-24 05:08:53.043546: step: 606/464, loss: 0.0395742766559124 2023-01-24 05:08:53.684006: step: 608/464, loss: 0.0009591281996108592 2023-01-24 05:08:54.285856: step: 610/464, loss: 0.1311628818511963 2023-01-24 05:08:54.908670: step: 612/464, loss: 0.020746229216456413 2023-01-24 05:08:55.492275: step: 614/464, loss: 0.022783661261200905 2023-01-24 05:08:56.144462: step: 616/464, loss: 9.927034261636436e-05 2023-01-24 05:08:56.721163: step: 618/464, loss: 0.008369174785912037 2023-01-24 05:08:57.419990: step: 620/464, loss: 0.09095170348882675 2023-01-24 05:08:58.010157: step: 622/464, loss: 0.005317453760653734 2023-01-24 05:08:58.641605: step: 624/464, loss: 0.002785000717267394 2023-01-24 05:08:59.329403: step: 626/464, loss: 0.013620770536363125 2023-01-24 05:08:59.945541: step: 628/464, loss: 0.03168050944805145 2023-01-24 05:09:00.583075: step: 630/464, loss: 0.013114920817315578 2023-01-24 05:09:01.228396: step: 632/464, loss: 0.0720565915107727 2023-01-24 05:09:01.931035: step: 634/464, loss: 0.016401885077357292 2023-01-24 05:09:02.561383: step: 636/464, loss: 0.0019064360531046987 2023-01-24 05:09:03.178656: step: 638/464, loss: 0.05868987366557121 2023-01-24 05:09:03.801748: step: 640/464, loss: 0.04122765362262726 2023-01-24 05:09:04.353754: step: 642/464, loss: 0.0013818825827911496 2023-01-24 05:09:05.004990: step: 644/464, loss: 0.029495844617486 2023-01-24 05:09:05.639822: step: 646/464, loss: 0.006111129652708769 2023-01-24 05:09:06.339371: step: 648/464, loss: 0.0003144819347653538 2023-01-24 05:09:06.900828: step: 650/464, loss: 0.004753305576741695 2023-01-24 05:09:07.479648: step: 652/464, loss: 0.007937440648674965 2023-01-24 05:09:08.104914: step: 654/464, loss: 0.010105142369866371 2023-01-24 05:09:08.735344: step: 656/464, loss: 0.01663767173886299 2023-01-24 05:09:09.364445: step: 658/464, loss: 0.04917242377996445 2023-01-24 05:09:09.967083: step: 660/464, loss: 0.0013400838943198323 2023-01-24 05:09:10.592661: step: 662/464, loss: 0.012249905616044998 2023-01-24 05:09:11.223816: step: 664/464, loss: 0.013516448438167572 2023-01-24 05:09:11.884940: step: 666/464, loss: 0.023560237139463425 2023-01-24 05:09:12.510282: step: 668/464, loss: 0.03818031772971153 2023-01-24 05:09:13.093231: step: 670/464, loss: 0.01443792600184679 2023-01-24 05:09:13.697104: step: 672/464, loss: 0.01864960603415966 2023-01-24 05:09:14.296827: step: 674/464, loss: 0.022748306393623352 2023-01-24 05:09:14.923872: step: 676/464, loss: 0.008343067020177841 2023-01-24 05:09:15.517775: step: 678/464, loss: 9.029127977555618e-05 2023-01-24 05:09:16.120249: step: 680/464, loss: 0.0007436299347318709 2023-01-24 05:09:16.699922: step: 682/464, loss: 0.0011212803656235337 2023-01-24 05:09:17.383252: step: 684/464, loss: 0.0007027724641375244 2023-01-24 05:09:17.956122: step: 686/464, loss: 0.00898168608546257 2023-01-24 05:09:18.558737: step: 688/464, loss: 0.017184296622872353 2023-01-24 05:09:19.151260: step: 690/464, loss: 0.004175418987870216 2023-01-24 05:09:19.819829: step: 692/464, loss: 0.03173663839697838 2023-01-24 05:09:20.438240: step: 694/464, loss: 0.03442293033003807 2023-01-24 05:09:21.087577: step: 696/464, loss: 0.0225666593760252 2023-01-24 05:09:21.667354: step: 698/464, loss: 0.009640194475650787 2023-01-24 05:09:22.310710: step: 700/464, loss: 0.011529440991580486 2023-01-24 05:09:22.929797: step: 702/464, loss: 0.0011612694943323731 2023-01-24 05:09:23.554152: step: 704/464, loss: 0.01782440021634102 2023-01-24 05:09:24.186943: step: 706/464, loss: 0.010416793636977673 2023-01-24 05:09:24.790203: step: 708/464, loss: 0.00046717614168301225 2023-01-24 05:09:25.385766: step: 710/464, loss: 0.31452393531799316 2023-01-24 05:09:26.065871: step: 712/464, loss: 0.03398028016090393 2023-01-24 05:09:26.663648: step: 714/464, loss: 0.02827555313706398 2023-01-24 05:09:27.359847: step: 716/464, loss: 0.0073892888613045216 2023-01-24 05:09:28.073179: step: 718/464, loss: 0.015934636816382408 2023-01-24 05:09:28.727490: step: 720/464, loss: 0.00607600063085556 2023-01-24 05:09:29.363806: step: 722/464, loss: 0.005109555087983608 2023-01-24 05:09:30.024939: step: 724/464, loss: 0.00027257108013145626 2023-01-24 05:09:30.710242: step: 726/464, loss: 0.00013226554438006133 2023-01-24 05:09:31.336741: step: 728/464, loss: 0.042293351143598557 2023-01-24 05:09:31.919696: step: 730/464, loss: 0.006536096800118685 2023-01-24 05:09:32.540427: step: 732/464, loss: 0.023406287655234337 2023-01-24 05:09:33.111194: step: 734/464, loss: 0.2510569095611572 2023-01-24 05:09:33.734002: step: 736/464, loss: 0.17887428402900696 2023-01-24 05:09:34.366744: step: 738/464, loss: 0.0026564225554466248 2023-01-24 05:09:34.962088: step: 740/464, loss: 0.0019115894101560116 2023-01-24 05:09:35.524683: step: 742/464, loss: 0.008141586557030678 2023-01-24 05:09:36.137469: step: 744/464, loss: 0.022172318771481514 2023-01-24 05:09:36.760384: step: 746/464, loss: 0.002620603423565626 2023-01-24 05:09:37.338665: step: 748/464, loss: 0.029885120689868927 2023-01-24 05:09:38.005891: step: 750/464, loss: 0.00040005400660447776 2023-01-24 05:09:38.592497: step: 752/464, loss: 0.00014553121582139283 2023-01-24 05:09:39.222831: step: 754/464, loss: 0.014732057228684425 2023-01-24 05:09:39.894554: step: 756/464, loss: 0.002545611932873726 2023-01-24 05:09:40.490894: step: 758/464, loss: 0.013696306385099888 2023-01-24 05:09:41.132874: step: 760/464, loss: 0.02129439264535904 2023-01-24 05:09:41.721014: step: 762/464, loss: 0.0017211624654009938 2023-01-24 05:09:42.293833: step: 764/464, loss: 0.012150133959949017 2023-01-24 05:09:43.001454: step: 766/464, loss: 0.010667501017451286 2023-01-24 05:09:43.612487: step: 768/464, loss: 0.026343297213315964 2023-01-24 05:09:44.231256: step: 770/464, loss: 0.05519673600792885 2023-01-24 05:09:44.825466: step: 772/464, loss: 4.755964255309664e-05 2023-01-24 05:09:45.535627: step: 774/464, loss: 0.011045753955841064 2023-01-24 05:09:46.124247: step: 776/464, loss: 0.0020963868591934443 2023-01-24 05:09:46.778542: step: 778/464, loss: 7.541560649871826 2023-01-24 05:09:47.433269: step: 780/464, loss: 0.006616545375436544 2023-01-24 05:09:48.031272: step: 782/464, loss: 0.0030220819171518087 2023-01-24 05:09:48.633060: step: 784/464, loss: 0.0029843649826943874 2023-01-24 05:09:49.227085: step: 786/464, loss: 0.0005856929928995669 2023-01-24 05:09:49.873094: step: 788/464, loss: 2.3059473037719727 2023-01-24 05:09:50.495297: step: 790/464, loss: 0.0026003012899309397 2023-01-24 05:09:51.091309: step: 792/464, loss: 0.050763748586177826 2023-01-24 05:09:51.743445: step: 794/464, loss: 0.06708303093910217 2023-01-24 05:09:52.365457: step: 796/464, loss: 0.03334089741110802 2023-01-24 05:09:52.987326: step: 798/464, loss: 0.013586047105491161 2023-01-24 05:09:53.631724: step: 800/464, loss: 0.013940623961389065 2023-01-24 05:09:54.364723: step: 802/464, loss: 0.08756569772958755 2023-01-24 05:09:54.947934: step: 804/464, loss: 0.007653203327208757 2023-01-24 05:09:55.584698: step: 806/464, loss: 0.005163044203072786 2023-01-24 05:09:56.239366: step: 808/464, loss: 0.014750438742339611 2023-01-24 05:09:56.934213: step: 810/464, loss: 0.002899068407714367 2023-01-24 05:09:57.535682: step: 812/464, loss: 0.00044651940697804093 2023-01-24 05:09:58.138642: step: 814/464, loss: 0.0063481805846095085 2023-01-24 05:09:58.830525: step: 816/464, loss: 0.027266254648566246 2023-01-24 05:09:59.503174: step: 818/464, loss: 0.0041908444836735725 2023-01-24 05:10:00.164096: step: 820/464, loss: 0.00024215153825934976 2023-01-24 05:10:00.794426: step: 822/464, loss: 0.0004711594374384731 2023-01-24 05:10:01.396955: step: 824/464, loss: 0.042581092566251755 2023-01-24 05:10:02.094626: step: 826/464, loss: 0.012439103797078133 2023-01-24 05:10:02.790704: step: 828/464, loss: 0.029706567525863647 2023-01-24 05:10:03.470341: step: 830/464, loss: 0.019360028207302094 2023-01-24 05:10:04.080189: step: 832/464, loss: 0.014573116786777973 2023-01-24 05:10:04.672557: step: 834/464, loss: 0.010647988878190517 2023-01-24 05:10:05.277891: step: 836/464, loss: 0.0393616147339344 2023-01-24 05:10:05.881963: step: 838/464, loss: 0.003897633170709014 2023-01-24 05:10:06.458951: step: 840/464, loss: 0.0009832321666181087 2023-01-24 05:10:07.096285: step: 842/464, loss: 0.008502716198563576 2023-01-24 05:10:07.746311: step: 844/464, loss: 0.0011305073276162148 2023-01-24 05:10:08.384466: step: 846/464, loss: 0.02720501646399498 2023-01-24 05:10:08.933669: step: 848/464, loss: 0.02629738114774227 2023-01-24 05:10:09.492326: step: 850/464, loss: 0.012617183849215508 2023-01-24 05:10:10.116229: step: 852/464, loss: 0.03479094058275223 2023-01-24 05:10:10.704535: step: 854/464, loss: 0.030419880524277687 2023-01-24 05:10:11.293587: step: 856/464, loss: 0.0031641533132642508 2023-01-24 05:10:11.935765: step: 858/464, loss: 0.007085779681801796 2023-01-24 05:10:12.559792: step: 860/464, loss: 0.0025283698923885822 2023-01-24 05:10:13.241425: step: 862/464, loss: 0.07772478461265564 2023-01-24 05:10:13.898035: step: 864/464, loss: 0.03700843080878258 2023-01-24 05:10:14.497724: step: 866/464, loss: 0.0037723893765360117 2023-01-24 05:10:15.102764: step: 868/464, loss: 0.0020687098149210215 2023-01-24 05:10:15.703712: step: 870/464, loss: 0.00994616188108921 2023-01-24 05:10:16.283688: step: 872/464, loss: 0.00024091260274872184 2023-01-24 05:10:16.947509: step: 874/464, loss: 0.00023713010887149721 2023-01-24 05:10:17.590958: step: 876/464, loss: 0.0046511306427419186 2023-01-24 05:10:18.153777: step: 878/464, loss: 9.003627383208368e-06 2023-01-24 05:10:18.740115: step: 880/464, loss: 0.32005923986434937 2023-01-24 05:10:19.380118: step: 882/464, loss: 0.07506310939788818 2023-01-24 05:10:19.957648: step: 884/464, loss: 0.017441291362047195 2023-01-24 05:10:20.563573: step: 886/464, loss: 0.03993603214621544 2023-01-24 05:10:21.152517: step: 888/464, loss: 0.0002836991334334016 2023-01-24 05:10:21.853925: step: 890/464, loss: 0.0018918365240097046 2023-01-24 05:10:22.532716: step: 892/464, loss: 0.03895119950175285 2023-01-24 05:10:23.253130: step: 894/464, loss: 0.10691957920789719 2023-01-24 05:10:23.838659: step: 896/464, loss: 0.0010565068805590272 2023-01-24 05:10:24.469731: step: 898/464, loss: 0.0009570553665980697 2023-01-24 05:10:25.114445: step: 900/464, loss: 0.026494259014725685 2023-01-24 05:10:25.779528: step: 902/464, loss: 0.00028691813349723816 2023-01-24 05:10:26.393077: step: 904/464, loss: 0.084366574883461 2023-01-24 05:10:27.044918: step: 906/464, loss: 0.007411717902868986 2023-01-24 05:10:27.702915: step: 908/464, loss: 0.010169975459575653 2023-01-24 05:10:28.320115: step: 910/464, loss: 0.007501612417399883 2023-01-24 05:10:28.944427: step: 912/464, loss: 0.03972383588552475 2023-01-24 05:10:29.551814: step: 914/464, loss: 0.0008084288565441966 2023-01-24 05:10:30.236370: step: 916/464, loss: 0.07327800244092941 2023-01-24 05:10:30.910253: step: 918/464, loss: 0.030206453055143356 2023-01-24 05:10:31.514735: step: 920/464, loss: 0.0031599088106304407 2023-01-24 05:10:32.107898: step: 922/464, loss: 0.005930094514042139 2023-01-24 05:10:32.748405: step: 924/464, loss: 0.0011120139388367534 2023-01-24 05:10:33.418675: step: 926/464, loss: 0.036906830966472626 2023-01-24 05:10:34.084909: step: 928/464, loss: 0.002705650869756937 2023-01-24 05:10:34.566772: step: 930/464, loss: 0.000839819957036525 ================================================== Loss: 0.062 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33291292868498756, 'r': 0.3272275086505191, 'f1': 0.33004573599774845}, 'combined': 0.24319159494570936, 'epoch': 35} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3247065959255648, 'r': 0.3142610712404593, 'f1': 0.3193984544695187}, 'combined': 0.20851919825471169, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3223283815811749, 'r': 0.328444669656719, 'f1': 0.3253577836637047}, 'combined': 0.23973731427851924, 'epoch': 35} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3393044500048481, 'r': 0.32119988873504024, 'f1': 0.33000404447541465}, 'combined': 0.21544305494249869, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34228174603174605, 'r': 0.3409827640733713, 'f1': 0.341631020278834}, 'combined': 0.25172812020545665, 'epoch': 35} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33008216942195984, 'r': 0.3076317277517162, 'f1': 0.3184617695469717}, 'combined': 0.20790768374569135, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24113475177304963, 'r': 0.32380952380952377, 'f1': 0.2764227642276422}, 'combined': 0.18428184281842813, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2642857142857143, 'r': 0.40217391304347827, 'f1': 0.31896551724137934}, 'combined': 0.15948275862068967, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348209212662338, 'r': 0.3557869372089012, 'f1': 0.3449856778456135}, 'combined': 0.2541999731493994, 'epoch': 34} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3240350842156237, 'r': 0.3094415923713539, 'f1': 0.31657024212508983}, 'combined': 0.2066728005583488, 'epoch': 34} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33088235294117646, 'r': 0.4891304347826087, 'f1': 0.39473684210526316}, 'combined': 0.19736842105263158, 'epoch': 34} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:13:10.992909: step: 2/464, loss: 0.0036940753925591707 2023-01-24 05:13:11.573779: step: 4/464, loss: 0.04975832626223564 2023-01-24 05:13:12.164106: step: 6/464, loss: 0.0008109904592856765 2023-01-24 05:13:12.790072: step: 8/464, loss: 0.0008208010112866759 2023-01-24 05:13:13.405877: step: 10/464, loss: 0.0042820945382118225 2023-01-24 05:13:14.058758: step: 12/464, loss: 0.0039556315168738365 2023-01-24 05:13:14.694974: step: 14/464, loss: 0.02546033449470997 2023-01-24 05:13:15.339806: step: 16/464, loss: 0.00010900833876803517 2023-01-24 05:13:15.897108: step: 18/464, loss: 0.002483731135725975 2023-01-24 05:13:16.480082: step: 20/464, loss: 0.00024638380273245275 2023-01-24 05:13:17.218020: step: 22/464, loss: 0.0012883706949651241 2023-01-24 05:13:17.806197: step: 24/464, loss: 0.0003679947112686932 2023-01-24 05:13:18.417604: step: 26/464, loss: 0.08963681757450104 2023-01-24 05:13:19.017376: step: 28/464, loss: 0.04453660547733307 2023-01-24 05:13:19.588893: step: 30/464, loss: 0.0038836635649204254 2023-01-24 05:13:20.178193: step: 32/464, loss: 0.005359238479286432 2023-01-24 05:13:20.785736: step: 34/464, loss: 0.05803626403212547 2023-01-24 05:13:21.436708: step: 36/464, loss: 0.003457083133980632 2023-01-24 05:13:22.077556: step: 38/464, loss: 0.20208704471588135 2023-01-24 05:13:22.685654: step: 40/464, loss: 0.04237399250268936 2023-01-24 05:13:23.310065: step: 42/464, loss: 0.0036644453648477793 2023-01-24 05:13:23.973998: step: 44/464, loss: 0.002522712107747793 2023-01-24 05:13:24.558315: step: 46/464, loss: 0.000686106679495424 2023-01-24 05:13:25.189238: step: 48/464, loss: 0.021788040176033974 2023-01-24 05:13:25.839938: step: 50/464, loss: 0.0007058007759042084 2023-01-24 05:13:26.454605: step: 52/464, loss: 5.509668335434981e-05 2023-01-24 05:13:27.105604: step: 54/464, loss: 0.02686609886586666 2023-01-24 05:13:27.724851: step: 56/464, loss: 0.0005029297899454832 2023-01-24 05:13:28.437659: step: 58/464, loss: 0.054092515259981155 2023-01-24 05:13:29.013161: step: 60/464, loss: 0.0016269775805994868 2023-01-24 05:13:29.642642: step: 62/464, loss: 0.03045477904379368 2023-01-24 05:13:30.278658: step: 64/464, loss: 0.0004281103319954127 2023-01-24 05:13:30.940568: step: 66/464, loss: 0.018816521391272545 2023-01-24 05:13:31.524863: step: 68/464, loss: 0.0003489804803393781 2023-01-24 05:13:32.139540: step: 70/464, loss: 0.001230527414008975 2023-01-24 05:13:32.745431: step: 72/464, loss: 0.0004906057729385793 2023-01-24 05:13:33.394456: step: 74/464, loss: 0.0012830095365643501 2023-01-24 05:13:33.963886: step: 76/464, loss: 0.03462563455104828 2023-01-24 05:13:34.608037: step: 78/464, loss: 0.0034266517031937838 2023-01-24 05:13:35.215003: step: 80/464, loss: 0.004816330503672361 2023-01-24 05:13:35.847037: step: 82/464, loss: 0.004266914911568165 2023-01-24 05:13:36.463520: step: 84/464, loss: 0.017530955374240875 2023-01-24 05:13:37.032038: step: 86/464, loss: 0.025057358667254448 2023-01-24 05:13:37.632521: step: 88/464, loss: 0.05076591297984123 2023-01-24 05:13:38.257748: step: 90/464, loss: 0.00010926132381428033 2023-01-24 05:13:38.874869: step: 92/464, loss: 0.0014297146117314696 2023-01-24 05:13:39.417300: step: 94/464, loss: 0.001774539239704609 2023-01-24 05:13:40.065157: step: 96/464, loss: 0.013956896960735321 2023-01-24 05:13:40.664158: step: 98/464, loss: 0.02583293244242668 2023-01-24 05:13:41.269470: step: 100/464, loss: 0.0030617262236773968 2023-01-24 05:13:41.817436: step: 102/464, loss: 0.003205106593668461 2023-01-24 05:13:42.427176: step: 104/464, loss: 0.0025927547831088305 2023-01-24 05:13:43.071738: step: 106/464, loss: 0.01715610921382904 2023-01-24 05:13:43.720501: step: 108/464, loss: 0.0008542435825802386 2023-01-24 05:13:44.323401: step: 110/464, loss: 2.9173745588195743e-06 2023-01-24 05:13:44.987652: step: 112/464, loss: 0.03331207111477852 2023-01-24 05:13:45.606335: step: 114/464, loss: 0.04286523163318634 2023-01-24 05:13:46.236294: step: 116/464, loss: 0.04366404190659523 2023-01-24 05:13:46.902138: step: 118/464, loss: 0.04296250268816948 2023-01-24 05:13:47.499806: step: 120/464, loss: 0.5016422867774963 2023-01-24 05:13:48.119582: step: 122/464, loss: 0.006844162475317717 2023-01-24 05:13:48.698214: step: 124/464, loss: 0.008258841931819916 2023-01-24 05:13:49.406017: step: 126/464, loss: 0.005316090304404497 2023-01-24 05:13:49.951561: step: 128/464, loss: 0.00038320745807141066 2023-01-24 05:13:50.544767: step: 130/464, loss: 0.009655006229877472 2023-01-24 05:13:51.233455: step: 132/464, loss: 0.0001869757688837126 2023-01-24 05:13:51.826643: step: 134/464, loss: 0.008668635971844196 2023-01-24 05:13:52.427843: step: 136/464, loss: 0.001098167267628014 2023-01-24 05:13:53.052462: step: 138/464, loss: 0.003017690032720566 2023-01-24 05:13:53.663820: step: 140/464, loss: 0.0026568910107016563 2023-01-24 05:13:54.316384: step: 142/464, loss: 0.002854824997484684 2023-01-24 05:13:54.944241: step: 144/464, loss: 3.517479126458056e-05 2023-01-24 05:13:55.543214: step: 146/464, loss: 0.00023735500872135162 2023-01-24 05:13:56.170958: step: 148/464, loss: 0.0016523152589797974 2023-01-24 05:13:56.788791: step: 150/464, loss: 0.02731013298034668 2023-01-24 05:13:57.438885: step: 152/464, loss: 0.02933180145919323 2023-01-24 05:13:58.033623: step: 154/464, loss: 0.00342610664665699 2023-01-24 05:13:58.613694: step: 156/464, loss: 0.5142154693603516 2023-01-24 05:13:59.212272: step: 158/464, loss: 0.020955195650458336 2023-01-24 05:13:59.767886: step: 160/464, loss: 0.0027112129610031843 2023-01-24 05:14:00.428032: step: 162/464, loss: 0.034296292811632156 2023-01-24 05:14:01.073463: step: 164/464, loss: 0.0006291309255175292 2023-01-24 05:14:01.670231: step: 166/464, loss: 0.003323235781863332 2023-01-24 05:14:02.277355: step: 168/464, loss: 0.0009546473156660795 2023-01-24 05:14:02.882258: step: 170/464, loss: 0.0013110835570842028 2023-01-24 05:14:03.472710: step: 172/464, loss: 0.0026694394182413816 2023-01-24 05:14:04.086575: step: 174/464, loss: 0.010749605484306812 2023-01-24 05:14:04.734256: step: 176/464, loss: 0.2029525488615036 2023-01-24 05:14:05.436106: step: 178/464, loss: 0.01791108027100563 2023-01-24 05:14:06.161578: step: 180/464, loss: 0.0067817047238349915 2023-01-24 05:14:06.722997: step: 182/464, loss: 0.014205916784703732 2023-01-24 05:14:07.391759: step: 184/464, loss: 0.017666643485426903 2023-01-24 05:14:07.959461: step: 186/464, loss: 2.091298119921703e-05 2023-01-24 05:14:08.583577: step: 188/464, loss: 0.011905474588274956 2023-01-24 05:14:09.161255: step: 190/464, loss: 0.07707890123128891 2023-01-24 05:14:09.767830: step: 192/464, loss: 0.004543210845440626 2023-01-24 05:14:10.373279: step: 194/464, loss: 0.012926164083182812 2023-01-24 05:14:10.960412: step: 196/464, loss: 0.01449363213032484 2023-01-24 05:14:11.528259: step: 198/464, loss: 0.00012760139361489564 2023-01-24 05:14:12.126986: step: 200/464, loss: 0.005846938583999872 2023-01-24 05:14:12.768103: step: 202/464, loss: 0.0015401836717501283 2023-01-24 05:14:13.424771: step: 204/464, loss: 0.0009976581204682589 2023-01-24 05:14:13.967813: step: 206/464, loss: 0.004876827355474234 2023-01-24 05:14:14.583215: step: 208/464, loss: 0.010364729911088943 2023-01-24 05:14:15.298946: step: 210/464, loss: 0.3535803258419037 2023-01-24 05:14:15.947009: step: 212/464, loss: 0.09206904470920563 2023-01-24 05:14:16.645317: step: 214/464, loss: 0.011944558471441269 2023-01-24 05:14:17.239715: step: 216/464, loss: 0.008668388240039349 2023-01-24 05:14:17.877004: step: 218/464, loss: 6.387718167388812e-05 2023-01-24 05:14:18.538641: step: 220/464, loss: 0.005396423861384392 2023-01-24 05:14:19.316552: step: 222/464, loss: 0.0007495448808185756 2023-01-24 05:14:19.999665: step: 224/464, loss: 0.01975761540234089 2023-01-24 05:14:20.545061: step: 226/464, loss: 0.00019024180073756725 2023-01-24 05:14:21.198612: step: 228/464, loss: 0.0291658453643322 2023-01-24 05:14:21.903011: step: 230/464, loss: 0.0006478412542492151 2023-01-24 05:14:22.584519: step: 232/464, loss: 0.03055237978696823 2023-01-24 05:14:23.219255: step: 234/464, loss: 0.0791534036397934 2023-01-24 05:14:23.879599: step: 236/464, loss: 0.013012934476137161 2023-01-24 05:14:24.491508: step: 238/464, loss: 0.0001017287650029175 2023-01-24 05:14:25.152517: step: 240/464, loss: 0.0075666275806725025 2023-01-24 05:14:25.749438: step: 242/464, loss: 0.008738645352423191 2023-01-24 05:14:26.441517: step: 244/464, loss: 7.892473513493314e-05 2023-01-24 05:14:27.081313: step: 246/464, loss: 0.0020362145733088255 2023-01-24 05:14:27.777768: step: 248/464, loss: 0.0003480454906821251 2023-01-24 05:14:28.358348: step: 250/464, loss: 0.01788203977048397 2023-01-24 05:14:29.055523: step: 252/464, loss: 0.0037269634194672108 2023-01-24 05:14:29.593799: step: 254/464, loss: 0.00046149862464517355 2023-01-24 05:14:30.218058: step: 256/464, loss: 0.018515929579734802 2023-01-24 05:14:30.808411: step: 258/464, loss: 0.024124393239617348 2023-01-24 05:14:31.408560: step: 260/464, loss: 0.006267345976084471 2023-01-24 05:14:32.019288: step: 262/464, loss: 0.1672067642211914 2023-01-24 05:14:32.666063: step: 264/464, loss: 0.04586471989750862 2023-01-24 05:14:33.219186: step: 266/464, loss: 0.0006315509090200067 2023-01-24 05:14:33.792653: step: 268/464, loss: 0.019157512113451958 2023-01-24 05:14:34.509564: step: 270/464, loss: 0.025100653991103172 2023-01-24 05:14:35.142913: step: 272/464, loss: 0.011103455908596516 2023-01-24 05:14:35.778983: step: 274/464, loss: 0.007256446406245232 2023-01-24 05:14:36.459823: step: 276/464, loss: 0.00605237390846014 2023-01-24 05:14:37.050304: step: 278/464, loss: 0.004968108143657446 2023-01-24 05:14:37.725168: step: 280/464, loss: 0.19366006553173065 2023-01-24 05:14:38.371208: step: 282/464, loss: 0.025342747569084167 2023-01-24 05:14:39.007958: step: 284/464, loss: 0.09072583168745041 2023-01-24 05:14:39.610223: step: 286/464, loss: 0.009893305599689484 2023-01-24 05:14:40.201252: step: 288/464, loss: 9.708423749543726e-05 2023-01-24 05:14:40.807782: step: 290/464, loss: 0.004454748705029488 2023-01-24 05:14:41.405185: step: 292/464, loss: 0.0009248661808669567 2023-01-24 05:14:42.041611: step: 294/464, loss: 0.01104151550680399 2023-01-24 05:14:42.599284: step: 296/464, loss: 0.020130300894379616 2023-01-24 05:14:43.228613: step: 298/464, loss: 0.08098264038562775 2023-01-24 05:14:43.836282: step: 300/464, loss: 0.00018765582353807986 2023-01-24 05:14:44.478550: step: 302/464, loss: 0.00038301621680147946 2023-01-24 05:14:45.096255: step: 304/464, loss: 0.03141546621918678 2023-01-24 05:14:45.731597: step: 306/464, loss: 0.016768259927630424 2023-01-24 05:14:46.413300: step: 308/464, loss: 0.01261827815324068 2023-01-24 05:14:47.055466: step: 310/464, loss: 0.016717633232474327 2023-01-24 05:14:47.705137: step: 312/464, loss: 0.035504184663295746 2023-01-24 05:14:48.290099: step: 314/464, loss: 0.006222328171133995 2023-01-24 05:14:48.916204: step: 316/464, loss: 0.018137024715542793 2023-01-24 05:14:49.478424: step: 318/464, loss: 0.020232651382684708 2023-01-24 05:14:50.034905: step: 320/464, loss: 0.0003459895960986614 2023-01-24 05:14:50.619272: step: 322/464, loss: 0.0039661722257733345 2023-01-24 05:14:51.238377: step: 324/464, loss: 0.01878402568399906 2023-01-24 05:14:51.904591: step: 326/464, loss: 0.3380680978298187 2023-01-24 05:14:52.502383: step: 328/464, loss: 0.015422080643475056 2023-01-24 05:14:53.091969: step: 330/464, loss: 0.0011237740982323885 2023-01-24 05:14:53.776125: step: 332/464, loss: 0.006381748244166374 2023-01-24 05:14:54.353149: step: 334/464, loss: 0.0002370486909057945 2023-01-24 05:14:54.972118: step: 336/464, loss: 0.009715653955936432 2023-01-24 05:14:55.601213: step: 338/464, loss: 0.017769871279597282 2023-01-24 05:14:56.185408: step: 340/464, loss: 0.0009704561671242118 2023-01-24 05:14:56.761861: step: 342/464, loss: 6.408966146409512e-05 2023-01-24 05:14:57.394523: step: 344/464, loss: 0.00027266753022558987 2023-01-24 05:14:58.081787: step: 346/464, loss: 0.0036946088075637817 2023-01-24 05:14:58.701721: step: 348/464, loss: 0.02732905186712742 2023-01-24 05:14:59.344349: step: 350/464, loss: 0.004943967796862125 2023-01-24 05:15:00.014422: step: 352/464, loss: 0.341863214969635 2023-01-24 05:15:00.633978: step: 354/464, loss: 0.007155687548220158 2023-01-24 05:15:01.284934: step: 356/464, loss: 0.025890706107020378 2023-01-24 05:15:02.029882: step: 358/464, loss: 0.002095653209835291 2023-01-24 05:15:02.647374: step: 360/464, loss: 0.02985740266740322 2023-01-24 05:15:03.251439: step: 362/464, loss: 3.325389843666926e-05 2023-01-24 05:15:03.878972: step: 364/464, loss: 0.006313004996627569 2023-01-24 05:15:04.741492: step: 366/464, loss: 1.2024905681610107 2023-01-24 05:15:05.356595: step: 368/464, loss: 0.0041957120411098 2023-01-24 05:15:06.003458: step: 370/464, loss: 0.00036565324990078807 2023-01-24 05:15:06.578714: step: 372/464, loss: 8.443810656899586e-05 2023-01-24 05:15:07.217326: step: 374/464, loss: 0.016036560758948326 2023-01-24 05:15:07.894597: step: 376/464, loss: 0.003646174678578973 2023-01-24 05:15:08.525192: step: 378/464, loss: 0.006669667083770037 2023-01-24 05:15:09.092214: step: 380/464, loss: 0.005503546912223101 2023-01-24 05:15:09.697356: step: 382/464, loss: 0.006535328924655914 2023-01-24 05:15:10.374928: step: 384/464, loss: 0.027504896745085716 2023-01-24 05:15:11.005340: step: 386/464, loss: 0.001106930780224502 2023-01-24 05:15:11.673454: step: 388/464, loss: 0.02494746819138527 2023-01-24 05:15:12.295722: step: 390/464, loss: 0.00857547391206026 2023-01-24 05:15:12.926230: step: 392/464, loss: 0.0018851346103474498 2023-01-24 05:15:13.502125: step: 394/464, loss: 2.1371150069171563e-05 2023-01-24 05:15:14.088099: step: 396/464, loss: 0.2606925964355469 2023-01-24 05:15:14.757905: step: 398/464, loss: 0.03222309798002243 2023-01-24 05:15:15.345445: step: 400/464, loss: 0.00016328068159054965 2023-01-24 05:15:15.958840: step: 402/464, loss: 0.01708224229514599 2023-01-24 05:15:16.571260: step: 404/464, loss: 0.010649572126567364 2023-01-24 05:15:17.191643: step: 406/464, loss: 0.011210165917873383 2023-01-24 05:15:17.845118: step: 408/464, loss: 0.02789386548101902 2023-01-24 05:15:18.393432: step: 410/464, loss: 0.001550469663925469 2023-01-24 05:15:19.027434: step: 412/464, loss: 0.002343697240576148 2023-01-24 05:15:19.635944: step: 414/464, loss: 0.0039685931988060474 2023-01-24 05:15:20.227350: step: 416/464, loss: 0.002458785893395543 2023-01-24 05:15:20.874603: step: 418/464, loss: 0.004095163196325302 2023-01-24 05:15:21.470360: step: 420/464, loss: 0.00020929214952047914 2023-01-24 05:15:22.135907: step: 422/464, loss: 0.11573031544685364 2023-01-24 05:15:22.762707: step: 424/464, loss: 0.015518685802817345 2023-01-24 05:15:23.381806: step: 426/464, loss: 0.010964400134980679 2023-01-24 05:15:23.920345: step: 428/464, loss: 5.291239722282626e-05 2023-01-24 05:15:24.545692: step: 430/464, loss: 0.033461663872003555 2023-01-24 05:15:25.192501: step: 432/464, loss: 0.05376812815666199 2023-01-24 05:15:25.835180: step: 434/464, loss: 0.0001575053174747154 2023-01-24 05:15:26.425686: step: 436/464, loss: 0.015117555856704712 2023-01-24 05:15:27.042929: step: 438/464, loss: 0.01067406591027975 2023-01-24 05:15:27.655977: step: 440/464, loss: 0.006663429085165262 2023-01-24 05:15:28.318383: step: 442/464, loss: 0.00010586978169158101 2023-01-24 05:15:28.939768: step: 444/464, loss: 0.016213309019804 2023-01-24 05:15:29.572680: step: 446/464, loss: 0.02754809707403183 2023-01-24 05:15:30.113225: step: 448/464, loss: 0.0036638882011175156 2023-01-24 05:15:30.742242: step: 450/464, loss: 0.013981817290186882 2023-01-24 05:15:31.425311: step: 452/464, loss: 0.0026171025820076466 2023-01-24 05:15:32.035168: step: 454/464, loss: 0.051369551569223404 2023-01-24 05:15:32.613312: step: 456/464, loss: 0.0019740727730095387 2023-01-24 05:15:33.270354: step: 458/464, loss: 0.4339074492454529 2023-01-24 05:15:33.894699: step: 460/464, loss: 0.001750551862642169 2023-01-24 05:15:34.530376: step: 462/464, loss: 0.0016651484183967113 2023-01-24 05:15:35.189612: step: 464/464, loss: 0.0006342732231132686 2023-01-24 05:15:35.848980: step: 466/464, loss: 3.723198413848877 2023-01-24 05:15:36.440475: step: 468/464, loss: 0.018522344529628754 2023-01-24 05:15:37.016822: step: 470/464, loss: 0.03673872724175453 2023-01-24 05:15:37.649271: step: 472/464, loss: 0.0006409911438822746 2023-01-24 05:15:38.263426: step: 474/464, loss: 0.00557336863130331 2023-01-24 05:15:38.942324: step: 476/464, loss: 0.010654748417437077 2023-01-24 05:15:39.555515: step: 478/464, loss: 0.02702816016972065 2023-01-24 05:15:40.181770: step: 480/464, loss: 0.0016854925779625773 2023-01-24 05:15:40.765024: step: 482/464, loss: 0.014287048950791359 2023-01-24 05:15:41.387351: step: 484/464, loss: 0.003183132503181696 2023-01-24 05:15:42.069379: step: 486/464, loss: 0.019391268491744995 2023-01-24 05:15:42.666527: step: 488/464, loss: 0.0006723207188770175 2023-01-24 05:15:43.331244: step: 490/464, loss: 0.0011718154419213533 2023-01-24 05:15:43.917678: step: 492/464, loss: 0.014938319101929665 2023-01-24 05:15:44.586894: step: 494/464, loss: 0.09425222128629684 2023-01-24 05:15:45.214824: step: 496/464, loss: 0.03835434466600418 2023-01-24 05:15:45.890164: step: 498/464, loss: 0.09424697607755661 2023-01-24 05:15:46.589600: step: 500/464, loss: 0.0001944851828739047 2023-01-24 05:15:47.156299: step: 502/464, loss: 0.12420111894607544 2023-01-24 05:15:47.741438: step: 504/464, loss: 0.0020259881857782602 2023-01-24 05:15:48.321249: step: 506/464, loss: 0.08971814811229706 2023-01-24 05:15:48.919946: step: 508/464, loss: 0.015696369111537933 2023-01-24 05:15:49.513191: step: 510/464, loss: 0.02257942035794258 2023-01-24 05:15:50.111128: step: 512/464, loss: 0.004465331323444843 2023-01-24 05:15:50.731642: step: 514/464, loss: 0.0001645921729505062 2023-01-24 05:15:51.311515: step: 516/464, loss: 0.00840042158961296 2023-01-24 05:15:52.019423: step: 518/464, loss: 0.0004073931195307523 2023-01-24 05:15:52.688883: step: 520/464, loss: 0.0217901561409235 2023-01-24 05:15:53.333512: step: 522/464, loss: 0.12779176235198975 2023-01-24 05:15:53.997611: step: 524/464, loss: 0.001443426706828177 2023-01-24 05:15:54.686998: step: 526/464, loss: 0.020604323595762253 2023-01-24 05:15:55.328351: step: 528/464, loss: 0.006679283920675516 2023-01-24 05:15:55.974087: step: 530/464, loss: 1.2766868167091161e-05 2023-01-24 05:15:56.710633: step: 532/464, loss: 0.001376515720039606 2023-01-24 05:15:57.337883: step: 534/464, loss: 0.050487276166677475 2023-01-24 05:15:57.918099: step: 536/464, loss: 0.0036363143008202314 2023-01-24 05:15:58.526783: step: 538/464, loss: 0.010122239589691162 2023-01-24 05:15:59.198634: step: 540/464, loss: 0.0013998758513480425 2023-01-24 05:15:59.907635: step: 542/464, loss: 0.0008087092428468168 2023-01-24 05:16:00.484409: step: 544/464, loss: 0.004818467888981104 2023-01-24 05:16:01.111846: step: 546/464, loss: 0.015545015223324299 2023-01-24 05:16:01.790711: step: 548/464, loss: 0.014555670320987701 2023-01-24 05:16:02.409742: step: 550/464, loss: 0.003583703190088272 2023-01-24 05:16:03.064268: step: 552/464, loss: 0.005766857415437698 2023-01-24 05:16:03.658780: step: 554/464, loss: 0.02280147559940815 2023-01-24 05:16:04.261609: step: 556/464, loss: 0.0035246696788817644 2023-01-24 05:16:04.931509: step: 558/464, loss: 0.02302168682217598 2023-01-24 05:16:05.528706: step: 560/464, loss: 0.004515378270298243 2023-01-24 05:16:06.101660: step: 562/464, loss: 0.008165445178747177 2023-01-24 05:16:06.705209: step: 564/464, loss: 0.00960410013794899 2023-01-24 05:16:07.293092: step: 566/464, loss: 0.00022214172349777073 2023-01-24 05:16:07.902519: step: 568/464, loss: 0.012886843644082546 2023-01-24 05:16:08.575255: step: 570/464, loss: 0.0118499044328928 2023-01-24 05:16:09.176928: step: 572/464, loss: 0.014691988937556744 2023-01-24 05:16:09.797592: step: 574/464, loss: 0.009075475856661797 2023-01-24 05:16:10.484958: step: 576/464, loss: 0.062365129590034485 2023-01-24 05:16:11.097856: step: 578/464, loss: 0.010842734016478062 2023-01-24 05:16:11.830013: step: 580/464, loss: 0.007636451628059149 2023-01-24 05:16:12.492939: step: 582/464, loss: 0.033298321068286896 2023-01-24 05:16:13.117967: step: 584/464, loss: 0.02356777898967266 2023-01-24 05:16:13.732574: step: 586/464, loss: 0.0002865640271920711 2023-01-24 05:16:14.339195: step: 588/464, loss: 0.13140997290611267 2023-01-24 05:16:14.938985: step: 590/464, loss: 0.003056851914152503 2023-01-24 05:16:15.572635: step: 592/464, loss: 0.0005417139618657529 2023-01-24 05:16:16.227793: step: 594/464, loss: 0.002889784285798669 2023-01-24 05:16:16.871609: step: 596/464, loss: 0.012304414063692093 2023-01-24 05:16:17.496972: step: 598/464, loss: 0.0003184415982104838 2023-01-24 05:16:18.127292: step: 600/464, loss: 0.005693783052265644 2023-01-24 05:16:18.786804: step: 602/464, loss: 0.0038007793482393026 2023-01-24 05:16:19.388001: step: 604/464, loss: 0.04014163836836815 2023-01-24 05:16:19.963288: step: 606/464, loss: 0.0021637456957250834 2023-01-24 05:16:20.588872: step: 608/464, loss: 0.049259163439273834 2023-01-24 05:16:21.217769: step: 610/464, loss: 0.0007301790756173432 2023-01-24 05:16:21.802531: step: 612/464, loss: 0.0011699952883645892 2023-01-24 05:16:22.395005: step: 614/464, loss: 0.011053789407014847 2023-01-24 05:16:23.018900: step: 616/464, loss: 0.010029182769358158 2023-01-24 05:16:23.586360: step: 618/464, loss: 0.0029339087195694447 2023-01-24 05:16:24.206787: step: 620/464, loss: 0.03637511283159256 2023-01-24 05:16:24.817641: step: 622/464, loss: 0.00411232328042388 2023-01-24 05:16:25.429623: step: 624/464, loss: 0.001893992186523974 2023-01-24 05:16:26.033648: step: 626/464, loss: 0.005734010133892298 2023-01-24 05:16:26.648161: step: 628/464, loss: 0.0007987542194314301 2023-01-24 05:16:27.302593: step: 630/464, loss: 0.0362277552485466 2023-01-24 05:16:27.880792: step: 632/464, loss: 0.0009814549703150988 2023-01-24 05:16:28.488390: step: 634/464, loss: 0.01007351465523243 2023-01-24 05:16:29.044448: step: 636/464, loss: 0.00042207157821394503 2023-01-24 05:16:29.637199: step: 638/464, loss: 0.028407089412212372 2023-01-24 05:16:30.317686: step: 640/464, loss: 0.005203355569392443 2023-01-24 05:16:31.073488: step: 642/464, loss: 0.0018461854197084904 2023-01-24 05:16:31.730635: step: 644/464, loss: 0.004764284007251263 2023-01-24 05:16:32.412581: step: 646/464, loss: 0.039781831204891205 2023-01-24 05:16:32.987714: step: 648/464, loss: 0.2181847244501114 2023-01-24 05:16:33.594939: step: 650/464, loss: 0.044329460710287094 2023-01-24 05:16:34.178871: step: 652/464, loss: 0.018158644437789917 2023-01-24 05:16:34.791355: step: 654/464, loss: 0.3767031729221344 2023-01-24 05:16:35.440675: step: 656/464, loss: 0.0006728554726578295 2023-01-24 05:16:36.051325: step: 658/464, loss: 0.00984268169850111 2023-01-24 05:16:36.635792: step: 660/464, loss: 0.004165737424045801 2023-01-24 05:16:37.300955: step: 662/464, loss: 0.005901847034692764 2023-01-24 05:16:37.905778: step: 664/464, loss: 0.005652338732033968 2023-01-24 05:16:38.500916: step: 666/464, loss: 0.01282537542283535 2023-01-24 05:16:39.166956: step: 668/464, loss: 0.024484090507030487 2023-01-24 05:16:39.764618: step: 670/464, loss: 0.06346960365772247 2023-01-24 05:16:40.356611: step: 672/464, loss: 0.0004375589778646827 2023-01-24 05:16:40.993842: step: 674/464, loss: 0.010980060324072838 2023-01-24 05:16:41.601402: step: 676/464, loss: 0.0042953877709805965 2023-01-24 05:16:42.209553: step: 678/464, loss: 0.40047112107276917 2023-01-24 05:16:42.825475: step: 680/464, loss: 0.013633492402732372 2023-01-24 05:16:43.465859: step: 682/464, loss: 0.0179133340716362 2023-01-24 05:16:44.107104: step: 684/464, loss: 0.0058382549323141575 2023-01-24 05:16:44.733698: step: 686/464, loss: 0.012440420687198639 2023-01-24 05:16:45.336308: step: 688/464, loss: 0.002699640579521656 2023-01-24 05:16:46.099489: step: 690/464, loss: 0.07426819205284119 2023-01-24 05:16:46.785009: step: 692/464, loss: 2.1392199993133545 2023-01-24 05:16:47.395919: step: 694/464, loss: 0.0050012702122330666 2023-01-24 05:16:48.107960: step: 696/464, loss: 0.020631911233067513 2023-01-24 05:16:48.711754: step: 698/464, loss: 0.0006646870751865208 2023-01-24 05:16:49.382202: step: 700/464, loss: 0.0987074002623558 2023-01-24 05:16:49.993616: step: 702/464, loss: 0.008953984826803207 2023-01-24 05:16:50.595478: step: 704/464, loss: 0.007717052940279245 2023-01-24 05:16:51.338322: step: 706/464, loss: 0.007053047884255648 2023-01-24 05:16:51.962011: step: 708/464, loss: 0.004249035846441984 2023-01-24 05:16:52.592907: step: 710/464, loss: 0.002700645476579666 2023-01-24 05:16:53.202769: step: 712/464, loss: 0.11764708906412125 2023-01-24 05:16:53.820087: step: 714/464, loss: 0.002235703868791461 2023-01-24 05:16:54.458524: step: 716/464, loss: 0.0013340015430003405 2023-01-24 05:16:55.090916: step: 718/464, loss: 0.031899593770504 2023-01-24 05:16:55.711132: step: 720/464, loss: 0.021519597619771957 2023-01-24 05:16:56.346576: step: 722/464, loss: 0.05550776794552803 2023-01-24 05:16:56.979940: step: 724/464, loss: 0.014864136464893818 2023-01-24 05:16:57.598321: step: 726/464, loss: 0.015005495399236679 2023-01-24 05:16:58.218978: step: 728/464, loss: 0.043122515082359314 2023-01-24 05:16:58.847829: step: 730/464, loss: 0.0057805743999779224 2023-01-24 05:16:59.494658: step: 732/464, loss: 0.013059835880994797 2023-01-24 05:17:00.051267: step: 734/464, loss: 0.22827355563640594 2023-01-24 05:17:00.653459: step: 736/464, loss: 0.001594355795532465 2023-01-24 05:17:01.231565: step: 738/464, loss: 0.002286111004650593 2023-01-24 05:17:01.926766: step: 740/464, loss: 0.040832217782735825 2023-01-24 05:17:02.533773: step: 742/464, loss: 0.014757783152163029 2023-01-24 05:17:03.208489: step: 744/464, loss: 0.00119683553930372 2023-01-24 05:17:03.764221: step: 746/464, loss: 0.009748230688273907 2023-01-24 05:17:04.388359: step: 748/464, loss: 0.009451358579099178 2023-01-24 05:17:05.018397: step: 750/464, loss: 0.002357813995331526 2023-01-24 05:17:05.593526: step: 752/464, loss: 0.00039199861930683255 2023-01-24 05:17:06.266436: step: 754/464, loss: 0.0008302823989652097 2023-01-24 05:17:06.923879: step: 756/464, loss: 0.04160595312714577 2023-01-24 05:17:07.520257: step: 758/464, loss: 0.0005140166613273323 2023-01-24 05:17:08.220670: step: 760/464, loss: 0.09402786940336227 2023-01-24 05:17:08.858458: step: 762/464, loss: 0.015791242942214012 2023-01-24 05:17:09.457358: step: 764/464, loss: 0.008117442019283772 2023-01-24 05:17:10.093461: step: 766/464, loss: 0.020854402333498 2023-01-24 05:17:10.730254: step: 768/464, loss: 0.003169047413393855 2023-01-24 05:17:11.341932: step: 770/464, loss: 0.009248015470802784 2023-01-24 05:17:11.965573: step: 772/464, loss: 0.041055675595998764 2023-01-24 05:17:12.570372: step: 774/464, loss: 0.0022171782329678535 2023-01-24 05:17:13.203510: step: 776/464, loss: 0.022929474711418152 2023-01-24 05:17:13.819778: step: 778/464, loss: 0.011255311779677868 2023-01-24 05:17:14.422779: step: 780/464, loss: 0.01118182111531496 2023-01-24 05:17:14.956846: step: 782/464, loss: 4.8654284910298884e-05 2023-01-24 05:17:15.561875: step: 784/464, loss: 0.004450024571269751 2023-01-24 05:17:16.137183: step: 786/464, loss: 0.01964416168630123 2023-01-24 05:17:16.734715: step: 788/464, loss: 0.006778170820325613 2023-01-24 05:17:17.273719: step: 790/464, loss: 0.004299049731343985 2023-01-24 05:17:17.910393: step: 792/464, loss: 0.0021851633209735155 2023-01-24 05:17:18.554436: step: 794/464, loss: 0.020739721134305 2023-01-24 05:17:19.149314: step: 796/464, loss: 0.008872399106621742 2023-01-24 05:17:19.784898: step: 798/464, loss: 0.00020837679039686918 2023-01-24 05:17:20.356259: step: 800/464, loss: 1.2180484533309937 2023-01-24 05:17:21.023779: step: 802/464, loss: 0.0006499432493001223 2023-01-24 05:17:21.571421: step: 804/464, loss: 0.014890742488205433 2023-01-24 05:17:22.198087: step: 806/464, loss: 0.00425117090344429 2023-01-24 05:17:22.813634: step: 808/464, loss: 0.1405792236328125 2023-01-24 05:17:23.445357: step: 810/464, loss: 0.004117715172469616 2023-01-24 05:17:24.041193: step: 812/464, loss: 0.001980512635782361 2023-01-24 05:17:24.666062: step: 814/464, loss: 0.013748962432146072 2023-01-24 05:17:25.253487: step: 816/464, loss: 0.0009049120708368719 2023-01-24 05:17:25.956323: step: 818/464, loss: 0.05674424394965172 2023-01-24 05:17:26.601756: step: 820/464, loss: 0.08134204149246216 2023-01-24 05:17:27.143234: step: 822/464, loss: 0.13224709033966064 2023-01-24 05:17:27.737903: step: 824/464, loss: 0.030569393187761307 2023-01-24 05:17:28.364370: step: 826/464, loss: 0.00033594819251447916 2023-01-24 05:17:28.967651: step: 828/464, loss: 0.024964628741145134 2023-01-24 05:17:29.589253: step: 830/464, loss: 0.0020226610358804464 2023-01-24 05:17:30.216460: step: 832/464, loss: 0.00052282476099208 2023-01-24 05:17:30.853402: step: 834/464, loss: 0.0018711028387770057 2023-01-24 05:17:31.518885: step: 836/464, loss: 0.004274678882211447 2023-01-24 05:17:32.169610: step: 838/464, loss: 0.0010742597514763474 2023-01-24 05:17:32.781980: step: 840/464, loss: 0.008641785010695457 2023-01-24 05:17:33.371995: step: 842/464, loss: 0.0010377444559708238 2023-01-24 05:17:34.048735: step: 844/464, loss: 0.0007217152742668986 2023-01-24 05:17:34.721606: step: 846/464, loss: 0.00023196318943519145 2023-01-24 05:17:35.335813: step: 848/464, loss: 0.012066647410392761 2023-01-24 05:17:35.926451: step: 850/464, loss: 0.0038928319700062275 2023-01-24 05:17:36.595206: step: 852/464, loss: 0.005979029927402735 2023-01-24 05:17:37.216368: step: 854/464, loss: 0.02144569158554077 2023-01-24 05:17:37.881907: step: 856/464, loss: 0.02421189285814762 2023-01-24 05:17:38.499102: step: 858/464, loss: 0.07788847386837006 2023-01-24 05:17:39.131726: step: 860/464, loss: 0.059540193527936935 2023-01-24 05:17:39.723313: step: 862/464, loss: 0.0012361510889604688 2023-01-24 05:17:40.370643: step: 864/464, loss: 0.020145397633314133 2023-01-24 05:17:41.084535: step: 866/464, loss: 0.007799180690199137 2023-01-24 05:17:41.694216: step: 868/464, loss: 0.0013386164791882038 2023-01-24 05:17:42.308172: step: 870/464, loss: 0.014053912833333015 2023-01-24 05:17:42.879110: step: 872/464, loss: 0.01760275289416313 2023-01-24 05:17:43.530845: step: 874/464, loss: 0.0513724610209465 2023-01-24 05:17:44.101748: step: 876/464, loss: 0.0005567611078731716 2023-01-24 05:17:44.689185: step: 878/464, loss: 0.01748538762331009 2023-01-24 05:17:45.292036: step: 880/464, loss: 0.047362834215164185 2023-01-24 05:17:45.849404: step: 882/464, loss: 0.00245377654209733 2023-01-24 05:17:46.415214: step: 884/464, loss: 0.005447067320346832 2023-01-24 05:17:46.984136: step: 886/464, loss: 0.0004946636036038399 2023-01-24 05:17:47.638465: step: 888/464, loss: 0.0263107530772686 2023-01-24 05:17:48.247751: step: 890/464, loss: 0.007242008112370968 2023-01-24 05:17:48.892072: step: 892/464, loss: 0.02358938939869404 2023-01-24 05:17:49.558831: step: 894/464, loss: 0.019537916406989098 2023-01-24 05:17:50.170389: step: 896/464, loss: 0.0031970730051398277 2023-01-24 05:17:50.789295: step: 898/464, loss: 0.014965091831982136 2023-01-24 05:17:51.508259: step: 900/464, loss: 0.008327051997184753 2023-01-24 05:17:52.174354: step: 902/464, loss: 0.000980615266598761 2023-01-24 05:17:52.816384: step: 904/464, loss: 0.0018039607675746083 2023-01-24 05:17:53.477900: step: 906/464, loss: 0.020369213074445724 2023-01-24 05:17:54.107469: step: 908/464, loss: 0.0035111133474856615 2023-01-24 05:17:54.825445: step: 910/464, loss: 0.14214608073234558 2023-01-24 05:17:55.449115: step: 912/464, loss: 0.0030803342815488577 2023-01-24 05:17:56.042868: step: 914/464, loss: 0.011297043412923813 2023-01-24 05:17:56.669441: step: 916/464, loss: 0.005871086847037077 2023-01-24 05:17:57.339663: step: 918/464, loss: 0.0007762617897242308 2023-01-24 05:17:58.014221: step: 920/464, loss: 0.0025786582846194506 2023-01-24 05:17:58.593441: step: 922/464, loss: 0.038762062788009644 2023-01-24 05:17:59.159553: step: 924/464, loss: 0.004394220653921366 2023-01-24 05:17:59.793120: step: 926/464, loss: 0.02181841805577278 2023-01-24 05:18:00.408665: step: 928/464, loss: 0.005161995533853769 2023-01-24 05:18:00.910887: step: 930/464, loss: 0.0001706589391687885 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3241981132075472, 'r': 0.3260436432637571, 'f1': 0.3251182592242195}, 'combined': 0.23956082258626699, 'epoch': 36} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3294396274423794, 'r': 0.31308876358034954, 'f1': 0.321056149646956}, 'combined': 0.20960142412184693, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3196470517606231, 'r': 0.33177786966425205, 'f1': 0.3255995108250667}, 'combined': 0.2399154290289965, 'epoch': 36} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33602413480144155, 'r': 0.31347839781568304, 'f1': 0.3243599589381485}, 'combined': 0.2117583151616928, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3365039473423968, 'r': 0.32884161837065345, 'f1': 0.3326286619603347}, 'combined': 0.2450948035497203, 'epoch': 36} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3403638639584428, 'r': 0.3037285630539922, 'f1': 0.32100432478336366}, 'combined': 0.20956759027307678, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28518518518518515, 'r': 0.36666666666666664, 'f1': 0.3208333333333333}, 'combined': 0.21388888888888885, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.40217391304347827, 'f1': 0.30833333333333335}, 'combined': 0.15416666666666667, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348209212662338, 'r': 0.3557869372089012, 'f1': 0.3449856778456135}, 'combined': 0.2541999731493994, 'epoch': 34} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3240350842156237, 'r': 0.3094415923713539, 'f1': 0.31657024212508983}, 'combined': 0.2066728005583488, 'epoch': 34} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33088235294117646, 'r': 0.4891304347826087, 'f1': 0.39473684210526316}, 'combined': 0.19736842105263158, 'epoch': 34} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:20:39.982259: step: 2/464, loss: 0.09267619997262955 2023-01-24 05:20:40.609862: step: 4/464, loss: 0.026597000658512115 2023-01-24 05:20:41.307746: step: 6/464, loss: 0.005423111375421286 2023-01-24 05:20:41.924226: step: 8/464, loss: 0.0020108595490455627 2023-01-24 05:20:42.464089: step: 10/464, loss: 0.10110600292682648 2023-01-24 05:20:43.054247: step: 12/464, loss: 0.013702924363315105 2023-01-24 05:20:43.676938: step: 14/464, loss: 0.03764618560671806 2023-01-24 05:20:44.295573: step: 16/464, loss: 0.01584353670477867 2023-01-24 05:20:44.919459: step: 18/464, loss: 0.0006389468908309937 2023-01-24 05:20:45.571460: step: 20/464, loss: 0.00021083604951854795 2023-01-24 05:20:46.164378: step: 22/464, loss: 0.003850563894957304 2023-01-24 05:20:46.806915: step: 24/464, loss: 0.015138974413275719 2023-01-24 05:20:47.407226: step: 26/464, loss: 0.003434586338698864 2023-01-24 05:20:48.103853: step: 28/464, loss: 0.2523220479488373 2023-01-24 05:20:48.751131: step: 30/464, loss: 0.0033589720260351896 2023-01-24 05:20:49.419921: step: 32/464, loss: 0.022264551371335983 2023-01-24 05:20:50.009898: step: 34/464, loss: 0.009132283739745617 2023-01-24 05:20:50.676679: step: 36/464, loss: 0.0029106466099619865 2023-01-24 05:20:51.320633: step: 38/464, loss: 0.0030777426436543465 2023-01-24 05:20:51.895906: step: 40/464, loss: 0.0011019902303814888 2023-01-24 05:20:52.595605: step: 42/464, loss: 0.004676553886383772 2023-01-24 05:20:53.162546: step: 44/464, loss: 0.0007982408860698342 2023-01-24 05:20:53.819333: step: 46/464, loss: 0.011835400015115738 2023-01-24 05:20:54.432889: step: 48/464, loss: 0.06304922699928284 2023-01-24 05:20:55.016102: step: 50/464, loss: 5.2777814865112305 2023-01-24 05:20:55.689346: step: 52/464, loss: 0.003288473468273878 2023-01-24 05:20:56.388062: step: 54/464, loss: 0.0034744839649647474 2023-01-24 05:20:57.010576: step: 56/464, loss: 0.014087451621890068 2023-01-24 05:20:57.603222: step: 58/464, loss: 0.06869802623987198 2023-01-24 05:20:58.242957: step: 60/464, loss: 0.09902717173099518 2023-01-24 05:20:58.936239: step: 62/464, loss: 0.2621070444583893 2023-01-24 05:20:59.540622: step: 64/464, loss: 0.006215594243258238 2023-01-24 05:21:00.168721: step: 66/464, loss: 0.0017401105724275112 2023-01-24 05:21:00.806721: step: 68/464, loss: 0.0027333374600857496 2023-01-24 05:21:01.353058: step: 70/464, loss: 0.006508005317300558 2023-01-24 05:21:01.926074: step: 72/464, loss: 0.0001003606230369769 2023-01-24 05:21:02.552774: step: 74/464, loss: 0.015184964053332806 2023-01-24 05:21:03.168292: step: 76/464, loss: 0.006664635613560677 2023-01-24 05:21:03.810349: step: 78/464, loss: 0.013055585324764252 2023-01-24 05:21:04.435784: step: 80/464, loss: 0.010733279399573803 2023-01-24 05:21:04.993876: step: 82/464, loss: 0.00017453398322686553 2023-01-24 05:21:05.591413: step: 84/464, loss: 0.012425980530679226 2023-01-24 05:21:06.182327: step: 86/464, loss: 0.008501471020281315 2023-01-24 05:21:06.845511: step: 88/464, loss: 0.0169044379144907 2023-01-24 05:21:07.508947: step: 90/464, loss: 0.002584279514849186 2023-01-24 05:21:08.074042: step: 92/464, loss: 0.006378215737640858 2023-01-24 05:21:08.769896: step: 94/464, loss: 0.0023710145615041256 2023-01-24 05:21:09.441307: step: 96/464, loss: 0.020976275205612183 2023-01-24 05:21:10.097820: step: 98/464, loss: 0.01542168203741312 2023-01-24 05:21:10.691919: step: 100/464, loss: 0.02872079610824585 2023-01-24 05:21:11.364103: step: 102/464, loss: 0.0529462993144989 2023-01-24 05:21:11.955963: step: 104/464, loss: 0.0027728017885237932 2023-01-24 05:21:12.564712: step: 106/464, loss: 0.04583406448364258 2023-01-24 05:21:13.240139: step: 108/464, loss: 0.04017186909914017 2023-01-24 05:21:13.900989: step: 110/464, loss: 0.004438281990587711 2023-01-24 05:21:14.512075: step: 112/464, loss: 0.5971619486808777 2023-01-24 05:21:15.099542: step: 114/464, loss: 0.018442340195178986 2023-01-24 05:21:15.735973: step: 116/464, loss: 0.002727809129282832 2023-01-24 05:21:16.381405: step: 118/464, loss: 0.013315348885953426 2023-01-24 05:21:16.981630: step: 120/464, loss: 7.021363671810832e-06 2023-01-24 05:21:17.551403: step: 122/464, loss: 0.007461446337401867 2023-01-24 05:21:18.192170: step: 124/464, loss: 0.004144869279116392 2023-01-24 05:21:18.771060: step: 126/464, loss: 0.0007806203211657703 2023-01-24 05:21:19.427712: step: 128/464, loss: 0.008138487115502357 2023-01-24 05:21:20.002114: step: 130/464, loss: 0.026130348443984985 2023-01-24 05:21:20.701319: step: 132/464, loss: 0.013566428795456886 2023-01-24 05:21:21.301004: step: 134/464, loss: 0.01589006558060646 2023-01-24 05:21:21.940670: step: 136/464, loss: 0.02959158644080162 2023-01-24 05:21:22.573626: step: 138/464, loss: 0.0009720510570332408 2023-01-24 05:21:23.145263: step: 140/464, loss: 0.031041495501995087 2023-01-24 05:21:23.787321: step: 142/464, loss: 0.12288791686296463 2023-01-24 05:21:24.492275: step: 144/464, loss: 0.020230216905474663 2023-01-24 05:21:25.106077: step: 146/464, loss: 0.013478816486895084 2023-01-24 05:21:25.694381: step: 148/464, loss: 0.004989316686987877 2023-01-24 05:21:26.286897: step: 150/464, loss: 0.007645792793482542 2023-01-24 05:21:26.850554: step: 152/464, loss: 0.0022924228105694056 2023-01-24 05:21:27.482348: step: 154/464, loss: 0.0008439055527560413 2023-01-24 05:21:28.201766: step: 156/464, loss: 0.0036724477540701628 2023-01-24 05:21:28.914237: step: 158/464, loss: 0.03467431664466858 2023-01-24 05:21:29.522378: step: 160/464, loss: 0.002105517778545618 2023-01-24 05:21:30.069221: step: 162/464, loss: 0.0003491532406769693 2023-01-24 05:21:30.708902: step: 164/464, loss: 0.0011833166936412454 2023-01-24 05:21:31.249379: step: 166/464, loss: 0.004322324879467487 2023-01-24 05:21:31.865086: step: 168/464, loss: 0.0022278232499957085 2023-01-24 05:21:32.532655: step: 170/464, loss: 0.02636023610830307 2023-01-24 05:21:33.122351: step: 172/464, loss: 0.0285699050873518 2023-01-24 05:21:33.747776: step: 174/464, loss: 0.0008239694871008396 2023-01-24 05:21:34.355005: step: 176/464, loss: 0.00035955157363787293 2023-01-24 05:21:34.989777: step: 178/464, loss: 0.05594809353351593 2023-01-24 05:21:35.622660: step: 180/464, loss: 0.01072358526289463 2023-01-24 05:21:36.219777: step: 182/464, loss: 0.0035648152697831392 2023-01-24 05:21:36.829482: step: 184/464, loss: 0.015191210433840752 2023-01-24 05:21:37.459317: step: 186/464, loss: 0.0001803378399927169 2023-01-24 05:21:38.071122: step: 188/464, loss: 0.0023557611275464296 2023-01-24 05:21:38.693094: step: 190/464, loss: 0.009699953719973564 2023-01-24 05:21:39.231112: step: 192/464, loss: 0.0130624333396554 2023-01-24 05:21:39.801993: step: 194/464, loss: 0.010105582885444164 2023-01-24 05:21:40.405145: step: 196/464, loss: 0.00039616189314983785 2023-01-24 05:21:41.015274: step: 198/464, loss: 0.008077583275735378 2023-01-24 05:21:41.657689: step: 200/464, loss: 0.014863918535411358 2023-01-24 05:21:42.263012: step: 202/464, loss: 0.025830352678894997 2023-01-24 05:21:42.904831: step: 204/464, loss: 0.008622733876109123 2023-01-24 05:21:43.546060: step: 206/464, loss: 0.006310919299721718 2023-01-24 05:21:44.175874: step: 208/464, loss: 0.004576168488711119 2023-01-24 05:21:44.840208: step: 210/464, loss: 0.004799055866897106 2023-01-24 05:21:45.479385: step: 212/464, loss: 0.010103247128427029 2023-01-24 05:21:46.177167: step: 214/464, loss: 0.028001364320516586 2023-01-24 05:21:46.841940: step: 216/464, loss: 0.02767540141940117 2023-01-24 05:21:47.416736: step: 218/464, loss: 0.03786350414156914 2023-01-24 05:21:48.008365: step: 220/464, loss: 0.0211105365306139 2023-01-24 05:21:48.647354: step: 222/464, loss: 0.024852849543094635 2023-01-24 05:21:49.273360: step: 224/464, loss: 0.0018844869919121265 2023-01-24 05:21:49.941154: step: 226/464, loss: 0.0008984083542600274 2023-01-24 05:21:50.533566: step: 228/464, loss: 0.003061442170292139 2023-01-24 05:21:51.112694: step: 230/464, loss: 0.005059152841567993 2023-01-24 05:21:51.771034: step: 232/464, loss: 0.010660984553396702 2023-01-24 05:21:52.435016: step: 234/464, loss: 0.04760711267590523 2023-01-24 05:21:53.156391: step: 236/464, loss: 0.0017878114013001323 2023-01-24 05:21:53.841690: step: 238/464, loss: 0.000544917129445821 2023-01-24 05:21:54.419772: step: 240/464, loss: 0.002339319558814168 2023-01-24 05:21:55.129123: step: 242/464, loss: 0.0008954803925007582 2023-01-24 05:21:55.703673: step: 244/464, loss: 0.01935073919594288 2023-01-24 05:21:56.267166: step: 246/464, loss: 0.0002384855761192739 2023-01-24 05:21:56.852568: step: 248/464, loss: 0.009137220680713654 2023-01-24 05:21:57.452692: step: 250/464, loss: 0.00015304001863114536 2023-01-24 05:21:58.110190: step: 252/464, loss: 0.12504711747169495 2023-01-24 05:21:58.712223: step: 254/464, loss: 0.002635303186252713 2023-01-24 05:21:59.281514: step: 256/464, loss: 0.020652201026678085 2023-01-24 05:21:59.899936: step: 258/464, loss: 0.0019404953345656395 2023-01-24 05:22:00.531297: step: 260/464, loss: 0.006000785622745752 2023-01-24 05:22:01.207058: step: 262/464, loss: 0.028538094833493233 2023-01-24 05:22:01.815943: step: 264/464, loss: 0.01821240969002247 2023-01-24 05:22:02.422794: step: 266/464, loss: 0.008518668822944164 2023-01-24 05:22:03.034615: step: 268/464, loss: 0.4000067114830017 2023-01-24 05:22:03.655950: step: 270/464, loss: 0.0006602701032534242 2023-01-24 05:22:04.439476: step: 272/464, loss: 0.03606516122817993 2023-01-24 05:22:05.050865: step: 274/464, loss: 0.00956253707408905 2023-01-24 05:22:05.622215: step: 276/464, loss: 0.0006510717212222517 2023-01-24 05:22:06.230098: step: 278/464, loss: 0.0020940338727086782 2023-01-24 05:22:06.815055: step: 280/464, loss: 0.005512750707566738 2023-01-24 05:22:07.360380: step: 282/464, loss: 0.00011423335672589019 2023-01-24 05:22:07.938278: step: 284/464, loss: 0.00028186841518618166 2023-01-24 05:22:08.498682: step: 286/464, loss: 0.0005944963777437806 2023-01-24 05:22:09.062450: step: 288/464, loss: 0.003952810075134039 2023-01-24 05:22:09.727976: step: 290/464, loss: 0.009363141842186451 2023-01-24 05:22:10.401291: step: 292/464, loss: 0.014664217829704285 2023-01-24 05:22:10.947384: step: 294/464, loss: 0.0002833757607731968 2023-01-24 05:22:11.570703: step: 296/464, loss: 0.021436164155602455 2023-01-24 05:22:12.233510: step: 298/464, loss: 0.004286719486117363 2023-01-24 05:22:12.929827: step: 300/464, loss: 0.19252799451351166 2023-01-24 05:22:13.533621: step: 302/464, loss: 0.005125043913722038 2023-01-24 05:22:14.146285: step: 304/464, loss: 0.00039617405855096877 2023-01-24 05:22:14.760742: step: 306/464, loss: 0.0016010634135454893 2023-01-24 05:22:15.366342: step: 308/464, loss: 0.012710998766124249 2023-01-24 05:22:16.006911: step: 310/464, loss: 0.010261507704854012 2023-01-24 05:22:16.622784: step: 312/464, loss: 1.8873350200010464e-05 2023-01-24 05:22:17.176664: step: 314/464, loss: 0.0005399395013228059 2023-01-24 05:22:17.752802: step: 316/464, loss: 0.06412962824106216 2023-01-24 05:22:18.393097: step: 318/464, loss: 0.0034419228322803974 2023-01-24 05:22:18.994107: step: 320/464, loss: 0.013939480297267437 2023-01-24 05:22:19.566970: step: 322/464, loss: 0.008041913621127605 2023-01-24 05:22:20.175004: step: 324/464, loss: 0.2070617526769638 2023-01-24 05:22:20.878075: step: 326/464, loss: 0.016397660598158836 2023-01-24 05:22:21.519606: step: 328/464, loss: 0.0075540849938988686 2023-01-24 05:22:22.139769: step: 330/464, loss: 0.027589106932282448 2023-01-24 05:22:22.751074: step: 332/464, loss: 0.0036028767935931683 2023-01-24 05:22:23.402252: step: 334/464, loss: 0.023024383932352066 2023-01-24 05:22:24.037200: step: 336/464, loss: 0.13009671866893768 2023-01-24 05:22:24.697277: step: 338/464, loss: 0.03645065799355507 2023-01-24 05:22:25.307696: step: 340/464, loss: 0.024611737579107285 2023-01-24 05:22:25.961436: step: 342/464, loss: 0.0030728767160326242 2023-01-24 05:22:26.506103: step: 344/464, loss: 0.0013950022403150797 2023-01-24 05:22:27.175630: step: 346/464, loss: 0.020612915977835655 2023-01-24 05:22:27.781485: step: 348/464, loss: 0.0010369179071858525 2023-01-24 05:22:28.423911: step: 350/464, loss: 0.08027210831642151 2023-01-24 05:22:29.025144: step: 352/464, loss: 0.5803027749061584 2023-01-24 05:22:29.662601: step: 354/464, loss: 0.00014347555406857282 2023-01-24 05:22:30.249346: step: 356/464, loss: 0.00094506551977247 2023-01-24 05:22:30.799653: step: 358/464, loss: 0.0030374499037861824 2023-01-24 05:22:31.477961: step: 360/464, loss: 0.004331836476922035 2023-01-24 05:22:32.167051: step: 362/464, loss: 0.0029369716066867113 2023-01-24 05:22:32.792719: step: 364/464, loss: 0.003449542447924614 2023-01-24 05:22:33.380425: step: 366/464, loss: 0.13225261867046356 2023-01-24 05:22:33.970761: step: 368/464, loss: 0.0019141642842441797 2023-01-24 05:22:34.619283: step: 370/464, loss: 0.04574725404381752 2023-01-24 05:22:35.217206: step: 372/464, loss: 0.0033610507380217314 2023-01-24 05:22:35.820653: step: 374/464, loss: 0.21356722712516785 2023-01-24 05:22:36.410755: step: 376/464, loss: 0.0360436886548996 2023-01-24 05:22:37.143807: step: 378/464, loss: 0.038373976945877075 2023-01-24 05:22:37.852718: step: 380/464, loss: 0.005195892881602049 2023-01-24 05:22:38.436400: step: 382/464, loss: 0.004966085311025381 2023-01-24 05:22:38.970594: step: 384/464, loss: 0.0003348653845023364 2023-01-24 05:22:39.608852: step: 386/464, loss: 0.00865214318037033 2023-01-24 05:22:40.264541: step: 388/464, loss: 0.0016444892389699817 2023-01-24 05:22:40.898976: step: 390/464, loss: 0.0025462752673774958 2023-01-24 05:22:41.572633: step: 392/464, loss: 0.005985606927424669 2023-01-24 05:22:42.211261: step: 394/464, loss: 0.01848895289003849 2023-01-24 05:22:42.832304: step: 396/464, loss: 0.014712951146066189 2023-01-24 05:22:43.461784: step: 398/464, loss: 0.01997094601392746 2023-01-24 05:22:44.113764: step: 400/464, loss: 0.0033737528137862682 2023-01-24 05:22:44.819019: step: 402/464, loss: 0.08923365920782089 2023-01-24 05:22:45.485562: step: 404/464, loss: 0.007990365847945213 2023-01-24 05:22:46.103809: step: 406/464, loss: 0.0056911977007985115 2023-01-24 05:22:46.703677: step: 408/464, loss: 0.006825309246778488 2023-01-24 05:22:47.314408: step: 410/464, loss: 0.00802522711455822 2023-01-24 05:22:47.922946: step: 412/464, loss: 0.00013568451686296612 2023-01-24 05:22:48.480025: step: 414/464, loss: 0.04462343081831932 2023-01-24 05:22:49.092497: step: 416/464, loss: 0.0414571575820446 2023-01-24 05:22:49.715276: step: 418/464, loss: 0.045396171510219574 2023-01-24 05:22:50.346485: step: 420/464, loss: 0.0010843529598787427 2023-01-24 05:22:50.966883: step: 422/464, loss: 0.0035425713285803795 2023-01-24 05:22:51.560028: step: 424/464, loss: 0.01614678092300892 2023-01-24 05:22:52.195461: step: 426/464, loss: 0.0030302645172923803 2023-01-24 05:22:52.780051: step: 428/464, loss: 0.0004536340420600027 2023-01-24 05:22:53.410704: step: 430/464, loss: 9.008437336888164e-05 2023-01-24 05:22:54.018344: step: 432/464, loss: 0.008234814740717411 2023-01-24 05:22:54.675781: step: 434/464, loss: 0.008559430949389935 2023-01-24 05:22:55.264473: step: 436/464, loss: 0.0018182012718170881 2023-01-24 05:22:55.957883: step: 438/464, loss: 6.693278464808827e-06 2023-01-24 05:22:56.627947: step: 440/464, loss: 0.0007661496638320386 2023-01-24 05:22:57.227094: step: 442/464, loss: 0.00024162699992302805 2023-01-24 05:22:57.789525: step: 444/464, loss: 0.001283713267184794 2023-01-24 05:22:58.447152: step: 446/464, loss: 0.01634528674185276 2023-01-24 05:22:59.030276: step: 448/464, loss: 0.019098889082670212 2023-01-24 05:22:59.676143: step: 450/464, loss: 0.0008116748067550361 2023-01-24 05:23:00.255520: step: 452/464, loss: 0.29386022686958313 2023-01-24 05:23:00.948426: step: 454/464, loss: 0.042279280722141266 2023-01-24 05:23:01.581572: step: 456/464, loss: 0.0006630786811001599 2023-01-24 05:23:02.184938: step: 458/464, loss: 0.0030012684874236584 2023-01-24 05:23:02.782299: step: 460/464, loss: 0.06653464585542679 2023-01-24 05:23:03.423405: step: 462/464, loss: 0.018292022868990898 2023-01-24 05:23:04.104768: step: 464/464, loss: 0.014761765487492085 2023-01-24 05:23:04.717090: step: 466/464, loss: 0.009232250042259693 2023-01-24 05:23:05.347493: step: 468/464, loss: 0.0020965656731277704 2023-01-24 05:23:06.071732: step: 470/464, loss: 0.0011324052466079593 2023-01-24 05:23:06.698446: step: 472/464, loss: 0.08012159913778305 2023-01-24 05:23:07.369650: step: 474/464, loss: 0.008564132265746593 2023-01-24 05:23:08.003127: step: 476/464, loss: 0.0025925911031663418 2023-01-24 05:23:08.682310: step: 478/464, loss: 0.009631761349737644 2023-01-24 05:23:09.308410: step: 480/464, loss: 0.013792794197797775 2023-01-24 05:23:09.888973: step: 482/464, loss: 0.8465481996536255 2023-01-24 05:23:10.458655: step: 484/464, loss: 0.025237884372472763 2023-01-24 05:23:11.095041: step: 486/464, loss: 0.06432559341192245 2023-01-24 05:23:11.778110: step: 488/464, loss: 0.004095649812370539 2023-01-24 05:23:12.386001: step: 490/464, loss: 0.00688563659787178 2023-01-24 05:23:12.988697: step: 492/464, loss: 0.0009884837782010436 2023-01-24 05:23:13.592400: step: 494/464, loss: 0.0033845400903373957 2023-01-24 05:23:14.245355: step: 496/464, loss: 0.06000358238816261 2023-01-24 05:23:14.879839: step: 498/464, loss: 0.059953149408102036 2023-01-24 05:23:15.520070: step: 500/464, loss: 0.0382724367082119 2023-01-24 05:23:16.170261: step: 502/464, loss: 0.010647490620613098 2023-01-24 05:23:16.910328: step: 504/464, loss: 0.014845588244497776 2023-01-24 05:23:17.480158: step: 506/464, loss: 0.15880149602890015 2023-01-24 05:23:18.138382: step: 508/464, loss: 0.04840749502182007 2023-01-24 05:23:18.763550: step: 510/464, loss: 0.01429628673940897 2023-01-24 05:23:19.367205: step: 512/464, loss: 0.00044565758435055614 2023-01-24 05:23:19.983516: step: 514/464, loss: 0.007999812252819538 2023-01-24 05:23:20.544525: step: 516/464, loss: 0.06677156686782837 2023-01-24 05:23:21.168253: step: 518/464, loss: 0.00020486098947003484 2023-01-24 05:23:21.808089: step: 520/464, loss: 0.0037851138040423393 2023-01-24 05:23:22.457342: step: 522/464, loss: 0.002987146843224764 2023-01-24 05:23:23.026476: step: 524/464, loss: 0.00014971356722526252 2023-01-24 05:23:23.676470: step: 526/464, loss: 0.00678643211722374 2023-01-24 05:23:24.327779: step: 528/464, loss: 0.04216880723834038 2023-01-24 05:23:25.010190: step: 530/464, loss: 0.00484335795044899 2023-01-24 05:23:25.668809: step: 532/464, loss: 0.12765325605869293 2023-01-24 05:23:26.253443: step: 534/464, loss: 0.001677449676208198 2023-01-24 05:23:26.881534: step: 536/464, loss: 0.0025106756947934628 2023-01-24 05:23:27.484575: step: 538/464, loss: 0.009580564685165882 2023-01-24 05:23:28.108680: step: 540/464, loss: 0.10899461805820465 2023-01-24 05:23:28.731709: step: 542/464, loss: 0.00041793863056227565 2023-01-24 05:23:29.449970: step: 544/464, loss: 0.008387402631342411 2023-01-24 05:23:30.061133: step: 546/464, loss: 0.010032331570982933 2023-01-24 05:23:30.681041: step: 548/464, loss: 0.016491416841745377 2023-01-24 05:23:31.301803: step: 550/464, loss: 0.001153131597675383 2023-01-24 05:23:31.923122: step: 552/464, loss: 0.1899278461933136 2023-01-24 05:23:32.556521: step: 554/464, loss: 0.0023474283516407013 2023-01-24 05:23:33.139223: step: 556/464, loss: 0.00033688393887132406 2023-01-24 05:23:33.702495: step: 558/464, loss: 0.12733258306980133 2023-01-24 05:23:34.345479: step: 560/464, loss: 0.0013179974630475044 2023-01-24 05:23:34.932574: step: 562/464, loss: 0.002366940723732114 2023-01-24 05:23:35.559377: step: 564/464, loss: 0.0010238544782623649 2023-01-24 05:23:36.203646: step: 566/464, loss: 0.11703494191169739 2023-01-24 05:23:36.761840: step: 568/464, loss: 0.003410003613680601 2023-01-24 05:23:37.301167: step: 570/464, loss: 0.0008537861285731196 2023-01-24 05:23:37.877436: step: 572/464, loss: 0.0039034802466630936 2023-01-24 05:23:38.527115: step: 574/464, loss: 0.010530706495046616 2023-01-24 05:23:39.127725: step: 576/464, loss: 0.009586167521774769 2023-01-24 05:23:39.776237: step: 578/464, loss: 0.007921237498521805 2023-01-24 05:23:40.371336: step: 580/464, loss: 0.013114217668771744 2023-01-24 05:23:40.988501: step: 582/464, loss: 0.0015510583762079477 2023-01-24 05:23:41.599282: step: 584/464, loss: 0.0006341558764688671 2023-01-24 05:23:42.195890: step: 586/464, loss: 0.007430794648826122 2023-01-24 05:23:42.775430: step: 588/464, loss: 0.034198254346847534 2023-01-24 05:23:43.441256: step: 590/464, loss: 0.036947060376405716 2023-01-24 05:23:44.080415: step: 592/464, loss: 5.735832382924855e-05 2023-01-24 05:23:44.736659: step: 594/464, loss: 0.013463602401316166 2023-01-24 05:23:45.437709: step: 596/464, loss: 0.06106730177998543 2023-01-24 05:23:46.014138: step: 598/464, loss: 0.0011482078116387129 2023-01-24 05:23:46.607659: step: 600/464, loss: 0.002552524907514453 2023-01-24 05:23:47.245941: step: 602/464, loss: 0.016755448654294014 2023-01-24 05:23:47.867766: step: 604/464, loss: 0.03485510125756264 2023-01-24 05:23:48.564991: step: 606/464, loss: 1.016755223274231 2023-01-24 05:23:49.226662: step: 608/464, loss: 0.0009947152575477958 2023-01-24 05:23:49.809028: step: 610/464, loss: 0.008130676113069057 2023-01-24 05:23:50.464995: step: 612/464, loss: 0.009686904959380627 2023-01-24 05:23:51.056312: step: 614/464, loss: 0.02364383079111576 2023-01-24 05:23:51.687788: step: 616/464, loss: 0.0007474590674974024 2023-01-24 05:23:52.292519: step: 618/464, loss: 0.0047137551009655 2023-01-24 05:23:52.926760: step: 620/464, loss: 0.000658941688016057 2023-01-24 05:23:53.529744: step: 622/464, loss: 0.0004643475986085832 2023-01-24 05:23:54.118988: step: 624/464, loss: 0.2423620969057083 2023-01-24 05:23:54.710313: step: 626/464, loss: 0.001640212256461382 2023-01-24 05:23:55.309308: step: 628/464, loss: 0.025120731443166733 2023-01-24 05:23:55.943314: step: 630/464, loss: 0.012573404237627983 2023-01-24 05:23:56.564493: step: 632/464, loss: 0.007707800250500441 2023-01-24 05:23:57.162784: step: 634/464, loss: 0.039470117539167404 2023-01-24 05:23:57.800589: step: 636/464, loss: 0.03454526141285896 2023-01-24 05:23:58.388286: step: 638/464, loss: 0.0012105669593438506 2023-01-24 05:23:59.028533: step: 640/464, loss: 0.02095707319676876 2023-01-24 05:23:59.611210: step: 642/464, loss: 0.009889095090329647 2023-01-24 05:24:00.185970: step: 644/464, loss: 0.014576292596757412 2023-01-24 05:24:00.790800: step: 646/464, loss: 0.00138474116101861 2023-01-24 05:24:01.461270: step: 648/464, loss: 0.00338058196939528 2023-01-24 05:24:02.081701: step: 650/464, loss: 0.0009252754971385002 2023-01-24 05:24:02.734798: step: 652/464, loss: 0.00416414812207222 2023-01-24 05:24:03.264461: step: 654/464, loss: 0.00028824794571846724 2023-01-24 05:24:03.885378: step: 656/464, loss: 0.0022176536731421947 2023-01-24 05:24:04.542979: step: 658/464, loss: 0.03203499689698219 2023-01-24 05:24:05.168153: step: 660/464, loss: 0.0019834109116345644 2023-01-24 05:24:05.822433: step: 662/464, loss: 0.001044319011271 2023-01-24 05:24:06.555672: step: 664/464, loss: 0.030004270374774933 2023-01-24 05:24:07.253769: step: 666/464, loss: 0.06255772709846497 2023-01-24 05:24:07.852811: step: 668/464, loss: 7.002799975452945e-06 2023-01-24 05:24:08.494053: step: 670/464, loss: 0.01589263416826725 2023-01-24 05:24:09.137305: step: 672/464, loss: 0.03454243019223213 2023-01-24 05:24:09.826461: step: 674/464, loss: 0.08794011175632477 2023-01-24 05:24:10.490175: step: 676/464, loss: 0.005668723955750465 2023-01-24 05:24:11.105164: step: 678/464, loss: 0.017416013404726982 2023-01-24 05:24:11.714769: step: 680/464, loss: 0.15912096202373505 2023-01-24 05:24:12.329432: step: 682/464, loss: 0.0011662282049655914 2023-01-24 05:24:12.990709: step: 684/464, loss: 0.007506620604544878 2023-01-24 05:24:13.628940: step: 686/464, loss: 1.0766023397445679 2023-01-24 05:24:14.256702: step: 688/464, loss: 7.692611688980833e-05 2023-01-24 05:24:14.932945: step: 690/464, loss: 0.00015255837934091687 2023-01-24 05:24:15.551499: step: 692/464, loss: 0.003920267801731825 2023-01-24 05:24:16.102949: step: 694/464, loss: 0.008264812640845776 2023-01-24 05:24:16.728986: step: 696/464, loss: 0.02437479980289936 2023-01-24 05:24:17.293320: step: 698/464, loss: 0.00026347560924477875 2023-01-24 05:24:17.916576: step: 700/464, loss: 0.0008082684362307191 2023-01-24 05:24:18.519008: step: 702/464, loss: 0.05713580548763275 2023-01-24 05:24:19.098577: step: 704/464, loss: 0.0007922378135845065 2023-01-24 05:24:19.716034: step: 706/464, loss: 0.10754930973052979 2023-01-24 05:24:20.320411: step: 708/464, loss: 2.2035123038222082e-05 2023-01-24 05:24:20.903223: step: 710/464, loss: 0.012474643997848034 2023-01-24 05:24:21.471424: step: 712/464, loss: 6.510557432193309e-05 2023-01-24 05:24:22.110451: step: 714/464, loss: 0.009142348542809486 2023-01-24 05:24:22.726027: step: 716/464, loss: 0.01799674890935421 2023-01-24 05:24:23.475197: step: 718/464, loss: 0.4685097634792328 2023-01-24 05:24:24.085655: step: 720/464, loss: 0.00978299044072628 2023-01-24 05:24:24.653531: step: 722/464, loss: 0.012385339476168156 2023-01-24 05:24:25.209765: step: 724/464, loss: 0.004721686244010925 2023-01-24 05:24:25.805997: step: 726/464, loss: 0.005557945929467678 2023-01-24 05:24:26.388546: step: 728/464, loss: 0.003821711055934429 2023-01-24 05:24:26.994594: step: 730/464, loss: 0.0028337466064840555 2023-01-24 05:24:27.608602: step: 732/464, loss: 0.016650592908263206 2023-01-24 05:24:28.236985: step: 734/464, loss: 0.0019164554541930556 2023-01-24 05:24:28.864794: step: 736/464, loss: 0.00016860793402884156 2023-01-24 05:24:29.474816: step: 738/464, loss: 0.005961798131465912 2023-01-24 05:24:30.024801: step: 740/464, loss: 0.0008520600385963917 2023-01-24 05:24:30.645914: step: 742/464, loss: 0.041630685329437256 2023-01-24 05:24:31.247254: step: 744/464, loss: 0.017452936619520187 2023-01-24 05:24:31.817608: step: 746/464, loss: 0.0014399095671251416 2023-01-24 05:24:32.379237: step: 748/464, loss: 0.003561159363016486 2023-01-24 05:24:32.978506: step: 750/464, loss: 0.009633861482143402 2023-01-24 05:24:33.547504: step: 752/464, loss: 0.0030405675061047077 2023-01-24 05:24:34.130931: step: 754/464, loss: 0.009415525943040848 2023-01-24 05:24:34.737290: step: 756/464, loss: 0.09051462262868881 2023-01-24 05:24:35.385681: step: 758/464, loss: 0.02376624383032322 2023-01-24 05:24:35.984455: step: 760/464, loss: 0.03737789765000343 2023-01-24 05:24:36.608207: step: 762/464, loss: 0.0007035199669189751 2023-01-24 05:24:37.187636: step: 764/464, loss: 0.010482733137905598 2023-01-24 05:24:37.803332: step: 766/464, loss: 0.0009111549006775022 2023-01-24 05:24:38.456593: step: 768/464, loss: 0.06286580115556717 2023-01-24 05:24:39.074289: step: 770/464, loss: 0.0038697372656315565 2023-01-24 05:24:39.735832: step: 772/464, loss: 0.016095953062176704 2023-01-24 05:24:40.309060: step: 774/464, loss: 0.0009386722231283784 2023-01-24 05:24:40.904105: step: 776/464, loss: 0.002113762078806758 2023-01-24 05:24:41.455077: step: 778/464, loss: 0.004145435523241758 2023-01-24 05:24:42.069506: step: 780/464, loss: 0.004087845329195261 2023-01-24 05:24:42.710224: step: 782/464, loss: 0.46168965101242065 2023-01-24 05:24:43.291895: step: 784/464, loss: 0.0006339970277622342 2023-01-24 05:24:44.023215: step: 786/464, loss: 0.0003590055275708437 2023-01-24 05:24:44.673525: step: 788/464, loss: 0.04442901164293289 2023-01-24 05:24:45.282006: step: 790/464, loss: 0.011943993158638477 2023-01-24 05:24:45.858041: step: 792/464, loss: 0.005789447575807571 2023-01-24 05:24:46.462110: step: 794/464, loss: 0.00178483163472265 2023-01-24 05:24:47.111186: step: 796/464, loss: 0.025112107396125793 2023-01-24 05:24:47.751238: step: 798/464, loss: 0.0019318273989483714 2023-01-24 05:24:48.379437: step: 800/464, loss: 0.004134173039346933 2023-01-24 05:24:48.995411: step: 802/464, loss: 0.01001482829451561 2023-01-24 05:24:49.650590: step: 804/464, loss: 0.006891184486448765 2023-01-24 05:24:50.254623: step: 806/464, loss: 0.016399085521697998 2023-01-24 05:24:50.865130: step: 808/464, loss: 0.08795420080423355 2023-01-24 05:24:51.510393: step: 810/464, loss: 0.006551109254360199 2023-01-24 05:24:52.085210: step: 812/464, loss: 0.037414442747831345 2023-01-24 05:24:52.642072: step: 814/464, loss: 0.00023505538410972804 2023-01-24 05:24:53.287496: step: 816/464, loss: 0.005884307436645031 2023-01-24 05:24:53.906132: step: 818/464, loss: 0.5972034931182861 2023-01-24 05:24:54.552002: step: 820/464, loss: 0.033281926065683365 2023-01-24 05:24:55.202644: step: 822/464, loss: 0.002844218397513032 2023-01-24 05:24:55.827949: step: 824/464, loss: 0.0022140166256576777 2023-01-24 05:24:56.470574: step: 826/464, loss: 0.06122094765305519 2023-01-24 05:24:57.085785: step: 828/464, loss: 0.02330681122839451 2023-01-24 05:24:57.680011: step: 830/464, loss: 0.00125783565454185 2023-01-24 05:24:58.377473: step: 832/464, loss: 0.0038115172646939754 2023-01-24 05:24:59.030779: step: 834/464, loss: 0.03195692598819733 2023-01-24 05:24:59.675166: step: 836/464, loss: 0.009772353805601597 2023-01-24 05:25:00.279234: step: 838/464, loss: 0.008238757960498333 2023-01-24 05:25:00.893361: step: 840/464, loss: 0.008015172556042671 2023-01-24 05:25:01.451990: step: 842/464, loss: 0.014947726391255856 2023-01-24 05:25:02.039313: step: 844/464, loss: 0.00028287232271395624 2023-01-24 05:25:02.620915: step: 846/464, loss: 0.0006196981994435191 2023-01-24 05:25:03.108272: step: 848/464, loss: 0.005727910902351141 2023-01-24 05:25:03.747248: step: 850/464, loss: 1.8394459402770735e-05 2023-01-24 05:25:04.338462: step: 852/464, loss: 0.08610616624355316 2023-01-24 05:25:04.963268: step: 854/464, loss: 0.010558906942605972 2023-01-24 05:25:05.541483: step: 856/464, loss: 0.009178843349218369 2023-01-24 05:25:06.142765: step: 858/464, loss: 0.0027270710561424494 2023-01-24 05:25:06.707573: step: 860/464, loss: 0.00411981763318181 2023-01-24 05:25:07.266683: step: 862/464, loss: 0.00033645035000517964 2023-01-24 05:25:07.875569: step: 864/464, loss: 0.0021524657495319843 2023-01-24 05:25:08.483301: step: 866/464, loss: 0.0221529770642519 2023-01-24 05:25:09.107623: step: 868/464, loss: 0.0135272815823555 2023-01-24 05:25:09.712607: step: 870/464, loss: 0.017445018514990807 2023-01-24 05:25:10.332083: step: 872/464, loss: 0.0009814107324928045 2023-01-24 05:25:11.059101: step: 874/464, loss: 0.12281250953674316 2023-01-24 05:25:11.776251: step: 876/464, loss: 0.0002310747659066692 2023-01-24 05:25:12.405888: step: 878/464, loss: 0.008918222039937973 2023-01-24 05:25:13.049050: step: 880/464, loss: 0.03904656320810318 2023-01-24 05:25:13.652311: step: 882/464, loss: 0.008514349348843098 2023-01-24 05:25:14.248511: step: 884/464, loss: 0.027735181152820587 2023-01-24 05:25:14.873839: step: 886/464, loss: 0.019990824162960052 2023-01-24 05:25:15.463414: step: 888/464, loss: 0.002079431666061282 2023-01-24 05:25:16.043124: step: 890/464, loss: 0.0047247945331037045 2023-01-24 05:25:16.642158: step: 892/464, loss: 0.10039626806974411 2023-01-24 05:25:17.255544: step: 894/464, loss: 0.032289791852235794 2023-01-24 05:25:17.911185: step: 896/464, loss: 0.001553431968204677 2023-01-24 05:25:18.569402: step: 898/464, loss: 0.06520793586969376 2023-01-24 05:25:19.196542: step: 900/464, loss: 0.006170877255499363 2023-01-24 05:25:19.836043: step: 902/464, loss: 0.015883471816778183 2023-01-24 05:25:20.451571: step: 904/464, loss: 0.0009544222266413271 2023-01-24 05:25:21.192294: step: 906/464, loss: 0.00172118388582021 2023-01-24 05:25:21.859182: step: 908/464, loss: 0.0004212943895254284 2023-01-24 05:25:22.456920: step: 910/464, loss: 9.579287143424153e-05 2023-01-24 05:25:23.115836: step: 912/464, loss: 0.0012574447318911552 2023-01-24 05:25:23.815982: step: 914/464, loss: 0.010224048979580402 2023-01-24 05:25:24.468303: step: 916/464, loss: 0.03736108914017677 2023-01-24 05:25:25.091923: step: 918/464, loss: 0.00016129278810694814 2023-01-24 05:25:25.761719: step: 920/464, loss: 0.05936411768198013 2023-01-24 05:25:26.370287: step: 922/464, loss: 0.00186917616520077 2023-01-24 05:25:27.060178: step: 924/464, loss: 0.008587202057242393 2023-01-24 05:25:27.646732: step: 926/464, loss: 0.0009364963043481112 2023-01-24 05:25:28.273047: step: 928/464, loss: 0.01637764275074005 2023-01-24 05:25:28.761623: step: 930/464, loss: 8.549598715035245e-05 ================================================== Loss: 0.045 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3272055799287085, 'r': 0.32348027920845757, 'f1': 0.3253322655398037}, 'combined': 0.23971851145038167, 'epoch': 37} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3293864766360918, 'r': 0.31154092831512814, 'f1': 0.3202152627986546}, 'combined': 0.20905245136077968, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3295272903594533, 'r': 0.3339043131915523, 'f1': 0.33170136296314434}, 'combined': 0.24441153060442214, 'epoch': 37} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3351491171980056, 'r': 0.3077082051183161, 'f1': 0.3208429918116159}, 'combined': 0.20946226408426738, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34171540466961975, 'r': 0.33458282506551007, 'f1': 0.3381115029904579}, 'combined': 0.24913479167717947, 'epoch': 37} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3345265355837896, 'r': 0.3006738042919617, 'f1': 0.3166980864974442}, 'combined': 0.20675626372372002, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31150793650793646, 'r': 0.37380952380952376, 'f1': 0.33982683982683975}, 'combined': 0.2265512265512265, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.358695652173913, 'f1': 0.3113207547169812}, 'combined': 0.1556603773584906, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5227272727272727, 'r': 0.19827586206896552, 'f1': 0.28750000000000003}, 'combined': 0.19166666666666668, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348209212662338, 'r': 0.3557869372089012, 'f1': 0.3449856778456135}, 'combined': 0.2541999731493994, 'epoch': 34} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3240350842156237, 'r': 0.3094415923713539, 'f1': 0.31657024212508983}, 'combined': 0.2066728005583488, 'epoch': 34} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33088235294117646, 'r': 0.4891304347826087, 'f1': 0.39473684210526316}, 'combined': 0.19736842105263158, 'epoch': 34} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:28:05.350040: step: 2/464, loss: 0.07666371762752533 2023-01-24 05:28:05.903588: step: 4/464, loss: 0.0041556404903531075 2023-01-24 05:28:06.525269: step: 6/464, loss: 0.016601957380771637 2023-01-24 05:28:07.147361: step: 8/464, loss: 0.008504652418196201 2023-01-24 05:28:07.803487: step: 10/464, loss: 0.1495015174150467 2023-01-24 05:28:08.407329: step: 12/464, loss: 0.000984443468041718 2023-01-24 05:28:09.011575: step: 14/464, loss: 0.01359118428081274 2023-01-24 05:28:09.627587: step: 16/464, loss: 0.0004940666840411723 2023-01-24 05:28:10.238657: step: 18/464, loss: 0.004424249287694693 2023-01-24 05:28:10.925996: step: 20/464, loss: 0.016282692551612854 2023-01-24 05:28:11.559292: step: 22/464, loss: 0.03299436718225479 2023-01-24 05:28:12.172272: step: 24/464, loss: 2.0109622710151598e-05 2023-01-24 05:28:12.833037: step: 26/464, loss: 0.0018645920790731907 2023-01-24 05:28:13.471315: step: 28/464, loss: 0.02996472455561161 2023-01-24 05:28:14.085644: step: 30/464, loss: 0.00014173431554809213 2023-01-24 05:28:14.694037: step: 32/464, loss: 0.004367295186966658 2023-01-24 05:28:15.331192: step: 34/464, loss: 0.05591445416212082 2023-01-24 05:28:16.001485: step: 36/464, loss: 0.0004241672868374735 2023-01-24 05:28:16.682417: step: 38/464, loss: 0.015961840748786926 2023-01-24 05:28:17.306608: step: 40/464, loss: 0.004817094653844833 2023-01-24 05:28:17.895097: step: 42/464, loss: 0.0018298542127013206 2023-01-24 05:28:18.510643: step: 44/464, loss: 0.0121889878064394 2023-01-24 05:28:19.095085: step: 46/464, loss: 0.0021618735045194626 2023-01-24 05:28:19.642201: step: 48/464, loss: 0.0003386117168702185 2023-01-24 05:28:20.196757: step: 50/464, loss: 0.00016865790530573577 2023-01-24 05:28:20.761571: step: 52/464, loss: 0.008086579851806164 2023-01-24 05:28:21.374888: step: 54/464, loss: 0.001665792427957058 2023-01-24 05:28:22.001889: step: 56/464, loss: 0.02543218433856964 2023-01-24 05:28:22.584246: step: 58/464, loss: 0.8957485556602478 2023-01-24 05:28:23.159943: step: 60/464, loss: 0.0036911722272634506 2023-01-24 05:28:23.747272: step: 62/464, loss: 0.0007915475289337337 2023-01-24 05:28:24.376558: step: 64/464, loss: 0.005884534679353237 2023-01-24 05:28:24.995777: step: 66/464, loss: 0.06674639135599136 2023-01-24 05:28:25.576355: step: 68/464, loss: 0.004447010345757008 2023-01-24 05:28:26.142261: step: 70/464, loss: 0.0007373158005066216 2023-01-24 05:28:26.776301: step: 72/464, loss: 0.07409743219614029 2023-01-24 05:28:27.431478: step: 74/464, loss: 0.00021967102657072246 2023-01-24 05:28:28.007420: step: 76/464, loss: 0.006402883678674698 2023-01-24 05:28:28.672864: step: 78/464, loss: 0.1285560429096222 2023-01-24 05:28:29.266280: step: 80/464, loss: 0.0064278459176421165 2023-01-24 05:28:29.846737: step: 82/464, loss: 0.0027200295589864254 2023-01-24 05:28:30.476283: step: 84/464, loss: 0.002880350686609745 2023-01-24 05:28:31.084456: step: 86/464, loss: 0.006422638893127441 2023-01-24 05:28:31.636644: step: 88/464, loss: 5.416858402895741e-05 2023-01-24 05:28:32.345570: step: 90/464, loss: 0.05255880579352379 2023-01-24 05:28:32.903669: step: 92/464, loss: 0.0015383812133222818 2023-01-24 05:28:33.483083: step: 94/464, loss: 0.028287207707762718 2023-01-24 05:28:34.131102: step: 96/464, loss: 0.0023062839172780514 2023-01-24 05:28:34.749133: step: 98/464, loss: 0.01701662316918373 2023-01-24 05:28:35.346129: step: 100/464, loss: 0.0005828766734339297 2023-01-24 05:28:36.007693: step: 102/464, loss: 0.051405563950538635 2023-01-24 05:28:36.685359: step: 104/464, loss: 0.00017306103836745024 2023-01-24 05:28:37.335954: step: 106/464, loss: 0.015117556788027287 2023-01-24 05:28:37.907413: step: 108/464, loss: 0.0004980931989848614 2023-01-24 05:28:38.470495: step: 110/464, loss: 0.0075327204540371895 2023-01-24 05:28:39.060601: step: 112/464, loss: 0.006094371899962425 2023-01-24 05:28:39.696746: step: 114/464, loss: 0.04257412627339363 2023-01-24 05:28:40.354317: step: 116/464, loss: 0.01640826277434826 2023-01-24 05:28:41.009669: step: 118/464, loss: 0.01762639544904232 2023-01-24 05:28:41.521419: step: 120/464, loss: 0.005846071057021618 2023-01-24 05:28:42.192260: step: 122/464, loss: 0.5272281169891357 2023-01-24 05:28:42.877828: step: 124/464, loss: 0.00011272434494458139 2023-01-24 05:28:43.567551: step: 126/464, loss: 0.15199708938598633 2023-01-24 05:28:44.272057: step: 128/464, loss: 0.04380028694868088 2023-01-24 05:28:44.896917: step: 130/464, loss: 0.010487067513167858 2023-01-24 05:28:45.544409: step: 132/464, loss: 0.0066137295216321945 2023-01-24 05:28:46.140181: step: 134/464, loss: 0.0038525154814124107 2023-01-24 05:28:46.767602: step: 136/464, loss: 0.0010183482663705945 2023-01-24 05:28:47.414369: step: 138/464, loss: 0.003813040442764759 2023-01-24 05:28:48.115142: step: 140/464, loss: 0.0023907809518277645 2023-01-24 05:28:48.713113: step: 142/464, loss: 5.111067002872005e-05 2023-01-24 05:28:49.333610: step: 144/464, loss: 0.004993663635104895 2023-01-24 05:28:49.945822: step: 146/464, loss: 0.13828477263450623 2023-01-24 05:28:50.589981: step: 148/464, loss: 0.0017009323928505182 2023-01-24 05:28:51.269196: step: 150/464, loss: 0.005286176223307848 2023-01-24 05:28:51.887572: step: 152/464, loss: 0.001723647816106677 2023-01-24 05:28:52.493757: step: 154/464, loss: 0.001361824688501656 2023-01-24 05:28:53.092442: step: 156/464, loss: 0.0003422506561037153 2023-01-24 05:28:53.678165: step: 158/464, loss: 0.0026143400464206934 2023-01-24 05:28:54.304475: step: 160/464, loss: 0.0035480279475450516 2023-01-24 05:28:55.025754: step: 162/464, loss: 0.06149844825267792 2023-01-24 05:28:55.627658: step: 164/464, loss: 0.012968159280717373 2023-01-24 05:28:56.280253: step: 166/464, loss: 0.008539444766938686 2023-01-24 05:28:56.929966: step: 168/464, loss: 0.006774135399609804 2023-01-24 05:28:57.554887: step: 170/464, loss: 0.0024416123051196337 2023-01-24 05:28:58.167322: step: 172/464, loss: 0.0045210495591163635 2023-01-24 05:28:58.749123: step: 174/464, loss: 0.0001654277293710038 2023-01-24 05:28:59.345693: step: 176/464, loss: 0.1539025604724884 2023-01-24 05:28:59.971147: step: 178/464, loss: 0.016555478796362877 2023-01-24 05:29:00.592734: step: 180/464, loss: 0.0009038833668455482 2023-01-24 05:29:01.230623: step: 182/464, loss: 0.04463731870055199 2023-01-24 05:29:01.814636: step: 184/464, loss: 0.0007485342212021351 2023-01-24 05:29:02.424389: step: 186/464, loss: 0.09173037111759186 2023-01-24 05:29:02.967888: step: 188/464, loss: 0.22339533269405365 2023-01-24 05:29:03.589837: step: 190/464, loss: 0.4433741271495819 2023-01-24 05:29:04.208653: step: 192/464, loss: 0.00010236509115202352 2023-01-24 05:29:04.805657: step: 194/464, loss: 8.915683429222554e-05 2023-01-24 05:29:05.434937: step: 196/464, loss: 0.0865844190120697 2023-01-24 05:29:06.067601: step: 198/464, loss: 0.09011106193065643 2023-01-24 05:29:06.668795: step: 200/464, loss: 0.0009091346873901784 2023-01-24 05:29:07.292631: step: 202/464, loss: 0.0038832188583910465 2023-01-24 05:29:07.909215: step: 204/464, loss: 0.0029701266903430223 2023-01-24 05:29:08.482510: step: 206/464, loss: 0.0030103796161711216 2023-01-24 05:29:09.103190: step: 208/464, loss: 0.00433703837916255 2023-01-24 05:29:09.760842: step: 210/464, loss: 0.0011400451185181737 2023-01-24 05:29:10.339252: step: 212/464, loss: 0.005956655368208885 2023-01-24 05:29:10.988372: step: 214/464, loss: 0.014067273586988449 2023-01-24 05:29:11.576483: step: 216/464, loss: 0.004998539574444294 2023-01-24 05:29:12.184161: step: 218/464, loss: 0.05108063295483589 2023-01-24 05:29:12.869565: step: 220/464, loss: 0.15355080366134644 2023-01-24 05:29:13.628176: step: 222/464, loss: 0.00015312989125959575 2023-01-24 05:29:14.239879: step: 224/464, loss: 0.0011173977982252836 2023-01-24 05:29:14.892240: step: 226/464, loss: 0.06421932578086853 2023-01-24 05:29:15.559270: step: 228/464, loss: 2.7660456908051856e-05 2023-01-24 05:29:16.183478: step: 230/464, loss: 0.0002391198358964175 2023-01-24 05:29:16.852274: step: 232/464, loss: 0.0005655947607010603 2023-01-24 05:29:17.481669: step: 234/464, loss: 0.01645440049469471 2023-01-24 05:29:18.044133: step: 236/464, loss: 0.005959731992334127 2023-01-24 05:29:18.687627: step: 238/464, loss: 0.008238804526627064 2023-01-24 05:29:19.230370: step: 240/464, loss: 0.0012029623612761497 2023-01-24 05:29:19.910042: step: 242/464, loss: 0.000807002536021173 2023-01-24 05:29:20.636193: step: 244/464, loss: 2.4607074010418728e-05 2023-01-24 05:29:21.240096: step: 246/464, loss: 0.003851450514048338 2023-01-24 05:29:21.840405: step: 248/464, loss: 0.01687595620751381 2023-01-24 05:29:22.430254: step: 250/464, loss: 0.0029494829941540956 2023-01-24 05:29:23.045317: step: 252/464, loss: 1.2497486750362441e-05 2023-01-24 05:29:23.619199: step: 254/464, loss: 0.000681983889080584 2023-01-24 05:29:24.231552: step: 256/464, loss: 0.007440278772264719 2023-01-24 05:29:24.869790: step: 258/464, loss: 0.0018620517803356051 2023-01-24 05:29:25.536308: step: 260/464, loss: 0.008712049573659897 2023-01-24 05:29:26.134773: step: 262/464, loss: 0.0009259435464628041 2023-01-24 05:29:26.854325: step: 264/464, loss: 0.010966053232550621 2023-01-24 05:29:27.463967: step: 266/464, loss: 2.162556666007731e-05 2023-01-24 05:29:28.189075: step: 268/464, loss: 0.07548662275075912 2023-01-24 05:29:28.766348: step: 270/464, loss: 0.0036649368703365326 2023-01-24 05:29:29.393711: step: 272/464, loss: 0.09332875162363052 2023-01-24 05:29:30.085174: step: 274/464, loss: 0.0022640973329544067 2023-01-24 05:29:30.687615: step: 276/464, loss: 0.0028570217546075583 2023-01-24 05:29:31.364231: step: 278/464, loss: 0.029439497739076614 2023-01-24 05:29:32.044402: step: 280/464, loss: 0.00011209556396352127 2023-01-24 05:29:32.731150: step: 282/464, loss: 0.0016224890714511275 2023-01-24 05:29:33.302952: step: 284/464, loss: 0.10801452398300171 2023-01-24 05:29:33.879335: step: 286/464, loss: 0.002695675939321518 2023-01-24 05:29:34.578332: step: 288/464, loss: 0.0012047748314216733 2023-01-24 05:29:35.153977: step: 290/464, loss: 0.0006728891166858375 2023-01-24 05:29:35.783427: step: 292/464, loss: 0.00027445441810414195 2023-01-24 05:29:36.434075: step: 294/464, loss: 0.0254234429448843 2023-01-24 05:29:37.032625: step: 296/464, loss: 0.0009080729796551168 2023-01-24 05:29:37.617208: step: 298/464, loss: 8.996039105113596e-05 2023-01-24 05:29:38.192159: step: 300/464, loss: 0.014653047546744347 2023-01-24 05:29:38.824908: step: 302/464, loss: 0.12983016669750214 2023-01-24 05:29:39.428400: step: 304/464, loss: 0.03050478920340538 2023-01-24 05:29:40.069447: step: 306/464, loss: 0.009392405860126019 2023-01-24 05:29:40.709437: step: 308/464, loss: 0.0025869968812912703 2023-01-24 05:29:41.221032: step: 310/464, loss: 0.000151861910126172 2023-01-24 05:29:41.865882: step: 312/464, loss: 0.0013560017105191946 2023-01-24 05:29:42.527706: step: 314/464, loss: 0.0028233586344867945 2023-01-24 05:29:43.158732: step: 316/464, loss: 0.020580098032951355 2023-01-24 05:29:43.823961: step: 318/464, loss: 1.2516134977340698 2023-01-24 05:29:44.488511: step: 320/464, loss: 0.16352632641792297 2023-01-24 05:29:45.064926: step: 322/464, loss: 0.001475161057896912 2023-01-24 05:29:45.700729: step: 324/464, loss: 0.0037892444524914026 2023-01-24 05:29:46.341634: step: 326/464, loss: 0.014337840490043163 2023-01-24 05:29:46.940707: step: 328/464, loss: 0.00012592771963682026 2023-01-24 05:29:47.535315: step: 330/464, loss: 0.010473833419382572 2023-01-24 05:29:48.100107: step: 332/464, loss: 0.05411313846707344 2023-01-24 05:29:48.634576: step: 334/464, loss: 0.0004032576980534941 2023-01-24 05:29:49.223088: step: 336/464, loss: 0.37042418122291565 2023-01-24 05:29:49.793360: step: 338/464, loss: 8.61345324665308e-05 2023-01-24 05:29:50.328878: step: 340/464, loss: 0.0011708943638950586 2023-01-24 05:29:51.027315: step: 342/464, loss: 4.38054895401001 2023-01-24 05:29:51.616480: step: 344/464, loss: 0.004698981065303087 2023-01-24 05:29:52.233793: step: 346/464, loss: 0.00021359566017054021 2023-01-24 05:29:52.906025: step: 348/464, loss: 0.011838965117931366 2023-01-24 05:29:53.482304: step: 350/464, loss: 0.02740328386425972 2023-01-24 05:29:54.098804: step: 352/464, loss: 0.006860440131276846 2023-01-24 05:29:54.760054: step: 354/464, loss: 0.24267810583114624 2023-01-24 05:29:55.379592: step: 356/464, loss: 1.5543577319476753e-05 2023-01-24 05:29:55.986893: step: 358/464, loss: 0.0018179682083427906 2023-01-24 05:29:56.646851: step: 360/464, loss: 0.0008576444233767688 2023-01-24 05:29:57.368734: step: 362/464, loss: 0.10859065502882004 2023-01-24 05:29:57.994755: step: 364/464, loss: 0.018232515081763268 2023-01-24 05:29:58.688775: step: 366/464, loss: 0.018040932714939117 2023-01-24 05:29:59.362771: step: 368/464, loss: 0.020858148112893105 2023-01-24 05:29:59.960567: step: 370/464, loss: 0.007685009855777025 2023-01-24 05:30:00.616637: step: 372/464, loss: 0.02899814024567604 2023-01-24 05:30:01.274999: step: 374/464, loss: 0.021727699786424637 2023-01-24 05:30:01.841543: step: 376/464, loss: 0.06514622271060944 2023-01-24 05:30:02.459107: step: 378/464, loss: 5.6364660849794745e-06 2023-01-24 05:30:03.044139: step: 380/464, loss: 0.0019106330582872033 2023-01-24 05:30:03.657270: step: 382/464, loss: 0.0014959658728912473 2023-01-24 05:30:04.201043: step: 384/464, loss: 0.001561407814733684 2023-01-24 05:30:04.806196: step: 386/464, loss: 0.00809608306735754 2023-01-24 05:30:05.341197: step: 388/464, loss: 0.00039438524981960654 2023-01-24 05:30:06.035723: step: 390/464, loss: 0.013873212039470673 2023-01-24 05:30:06.647720: step: 392/464, loss: 0.002111123176291585 2023-01-24 05:30:07.268144: step: 394/464, loss: 0.15582974255084991 2023-01-24 05:30:07.847210: step: 396/464, loss: 0.03531589359045029 2023-01-24 05:30:08.497255: step: 398/464, loss: 0.09494128823280334 2023-01-24 05:30:09.147843: step: 400/464, loss: 0.028393391519784927 2023-01-24 05:30:09.716518: step: 402/464, loss: 0.002526383614167571 2023-01-24 05:30:10.360166: step: 404/464, loss: 0.006093881092965603 2023-01-24 05:30:10.992269: step: 406/464, loss: 0.02048211172223091 2023-01-24 05:30:11.570436: step: 408/464, loss: 0.0005945987650193274 2023-01-24 05:30:12.193650: step: 410/464, loss: 1.6490535870161693e-07 2023-01-24 05:30:12.881604: step: 412/464, loss: 0.01898992620408535 2023-01-24 05:30:13.475547: step: 414/464, loss: 0.030549850314855576 2023-01-24 05:30:14.092476: step: 416/464, loss: 0.0011503227287903428 2023-01-24 05:30:14.744597: step: 418/464, loss: 0.06716148555278778 2023-01-24 05:30:15.419971: step: 420/464, loss: 0.002034626202657819 2023-01-24 05:30:16.062586: step: 422/464, loss: 0.006199575029313564 2023-01-24 05:30:16.668540: step: 424/464, loss: 0.006802136544138193 2023-01-24 05:30:17.389512: step: 426/464, loss: 0.04740991070866585 2023-01-24 05:30:17.992516: step: 428/464, loss: 0.02002396434545517 2023-01-24 05:30:18.546081: step: 430/464, loss: 0.009074503555893898 2023-01-24 05:30:19.171909: step: 432/464, loss: 0.026804577559232712 2023-01-24 05:30:19.755586: step: 434/464, loss: 0.002207412151619792 2023-01-24 05:30:20.313482: step: 436/464, loss: 0.000302294734865427 2023-01-24 05:30:20.950447: step: 438/464, loss: 0.0211816243827343 2023-01-24 05:30:21.615703: step: 440/464, loss: 0.006016803439706564 2023-01-24 05:30:22.298404: step: 442/464, loss: 0.0037967958487570286 2023-01-24 05:30:22.960994: step: 444/464, loss: 0.003919376991689205 2023-01-24 05:30:23.535681: step: 446/464, loss: 0.0012541390024125576 2023-01-24 05:30:24.161510: step: 448/464, loss: 0.007256507407873869 2023-01-24 05:30:24.793483: step: 450/464, loss: 0.01981954276561737 2023-01-24 05:30:25.498896: step: 452/464, loss: 0.005772142205387354 2023-01-24 05:30:26.108288: step: 454/464, loss: 0.0013192944461479783 2023-01-24 05:30:26.746162: step: 456/464, loss: 0.03334563598036766 2023-01-24 05:30:27.325311: step: 458/464, loss: 0.0009300485835410655 2023-01-24 05:30:27.864350: step: 460/464, loss: 8.524296572431922e-05 2023-01-24 05:30:28.549389: step: 462/464, loss: 0.0048855082131922245 2023-01-24 05:30:29.186338: step: 464/464, loss: 0.00821410957723856 2023-01-24 05:30:29.835713: step: 466/464, loss: 0.018185364082455635 2023-01-24 05:30:30.433459: step: 468/464, loss: 0.008915431797504425 2023-01-24 05:30:31.054793: step: 470/464, loss: 0.3980819284915924 2023-01-24 05:30:31.696670: step: 472/464, loss: 0.015454445965588093 2023-01-24 05:30:32.441002: step: 474/464, loss: 0.0009482467430643737 2023-01-24 05:30:33.003703: step: 476/464, loss: 0.0003818488912656903 2023-01-24 05:30:33.583067: step: 478/464, loss: 0.01932300068438053 2023-01-24 05:30:34.101926: step: 480/464, loss: 0.02119056135416031 2023-01-24 05:30:34.693241: step: 482/464, loss: 0.010176424868404865 2023-01-24 05:30:35.348346: step: 484/464, loss: 0.002057582139968872 2023-01-24 05:30:35.968582: step: 486/464, loss: 0.038074057549238205 2023-01-24 05:30:36.671607: step: 488/464, loss: 0.0008602479356341064 2023-01-24 05:30:37.278700: step: 490/464, loss: 0.0010032361606135964 2023-01-24 05:30:37.862027: step: 492/464, loss: 0.02331576868891716 2023-01-24 05:30:38.458697: step: 494/464, loss: 0.0032299442682415247 2023-01-24 05:30:39.027796: step: 496/464, loss: 0.007703948765993118 2023-01-24 05:30:39.709977: step: 498/464, loss: 0.07672279328107834 2023-01-24 05:30:40.392089: step: 500/464, loss: 0.013739488087594509 2023-01-24 05:30:41.146742: step: 502/464, loss: 0.004015688318759203 2023-01-24 05:30:41.825202: step: 504/464, loss: 0.0002704980142880231 2023-01-24 05:30:42.372930: step: 506/464, loss: 0.04848206043243408 2023-01-24 05:30:43.005825: step: 508/464, loss: 0.022236688062548637 2023-01-24 05:30:43.661234: step: 510/464, loss: 0.042567480355501175 2023-01-24 05:30:44.305335: step: 512/464, loss: 4.5881301957706455e-06 2023-01-24 05:30:44.911850: step: 514/464, loss: 0.00047643258585594594 2023-01-24 05:30:45.544255: step: 516/464, loss: 0.0048703039065003395 2023-01-24 05:30:46.195012: step: 518/464, loss: 0.03520174324512482 2023-01-24 05:30:46.785247: step: 520/464, loss: 0.000222586008021608 2023-01-24 05:30:47.448653: step: 522/464, loss: 0.006125647574663162 2023-01-24 05:30:48.106862: step: 524/464, loss: 0.009394499473273754 2023-01-24 05:30:48.688530: step: 526/464, loss: 0.0008229393279179931 2023-01-24 05:30:49.335022: step: 528/464, loss: 0.007002570666372776 2023-01-24 05:30:49.886417: step: 530/464, loss: 0.005204002372920513 2023-01-24 05:30:50.501332: step: 532/464, loss: 0.004415723029524088 2023-01-24 05:30:51.123279: step: 534/464, loss: 0.007514380384236574 2023-01-24 05:30:51.725430: step: 536/464, loss: 0.0004743316094391048 2023-01-24 05:30:52.313169: step: 538/464, loss: 0.002290072152391076 2023-01-24 05:30:52.981518: step: 540/464, loss: 0.005570805165916681 2023-01-24 05:30:53.588623: step: 542/464, loss: 5.998162305331789e-05 2023-01-24 05:30:54.190611: step: 544/464, loss: 0.0 2023-01-24 05:30:54.891517: step: 546/464, loss: 0.00013383693294599652 2023-01-24 05:30:55.517378: step: 548/464, loss: 0.023291975259780884 2023-01-24 05:30:56.146178: step: 550/464, loss: 0.00010557601490290835 2023-01-24 05:30:56.674676: step: 552/464, loss: 0.001020438619889319 2023-01-24 05:30:57.340701: step: 554/464, loss: 0.002947790315374732 2023-01-24 05:30:57.960575: step: 556/464, loss: 0.0017539083492010832 2023-01-24 05:30:58.585394: step: 558/464, loss: 0.0005117803229950368 2023-01-24 05:30:59.233432: step: 560/464, loss: 0.00025536149041727185 2023-01-24 05:30:59.841305: step: 562/464, loss: 0.24654802680015564 2023-01-24 05:31:00.596412: step: 564/464, loss: 0.0009103059419430792 2023-01-24 05:31:01.261609: step: 566/464, loss: 0.004263360984623432 2023-01-24 05:31:01.918448: step: 568/464, loss: 0.0005020697717554867 2023-01-24 05:31:02.572216: step: 570/464, loss: 0.03391928970813751 2023-01-24 05:31:03.163747: step: 572/464, loss: 0.0711340457201004 2023-01-24 05:31:03.787681: step: 574/464, loss: 0.00024413218488916755 2023-01-24 05:31:04.389220: step: 576/464, loss: 0.0878123864531517 2023-01-24 05:31:04.949554: step: 578/464, loss: 0.0047101471573114395 2023-01-24 05:31:05.665054: step: 580/464, loss: 0.001482927706092596 2023-01-24 05:31:06.345112: step: 582/464, loss: 0.0059550609439611435 2023-01-24 05:31:06.937374: step: 584/464, loss: 0.006339132785797119 2023-01-24 05:31:07.602550: step: 586/464, loss: 0.007386078126728535 2023-01-24 05:31:08.199528: step: 588/464, loss: 0.008458703756332397 2023-01-24 05:31:08.854685: step: 590/464, loss: 0.005758496467024088 2023-01-24 05:31:09.473871: step: 592/464, loss: 0.0013132354943081737 2023-01-24 05:31:10.101816: step: 594/464, loss: 0.0005673606647178531 2023-01-24 05:31:10.798017: step: 596/464, loss: 0.018045693635940552 2023-01-24 05:31:11.409953: step: 598/464, loss: 0.4945283830165863 2023-01-24 05:31:12.068995: step: 600/464, loss: 0.0024164745118469 2023-01-24 05:31:12.723637: step: 602/464, loss: 0.004685352556407452 2023-01-24 05:31:13.364590: step: 604/464, loss: 0.004564021248370409 2023-01-24 05:31:14.045428: step: 606/464, loss: 0.050456490367650986 2023-01-24 05:31:14.715849: step: 608/464, loss: 0.04056352376937866 2023-01-24 05:31:15.360858: step: 610/464, loss: 0.0005739732296206057 2023-01-24 05:31:15.977268: step: 612/464, loss: 0.007927405647933483 2023-01-24 05:31:16.596954: step: 614/464, loss: 0.0051388186402618885 2023-01-24 05:31:17.226631: step: 616/464, loss: 0.0002850943128578365 2023-01-24 05:31:17.789879: step: 618/464, loss: 0.005885418504476547 2023-01-24 05:31:18.385578: step: 620/464, loss: 0.005376122426241636 2023-01-24 05:31:18.953819: step: 622/464, loss: 0.00022895917936693877 2023-01-24 05:31:19.530458: step: 624/464, loss: 1.3114871978759766 2023-01-24 05:31:20.198540: step: 626/464, loss: 0.0682322308421135 2023-01-24 05:31:20.814502: step: 628/464, loss: 2.1218525944277644e-05 2023-01-24 05:31:21.383002: step: 630/464, loss: 0.0011831369483843446 2023-01-24 05:31:21.934802: step: 632/464, loss: 0.0001217541066580452 2023-01-24 05:31:22.536047: step: 634/464, loss: 0.000267831957899034 2023-01-24 05:31:23.176982: step: 636/464, loss: 0.01607227884232998 2023-01-24 05:31:23.772725: step: 638/464, loss: 0.01205810159444809 2023-01-24 05:31:24.341664: step: 640/464, loss: 0.009893765673041344 2023-01-24 05:31:24.946247: step: 642/464, loss: 0.002671779366210103 2023-01-24 05:31:25.554453: step: 644/464, loss: 0.00041898165363818407 2023-01-24 05:31:26.147354: step: 646/464, loss: 0.00014477742661256343 2023-01-24 05:31:26.679150: step: 648/464, loss: 0.008365098387002945 2023-01-24 05:31:27.317568: step: 650/464, loss: 0.026274938136339188 2023-01-24 05:31:27.904919: step: 652/464, loss: 0.00337810511700809 2023-01-24 05:31:28.499055: step: 654/464, loss: 0.006393271032720804 2023-01-24 05:31:29.064400: step: 656/464, loss: 0.00383376725949347 2023-01-24 05:31:29.722469: step: 658/464, loss: 0.06319655478000641 2023-01-24 05:31:30.298682: step: 660/464, loss: 0.0021081226877868176 2023-01-24 05:31:30.921499: step: 662/464, loss: 0.006011603865772486 2023-01-24 05:31:31.540618: step: 664/464, loss: 0.023292817175388336 2023-01-24 05:31:32.351934: step: 666/464, loss: 0.0008032124023884535 2023-01-24 05:31:33.004745: step: 668/464, loss: 0.0015976447612047195 2023-01-24 05:31:33.646909: step: 670/464, loss: 0.006079916842281818 2023-01-24 05:31:34.273931: step: 672/464, loss: 0.0005906568258069456 2023-01-24 05:31:34.887665: step: 674/464, loss: 0.037261757999658585 2023-01-24 05:31:35.561068: step: 676/464, loss: 0.00014715935685671866 2023-01-24 05:31:36.259995: step: 678/464, loss: 0.04640545696020126 2023-01-24 05:31:36.847576: step: 680/464, loss: 0.00031977047910913825 2023-01-24 05:31:37.394267: step: 682/464, loss: 0.004776813089847565 2023-01-24 05:31:37.977863: step: 684/464, loss: 0.00037920771865174174 2023-01-24 05:31:38.593964: step: 686/464, loss: 0.016807299107313156 2023-01-24 05:31:39.202343: step: 688/464, loss: 0.0012932555982843041 2023-01-24 05:31:39.738782: step: 690/464, loss: 0.014235904440283775 2023-01-24 05:31:40.394391: step: 692/464, loss: 0.0753798708319664 2023-01-24 05:31:41.082555: step: 694/464, loss: 0.012994196265935898 2023-01-24 05:31:41.686854: step: 696/464, loss: 0.00023477750073652714 2023-01-24 05:31:42.295984: step: 698/464, loss: 0.012356506660580635 2023-01-24 05:31:42.918525: step: 700/464, loss: 0.00019970822904724628 2023-01-24 05:31:43.472923: step: 702/464, loss: 0.03570036590099335 2023-01-24 05:31:44.023022: step: 704/464, loss: 0.1842038929462433 2023-01-24 05:31:44.669717: step: 706/464, loss: 0.0002647065557539463 2023-01-24 05:31:45.279982: step: 708/464, loss: 1.2724791765213013 2023-01-24 05:31:45.975965: step: 710/464, loss: 0.0004814733983948827 2023-01-24 05:31:46.558745: step: 712/464, loss: 1.2289744972804328e-06 2023-01-24 05:31:47.171457: step: 714/464, loss: 0.007565478794276714 2023-01-24 05:31:47.712391: step: 716/464, loss: 0.007476923055946827 2023-01-24 05:31:48.293871: step: 718/464, loss: 0.0003839374694507569 2023-01-24 05:31:49.013026: step: 720/464, loss: 0.0027928303461521864 2023-01-24 05:31:49.682272: step: 722/464, loss: 0.004604507237672806 2023-01-24 05:31:50.269221: step: 724/464, loss: 0.003077725414186716 2023-01-24 05:31:50.871507: step: 726/464, loss: 0.0004951234441250563 2023-01-24 05:31:51.467832: step: 728/464, loss: 0.026010407134890556 2023-01-24 05:31:52.061934: step: 730/464, loss: 0.0009832883952185512 2023-01-24 05:31:52.723492: step: 732/464, loss: 0.0680810809135437 2023-01-24 05:31:53.374670: step: 734/464, loss: 0.08909051865339279 2023-01-24 05:31:53.983823: step: 736/464, loss: 0.0001485932880314067 2023-01-24 05:31:54.592464: step: 738/464, loss: 0.16136470437049866 2023-01-24 05:31:55.198461: step: 740/464, loss: 0.0013271862408146262 2023-01-24 05:31:55.828218: step: 742/464, loss: 0.0001475284807384014 2023-01-24 05:31:56.459190: step: 744/464, loss: 0.004155992995947599 2023-01-24 05:31:57.087467: step: 746/464, loss: 0.0003829763736575842 2023-01-24 05:31:57.657984: step: 748/464, loss: 0.006544989533722401 2023-01-24 05:31:58.260632: step: 750/464, loss: 0.0001559268857818097 2023-01-24 05:31:58.835401: step: 752/464, loss: 0.00019917615281883627 2023-01-24 05:31:59.456452: step: 754/464, loss: 0.0016130805015563965 2023-01-24 05:32:00.052142: step: 756/464, loss: 0.0039898729883134365 2023-01-24 05:32:00.789205: step: 758/464, loss: 0.04093284532427788 2023-01-24 05:32:01.400209: step: 760/464, loss: 0.001979441847652197 2023-01-24 05:32:02.003964: step: 762/464, loss: 0.3502618968486786 2023-01-24 05:32:02.627281: step: 764/464, loss: 0.0003438458370510489 2023-01-24 05:32:03.298414: step: 766/464, loss: 0.0072568198665976524 2023-01-24 05:32:03.855561: step: 768/464, loss: 0.017397085204720497 2023-01-24 05:32:04.441344: step: 770/464, loss: 0.03964915871620178 2023-01-24 05:32:05.032857: step: 772/464, loss: 0.0007863616337999701 2023-01-24 05:32:05.643077: step: 774/464, loss: 0.0190030075609684 2023-01-24 05:32:06.289121: step: 776/464, loss: 0.004721463192254305 2023-01-24 05:32:06.912434: step: 778/464, loss: 7.103903772076592e-05 2023-01-24 05:32:07.533634: step: 780/464, loss: 0.0064157661981880665 2023-01-24 05:32:08.185079: step: 782/464, loss: 0.0024287928827106953 2023-01-24 05:32:08.800540: step: 784/464, loss: 0.011441824026405811 2023-01-24 05:32:09.420629: step: 786/464, loss: 0.013380954042077065 2023-01-24 05:32:10.000401: step: 788/464, loss: 0.0010275207459926605 2023-01-24 05:32:10.653031: step: 790/464, loss: 0.0008638154831714928 2023-01-24 05:32:11.222109: step: 792/464, loss: 0.0020451450254768133 2023-01-24 05:32:11.843046: step: 794/464, loss: 0.00572402635589242 2023-01-24 05:32:12.489047: step: 796/464, loss: 0.006697699893265963 2023-01-24 05:32:13.077323: step: 798/464, loss: 0.003393400926142931 2023-01-24 05:32:13.684186: step: 800/464, loss: 0.004453219939023256 2023-01-24 05:32:14.302188: step: 802/464, loss: 0.02158048003911972 2023-01-24 05:32:14.852906: step: 804/464, loss: 0.0020164845045655966 2023-01-24 05:32:15.498653: step: 806/464, loss: 0.02241668850183487 2023-01-24 05:32:16.152498: step: 808/464, loss: 0.06384516507387161 2023-01-24 05:32:16.807087: step: 810/464, loss: 0.011012006551027298 2023-01-24 05:32:17.414433: step: 812/464, loss: 0.0005875803180970252 2023-01-24 05:32:18.083431: step: 814/464, loss: 0.00029726020875386894 2023-01-24 05:32:18.728623: step: 816/464, loss: 0.02253718674182892 2023-01-24 05:32:19.317120: step: 818/464, loss: 0.002219903515651822 2023-01-24 05:32:19.927080: step: 820/464, loss: 0.000503991381265223 2023-01-24 05:32:20.519369: step: 822/464, loss: 0.0007541986415162683 2023-01-24 05:32:21.155654: step: 824/464, loss: 0.003817042801529169 2023-01-24 05:32:21.853174: step: 826/464, loss: 2.5243652999051847e-05 2023-01-24 05:32:22.490691: step: 828/464, loss: 0.028099194169044495 2023-01-24 05:32:23.071275: step: 830/464, loss: 0.009699639864265919 2023-01-24 05:32:23.710766: step: 832/464, loss: 0.025268562138080597 2023-01-24 05:32:24.349556: step: 834/464, loss: 0.013387763872742653 2023-01-24 05:32:24.936742: step: 836/464, loss: 0.00692420918494463 2023-01-24 05:32:25.535568: step: 838/464, loss: 0.00038922022213228047 2023-01-24 05:32:26.136013: step: 840/464, loss: 0.0006864046445116401 2023-01-24 05:32:26.749505: step: 842/464, loss: 0.013146799989044666 2023-01-24 05:32:27.333625: step: 844/464, loss: 4.239610916556558e-06 2023-01-24 05:32:27.921181: step: 846/464, loss: 0.0018868102924898267 2023-01-24 05:32:28.519544: step: 848/464, loss: 0.0007481158245354891 2023-01-24 05:32:29.113321: step: 850/464, loss: 0.0018886085599660873 2023-01-24 05:32:29.745560: step: 852/464, loss: 0.005018434952944517 2023-01-24 05:32:30.460348: step: 854/464, loss: 0.05035892874002457 2023-01-24 05:32:31.128169: step: 856/464, loss: 0.03840731829404831 2023-01-24 05:32:31.704717: step: 858/464, loss: 0.003952166996896267 2023-01-24 05:32:32.346911: step: 860/464, loss: 0.007025779690593481 2023-01-24 05:32:32.944619: step: 862/464, loss: 0.007006064988672733 2023-01-24 05:32:33.530022: step: 864/464, loss: 4.2151474190177396e-05 2023-01-24 05:32:34.209386: step: 866/464, loss: 0.012056934647262096 2023-01-24 05:32:34.814530: step: 868/464, loss: 0.2325555384159088 2023-01-24 05:32:35.400965: step: 870/464, loss: 3.44830584526062 2023-01-24 05:32:35.994520: step: 872/464, loss: 0.02701178938150406 2023-01-24 05:32:36.561290: step: 874/464, loss: 0.002139961114153266 2023-01-24 05:32:37.173562: step: 876/464, loss: 0.0028142149094492197 2023-01-24 05:32:37.811102: step: 878/464, loss: 0.005788634996861219 2023-01-24 05:32:38.414280: step: 880/464, loss: 4.943248495692387e-05 2023-01-24 05:32:39.002200: step: 882/464, loss: 0.0016170486342161894 2023-01-24 05:32:39.568481: step: 884/464, loss: 0.0027413603384047747 2023-01-24 05:32:40.138617: step: 886/464, loss: 0.003258903743699193 2023-01-24 05:32:40.773074: step: 888/464, loss: 2.534052327973768e-06 2023-01-24 05:32:41.425290: step: 890/464, loss: 0.000867818424012512 2023-01-24 05:32:42.044959: step: 892/464, loss: 0.0012389046605676413 2023-01-24 05:32:42.687851: step: 894/464, loss: 8.449164306512102e-05 2023-01-24 05:32:43.387642: step: 896/464, loss: 0.012688858434557915 2023-01-24 05:32:43.992734: step: 898/464, loss: 7.011165871517733e-05 2023-01-24 05:32:44.591236: step: 900/464, loss: 0.0015451086219400167 2023-01-24 05:32:45.212819: step: 902/464, loss: 0.0895097628235817 2023-01-24 05:32:45.859106: step: 904/464, loss: 0.02035101316869259 2023-01-24 05:32:46.462069: step: 906/464, loss: 0.019604161381721497 2023-01-24 05:32:47.043723: step: 908/464, loss: 0.0012368483003228903 2023-01-24 05:32:47.769682: step: 910/464, loss: 0.015060057863593102 2023-01-24 05:32:48.442122: step: 912/464, loss: 0.02042451687157154 2023-01-24 05:32:49.156201: step: 914/464, loss: 0.002836141036823392 2023-01-24 05:32:49.767094: step: 916/464, loss: 0.0005727356183342636 2023-01-24 05:32:50.396595: step: 918/464, loss: 0.014801361598074436 2023-01-24 05:32:50.987892: step: 920/464, loss: 0.00039233363349922 2023-01-24 05:32:51.619958: step: 922/464, loss: 0.008480165153741837 2023-01-24 05:32:52.235995: step: 924/464, loss: 0.003106620628386736 2023-01-24 05:32:52.818674: step: 926/464, loss: 0.01438205223530531 2023-01-24 05:32:53.460467: step: 928/464, loss: 0.019052069634199142 2023-01-24 05:32:53.949788: step: 930/464, loss: 0.0014227000065147877 ================================================== Loss: 0.050 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32827658582089553, 'r': 0.3338828273244782, 'f1': 0.33105597365945444}, 'combined': 0.24393598059117694, 'epoch': 38} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3153785473099367, 'r': 0.30667444756817214, 'f1': 0.31096560121884614}, 'combined': 0.20301381219468712, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32953151835551736, 'r': 0.3426627553298359, 'f1': 0.33596887824897403}, 'combined': 0.2475560155518756, 'epoch': 38} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33501269320167815, 'r': 0.3189863270135574, 'f1': 0.3268031455831639}, 'combined': 0.2133533489299412, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3424951946069235, 'r': 0.34639457063660384, 'f1': 0.3444338466518684}, 'combined': 0.25379336069085034, 'epoch': 38} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.32878712914791036, 'r': 0.3006572275740781, 'f1': 0.31409361496686483}, 'combined': 0.20505593515971485, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23611111111111108, 'r': 0.32380952380952377, 'f1': 0.2730923694779116}, 'combined': 0.18206157965194106, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2803030303030303, 'r': 0.40217391304347827, 'f1': 0.33035714285714285}, 'combined': 0.16517857142857142, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5384615384615384, 'r': 0.2413793103448276, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348209212662338, 'r': 0.3557869372089012, 'f1': 0.3449856778456135}, 'combined': 0.2541999731493994, 'epoch': 34} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3240350842156237, 'r': 0.3094415923713539, 'f1': 0.31657024212508983}, 'combined': 0.2066728005583488, 'epoch': 34} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33088235294117646, 'r': 0.4891304347826087, 'f1': 0.39473684210526316}, 'combined': 0.19736842105263158, 'epoch': 34} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:35:31.668402: step: 2/464, loss: 0.005334419198334217 2023-01-24 05:35:32.264108: step: 4/464, loss: 7.04170306562446e-05 2023-01-24 05:35:32.905449: step: 6/464, loss: 0.06576389819383621 2023-01-24 05:35:33.547864: step: 8/464, loss: 0.0003311616601422429 2023-01-24 05:35:34.144446: step: 10/464, loss: 0.0016958436463028193 2023-01-24 05:35:34.710526: step: 12/464, loss: 0.005034781061112881 2023-01-24 05:35:35.371753: step: 14/464, loss: 0.8912667036056519 2023-01-24 05:35:36.033359: step: 16/464, loss: 0.004827701486647129 2023-01-24 05:35:36.648678: step: 18/464, loss: 0.0013554502511397004 2023-01-24 05:35:37.266763: step: 20/464, loss: 0.0004365647619124502 2023-01-24 05:35:37.874594: step: 22/464, loss: 0.001043907948769629 2023-01-24 05:35:38.512371: step: 24/464, loss: 0.0014231398236006498 2023-01-24 05:35:39.130275: step: 26/464, loss: 0.0029816306196153164 2023-01-24 05:35:39.756343: step: 28/464, loss: 0.003940364811569452 2023-01-24 05:35:40.378670: step: 30/464, loss: 0.0007442350615747273 2023-01-24 05:35:40.965068: step: 32/464, loss: 0.0001322044263361022 2023-01-24 05:35:41.660590: step: 34/464, loss: 0.0019910086411982775 2023-01-24 05:35:42.326948: step: 36/464, loss: 0.010590208694338799 2023-01-24 05:35:42.967665: step: 38/464, loss: 0.0035319929011166096 2023-01-24 05:35:43.623474: step: 40/464, loss: 0.007291679736226797 2023-01-24 05:35:44.231350: step: 42/464, loss: 0.0005593971582129598 2023-01-24 05:35:44.820930: step: 44/464, loss: 0.0059903026558458805 2023-01-24 05:35:45.423113: step: 46/464, loss: 0.00047973901382647455 2023-01-24 05:35:45.989006: step: 48/464, loss: 0.002392848953604698 2023-01-24 05:35:46.580577: step: 50/464, loss: 0.0029812839347869158 2023-01-24 05:35:47.202590: step: 52/464, loss: 0.016058560460805893 2023-01-24 05:35:47.878990: step: 54/464, loss: 0.00029119051760062575 2023-01-24 05:35:48.541609: step: 56/464, loss: 0.004253122489899397 2023-01-24 05:35:49.343849: step: 58/464, loss: 0.016664346680045128 2023-01-24 05:35:49.984734: step: 60/464, loss: 0.006586653180420399 2023-01-24 05:35:50.626614: step: 62/464, loss: 0.0007175652426667511 2023-01-24 05:35:51.249495: step: 64/464, loss: 0.018905365839600563 2023-01-24 05:35:51.863073: step: 66/464, loss: 1.7158316040877253e-05 2023-01-24 05:35:52.476683: step: 68/464, loss: 0.0022365667391568422 2023-01-24 05:35:53.120342: step: 70/464, loss: 0.0004463378863874823 2023-01-24 05:35:53.736967: step: 72/464, loss: 0.0002532684011384845 2023-01-24 05:35:54.421704: step: 74/464, loss: 0.012067864648997784 2023-01-24 05:35:55.042867: step: 76/464, loss: 0.02911846898496151 2023-01-24 05:35:55.681661: step: 78/464, loss: 0.008488481864333153 2023-01-24 05:35:56.281849: step: 80/464, loss: 0.06633666157722473 2023-01-24 05:35:56.896098: step: 82/464, loss: 0.0010267571778967977 2023-01-24 05:35:57.501965: step: 84/464, loss: 0.12181129306554794 2023-01-24 05:35:58.135447: step: 86/464, loss: 0.0045325239188969135 2023-01-24 05:35:58.804083: step: 88/464, loss: 0.0010455892188474536 2023-01-24 05:35:59.416295: step: 90/464, loss: 0.0067656678147614 2023-01-24 05:36:00.058283: step: 92/464, loss: 0.20134253799915314 2023-01-24 05:36:00.619552: step: 94/464, loss: 0.06584080308675766 2023-01-24 05:36:01.225339: step: 96/464, loss: 0.00019669999892357737 2023-01-24 05:36:01.788372: step: 98/464, loss: 0.022210508584976196 2023-01-24 05:36:02.422363: step: 100/464, loss: 0.05593627318739891 2023-01-24 05:36:03.063604: step: 102/464, loss: 0.04347601160407066 2023-01-24 05:36:03.746486: step: 104/464, loss: 0.0016960520297288895 2023-01-24 05:36:04.366484: step: 106/464, loss: 0.005098738707602024 2023-01-24 05:36:04.988757: step: 108/464, loss: 0.013563141226768494 2023-01-24 05:36:05.575117: step: 110/464, loss: 0.00041637677350081503 2023-01-24 05:36:06.168046: step: 112/464, loss: 0.00041668335325084627 2023-01-24 05:36:06.818770: step: 114/464, loss: 0.00100265396758914 2023-01-24 05:36:07.415239: step: 116/464, loss: 0.0170939639210701 2023-01-24 05:36:08.021338: step: 118/464, loss: 8.722050552023575e-06 2023-01-24 05:36:08.632493: step: 120/464, loss: 0.0005133725935593247 2023-01-24 05:36:09.248155: step: 122/464, loss: 0.00265505351126194 2023-01-24 05:36:09.853854: step: 124/464, loss: 0.006810938473790884 2023-01-24 05:36:10.552423: step: 126/464, loss: 0.011815540492534637 2023-01-24 05:36:11.209982: step: 128/464, loss: 0.00028624487458728254 2023-01-24 05:36:11.757634: step: 130/464, loss: 0.0007571761962026358 2023-01-24 05:36:12.389591: step: 132/464, loss: 5.493920616572723e-05 2023-01-24 05:36:13.077779: step: 134/464, loss: 0.08115773648023605 2023-01-24 05:36:13.649199: step: 136/464, loss: 0.003416879801079631 2023-01-24 05:36:14.260770: step: 138/464, loss: 0.1467238962650299 2023-01-24 05:36:14.935538: step: 140/464, loss: 0.13560757040977478 2023-01-24 05:36:15.534235: step: 142/464, loss: 9.385305929754395e-06 2023-01-24 05:36:16.162548: step: 144/464, loss: 0.006654942408204079 2023-01-24 05:36:16.816569: step: 146/464, loss: 0.06987083703279495 2023-01-24 05:36:17.510470: step: 148/464, loss: 0.015678076073527336 2023-01-24 05:36:18.088076: step: 150/464, loss: 0.007049160078167915 2023-01-24 05:36:18.697711: step: 152/464, loss: 0.002859183819964528 2023-01-24 05:36:19.270987: step: 154/464, loss: 0.0075204456225037575 2023-01-24 05:36:19.879257: step: 156/464, loss: 0.004340674262493849 2023-01-24 05:36:20.443700: step: 158/464, loss: 0.01086458284407854 2023-01-24 05:36:21.007534: step: 160/464, loss: 0.00022155193437356502 2023-01-24 05:36:21.589549: step: 162/464, loss: 0.01219822559505701 2023-01-24 05:36:22.220698: step: 164/464, loss: 0.007934968918561935 2023-01-24 05:36:22.864105: step: 166/464, loss: 0.010010930709540844 2023-01-24 05:36:23.434802: step: 168/464, loss: 0.00616831099614501 2023-01-24 05:36:24.062149: step: 170/464, loss: 0.010001836344599724 2023-01-24 05:36:24.672347: step: 172/464, loss: 0.0013399292947724462 2023-01-24 05:36:25.290806: step: 174/464, loss: 0.03098447248339653 2023-01-24 05:36:25.915132: step: 176/464, loss: 0.03986944630742073 2023-01-24 05:36:26.570895: step: 178/464, loss: 0.1782996505498886 2023-01-24 05:36:27.199673: step: 180/464, loss: 0.0041822222992777824 2023-01-24 05:36:27.830707: step: 182/464, loss: 0.1593576818704605 2023-01-24 05:36:28.449331: step: 184/464, loss: 0.014084400609135628 2023-01-24 05:36:29.017758: step: 186/464, loss: 0.008832301013171673 2023-01-24 05:36:29.643067: step: 188/464, loss: 0.01603720150887966 2023-01-24 05:36:30.288700: step: 190/464, loss: 0.0220944806933403 2023-01-24 05:36:30.951657: step: 192/464, loss: 0.003413004567846656 2023-01-24 05:36:31.592228: step: 194/464, loss: 0.0013775130501016974 2023-01-24 05:36:32.189358: step: 196/464, loss: 0.00013820311869494617 2023-01-24 05:36:32.786321: step: 198/464, loss: 0.4320983290672302 2023-01-24 05:36:33.382653: step: 200/464, loss: 0.0289418064057827 2023-01-24 05:36:34.098082: step: 202/464, loss: 0.0008436589851044118 2023-01-24 05:36:34.659233: step: 204/464, loss: 0.00023445415718015283 2023-01-24 05:36:35.297026: step: 206/464, loss: 0.013457918539643288 2023-01-24 05:36:35.932837: step: 208/464, loss: 0.00038137033698149025 2023-01-24 05:36:36.536061: step: 210/464, loss: 0.00128361361566931 2023-01-24 05:36:37.136413: step: 212/464, loss: 0.000436326110502705 2023-01-24 05:36:37.664200: step: 214/464, loss: 0.00017113862850237638 2023-01-24 05:36:38.266418: step: 216/464, loss: 0.013265226036310196 2023-01-24 05:36:38.919771: step: 218/464, loss: 0.0004172701155766845 2023-01-24 05:36:39.573524: step: 220/464, loss: 0.004786666948348284 2023-01-24 05:36:40.142084: step: 222/464, loss: 0.007202350068837404 2023-01-24 05:36:40.814530: step: 224/464, loss: 0.00547541119158268 2023-01-24 05:36:41.492296: step: 226/464, loss: 0.01085236668586731 2023-01-24 05:36:42.091017: step: 228/464, loss: 0.0012195601593703032 2023-01-24 05:36:42.731900: step: 230/464, loss: 0.0015526131028309464 2023-01-24 05:36:43.360426: step: 232/464, loss: 0.0009176727035082877 2023-01-24 05:36:43.992569: step: 234/464, loss: 0.00871170312166214 2023-01-24 05:36:44.631965: step: 236/464, loss: 0.0004042471118737012 2023-01-24 05:36:45.231052: step: 238/464, loss: 0.0050428323447704315 2023-01-24 05:36:45.824712: step: 240/464, loss: 0.0018469096394255757 2023-01-24 05:36:46.438308: step: 242/464, loss: 0.0012668923009186983 2023-01-24 05:36:47.043736: step: 244/464, loss: 0.009330748580396175 2023-01-24 05:36:47.638200: step: 246/464, loss: 0.020866425707936287 2023-01-24 05:36:48.290969: step: 248/464, loss: 0.010160157456994057 2023-01-24 05:36:48.895570: step: 250/464, loss: 0.0020632955711334944 2023-01-24 05:36:49.487983: step: 252/464, loss: 0.04051090404391289 2023-01-24 05:36:50.074219: step: 254/464, loss: 0.02441595308482647 2023-01-24 05:36:50.748812: step: 256/464, loss: 0.00020131834025960416 2023-01-24 05:36:51.357220: step: 258/464, loss: 0.0008216965361498296 2023-01-24 05:36:51.966531: step: 260/464, loss: 0.0014969498151913285 2023-01-24 05:36:52.626576: step: 262/464, loss: 0.015299106016755104 2023-01-24 05:36:53.298308: step: 264/464, loss: 0.008774522691965103 2023-01-24 05:36:53.937340: step: 266/464, loss: 0.006688565015792847 2023-01-24 05:36:54.510506: step: 268/464, loss: 0.008134461008012295 2023-01-24 05:36:55.140445: step: 270/464, loss: 0.0023492120672017336 2023-01-24 05:36:55.777683: step: 272/464, loss: 0.0002234416751889512 2023-01-24 05:36:56.344379: step: 274/464, loss: 0.11775083839893341 2023-01-24 05:36:56.993908: step: 276/464, loss: 0.005091332830488682 2023-01-24 05:36:57.661407: step: 278/464, loss: 0.16419044137001038 2023-01-24 05:36:58.294485: step: 280/464, loss: 0.3668004274368286 2023-01-24 05:36:58.968082: step: 282/464, loss: 0.007253877818584442 2023-01-24 05:36:59.603977: step: 284/464, loss: 0.04187341406941414 2023-01-24 05:37:00.218380: step: 286/464, loss: 0.11548540741205215 2023-01-24 05:37:00.808297: step: 288/464, loss: 0.005352118983864784 2023-01-24 05:37:01.384672: step: 290/464, loss: 0.006731715518981218 2023-01-24 05:37:02.014761: step: 292/464, loss: 0.028945349156856537 2023-01-24 05:37:02.638376: step: 294/464, loss: 0.0008053245837800205 2023-01-24 05:37:03.229009: step: 296/464, loss: 0.00040874775731936097 2023-01-24 05:37:03.877365: step: 298/464, loss: 0.002919417340308428 2023-01-24 05:37:04.396089: step: 300/464, loss: 0.00022127020929474384 2023-01-24 05:37:04.970114: step: 302/464, loss: 0.0016775665571913123 2023-01-24 05:37:05.555886: step: 304/464, loss: 0.007574569899588823 2023-01-24 05:37:06.237478: step: 306/464, loss: 0.002090072724968195 2023-01-24 05:37:06.837352: step: 308/464, loss: 0.03746446967124939 2023-01-24 05:37:07.444428: step: 310/464, loss: 0.020062992349267006 2023-01-24 05:37:08.047556: step: 312/464, loss: 4.8538749979343265e-05 2023-01-24 05:37:08.713652: step: 314/464, loss: 0.0010255994275212288 2023-01-24 05:37:09.377817: step: 316/464, loss: 0.006342700682580471 2023-01-24 05:37:10.010016: step: 318/464, loss: 0.023778468370437622 2023-01-24 05:37:10.663048: step: 320/464, loss: 0.010570963844656944 2023-01-24 05:37:11.344155: step: 322/464, loss: 0.00012406407040543854 2023-01-24 05:37:11.949242: step: 324/464, loss: 0.0011799990897998214 2023-01-24 05:37:12.598217: step: 326/464, loss: 0.23131757974624634 2023-01-24 05:37:13.216466: step: 328/464, loss: 0.0009250523871742189 2023-01-24 05:37:13.870805: step: 330/464, loss: 0.0004726681509055197 2023-01-24 05:37:14.512022: step: 332/464, loss: 0.0024111729580909014 2023-01-24 05:37:15.128571: step: 334/464, loss: 0.00034986893297173083 2023-01-24 05:37:15.701521: step: 336/464, loss: 0.00017758288595359772 2023-01-24 05:37:16.301164: step: 338/464, loss: 0.00551891652867198 2023-01-24 05:37:16.971690: step: 340/464, loss: 0.006714930757880211 2023-01-24 05:37:17.583666: step: 342/464, loss: 0.010364735499024391 2023-01-24 05:37:18.199724: step: 344/464, loss: 0.03158561512827873 2023-01-24 05:37:18.909228: step: 346/464, loss: 0.0007301444420590997 2023-01-24 05:37:19.613745: step: 348/464, loss: 0.0017152574146166444 2023-01-24 05:37:20.210613: step: 350/464, loss: 0.010084620676934719 2023-01-24 05:37:20.863179: step: 352/464, loss: 0.007742516230791807 2023-01-24 05:37:21.493609: step: 354/464, loss: 0.0004419960896484554 2023-01-24 05:37:22.072402: step: 356/464, loss: 1.7414729882148094e-05 2023-01-24 05:37:22.678938: step: 358/464, loss: 0.004977700766175985 2023-01-24 05:37:23.283730: step: 360/464, loss: 0.009078857488930225 2023-01-24 05:37:23.955377: step: 362/464, loss: 0.012264758348464966 2023-01-24 05:37:24.548121: step: 364/464, loss: 0.0050095547921955585 2023-01-24 05:37:25.143767: step: 366/464, loss: 0.011527528055012226 2023-01-24 05:37:25.741695: step: 368/464, loss: 0.001211122376844287 2023-01-24 05:37:26.351740: step: 370/464, loss: 0.005661012604832649 2023-01-24 05:37:27.037926: step: 372/464, loss: 0.02688767947256565 2023-01-24 05:37:27.634724: step: 374/464, loss: 0.00032966237631626427 2023-01-24 05:37:28.261595: step: 376/464, loss: 0.040558621287345886 2023-01-24 05:37:28.826793: step: 378/464, loss: 0.0012790873879566789 2023-01-24 05:37:29.473417: step: 380/464, loss: 1.3759578905592207e-05 2023-01-24 05:37:30.056484: step: 382/464, loss: 0.030656851828098297 2023-01-24 05:37:30.654479: step: 384/464, loss: 2.6676945708459243e-05 2023-01-24 05:37:31.295203: step: 386/464, loss: 0.016037778928875923 2023-01-24 05:37:31.892508: step: 388/464, loss: 8.989993511931971e-05 2023-01-24 05:37:32.506825: step: 390/464, loss: 0.014434353448450565 2023-01-24 05:37:33.165181: step: 392/464, loss: 8.683041960466653e-05 2023-01-24 05:37:33.736723: step: 394/464, loss: 0.007126760669052601 2023-01-24 05:37:34.370922: step: 396/464, loss: 0.0069049145095050335 2023-01-24 05:37:35.005347: step: 398/464, loss: 0.006755793001502752 2023-01-24 05:37:35.651498: step: 400/464, loss: 0.13241246342658997 2023-01-24 05:37:36.233561: step: 402/464, loss: 0.018767550587654114 2023-01-24 05:37:36.850431: step: 404/464, loss: 0.0015962908510118723 2023-01-24 05:37:37.411978: step: 406/464, loss: 0.014316637068986893 2023-01-24 05:37:38.004071: step: 408/464, loss: 0.009027567692101002 2023-01-24 05:37:38.643290: step: 410/464, loss: 0.00208667921833694 2023-01-24 05:37:39.245633: step: 412/464, loss: 6.704343104502186e-05 2023-01-24 05:37:39.944356: step: 414/464, loss: 0.0064193690195679665 2023-01-24 05:37:40.543522: step: 416/464, loss: 0.0024382879491895437 2023-01-24 05:37:41.292458: step: 418/464, loss: 0.05062605068087578 2023-01-24 05:37:41.888163: step: 420/464, loss: 0.0002532451180741191 2023-01-24 05:37:42.447541: step: 422/464, loss: 0.002652210183441639 2023-01-24 05:37:43.174881: step: 424/464, loss: 0.0003879011783283204 2023-01-24 05:37:43.906396: step: 426/464, loss: 0.0086573651060462 2023-01-24 05:37:44.573017: step: 428/464, loss: 0.002407163381576538 2023-01-24 05:37:45.133204: step: 430/464, loss: 0.0006860074354335666 2023-01-24 05:37:45.730798: step: 432/464, loss: 0.0076747131533920765 2023-01-24 05:37:46.409301: step: 434/464, loss: 0.003394588129594922 2023-01-24 05:37:47.020325: step: 436/464, loss: 0.030170533806085587 2023-01-24 05:37:47.622872: step: 438/464, loss: 0.00032836163882166147 2023-01-24 05:37:48.189251: step: 440/464, loss: 0.00248022866435349 2023-01-24 05:37:48.860658: step: 442/464, loss: 0.0016721858410164714 2023-01-24 05:37:49.494292: step: 444/464, loss: 0.003967962693423033 2023-01-24 05:37:50.182127: step: 446/464, loss: 0.05698193982243538 2023-01-24 05:37:50.732604: step: 448/464, loss: 5.1779232308035716e-05 2023-01-24 05:37:51.416892: step: 450/464, loss: 0.0011327448301017284 2023-01-24 05:37:52.075307: step: 452/464, loss: 0.0063192518427968025 2023-01-24 05:37:52.651909: step: 454/464, loss: 0.011665189638733864 2023-01-24 05:37:53.199285: step: 456/464, loss: 0.0004848650423809886 2023-01-24 05:37:53.852637: step: 458/464, loss: 2.0286659491830505e-05 2023-01-24 05:37:54.445239: step: 460/464, loss: 0.028747156262397766 2023-01-24 05:37:55.046489: step: 462/464, loss: 0.0013898280449211597 2023-01-24 05:37:55.634941: step: 464/464, loss: 0.02706788294017315 2023-01-24 05:37:56.255369: step: 466/464, loss: 0.010710745118558407 2023-01-24 05:37:56.821564: step: 468/464, loss: 0.00042844729614444077 2023-01-24 05:37:57.384237: step: 470/464, loss: 0.013478504493832588 2023-01-24 05:37:58.022123: step: 472/464, loss: 0.07601866871118546 2023-01-24 05:37:58.627856: step: 474/464, loss: 9.289790614275262e-05 2023-01-24 05:37:59.182724: step: 476/464, loss: 0.005359324160963297 2023-01-24 05:37:59.759108: step: 478/464, loss: 0.0006429508794099092 2023-01-24 05:38:00.342953: step: 480/464, loss: 0.010076269507408142 2023-01-24 05:38:00.976693: step: 482/464, loss: 0.03780921921133995 2023-01-24 05:38:01.545743: step: 484/464, loss: 0.006643231958150864 2023-01-24 05:38:02.152635: step: 486/464, loss: 0.0005986772594042122 2023-01-24 05:38:02.722309: step: 488/464, loss: 1.6072845028247684e-05 2023-01-24 05:38:03.349695: step: 490/464, loss: 0.001547905383631587 2023-01-24 05:38:03.975715: step: 492/464, loss: 0.01453624852001667 2023-01-24 05:38:04.561120: step: 494/464, loss: 0.00017860019579529762 2023-01-24 05:38:05.171354: step: 496/464, loss: 0.0028860324528068304 2023-01-24 05:38:05.797778: step: 498/464, loss: 0.022191910073161125 2023-01-24 05:38:06.370566: step: 500/464, loss: 0.0008213834371417761 2023-01-24 05:38:06.937964: step: 502/464, loss: 0.0013431920669972897 2023-01-24 05:38:07.521472: step: 504/464, loss: 0.20636188983917236 2023-01-24 05:38:08.130290: step: 506/464, loss: 0.0002305272064404562 2023-01-24 05:38:08.762917: step: 508/464, loss: 0.008078988641500473 2023-01-24 05:38:09.391377: step: 510/464, loss: 0.0010731914080679417 2023-01-24 05:38:10.004976: step: 512/464, loss: 0.03191307187080383 2023-01-24 05:38:10.622933: step: 514/464, loss: 0.001679626409895718 2023-01-24 05:38:11.169566: step: 516/464, loss: 0.011458156630396843 2023-01-24 05:38:11.739849: step: 518/464, loss: 0.0052367281168699265 2023-01-24 05:38:12.379989: step: 520/464, loss: 0.0033925592433661222 2023-01-24 05:38:13.057206: step: 522/464, loss: 0.03281421214342117 2023-01-24 05:38:13.725703: step: 524/464, loss: 0.005167921539396048 2023-01-24 05:38:14.324476: step: 526/464, loss: 0.00027469813358038664 2023-01-24 05:38:14.999815: step: 528/464, loss: 0.020571140572428703 2023-01-24 05:38:15.537879: step: 530/464, loss: 0.16448889672756195 2023-01-24 05:38:16.148054: step: 532/464, loss: 0.0003321019175928086 2023-01-24 05:38:16.711455: step: 534/464, loss: 0.0021385548170655966 2023-01-24 05:38:17.316960: step: 536/464, loss: 0.00895167887210846 2023-01-24 05:38:17.941923: step: 538/464, loss: 0.003001745790243149 2023-01-24 05:38:18.592025: step: 540/464, loss: 0.14123773574829102 2023-01-24 05:38:19.212886: step: 542/464, loss: 0.04274594783782959 2023-01-24 05:38:19.785517: step: 544/464, loss: 0.00013322725135367364 2023-01-24 05:38:20.343826: step: 546/464, loss: 1.2787042578565888e-05 2023-01-24 05:38:20.990581: step: 548/464, loss: 0.002002798020839691 2023-01-24 05:38:21.655472: step: 550/464, loss: 0.00823196116834879 2023-01-24 05:38:22.177764: step: 552/464, loss: 0.0031336620450019836 2023-01-24 05:38:22.803651: step: 554/464, loss: 0.001206969260238111 2023-01-24 05:38:23.445135: step: 556/464, loss: 0.00016807409701868892 2023-01-24 05:38:24.034470: step: 558/464, loss: 0.0024363440461456776 2023-01-24 05:38:24.590902: step: 560/464, loss: 0.023397495970129967 2023-01-24 05:38:25.205467: step: 562/464, loss: 0.014201251789927483 2023-01-24 05:38:25.827108: step: 564/464, loss: 0.008838113397359848 2023-01-24 05:38:26.424745: step: 566/464, loss: 0.00206809607334435 2023-01-24 05:38:26.987404: step: 568/464, loss: 3.33831922034733e-05 2023-01-24 05:38:27.605346: step: 570/464, loss: 0.00028463671333156526 2023-01-24 05:38:28.165848: step: 572/464, loss: 0.004011175595223904 2023-01-24 05:38:28.783270: step: 574/464, loss: 0.03371892869472504 2023-01-24 05:38:29.356063: step: 576/464, loss: 0.02161114476621151 2023-01-24 05:38:30.045308: step: 578/464, loss: 0.05861933156847954 2023-01-24 05:38:30.634951: step: 580/464, loss: 0.00013423307973425835 2023-01-24 05:38:31.330177: step: 582/464, loss: 0.08435353636741638 2023-01-24 05:38:31.923631: step: 584/464, loss: 0.015690000727772713 2023-01-24 05:38:32.555566: step: 586/464, loss: 0.010642273351550102 2023-01-24 05:38:33.073398: step: 588/464, loss: 0.0003432645171415061 2023-01-24 05:38:33.702207: step: 590/464, loss: 0.003194859717041254 2023-01-24 05:38:34.270235: step: 592/464, loss: 0.02521173097193241 2023-01-24 05:38:34.963358: step: 594/464, loss: 0.0001475270837545395 2023-01-24 05:38:35.564274: step: 596/464, loss: 0.001698898384347558 2023-01-24 05:38:36.156939: step: 598/464, loss: 0.00022349257778842002 2023-01-24 05:38:36.772458: step: 600/464, loss: 0.000832016346976161 2023-01-24 05:38:37.426830: step: 602/464, loss: 0.004213997162878513 2023-01-24 05:38:38.080111: step: 604/464, loss: 0.004446571692824364 2023-01-24 05:38:38.718616: step: 606/464, loss: 0.009489525109529495 2023-01-24 05:38:39.405340: step: 608/464, loss: 0.005802988074719906 2023-01-24 05:38:40.026131: step: 610/464, loss: 0.005094396416097879 2023-01-24 05:38:40.676110: step: 612/464, loss: 0.008050832897424698 2023-01-24 05:38:41.289309: step: 614/464, loss: 0.013616934418678284 2023-01-24 05:38:42.003990: step: 616/464, loss: 0.0034955181181430817 2023-01-24 05:38:42.679546: step: 618/464, loss: 0.0064336154609918594 2023-01-24 05:38:43.351158: step: 620/464, loss: 0.0051127406768500805 2023-01-24 05:38:44.015528: step: 622/464, loss: 0.019992150366306305 2023-01-24 05:38:44.559194: step: 624/464, loss: 0.000994180329144001 2023-01-24 05:38:45.121714: step: 626/464, loss: 0.03615922853350639 2023-01-24 05:38:45.720391: step: 628/464, loss: 0.039472438395023346 2023-01-24 05:38:46.296855: step: 630/464, loss: 0.0010863380739465356 2023-01-24 05:38:46.922363: step: 632/464, loss: 0.012934243306517601 2023-01-24 05:38:47.541462: step: 634/464, loss: 0.0006051979144103825 2023-01-24 05:38:48.138604: step: 636/464, loss: 0.01047486998140812 2023-01-24 05:38:48.748384: step: 638/464, loss: 0.0045492686331272125 2023-01-24 05:38:49.411958: step: 640/464, loss: 0.00013454600411932915 2023-01-24 05:38:50.077495: step: 642/464, loss: 0.00737126637250185 2023-01-24 05:38:50.728833: step: 644/464, loss: 0.035146042704582214 2023-01-24 05:38:51.284586: step: 646/464, loss: 0.019110510125756264 2023-01-24 05:38:51.849038: step: 648/464, loss: 0.006026388145983219 2023-01-24 05:38:52.503311: step: 650/464, loss: 0.007406257558614016 2023-01-24 05:38:53.169526: step: 652/464, loss: 0.07835182547569275 2023-01-24 05:38:53.791175: step: 654/464, loss: 4.0928618545876816e-05 2023-01-24 05:38:54.387210: step: 656/464, loss: 0.0014599744463339448 2023-01-24 05:38:54.998369: step: 658/464, loss: 0.011592582799494267 2023-01-24 05:38:55.574230: step: 660/464, loss: 0.00022379629081115127 2023-01-24 05:38:56.218160: step: 662/464, loss: 0.07811188697814941 2023-01-24 05:38:56.845004: step: 664/464, loss: 0.008354385383427143 2023-01-24 05:38:57.467463: step: 666/464, loss: 0.0011065627913922071 2023-01-24 05:38:58.072923: step: 668/464, loss: 0.026546290144324303 2023-01-24 05:38:58.697723: step: 670/464, loss: 0.000566209782846272 2023-01-24 05:38:59.309201: step: 672/464, loss: 0.001381652895361185 2023-01-24 05:38:59.938979: step: 674/464, loss: 0.030852001160383224 2023-01-24 05:39:00.554127: step: 676/464, loss: 0.012157046236097813 2023-01-24 05:39:01.118803: step: 678/464, loss: 0.002967291511595249 2023-01-24 05:39:01.727658: step: 680/464, loss: 0.01703762449324131 2023-01-24 05:39:02.357143: step: 682/464, loss: 0.005857993848621845 2023-01-24 05:39:02.936847: step: 684/464, loss: 0.011864163912832737 2023-01-24 05:39:03.597722: step: 686/464, loss: 0.018091727048158646 2023-01-24 05:39:04.171116: step: 688/464, loss: 0.00010335772094549611 2023-01-24 05:39:04.761301: step: 690/464, loss: 0.03895269334316254 2023-01-24 05:39:05.308493: step: 692/464, loss: 0.0002217577857663855 2023-01-24 05:39:05.849501: step: 694/464, loss: 4.977637217962183e-05 2023-01-24 05:39:06.481719: step: 696/464, loss: 0.0008721100748516619 2023-01-24 05:39:07.081460: step: 698/464, loss: 0.0021709641441702843 2023-01-24 05:39:07.744920: step: 700/464, loss: 0.038126856088638306 2023-01-24 05:39:08.574384: step: 702/464, loss: 0.003595249028876424 2023-01-24 05:39:09.224330: step: 704/464, loss: 0.003833782859146595 2023-01-24 05:39:09.837126: step: 706/464, loss: 0.0010085896356031299 2023-01-24 05:39:10.423896: step: 708/464, loss: 0.001090651610866189 2023-01-24 05:39:11.055732: step: 710/464, loss: 0.005513668060302734 2023-01-24 05:39:11.629793: step: 712/464, loss: 0.011009197682142258 2023-01-24 05:39:12.208620: step: 714/464, loss: 0.0013777940766885877 2023-01-24 05:39:12.905425: step: 716/464, loss: 0.008790001273155212 2023-01-24 05:39:13.481763: step: 718/464, loss: 0.001057420508004725 2023-01-24 05:39:14.078323: step: 720/464, loss: 0.07679551094770432 2023-01-24 05:39:14.701969: step: 722/464, loss: 0.009837577119469643 2023-01-24 05:39:15.325322: step: 724/464, loss: 0.018549852073192596 2023-01-24 05:39:15.908702: step: 726/464, loss: 0.00033019756665453315 2023-01-24 05:39:16.542796: step: 728/464, loss: 0.006539863999933004 2023-01-24 05:39:17.154642: step: 730/464, loss: 1.3240577573014889e-05 2023-01-24 05:39:17.799139: step: 732/464, loss: 0.024357983842492104 2023-01-24 05:39:18.433749: step: 734/464, loss: 0.0012778789969161153 2023-01-24 05:39:19.078541: step: 736/464, loss: 0.0013503863010555506 2023-01-24 05:39:19.678811: step: 738/464, loss: 0.0009660544455982745 2023-01-24 05:39:20.251621: step: 740/464, loss: 0.008409286849200726 2023-01-24 05:39:20.843561: step: 742/464, loss: 0.0019777226261794567 2023-01-24 05:39:21.463096: step: 744/464, loss: 0.01464751921594143 2023-01-24 05:39:22.109723: step: 746/464, loss: 0.0016079582273960114 2023-01-24 05:39:22.761730: step: 748/464, loss: 0.003580467775464058 2023-01-24 05:39:23.352717: step: 750/464, loss: 0.004533705301582813 2023-01-24 05:39:24.059522: step: 752/464, loss: 0.03962564468383789 2023-01-24 05:39:24.686212: step: 754/464, loss: 0.019578954204916954 2023-01-24 05:39:25.291770: step: 756/464, loss: 7.120955706341192e-05 2023-01-24 05:39:25.942696: step: 758/464, loss: 0.01011847797781229 2023-01-24 05:39:26.519043: step: 760/464, loss: 0.08529553562402725 2023-01-24 05:39:27.133242: step: 762/464, loss: 0.002987058600410819 2023-01-24 05:39:27.741207: step: 764/464, loss: 0.04267577826976776 2023-01-24 05:39:28.365069: step: 766/464, loss: 0.12578915059566498 2023-01-24 05:39:29.001560: step: 768/464, loss: 0.015906495973467827 2023-01-24 05:39:29.653480: step: 770/464, loss: 0.010736898519098759 2023-01-24 05:39:30.281875: step: 772/464, loss: 0.00668590422719717 2023-01-24 05:39:30.904220: step: 774/464, loss: 0.0006817388930357993 2023-01-24 05:39:31.641966: step: 776/464, loss: 0.0001948197023011744 2023-01-24 05:39:32.301740: step: 778/464, loss: 2.019037310674321e-05 2023-01-24 05:39:32.929027: step: 780/464, loss: 0.020074861124157906 2023-01-24 05:39:33.548252: step: 782/464, loss: 0.05165455490350723 2023-01-24 05:39:34.138802: step: 784/464, loss: 0.004251818172633648 2023-01-24 05:39:34.891340: step: 786/464, loss: 5.3936899348627776e-05 2023-01-24 05:39:35.495235: step: 788/464, loss: 0.00522686867043376 2023-01-24 05:39:36.111543: step: 790/464, loss: 5.8365999393572565e-06 2023-01-24 05:39:36.838233: step: 792/464, loss: 0.004626940470188856 2023-01-24 05:39:37.501279: step: 794/464, loss: 0.02032882533967495 2023-01-24 05:39:38.076043: step: 796/464, loss: 0.0004746417107526213 2023-01-24 05:39:38.728887: step: 798/464, loss: 0.0975630059838295 2023-01-24 05:39:39.333073: step: 800/464, loss: 0.008815807290375233 2023-01-24 05:39:39.999128: step: 802/464, loss: 0.01239369623363018 2023-01-24 05:39:40.651981: step: 804/464, loss: 0.0012308191508054733 2023-01-24 05:39:41.359483: step: 806/464, loss: 0.008979709818959236 2023-01-24 05:39:41.950013: step: 808/464, loss: 0.5861056447029114 2023-01-24 05:39:42.594975: step: 810/464, loss: 0.0027114953845739365 2023-01-24 05:39:43.214622: step: 812/464, loss: 0.01977531611919403 2023-01-24 05:39:43.862261: step: 814/464, loss: 0.04423713684082031 2023-01-24 05:39:44.492980: step: 816/464, loss: 0.2734917104244232 2023-01-24 05:39:45.084881: step: 818/464, loss: 0.0002222139446530491 2023-01-24 05:39:45.710233: step: 820/464, loss: 0.0006343009881675243 2023-01-24 05:39:46.335593: step: 822/464, loss: 0.003511299379169941 2023-01-24 05:39:46.940485: step: 824/464, loss: 0.04748666286468506 2023-01-24 05:39:47.560827: step: 826/464, loss: 0.002951279981061816 2023-01-24 05:39:48.242315: step: 828/464, loss: 7.458165782736614e-05 2023-01-24 05:39:48.818976: step: 830/464, loss: 0.003993411548435688 2023-01-24 05:39:49.451075: step: 832/464, loss: 2.084258794784546 2023-01-24 05:39:50.100066: step: 834/464, loss: 0.0006001291912980378 2023-01-24 05:39:50.766271: step: 836/464, loss: 1.1620572877291124e-05 2023-01-24 05:39:51.336656: step: 838/464, loss: 0.0013079376658424735 2023-01-24 05:39:51.912860: step: 840/464, loss: 0.02507406286895275 2023-01-24 05:39:52.508990: step: 842/464, loss: 0.00011102350254077464 2023-01-24 05:39:53.136634: step: 844/464, loss: 0.020627956837415695 2023-01-24 05:39:53.748606: step: 846/464, loss: 0.006506875157356262 2023-01-24 05:39:54.363539: step: 848/464, loss: 0.0049045030027627945 2023-01-24 05:39:55.083697: step: 850/464, loss: 0.02644026279449463 2023-01-24 05:39:55.653816: step: 852/464, loss: 0.010783646255731583 2023-01-24 05:39:56.303525: step: 854/464, loss: 0.07383334636688232 2023-01-24 05:39:56.936467: step: 856/464, loss: 0.003980218432843685 2023-01-24 05:39:57.472446: step: 858/464, loss: 6.878763815620914e-05 2023-01-24 05:39:58.053898: step: 860/464, loss: 0.0005876432987861335 2023-01-24 05:39:58.623960: step: 862/464, loss: 0.0001143648914876394 2023-01-24 05:39:59.260561: step: 864/464, loss: 0.0001652220234973356 2023-01-24 05:39:59.997025: step: 866/464, loss: 0.00033615445136092603 2023-01-24 05:40:00.629194: step: 868/464, loss: 0.1940748542547226 2023-01-24 05:40:01.192048: step: 870/464, loss: 0.0013214467326179147 2023-01-24 05:40:01.799412: step: 872/464, loss: 0.721562385559082 2023-01-24 05:40:02.450822: step: 874/464, loss: 0.003268428845331073 2023-01-24 05:40:03.064710: step: 876/464, loss: 0.40066006779670715 2023-01-24 05:40:03.723946: step: 878/464, loss: 0.0004343487962614745 2023-01-24 05:40:04.320814: step: 880/464, loss: 0.020197506994009018 2023-01-24 05:40:04.954847: step: 882/464, loss: 0.037329163402318954 2023-01-24 05:40:05.585383: step: 884/464, loss: 0.027084853500127792 2023-01-24 05:40:06.244717: step: 886/464, loss: 0.020723722875118256 2023-01-24 05:40:06.834082: step: 888/464, loss: 0.001170316361822188 2023-01-24 05:40:07.405268: step: 890/464, loss: 0.004882392939180136 2023-01-24 05:40:08.092525: step: 892/464, loss: 0.02754181995987892 2023-01-24 05:40:08.673620: step: 894/464, loss: 0.0029659119900316 2023-01-24 05:40:09.282014: step: 896/464, loss: 0.012844149954617023 2023-01-24 05:40:09.936005: step: 898/464, loss: 0.0032909393776208162 2023-01-24 05:40:10.512640: step: 900/464, loss: 0.004768472630530596 2023-01-24 05:40:11.166712: step: 902/464, loss: 0.011873023584485054 2023-01-24 05:40:11.810077: step: 904/464, loss: 0.01567987911403179 2023-01-24 05:40:12.403642: step: 906/464, loss: 0.017684731632471085 2023-01-24 05:40:13.049015: step: 908/464, loss: 0.020612113177776337 2023-01-24 05:40:13.682395: step: 910/464, loss: 0.0011249807430431247 2023-01-24 05:40:14.355614: step: 912/464, loss: 0.010123012587428093 2023-01-24 05:40:14.949086: step: 914/464, loss: 0.0662151426076889 2023-01-24 05:40:15.668879: step: 916/464, loss: 0.14610925316810608 2023-01-24 05:40:16.240876: step: 918/464, loss: 0.06939881294965744 2023-01-24 05:40:16.993484: step: 920/464, loss: 0.012881439179182053 2023-01-24 05:40:17.608097: step: 922/464, loss: 0.03754853457212448 2023-01-24 05:40:18.261385: step: 924/464, loss: 0.02277289517223835 2023-01-24 05:40:18.859589: step: 926/464, loss: 0.6945991516113281 2023-01-24 05:40:19.479345: step: 928/464, loss: 0.010687149129807949 2023-01-24 05:40:19.999784: step: 930/464, loss: 0.049016449600458145 ================================================== Loss: 0.030 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3367761948529412, 'r': 0.3476399430740038, 'f1': 0.34212184873949586}, 'combined': 0.25208978328173376, 'epoch': 39} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.31724831127497327, 'r': 0.3026554726698687, 'f1': 0.3097801306894042}, 'combined': 0.20223987806665766, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32828683035714284, 'r': 0.34884369070208726, 'f1': 0.33825321987120516}, 'combined': 0.24923921464194063, 'epoch': 39} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33377496118721256, 'r': 0.3085959852742858, 'f1': 0.32069200381754165}, 'combined': 0.20936369161145205, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3395890990159902, 'r': 0.34925482289690074, 'f1': 0.344354147178048}, 'combined': 0.2537346347627722, 'epoch': 39} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3265629019474874, 'r': 0.29477624666150604, 'f1': 0.309856498662529}, 'combined': 0.20228973487812774, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2789855072463768, 'r': 0.36666666666666664, 'f1': 0.31687242798353904}, 'combined': 0.21124828532235934, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2847222222222222, 'r': 0.44565217391304346, 'f1': 0.3474576271186441}, 'combined': 0.17372881355932204, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6136363636363636, 'r': 0.23275862068965517, 'f1': 0.33749999999999997}, 'combined': 0.22499999999999998, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31258528609072095, 'r': 0.300129325923918, 'f1': 0.30623069653805385}, 'combined': 0.22564367113330283, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.30202346860614343, 'r': 0.2859229528164777, 'f1': 0.293752759834115}, 'combined': 0.1917764131559507, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3675213675213675, 'r': 0.4095238095238095, 'f1': 0.38738738738738737}, 'combined': 0.2582582582582582, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348209212662338, 'r': 0.3557869372089012, 'f1': 0.3449856778456135}, 'combined': 0.2541999731493994, 'epoch': 34} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.3240350842156237, 'r': 0.3094415923713539, 'f1': 0.31657024212508983}, 'combined': 0.2066728005583488, 'epoch': 34} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33088235294117646, 'r': 0.4891304347826087, 'f1': 0.39473684210526316}, 'combined': 0.19736842105263158, 'epoch': 34} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517857929376825, 'r': 0.3444430154759851, 'f1': 0.3480756839038239}, 'combined': 0.25647681971860703, 'epoch': 14} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.33847845395625953, 'r': 0.29745076256762204, 'f1': 0.3166411343461783}, 'combined': 0.20671908252652055, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.2413793103448276, 'f1': 0.34146341463414637}, 'combined': 0.22764227642276424, 'epoch': 14}