Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1128]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 614103147, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:31:54.669382: step: 2/464, loss: 9.374310493469238 2023-01-24 01:31:55.486275: step: 4/464, loss: 18.727828979492188 2023-01-24 01:31:56.207914: step: 6/464, loss: 12.132365226745605 2023-01-24 01:31:56.964072: step: 8/464, loss: 19.571609497070312 2023-01-24 01:31:57.861578: step: 10/464, loss: 5.619302749633789 2023-01-24 01:31:58.625438: step: 12/464, loss: 8.781749725341797 2023-01-24 01:31:59.311995: step: 14/464, loss: 12.500725746154785 2023-01-24 01:32:00.033134: step: 16/464, loss: 16.95241928100586 2023-01-24 01:32:00.791007: step: 18/464, loss: 10.717041015625 2023-01-24 01:32:01.642069: step: 20/464, loss: 6.4841814041137695 2023-01-24 01:32:02.349589: step: 22/464, loss: 27.81028938293457 2023-01-24 01:32:03.102929: step: 24/464, loss: 11.50721549987793 2023-01-24 01:32:03.937042: step: 26/464, loss: 17.522432327270508 2023-01-24 01:32:04.722880: step: 28/464, loss: 16.023082733154297 2023-01-24 01:32:05.471502: step: 30/464, loss: 6.1217217445373535 2023-01-24 01:32:06.256223: step: 32/464, loss: 15.088384628295898 2023-01-24 01:32:07.008992: step: 34/464, loss: 18.26673698425293 2023-01-24 01:32:07.762565: step: 36/464, loss: 16.102869033813477 2023-01-24 01:32:08.577685: step: 38/464, loss: 19.091053009033203 2023-01-24 01:32:09.285047: step: 40/464, loss: 16.98545265197754 2023-01-24 01:32:10.042191: step: 42/464, loss: 5.916228294372559 2023-01-24 01:32:10.777450: step: 44/464, loss: 19.067594528198242 2023-01-24 01:32:11.531646: step: 46/464, loss: 14.185728073120117 2023-01-24 01:32:12.330298: step: 48/464, loss: 10.070418357849121 2023-01-24 01:32:13.091664: step: 50/464, loss: 9.842507362365723 2023-01-24 01:32:13.882505: step: 52/464, loss: 29.716541290283203 2023-01-24 01:32:14.653681: step: 54/464, loss: 20.947689056396484 2023-01-24 01:32:15.453581: step: 56/464, loss: 25.807109832763672 2023-01-24 01:32:16.283003: step: 58/464, loss: 6.3881378173828125 2023-01-24 01:32:17.055100: step: 60/464, loss: 10.062652587890625 2023-01-24 01:32:17.845783: step: 62/464, loss: 9.788475036621094 2023-01-24 01:32:18.555143: step: 64/464, loss: 15.149709701538086 2023-01-24 01:32:19.228671: step: 66/464, loss: 18.39317512512207 2023-01-24 01:32:19.984364: step: 68/464, loss: 19.415142059326172 2023-01-24 01:32:20.720939: step: 70/464, loss: 9.763272285461426 2023-01-24 01:32:21.553547: step: 72/464, loss: 12.71608829498291 2023-01-24 01:32:22.271476: step: 74/464, loss: 19.501523971557617 2023-01-24 01:32:23.023527: step: 76/464, loss: 8.746461868286133 2023-01-24 01:32:23.778455: step: 78/464, loss: 9.88006591796875 2023-01-24 01:32:24.575334: step: 80/464, loss: 11.285353660583496 2023-01-24 01:32:25.378374: step: 82/464, loss: 5.65945291519165 2023-01-24 01:32:26.138650: step: 84/464, loss: 13.332258224487305 2023-01-24 01:32:26.930647: step: 86/464, loss: 10.611173629760742 2023-01-24 01:32:27.628407: step: 88/464, loss: 10.113225936889648 2023-01-24 01:32:28.413471: step: 90/464, loss: 12.189413070678711 2023-01-24 01:32:29.114737: step: 92/464, loss: 10.418415069580078 2023-01-24 01:32:29.809396: step: 94/464, loss: 7.840319633483887 2023-01-24 01:32:30.594376: step: 96/464, loss: 16.116275787353516 2023-01-24 01:32:31.287211: step: 98/464, loss: 4.881072521209717 2023-01-24 01:32:32.049608: step: 100/464, loss: 12.32833194732666 2023-01-24 01:32:32.920979: step: 102/464, loss: 14.402412414550781 2023-01-24 01:32:33.651293: step: 104/464, loss: 7.665492534637451 2023-01-24 01:32:34.385528: step: 106/464, loss: 4.3873772621154785 2023-01-24 01:32:35.162099: step: 108/464, loss: 5.572765350341797 2023-01-24 01:32:35.952727: step: 110/464, loss: 14.701895713806152 2023-01-24 01:32:36.711843: step: 112/464, loss: 9.786662101745605 2023-01-24 01:32:37.450827: step: 114/464, loss: 5.392247200012207 2023-01-24 01:32:38.271620: step: 116/464, loss: 8.178632736206055 2023-01-24 01:32:39.076410: step: 118/464, loss: 9.314098358154297 2023-01-24 01:32:39.844083: step: 120/464, loss: 20.993085861206055 2023-01-24 01:32:40.668143: step: 122/464, loss: 4.979355335235596 2023-01-24 01:32:41.430902: step: 124/464, loss: 20.287872314453125 2023-01-24 01:32:42.208148: step: 126/464, loss: 13.280475616455078 2023-01-24 01:32:42.970798: step: 128/464, loss: 5.503267765045166 2023-01-24 01:32:43.830060: step: 130/464, loss: 14.450859069824219 2023-01-24 01:32:44.643176: step: 132/464, loss: 16.351163864135742 2023-01-24 01:32:45.401472: step: 134/464, loss: 9.963763236999512 2023-01-24 01:32:46.121206: step: 136/464, loss: 13.730572700500488 2023-01-24 01:32:46.760464: step: 138/464, loss: 11.675607681274414 2023-01-24 01:32:47.531436: step: 140/464, loss: 11.64958667755127 2023-01-24 01:32:48.275962: step: 142/464, loss: 7.854397296905518 2023-01-24 01:32:49.074228: step: 144/464, loss: 12.849346160888672 2023-01-24 01:32:49.908190: step: 146/464, loss: 21.994861602783203 2023-01-24 01:32:50.605136: step: 148/464, loss: 6.948868274688721 2023-01-24 01:32:51.415526: step: 150/464, loss: 8.413993835449219 2023-01-24 01:32:52.157232: step: 152/464, loss: 2.9484715461730957 2023-01-24 01:32:52.936946: step: 154/464, loss: 13.926671028137207 2023-01-24 01:32:53.755879: step: 156/464, loss: 2.777003049850464 2023-01-24 01:32:54.485910: step: 158/464, loss: 10.172282218933105 2023-01-24 01:32:55.318051: step: 160/464, loss: 5.547769546508789 2023-01-24 01:32:56.082237: step: 162/464, loss: 4.884523868560791 2023-01-24 01:32:56.835305: step: 164/464, loss: 6.705595016479492 2023-01-24 01:32:57.668053: step: 166/464, loss: 8.995575904846191 2023-01-24 01:32:58.435220: step: 168/464, loss: 11.319543838500977 2023-01-24 01:32:59.255626: step: 170/464, loss: 15.827716827392578 2023-01-24 01:33:00.047898: step: 172/464, loss: 10.849842071533203 2023-01-24 01:33:00.787422: step: 174/464, loss: 3.5209128856658936 2023-01-24 01:33:01.539758: step: 176/464, loss: 6.0415544509887695 2023-01-24 01:33:02.285775: step: 178/464, loss: 4.605469703674316 2023-01-24 01:33:03.056931: step: 180/464, loss: 6.940061569213867 2023-01-24 01:33:03.918756: step: 182/464, loss: 11.00831413269043 2023-01-24 01:33:04.605977: step: 184/464, loss: 4.500629901885986 2023-01-24 01:33:05.303809: step: 186/464, loss: 11.850529670715332 2023-01-24 01:33:06.087264: step: 188/464, loss: 9.06021785736084 2023-01-24 01:33:06.738565: step: 190/464, loss: 6.195465087890625 2023-01-24 01:33:07.538832: step: 192/464, loss: 3.9920973777770996 2023-01-24 01:33:08.271973: step: 194/464, loss: 10.8495512008667 2023-01-24 01:33:09.041540: step: 196/464, loss: 5.404269695281982 2023-01-24 01:33:09.758195: step: 198/464, loss: 12.221254348754883 2023-01-24 01:33:10.572722: step: 200/464, loss: 7.055595397949219 2023-01-24 01:33:11.293224: step: 202/464, loss: 15.114753723144531 2023-01-24 01:33:12.081768: step: 204/464, loss: 4.877196311950684 2023-01-24 01:33:12.803541: step: 206/464, loss: 6.17531681060791 2023-01-24 01:33:13.566213: step: 208/464, loss: 6.529362201690674 2023-01-24 01:33:14.240034: step: 210/464, loss: 8.570602416992188 2023-01-24 01:33:14.955113: step: 212/464, loss: 17.8670711517334 2023-01-24 01:33:15.664505: step: 214/464, loss: 1.9516692161560059 2023-01-24 01:33:16.387473: step: 216/464, loss: 6.888588905334473 2023-01-24 01:33:17.099107: step: 218/464, loss: 19.382837295532227 2023-01-24 01:33:17.972865: step: 220/464, loss: 4.47511100769043 2023-01-24 01:33:18.729722: step: 222/464, loss: 3.614959239959717 2023-01-24 01:33:19.539013: step: 224/464, loss: 6.809412002563477 2023-01-24 01:33:20.317793: step: 226/464, loss: 5.449559688568115 2023-01-24 01:33:21.034355: step: 228/464, loss: 11.705848693847656 2023-01-24 01:33:21.811667: step: 230/464, loss: 16.532360076904297 2023-01-24 01:33:22.582155: step: 232/464, loss: 3.4342377185821533 2023-01-24 01:33:23.345421: step: 234/464, loss: 4.141678333282471 2023-01-24 01:33:24.066983: step: 236/464, loss: 5.295482158660889 2023-01-24 01:33:24.722976: step: 238/464, loss: 9.353677749633789 2023-01-24 01:33:25.504898: step: 240/464, loss: 3.7013063430786133 2023-01-24 01:33:26.258134: step: 242/464, loss: 3.4981331825256348 2023-01-24 01:33:27.038843: step: 244/464, loss: 6.408645153045654 2023-01-24 01:33:27.821943: step: 246/464, loss: 9.78841781616211 2023-01-24 01:33:28.531670: step: 248/464, loss: 9.539456367492676 2023-01-24 01:33:29.397713: step: 250/464, loss: 9.358221054077148 2023-01-24 01:33:30.159578: step: 252/464, loss: 9.760936737060547 2023-01-24 01:33:30.962182: step: 254/464, loss: 5.140605449676514 2023-01-24 01:33:31.676890: step: 256/464, loss: 13.301159858703613 2023-01-24 01:33:32.412470: step: 258/464, loss: 12.485919952392578 2023-01-24 01:33:33.141547: step: 260/464, loss: 15.078859329223633 2023-01-24 01:33:33.883846: step: 262/464, loss: 6.826125144958496 2023-01-24 01:33:34.635883: step: 264/464, loss: 3.3939123153686523 2023-01-24 01:33:35.391153: step: 266/464, loss: 4.8039350509643555 2023-01-24 01:33:36.114110: step: 268/464, loss: 7.7959089279174805 2023-01-24 01:33:36.831086: step: 270/464, loss: 4.709813117980957 2023-01-24 01:33:37.611008: step: 272/464, loss: 2.368288516998291 2023-01-24 01:33:38.393578: step: 274/464, loss: 9.393926620483398 2023-01-24 01:33:39.138066: step: 276/464, loss: 7.802385330200195 2023-01-24 01:33:39.789258: step: 278/464, loss: 7.686842918395996 2023-01-24 01:33:40.559696: step: 280/464, loss: 9.50601577758789 2023-01-24 01:33:41.316176: step: 282/464, loss: 5.707566738128662 2023-01-24 01:33:42.058294: step: 284/464, loss: 5.4928107261657715 2023-01-24 01:33:42.821134: step: 286/464, loss: 10.356019973754883 2023-01-24 01:33:43.605369: step: 288/464, loss: 2.413654327392578 2023-01-24 01:33:44.503024: step: 290/464, loss: 8.67782211303711 2023-01-24 01:33:45.203141: step: 292/464, loss: 5.6798858642578125 2023-01-24 01:33:45.974393: step: 294/464, loss: 14.590892791748047 2023-01-24 01:33:46.700236: step: 296/464, loss: 5.307000160217285 2023-01-24 01:33:47.545519: step: 298/464, loss: 16.15806007385254 2023-01-24 01:33:48.303794: step: 300/464, loss: 5.119200706481934 2023-01-24 01:33:48.998544: step: 302/464, loss: 2.9111478328704834 2023-01-24 01:33:49.808572: step: 304/464, loss: 6.235689163208008 2023-01-24 01:33:50.679149: step: 306/464, loss: 5.8132147789001465 2023-01-24 01:33:51.456077: step: 308/464, loss: 2.717848300933838 2023-01-24 01:33:52.242630: step: 310/464, loss: 6.308784484863281 2023-01-24 01:33:53.017410: step: 312/464, loss: 4.407107353210449 2023-01-24 01:33:53.754771: step: 314/464, loss: 3.238675117492676 2023-01-24 01:33:54.537478: step: 316/464, loss: 11.632369995117188 2023-01-24 01:33:55.274107: step: 318/464, loss: 6.453070163726807 2023-01-24 01:33:56.023412: step: 320/464, loss: 2.863231897354126 2023-01-24 01:33:56.826078: step: 322/464, loss: 2.2388625144958496 2023-01-24 01:33:57.648053: step: 324/464, loss: 19.541534423828125 2023-01-24 01:33:58.479991: step: 326/464, loss: 9.022233009338379 2023-01-24 01:33:59.336768: step: 328/464, loss: 4.538577556610107 2023-01-24 01:34:00.143012: step: 330/464, loss: 1.4825515747070312 2023-01-24 01:34:00.845891: step: 332/464, loss: 3.8235790729522705 2023-01-24 01:34:01.676960: step: 334/464, loss: 6.108069896697998 2023-01-24 01:34:02.453716: step: 336/464, loss: 7.865338325500488 2023-01-24 01:34:03.168673: step: 338/464, loss: 1.6649757623672485 2023-01-24 01:34:03.853705: step: 340/464, loss: 15.700176239013672 2023-01-24 01:34:04.579914: step: 342/464, loss: 1.5405945777893066 2023-01-24 01:34:05.350200: step: 344/464, loss: 6.349061965942383 2023-01-24 01:34:06.152407: step: 346/464, loss: 3.196582317352295 2023-01-24 01:34:06.825978: step: 348/464, loss: 8.026315689086914 2023-01-24 01:34:07.571450: step: 350/464, loss: 1.8204209804534912 2023-01-24 01:34:08.288871: step: 352/464, loss: 4.419278144836426 2023-01-24 01:34:09.107193: step: 354/464, loss: 6.484158039093018 2023-01-24 01:34:09.823731: step: 356/464, loss: 3.7357139587402344 2023-01-24 01:34:10.638225: step: 358/464, loss: 8.426981925964355 2023-01-24 01:34:11.352320: step: 360/464, loss: 5.3540544509887695 2023-01-24 01:34:12.133877: step: 362/464, loss: 10.684189796447754 2023-01-24 01:34:12.989259: step: 364/464, loss: 4.385252952575684 2023-01-24 01:34:13.732714: step: 366/464, loss: 1.5382404327392578 2023-01-24 01:34:14.453673: step: 368/464, loss: 2.7515692710876465 2023-01-24 01:34:15.254284: step: 370/464, loss: 2.2363736629486084 2023-01-24 01:34:15.996689: step: 372/464, loss: 3.405794620513916 2023-01-24 01:34:16.899181: step: 374/464, loss: 3.746248245239258 2023-01-24 01:34:17.579501: step: 376/464, loss: 6.643892288208008 2023-01-24 01:34:18.384231: step: 378/464, loss: 9.15991497039795 2023-01-24 01:34:19.085788: step: 380/464, loss: 1.7974623441696167 2023-01-24 01:34:19.860720: step: 382/464, loss: 2.5357003211975098 2023-01-24 01:34:20.673280: step: 384/464, loss: 6.739011764526367 2023-01-24 01:34:21.363854: step: 386/464, loss: 7.696263790130615 2023-01-24 01:34:22.113017: step: 388/464, loss: 3.198539972305298 2023-01-24 01:34:23.002858: step: 390/464, loss: 4.967364311218262 2023-01-24 01:34:23.830158: step: 392/464, loss: 8.086828231811523 2023-01-24 01:34:24.569178: step: 394/464, loss: 2.0360107421875 2023-01-24 01:34:25.306308: step: 396/464, loss: 1.9013662338256836 2023-01-24 01:34:26.096139: step: 398/464, loss: 5.694022178649902 2023-01-24 01:34:26.862689: step: 400/464, loss: 5.332306861877441 2023-01-24 01:34:27.634711: step: 402/464, loss: 7.137255668640137 2023-01-24 01:34:28.361951: step: 404/464, loss: 1.5250176191329956 2023-01-24 01:34:29.046474: step: 406/464, loss: 6.838038921356201 2023-01-24 01:34:29.799927: step: 408/464, loss: 6.499872207641602 2023-01-24 01:34:30.594645: step: 410/464, loss: 2.3436036109924316 2023-01-24 01:34:31.372224: step: 412/464, loss: 4.896717071533203 2023-01-24 01:34:32.148594: step: 414/464, loss: 2.5820794105529785 2023-01-24 01:34:32.977498: step: 416/464, loss: 1.179425597190857 2023-01-24 01:34:33.750665: step: 418/464, loss: 4.6026611328125 2023-01-24 01:34:34.501262: step: 420/464, loss: 2.9256577491760254 2023-01-24 01:34:35.250374: step: 422/464, loss: 3.4536118507385254 2023-01-24 01:34:36.007655: step: 424/464, loss: 4.34628963470459 2023-01-24 01:34:36.863270: step: 426/464, loss: 1.0080816745758057 2023-01-24 01:34:37.637226: step: 428/464, loss: 6.766529083251953 2023-01-24 01:34:38.340537: step: 430/464, loss: 1.8124334812164307 2023-01-24 01:34:39.170896: step: 432/464, loss: 3.1404690742492676 2023-01-24 01:34:39.931827: step: 434/464, loss: 3.186899185180664 2023-01-24 01:34:40.733584: step: 436/464, loss: 1.518646478652954 2023-01-24 01:34:41.445971: step: 438/464, loss: 2.915464162826538 2023-01-24 01:34:42.224943: step: 440/464, loss: 3.0261709690093994 2023-01-24 01:34:42.999383: step: 442/464, loss: 8.494826316833496 2023-01-24 01:34:43.765745: step: 444/464, loss: 1.1129310131072998 2023-01-24 01:34:44.611864: step: 446/464, loss: 1.0951164960861206 2023-01-24 01:34:45.357794: step: 448/464, loss: 3.7174148559570312 2023-01-24 01:34:46.251846: step: 450/464, loss: 1.5301337242126465 2023-01-24 01:34:47.044856: step: 452/464, loss: 1.1110576391220093 2023-01-24 01:34:47.834812: step: 454/464, loss: 4.443788528442383 2023-01-24 01:34:48.601748: step: 456/464, loss: 1.8520171642303467 2023-01-24 01:34:49.423773: step: 458/464, loss: 2.3815908432006836 2023-01-24 01:34:50.218951: step: 460/464, loss: 1.3823764324188232 2023-01-24 01:34:50.892523: step: 462/464, loss: 1.3576384782791138 2023-01-24 01:34:51.646649: step: 464/464, loss: 4.694050312042236 2023-01-24 01:34:52.492421: step: 466/464, loss: 1.1287555694580078 2023-01-24 01:34:53.222826: step: 468/464, loss: 2.7844748497009277 2023-01-24 01:34:54.009930: step: 470/464, loss: 1.916163682937622 2023-01-24 01:34:54.765499: step: 472/464, loss: 3.91558837890625 2023-01-24 01:34:55.509807: step: 474/464, loss: 10.167104721069336 2023-01-24 01:34:56.299358: step: 476/464, loss: 4.300200939178467 2023-01-24 01:34:57.006131: step: 478/464, loss: 1.098439335823059 2023-01-24 01:34:57.677470: step: 480/464, loss: 4.241396903991699 2023-01-24 01:34:58.429184: step: 482/464, loss: 2.5566744804382324 2023-01-24 01:34:59.176017: step: 484/464, loss: 0.6062437891960144 2023-01-24 01:34:59.950402: step: 486/464, loss: 3.4063737392425537 2023-01-24 01:35:00.749216: step: 488/464, loss: 1.0972111225128174 2023-01-24 01:35:01.565271: step: 490/464, loss: 2.4079630374908447 2023-01-24 01:35:02.292465: step: 492/464, loss: 1.7030748128890991 2023-01-24 01:35:03.079110: step: 494/464, loss: 0.6144923567771912 2023-01-24 01:35:03.828788: step: 496/464, loss: 4.25278902053833 2023-01-24 01:35:04.673016: step: 498/464, loss: 11.448986053466797 2023-01-24 01:35:05.432665: step: 500/464, loss: 1.90968656539917 2023-01-24 01:35:06.188240: step: 502/464, loss: 3.7317092418670654 2023-01-24 01:35:07.035594: step: 504/464, loss: 5.710849761962891 2023-01-24 01:35:07.777323: step: 506/464, loss: 3.7273013591766357 2023-01-24 01:35:08.545458: step: 508/464, loss: 1.74265718460083 2023-01-24 01:35:09.296043: step: 510/464, loss: 4.305446624755859 2023-01-24 01:35:10.044300: step: 512/464, loss: 3.4846932888031006 2023-01-24 01:35:10.757295: step: 514/464, loss: 0.8061954379081726 2023-01-24 01:35:11.444631: step: 516/464, loss: 1.9426944255828857 2023-01-24 01:35:12.234532: step: 518/464, loss: 3.0346713066101074 2023-01-24 01:35:13.009069: step: 520/464, loss: 5.749312877655029 2023-01-24 01:35:13.817018: step: 522/464, loss: 8.312419891357422 2023-01-24 01:35:14.512945: step: 524/464, loss: 0.8057400584220886 2023-01-24 01:35:15.264584: step: 526/464, loss: 0.5294106602668762 2023-01-24 01:35:16.090965: step: 528/464, loss: 2.6814074516296387 2023-01-24 01:35:16.729870: step: 530/464, loss: 2.8819162845611572 2023-01-24 01:35:17.533018: step: 532/464, loss: 5.775310039520264 2023-01-24 01:35:18.335277: step: 534/464, loss: 1.208691954612732 2023-01-24 01:35:19.123422: step: 536/464, loss: 0.7920262217521667 2023-01-24 01:35:19.881755: step: 538/464, loss: 0.8998887538909912 2023-01-24 01:35:20.611627: step: 540/464, loss: 5.381350517272949 2023-01-24 01:35:21.364619: step: 542/464, loss: 0.716932475566864 2023-01-24 01:35:22.178081: step: 544/464, loss: 7.021783828735352 2023-01-24 01:35:22.877297: step: 546/464, loss: 1.2206028699874878 2023-01-24 01:35:23.660854: step: 548/464, loss: 1.2179982662200928 2023-01-24 01:35:24.375050: step: 550/464, loss: 0.6256527900695801 2023-01-24 01:35:25.179324: step: 552/464, loss: 2.34977388381958 2023-01-24 01:35:25.929758: step: 554/464, loss: 2.508776903152466 2023-01-24 01:35:26.670924: step: 556/464, loss: 0.8136802911758423 2023-01-24 01:35:27.445696: step: 558/464, loss: 5.513727188110352 2023-01-24 01:35:28.138355: step: 560/464, loss: 1.3077001571655273 2023-01-24 01:35:28.871019: step: 562/464, loss: 2.8942441940307617 2023-01-24 01:35:29.617409: step: 564/464, loss: 1.109363317489624 2023-01-24 01:35:30.452255: step: 566/464, loss: 1.4600365161895752 2023-01-24 01:35:31.321876: step: 568/464, loss: 2.8010220527648926 2023-01-24 01:35:32.072807: step: 570/464, loss: 3.948335647583008 2023-01-24 01:35:32.813279: step: 572/464, loss: 2.7902843952178955 2023-01-24 01:35:33.518531: step: 574/464, loss: 0.5588739514350891 2023-01-24 01:35:34.270712: step: 576/464, loss: 2.748721122741699 2023-01-24 01:35:35.225180: step: 578/464, loss: 0.30092504620552063 2023-01-24 01:35:36.063156: step: 580/464, loss: 0.7211551666259766 2023-01-24 01:35:36.856965: step: 582/464, loss: 3.9377760887145996 2023-01-24 01:35:37.575292: step: 584/464, loss: 2.0166115760803223 2023-01-24 01:35:38.314638: step: 586/464, loss: 6.889471054077148 2023-01-24 01:35:39.000426: step: 588/464, loss: 2.986854076385498 2023-01-24 01:35:39.793428: step: 590/464, loss: 9.561071395874023 2023-01-24 01:35:40.584256: step: 592/464, loss: 1.638632893562317 2023-01-24 01:35:41.389526: step: 594/464, loss: 2.9973866939544678 2023-01-24 01:35:42.205322: step: 596/464, loss: 1.0983021259307861 2023-01-24 01:35:42.991398: step: 598/464, loss: 1.7044750452041626 2023-01-24 01:35:43.795880: step: 600/464, loss: 2.925450325012207 2023-01-24 01:35:44.540446: step: 602/464, loss: 8.897144317626953 2023-01-24 01:35:45.296349: step: 604/464, loss: 1.5220918655395508 2023-01-24 01:35:46.053726: step: 606/464, loss: 2.9862239360809326 2023-01-24 01:35:46.804106: step: 608/464, loss: 5.759862899780273 2023-01-24 01:35:47.670915: step: 610/464, loss: 1.089566946029663 2023-01-24 01:35:48.464036: step: 612/464, loss: 3.5152370929718018 2023-01-24 01:35:49.208652: step: 614/464, loss: 1.3656071424484253 2023-01-24 01:35:50.020546: step: 616/464, loss: 6.461595058441162 2023-01-24 01:35:50.802970: step: 618/464, loss: 0.7708498239517212 2023-01-24 01:35:51.544382: step: 620/464, loss: 2.1966259479522705 2023-01-24 01:35:52.295571: step: 622/464, loss: 1.6778101921081543 2023-01-24 01:35:53.039800: step: 624/464, loss: 1.459666132926941 2023-01-24 01:35:53.720190: step: 626/464, loss: 0.9618430733680725 2023-01-24 01:35:54.480877: step: 628/464, loss: 2.3581111431121826 2023-01-24 01:35:55.228751: step: 630/464, loss: 2.8875811100006104 2023-01-24 01:35:56.042580: step: 632/464, loss: 2.020002603530884 2023-01-24 01:35:56.832629: step: 634/464, loss: 1.6766326427459717 2023-01-24 01:35:57.524311: step: 636/464, loss: 2.358569383621216 2023-01-24 01:35:58.247915: step: 638/464, loss: 1.4719403982162476 2023-01-24 01:35:58.923968: step: 640/464, loss: 3.1487109661102295 2023-01-24 01:35:59.635010: step: 642/464, loss: 1.3823643922805786 2023-01-24 01:36:00.535902: step: 644/464, loss: 6.858518600463867 2023-01-24 01:36:01.282670: step: 646/464, loss: 2.9186878204345703 2023-01-24 01:36:02.050476: step: 648/464, loss: 0.6788272857666016 2023-01-24 01:36:02.779460: step: 650/464, loss: 0.8581159114837646 2023-01-24 01:36:03.705782: step: 652/464, loss: 3.3742990493774414 2023-01-24 01:36:04.482358: step: 654/464, loss: 0.6534217596054077 2023-01-24 01:36:05.244095: step: 656/464, loss: 0.4227631688117981 2023-01-24 01:36:05.965057: step: 658/464, loss: 0.6241885423660278 2023-01-24 01:36:06.764222: step: 660/464, loss: 2.2499334812164307 2023-01-24 01:36:07.433951: step: 662/464, loss: 2.9914143085479736 2023-01-24 01:36:08.192079: step: 664/464, loss: 5.7285356521606445 2023-01-24 01:36:08.993914: step: 666/464, loss: 4.63377046585083 2023-01-24 01:36:09.712235: step: 668/464, loss: 0.9508398175239563 2023-01-24 01:36:10.554521: step: 670/464, loss: 1.86622154712677 2023-01-24 01:36:11.348718: step: 672/464, loss: 18.502479553222656 2023-01-24 01:36:12.135341: step: 674/464, loss: 2.061119556427002 2023-01-24 01:36:12.807634: step: 676/464, loss: 1.0714634656906128 2023-01-24 01:36:13.525675: step: 678/464, loss: 2.290773868560791 2023-01-24 01:36:14.269095: step: 680/464, loss: 1.4382787942886353 2023-01-24 01:36:14.965572: step: 682/464, loss: 1.2964938879013062 2023-01-24 01:36:15.768160: step: 684/464, loss: 1.352104902267456 2023-01-24 01:36:16.677577: step: 686/464, loss: 2.0508487224578857 2023-01-24 01:36:17.508226: step: 688/464, loss: 4.085446357727051 2023-01-24 01:36:18.405176: step: 690/464, loss: 2.051877737045288 2023-01-24 01:36:19.153589: step: 692/464, loss: 1.0510663986206055 2023-01-24 01:36:19.933883: step: 694/464, loss: 1.0191794633865356 2023-01-24 01:36:20.706356: step: 696/464, loss: 2.4271769523620605 2023-01-24 01:36:21.455429: step: 698/464, loss: 5.522613048553467 2023-01-24 01:36:22.152263: step: 700/464, loss: 2.2515859603881836 2023-01-24 01:36:22.972882: step: 702/464, loss: 5.858063220977783 2023-01-24 01:36:23.731661: step: 704/464, loss: 1.3351037502288818 2023-01-24 01:36:24.495058: step: 706/464, loss: 3.453185558319092 2023-01-24 01:36:25.284949: step: 708/464, loss: 0.923098087310791 2023-01-24 01:36:26.064325: step: 710/464, loss: 1.162076473236084 2023-01-24 01:36:26.884022: step: 712/464, loss: 6.112480640411377 2023-01-24 01:36:27.622205: step: 714/464, loss: 0.42647066712379456 2023-01-24 01:36:28.417001: step: 716/464, loss: 1.2001023292541504 2023-01-24 01:36:29.149607: step: 718/464, loss: 2.029315233230591 2023-01-24 01:36:29.961083: step: 720/464, loss: 1.3218841552734375 2023-01-24 01:36:30.719494: step: 722/464, loss: 4.933912754058838 2023-01-24 01:36:31.480810: step: 724/464, loss: 3.8572864532470703 2023-01-24 01:36:32.185480: step: 726/464, loss: 3.1059410572052 2023-01-24 01:36:32.954689: step: 728/464, loss: 2.590851306915283 2023-01-24 01:36:33.726303: step: 730/464, loss: 1.0027847290039062 2023-01-24 01:36:34.439194: step: 732/464, loss: 1.3879283666610718 2023-01-24 01:36:35.195904: step: 734/464, loss: 0.9771751761436462 2023-01-24 01:36:36.018280: step: 736/464, loss: 1.621541142463684 2023-01-24 01:36:36.781184: step: 738/464, loss: 1.1764367818832397 2023-01-24 01:36:37.527913: step: 740/464, loss: 1.2249219417572021 2023-01-24 01:36:38.285087: step: 742/464, loss: 1.8417370319366455 2023-01-24 01:36:38.965972: step: 744/464, loss: 6.823138236999512 2023-01-24 01:36:39.741540: step: 746/464, loss: 1.8373222351074219 2023-01-24 01:36:40.400061: step: 748/464, loss: 5.270013809204102 2023-01-24 01:36:41.174704: step: 750/464, loss: 4.491097450256348 2023-01-24 01:36:41.977824: step: 752/464, loss: 0.7358742356300354 2023-01-24 01:36:42.810190: step: 754/464, loss: 3.433688163757324 2023-01-24 01:36:43.571299: step: 756/464, loss: 2.0288889408111572 2023-01-24 01:36:44.334581: step: 758/464, loss: 3.583008050918579 2023-01-24 01:36:45.094996: step: 760/464, loss: 1.6618074178695679 2023-01-24 01:36:45.843118: step: 762/464, loss: 14.27196216583252 2023-01-24 01:36:46.649282: step: 764/464, loss: 2.677417039871216 2023-01-24 01:36:47.385181: step: 766/464, loss: 4.172243118286133 2023-01-24 01:36:48.199333: step: 768/464, loss: 1.7907335758209229 2023-01-24 01:36:48.887172: step: 770/464, loss: 5.166937828063965 2023-01-24 01:36:49.628055: step: 772/464, loss: 4.351705551147461 2023-01-24 01:36:50.380841: step: 774/464, loss: 3.2508463859558105 2023-01-24 01:36:51.130942: step: 776/464, loss: 1.9359157085418701 2023-01-24 01:36:51.873831: step: 778/464, loss: 3.473889112472534 2023-01-24 01:36:52.614229: step: 780/464, loss: 2.1327149868011475 2023-01-24 01:36:53.486868: step: 782/464, loss: 2.69435715675354 2023-01-24 01:36:54.220486: step: 784/464, loss: 1.1987321376800537 2023-01-24 01:36:55.029711: step: 786/464, loss: 1.4761255979537964 2023-01-24 01:36:55.820674: step: 788/464, loss: 7.58142614364624 2023-01-24 01:36:56.524428: step: 790/464, loss: 1.345672845840454 2023-01-24 01:36:57.216991: step: 792/464, loss: 1.5891671180725098 2023-01-24 01:36:57.932641: step: 794/464, loss: 1.8026704788208008 2023-01-24 01:36:58.710945: step: 796/464, loss: 1.2802784442901611 2023-01-24 01:36:59.458262: step: 798/464, loss: 0.4238661825656891 2023-01-24 01:37:00.284431: step: 800/464, loss: 1.4901365041732788 2023-01-24 01:37:01.061311: step: 802/464, loss: 1.8327875137329102 2023-01-24 01:37:01.909440: step: 804/464, loss: 4.032846927642822 2023-01-24 01:37:02.660926: step: 806/464, loss: 1.8030288219451904 2023-01-24 01:37:03.368700: step: 808/464, loss: 6.166379928588867 2023-01-24 01:37:04.159356: step: 810/464, loss: 2.339250326156616 2023-01-24 01:37:04.904507: step: 812/464, loss: 0.9494217038154602 2023-01-24 01:37:05.681077: step: 814/464, loss: 1.4521145820617676 2023-01-24 01:37:06.404303: step: 816/464, loss: 2.0722744464874268 2023-01-24 01:37:07.118442: step: 818/464, loss: 9.778717994689941 2023-01-24 01:37:07.946586: step: 820/464, loss: 5.905690670013428 2023-01-24 01:37:08.733804: step: 822/464, loss: 3.5487167835235596 2023-01-24 01:37:09.579880: step: 824/464, loss: 1.1876356601715088 2023-01-24 01:37:10.311556: step: 826/464, loss: 4.4284491539001465 2023-01-24 01:37:11.038297: step: 828/464, loss: 4.3333821296691895 2023-01-24 01:37:11.780157: step: 830/464, loss: 8.989230155944824 2023-01-24 01:37:12.504356: step: 832/464, loss: 3.92063045501709 2023-01-24 01:37:13.365014: step: 834/464, loss: 2.353074312210083 2023-01-24 01:37:14.168617: step: 836/464, loss: 1.9473124742507935 2023-01-24 01:37:14.965685: step: 838/464, loss: 1.1626951694488525 2023-01-24 01:37:15.697795: step: 840/464, loss: 2.613588809967041 2023-01-24 01:37:16.477624: step: 842/464, loss: 1.9653387069702148 2023-01-24 01:37:17.199443: step: 844/464, loss: 0.7982001304626465 2023-01-24 01:37:17.932338: step: 846/464, loss: 2.37483286857605 2023-01-24 01:37:18.723076: step: 848/464, loss: 0.9634339809417725 2023-01-24 01:37:19.423906: step: 850/464, loss: 0.6170372366905212 2023-01-24 01:37:20.131437: step: 852/464, loss: 0.7587774991989136 2023-01-24 01:37:20.828308: step: 854/464, loss: 3.129608392715454 2023-01-24 01:37:21.633052: step: 856/464, loss: 0.570770263671875 2023-01-24 01:37:22.408355: step: 858/464, loss: 2.8230128288269043 2023-01-24 01:37:23.087020: step: 860/464, loss: 0.7748469114303589 2023-01-24 01:37:23.826696: step: 862/464, loss: 1.5421247482299805 2023-01-24 01:37:24.612552: step: 864/464, loss: 1.4275951385498047 2023-01-24 01:37:25.391105: step: 866/464, loss: 1.0909823179244995 2023-01-24 01:37:26.126124: step: 868/464, loss: 7.108874797821045 2023-01-24 01:37:26.854108: step: 870/464, loss: 1.9868006706237793 2023-01-24 01:37:27.609614: step: 872/464, loss: 11.043020248413086 2023-01-24 01:37:28.291404: step: 874/464, loss: 0.9537006616592407 2023-01-24 01:37:29.097718: step: 876/464, loss: 5.960934638977051 2023-01-24 01:37:29.870137: step: 878/464, loss: 2.2544214725494385 2023-01-24 01:37:30.633825: step: 880/464, loss: 0.5682533979415894 2023-01-24 01:37:31.355829: step: 882/464, loss: 0.8755449056625366 2023-01-24 01:37:32.156492: step: 884/464, loss: 1.6833244562149048 2023-01-24 01:37:32.855148: step: 886/464, loss: 2.0925192832946777 2023-01-24 01:37:33.582834: step: 888/464, loss: 5.369467735290527 2023-01-24 01:37:34.286971: step: 890/464, loss: 5.678498268127441 2023-01-24 01:37:35.068490: step: 892/464, loss: 2.654069423675537 2023-01-24 01:37:35.849479: step: 894/464, loss: 3.5575973987579346 2023-01-24 01:37:36.568779: step: 896/464, loss: 0.8754044771194458 2023-01-24 01:37:37.309767: step: 898/464, loss: 1.8832297325134277 2023-01-24 01:37:38.066456: step: 900/464, loss: 1.5971415042877197 2023-01-24 01:37:38.873158: step: 902/464, loss: 4.067296028137207 2023-01-24 01:37:39.563276: step: 904/464, loss: 1.232337236404419 2023-01-24 01:37:40.388589: step: 906/464, loss: 1.807895302772522 2023-01-24 01:37:41.175396: step: 908/464, loss: 1.2402400970458984 2023-01-24 01:37:41.927144: step: 910/464, loss: 7.4919891357421875 2023-01-24 01:37:42.700267: step: 912/464, loss: 0.6001932621002197 2023-01-24 01:37:43.461609: step: 914/464, loss: 2.4097423553466797 2023-01-24 01:37:44.272206: step: 916/464, loss: 2.4119486808776855 2023-01-24 01:37:45.049830: step: 918/464, loss: 5.363138198852539 2023-01-24 01:37:45.872856: step: 920/464, loss: 1.7590237855911255 2023-01-24 01:37:46.634822: step: 922/464, loss: 1.0465037822723389 2023-01-24 01:37:47.408439: step: 924/464, loss: 0.8016859889030457 2023-01-24 01:37:48.159543: step: 926/464, loss: 5.46735954284668 2023-01-24 01:37:48.889373: step: 928/464, loss: 1.4254200458526611 2023-01-24 01:37:49.556696: step: 930/464, loss: 1.331081509590149 ================================================== Loss: 5.576 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5103754940711462, 'r': 0.044380477745317067, 'f1': 0.08166007905138341}, 'combined': 0.060170584564177246, 'epoch': 0} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3944456674885523, 'r': 0.06464418691912487, 'f1': 0.11108334986608472}, 'combined': 0.06898860675893682, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4726443768996961, 'r': 0.041992978665946534, 'f1': 0.07713293650793651}, 'combined': 0.056834795321637425, 'epoch': 0} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.40642942583732056, 'r': 0.06820356410027656, 'f1': 0.11680576776165011}, 'combined': 0.07254252945197218, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5066445182724252, 'r': 0.04118282473669997, 'f1': 0.07617382617382616}, 'combined': 0.05612808244387191, 'epoch': 0} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3806010928961749, 'r': 0.068351324828263, 'f1': 0.1158901830282862}, 'combined': 0.07197390314388301, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.02857142857142857, 'f1': 0.05263157894736842}, 'combined': 0.03508771929824561, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5103754940711462, 'r': 0.044380477745317067, 'f1': 0.08166007905138341}, 'combined': 0.060170584564177246, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3944456674885523, 'r': 0.06464418691912487, 'f1': 0.11108334986608472}, 'combined': 0.06898860675893682, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.02857142857142857, 'f1': 0.05263157894736842}, 'combined': 0.03508771929824561, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4726443768996961, 'r': 0.041992978665946534, 'f1': 0.07713293650793651}, 'combined': 0.056834795321637425, 'epoch': 0} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.40642942583732056, 'r': 0.06820356410027656, 'f1': 0.11680576776165011}, 'combined': 0.07254252945197218, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5066445182724252, 'r': 0.04118282473669997, 'f1': 0.07617382617382616}, 'combined': 0.05612808244387191, 'epoch': 0} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3806010928961749, 'r': 0.068351324828263, 'f1': 0.1158901830282862}, 'combined': 0.07197390314388301, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:40:56.742431: step: 2/464, loss: 3.567324638366699 2023-01-24 01:40:57.532841: step: 4/464, loss: 2.116065263748169 2023-01-24 01:40:58.227301: step: 6/464, loss: 1.166826844215393 2023-01-24 01:40:59.031328: step: 8/464, loss: 4.697350025177002 2023-01-24 01:40:59.767751: step: 10/464, loss: 5.934710502624512 2023-01-24 01:41:00.554188: step: 12/464, loss: 1.6735713481903076 2023-01-24 01:41:01.289588: step: 14/464, loss: 1.6156316995620728 2023-01-24 01:41:02.025060: step: 16/464, loss: 2.347018003463745 2023-01-24 01:41:02.754558: step: 18/464, loss: 1.8784767389297485 2023-01-24 01:41:03.572284: step: 20/464, loss: 1.6167913675308228 2023-01-24 01:41:04.318328: step: 22/464, loss: 1.060924768447876 2023-01-24 01:41:05.082908: step: 24/464, loss: 2.0685951709747314 2023-01-24 01:41:05.847545: step: 26/464, loss: 1.5308924913406372 2023-01-24 01:41:06.583360: step: 28/464, loss: 2.8823797702789307 2023-01-24 01:41:07.273705: step: 30/464, loss: 3.9805989265441895 2023-01-24 01:41:08.002406: step: 32/464, loss: 4.019295692443848 2023-01-24 01:41:08.743738: step: 34/464, loss: 0.39726945757865906 2023-01-24 01:41:09.508610: step: 36/464, loss: 1.1988452672958374 2023-01-24 01:41:10.309128: step: 38/464, loss: 2.4698407649993896 2023-01-24 01:41:11.095797: step: 40/464, loss: 0.7756727933883667 2023-01-24 01:41:11.937514: step: 42/464, loss: 0.9783002734184265 2023-01-24 01:41:12.696211: step: 44/464, loss: 0.5537374019622803 2023-01-24 01:41:13.432179: step: 46/464, loss: 0.5232036709785461 2023-01-24 01:41:14.176183: step: 48/464, loss: 2.6981263160705566 2023-01-24 01:41:14.922534: step: 50/464, loss: 1.9383834600448608 2023-01-24 01:41:15.716509: step: 52/464, loss: 2.12388277053833 2023-01-24 01:41:16.491075: step: 54/464, loss: 2.682612895965576 2023-01-24 01:41:17.258287: step: 56/464, loss: 2.9795823097229004 2023-01-24 01:41:18.059674: step: 58/464, loss: 15.888510704040527 2023-01-24 01:41:18.793679: step: 60/464, loss: 0.7368502020835876 2023-01-24 01:41:19.547677: step: 62/464, loss: 2.890212297439575 2023-01-24 01:41:20.205500: step: 64/464, loss: 2.0080223083496094 2023-01-24 01:41:21.051535: step: 66/464, loss: 0.7404903769493103 2023-01-24 01:41:21.845011: step: 68/464, loss: 0.9457007050514221 2023-01-24 01:41:22.613906: step: 70/464, loss: 0.7126715183258057 2023-01-24 01:41:23.446198: step: 72/464, loss: 0.5138718485832214 2023-01-24 01:41:24.199842: step: 74/464, loss: 2.5417838096618652 2023-01-24 01:41:24.881645: step: 76/464, loss: 1.963165283203125 2023-01-24 01:41:25.727508: step: 78/464, loss: 3.571277618408203 2023-01-24 01:41:26.460907: step: 80/464, loss: 1.5786569118499756 2023-01-24 01:41:27.230245: step: 82/464, loss: 0.7398642301559448 2023-01-24 01:41:27.985590: step: 84/464, loss: 2.779233694076538 2023-01-24 01:41:28.790282: step: 86/464, loss: 1.2509294748306274 2023-01-24 01:41:29.494767: step: 88/464, loss: 1.2668819427490234 2023-01-24 01:41:30.263378: step: 90/464, loss: 1.0955171585083008 2023-01-24 01:41:31.008640: step: 92/464, loss: 2.2722392082214355 2023-01-24 01:41:31.768477: step: 94/464, loss: 5.017848014831543 2023-01-24 01:41:32.501305: step: 96/464, loss: 1.3737874031066895 2023-01-24 01:41:33.288674: step: 98/464, loss: 1.84757661819458 2023-01-24 01:41:34.032074: step: 100/464, loss: 0.489383727312088 2023-01-24 01:41:34.820370: step: 102/464, loss: 1.7194496393203735 2023-01-24 01:41:35.556078: step: 104/464, loss: 2.4874281883239746 2023-01-24 01:41:36.292856: step: 106/464, loss: 4.907480239868164 2023-01-24 01:41:37.059964: step: 108/464, loss: 0.7825038433074951 2023-01-24 01:41:37.827678: step: 110/464, loss: 1.3089637756347656 2023-01-24 01:41:38.510357: step: 112/464, loss: 0.7727731466293335 2023-01-24 01:41:39.243083: step: 114/464, loss: 4.281703948974609 2023-01-24 01:41:40.030143: step: 116/464, loss: 0.6136764883995056 2023-01-24 01:41:40.791709: step: 118/464, loss: 1.5267757177352905 2023-01-24 01:41:41.453319: step: 120/464, loss: 0.6549609899520874 2023-01-24 01:41:42.185500: step: 122/464, loss: 0.6695728302001953 2023-01-24 01:41:42.892791: step: 124/464, loss: 1.9565430879592896 2023-01-24 01:41:43.690934: step: 126/464, loss: 0.830868124961853 2023-01-24 01:41:44.414866: step: 128/464, loss: 2.3312859535217285 2023-01-24 01:41:45.164666: step: 130/464, loss: 1.7556043863296509 2023-01-24 01:41:45.899885: step: 132/464, loss: 0.5519129037857056 2023-01-24 01:41:46.676594: step: 134/464, loss: 1.4241178035736084 2023-01-24 01:41:47.392542: step: 136/464, loss: 4.3382134437561035 2023-01-24 01:41:48.293948: step: 138/464, loss: 0.9232587814331055 2023-01-24 01:41:49.115159: step: 140/464, loss: 4.314373970031738 2023-01-24 01:41:49.872655: step: 142/464, loss: 1.8007235527038574 2023-01-24 01:41:50.562954: step: 144/464, loss: 1.3967361450195312 2023-01-24 01:41:51.370557: step: 146/464, loss: 2.1668901443481445 2023-01-24 01:41:52.192472: step: 148/464, loss: 2.6193809509277344 2023-01-24 01:41:52.944127: step: 150/464, loss: 0.4625154137611389 2023-01-24 01:41:53.782760: step: 152/464, loss: 1.2553250789642334 2023-01-24 01:41:54.589264: step: 154/464, loss: 2.662593364715576 2023-01-24 01:41:55.354557: step: 156/464, loss: 1.529561161994934 2023-01-24 01:41:56.221209: step: 158/464, loss: 5.751806259155273 2023-01-24 01:41:56.980206: step: 160/464, loss: 3.984288215637207 2023-01-24 01:41:57.730257: step: 162/464, loss: 1.781134843826294 2023-01-24 01:41:58.442264: step: 164/464, loss: 8.015787124633789 2023-01-24 01:41:59.179257: step: 166/464, loss: 2.554163932800293 2023-01-24 01:41:59.914467: step: 168/464, loss: 4.118683815002441 2023-01-24 01:42:00.644955: step: 170/464, loss: 2.1517744064331055 2023-01-24 01:42:01.483029: step: 172/464, loss: 1.5270841121673584 2023-01-24 01:42:02.200833: step: 174/464, loss: 1.0206589698791504 2023-01-24 01:42:02.982814: step: 176/464, loss: 0.5664258599281311 2023-01-24 01:42:03.678788: step: 178/464, loss: 0.5027251243591309 2023-01-24 01:42:04.434024: step: 180/464, loss: 1.4795325994491577 2023-01-24 01:42:05.112565: step: 182/464, loss: 0.8331640362739563 2023-01-24 01:42:05.840957: step: 184/464, loss: 1.1108388900756836 2023-01-24 01:42:06.653673: step: 186/464, loss: 1.221932291984558 2023-01-24 01:42:07.414238: step: 188/464, loss: 1.2539130449295044 2023-01-24 01:42:08.201508: step: 190/464, loss: 1.0047433376312256 2023-01-24 01:42:09.028996: step: 192/464, loss: 1.9757084846496582 2023-01-24 01:42:09.833307: step: 194/464, loss: 4.2894744873046875 2023-01-24 01:42:10.581351: step: 196/464, loss: 1.083547830581665 2023-01-24 01:42:11.371408: step: 198/464, loss: 1.2944920063018799 2023-01-24 01:42:12.116399: step: 200/464, loss: 0.8665717840194702 2023-01-24 01:42:12.901366: step: 202/464, loss: 2.007253646850586 2023-01-24 01:42:13.686245: step: 204/464, loss: 1.715686559677124 2023-01-24 01:42:14.615979: step: 206/464, loss: 0.9834060072898865 2023-01-24 01:42:15.288240: step: 208/464, loss: 1.235771894454956 2023-01-24 01:42:16.038723: step: 210/464, loss: 2.0683391094207764 2023-01-24 01:42:16.725630: step: 212/464, loss: 0.8923385143280029 2023-01-24 01:42:17.558667: step: 214/464, loss: 4.624021053314209 2023-01-24 01:42:18.300439: step: 216/464, loss: 0.5816634297370911 2023-01-24 01:42:19.002666: step: 218/464, loss: 2.2024126052856445 2023-01-24 01:42:19.740035: step: 220/464, loss: 0.6494408845901489 2023-01-24 01:42:20.387319: step: 222/464, loss: 0.8478401899337769 2023-01-24 01:42:21.140135: step: 224/464, loss: 2.6478207111358643 2023-01-24 01:42:21.939698: step: 226/464, loss: 0.974275529384613 2023-01-24 01:42:22.729130: step: 228/464, loss: 0.543366551399231 2023-01-24 01:42:23.553417: step: 230/464, loss: 3.2722392082214355 2023-01-24 01:42:24.272077: step: 232/464, loss: 13.146383285522461 2023-01-24 01:42:24.997855: step: 234/464, loss: 1.7761332988739014 2023-01-24 01:42:25.803279: step: 236/464, loss: 4.8828349113464355 2023-01-24 01:42:26.530726: step: 238/464, loss: 6.12351131439209 2023-01-24 01:42:27.189308: step: 240/464, loss: 2.6610379219055176 2023-01-24 01:42:27.935770: step: 242/464, loss: 2.2539026737213135 2023-01-24 01:42:28.669188: step: 244/464, loss: 1.0267163515090942 2023-01-24 01:42:29.473997: step: 246/464, loss: 2.6492509841918945 2023-01-24 01:42:30.301003: step: 248/464, loss: 4.662771701812744 2023-01-24 01:42:31.069440: step: 250/464, loss: 2.4732513427734375 2023-01-24 01:42:31.885607: step: 252/464, loss: 4.1236186027526855 2023-01-24 01:42:32.678554: step: 254/464, loss: 2.082460880279541 2023-01-24 01:42:33.404333: step: 256/464, loss: 1.0133386850357056 2023-01-24 01:42:34.083217: step: 258/464, loss: 0.5736017823219299 2023-01-24 01:42:34.976768: step: 260/464, loss: 1.344170331954956 2023-01-24 01:42:35.742662: step: 262/464, loss: 0.6307258605957031 2023-01-24 01:42:36.562468: step: 264/464, loss: 0.8127894997596741 2023-01-24 01:42:37.309615: step: 266/464, loss: 3.181589126586914 2023-01-24 01:42:38.022931: step: 268/464, loss: 5.5859174728393555 2023-01-24 01:42:38.763643: step: 270/464, loss: 2.046428918838501 2023-01-24 01:42:39.624001: step: 272/464, loss: 5.400157451629639 2023-01-24 01:42:40.412225: step: 274/464, loss: 0.8204623460769653 2023-01-24 01:42:41.118034: step: 276/464, loss: 4.42466402053833 2023-01-24 01:42:41.922530: step: 278/464, loss: 2.860304117202759 2023-01-24 01:42:42.700917: step: 280/464, loss: 1.755745530128479 2023-01-24 01:42:43.428512: step: 282/464, loss: 0.9788022041320801 2023-01-24 01:42:44.109918: step: 284/464, loss: 0.9861152768135071 2023-01-24 01:42:44.926686: step: 286/464, loss: 1.0290707349777222 2023-01-24 01:42:45.663306: step: 288/464, loss: 2.2862699031829834 2023-01-24 01:42:46.383748: step: 290/464, loss: 2.5819599628448486 2023-01-24 01:42:47.184981: step: 292/464, loss: 0.915147602558136 2023-01-24 01:42:47.916990: step: 294/464, loss: 1.1519476175308228 2023-01-24 01:42:48.687085: step: 296/464, loss: 0.3368190824985504 2023-01-24 01:42:49.400194: step: 298/464, loss: 0.5094312429428101 2023-01-24 01:42:50.145539: step: 300/464, loss: 1.7346651554107666 2023-01-24 01:42:50.854308: step: 302/464, loss: 1.1590203046798706 2023-01-24 01:42:51.646975: step: 304/464, loss: 1.1996859312057495 2023-01-24 01:42:52.413284: step: 306/464, loss: 2.341116189956665 2023-01-24 01:42:53.151562: step: 308/464, loss: 1.8273674249649048 2023-01-24 01:42:54.026996: step: 310/464, loss: 0.4473875164985657 2023-01-24 01:42:54.837735: step: 312/464, loss: 3.9436159133911133 2023-01-24 01:42:55.620579: step: 314/464, loss: 3.0034258365631104 2023-01-24 01:42:56.339979: step: 316/464, loss: 2.238938331604004 2023-01-24 01:42:57.110644: step: 318/464, loss: 2.2146379947662354 2023-01-24 01:42:57.883458: step: 320/464, loss: 0.7863084673881531 2023-01-24 01:42:58.644252: step: 322/464, loss: 0.6234296560287476 2023-01-24 01:42:59.442947: step: 324/464, loss: 0.4865874648094177 2023-01-24 01:43:00.250412: step: 326/464, loss: 0.5788937211036682 2023-01-24 01:43:01.043568: step: 328/464, loss: 2.890346050262451 2023-01-24 01:43:01.847387: step: 330/464, loss: 7.377608299255371 2023-01-24 01:43:02.553629: step: 332/464, loss: 2.9715189933776855 2023-01-24 01:43:03.309198: step: 334/464, loss: 0.6012032628059387 2023-01-24 01:43:04.072120: step: 336/464, loss: 1.0220718383789062 2023-01-24 01:43:04.806996: step: 338/464, loss: 1.4404304027557373 2023-01-24 01:43:05.585539: step: 340/464, loss: 0.9285256862640381 2023-01-24 01:43:06.284169: step: 342/464, loss: 0.7093393802642822 2023-01-24 01:43:07.102713: step: 344/464, loss: 0.5021221041679382 2023-01-24 01:43:07.965837: step: 346/464, loss: 1.502563714981079 2023-01-24 01:43:08.693499: step: 348/464, loss: 0.9983633756637573 2023-01-24 01:43:09.534020: step: 350/464, loss: 0.690024197101593 2023-01-24 01:43:10.302371: step: 352/464, loss: 0.8608592748641968 2023-01-24 01:43:11.077467: step: 354/464, loss: 2.9874119758605957 2023-01-24 01:43:11.870201: step: 356/464, loss: 1.582247257232666 2023-01-24 01:43:12.587584: step: 358/464, loss: 1.9923336505889893 2023-01-24 01:43:13.322765: step: 360/464, loss: 0.45969799160957336 2023-01-24 01:43:14.072509: step: 362/464, loss: 0.37600523233413696 2023-01-24 01:43:14.776485: step: 364/464, loss: 1.5279144048690796 2023-01-24 01:43:15.539653: step: 366/464, loss: 7.2152509689331055 2023-01-24 01:43:16.320929: step: 368/464, loss: 1.3439027070999146 2023-01-24 01:43:17.105258: step: 370/464, loss: 1.8933783769607544 2023-01-24 01:43:17.863085: step: 372/464, loss: 0.5813678503036499 2023-01-24 01:43:18.567128: step: 374/464, loss: 0.8357667922973633 2023-01-24 01:43:19.322879: step: 376/464, loss: 2.265641927719116 2023-01-24 01:43:20.143231: step: 378/464, loss: 0.6052720546722412 2023-01-24 01:43:20.847156: step: 380/464, loss: 3.890368938446045 2023-01-24 01:43:21.594696: step: 382/464, loss: 2.141329526901245 2023-01-24 01:43:22.354807: step: 384/464, loss: 0.5587427616119385 2023-01-24 01:43:23.052830: step: 386/464, loss: 2.0051937103271484 2023-01-24 01:43:23.779366: step: 388/464, loss: 1.6070865392684937 2023-01-24 01:43:24.657314: step: 390/464, loss: 2.655045747756958 2023-01-24 01:43:25.439093: step: 392/464, loss: 1.8066232204437256 2023-01-24 01:43:26.126861: step: 394/464, loss: 2.347830295562744 2023-01-24 01:43:26.848067: step: 396/464, loss: 6.169438362121582 2023-01-24 01:43:27.665216: step: 398/464, loss: 5.859373569488525 2023-01-24 01:43:28.370300: step: 400/464, loss: 0.5093414783477783 2023-01-24 01:43:29.151778: step: 402/464, loss: 1.0215308666229248 2023-01-24 01:43:29.882449: step: 404/464, loss: 2.536679267883301 2023-01-24 01:43:30.629115: step: 406/464, loss: 0.9781687259674072 2023-01-24 01:43:31.366448: step: 408/464, loss: 1.8663051128387451 2023-01-24 01:43:32.128708: step: 410/464, loss: 1.7260236740112305 2023-01-24 01:43:32.863305: step: 412/464, loss: 0.708279013633728 2023-01-24 01:43:33.628847: step: 414/464, loss: 0.8949853181838989 2023-01-24 01:43:34.358564: step: 416/464, loss: 8.901383399963379 2023-01-24 01:43:35.143710: step: 418/464, loss: 4.989002227783203 2023-01-24 01:43:35.990199: step: 420/464, loss: 6.246890544891357 2023-01-24 01:43:36.720961: step: 422/464, loss: 1.0378128290176392 2023-01-24 01:43:37.544447: step: 424/464, loss: 1.038848876953125 2023-01-24 01:43:38.251285: step: 426/464, loss: 1.004784345626831 2023-01-24 01:43:38.990969: step: 428/464, loss: 0.6553089022636414 2023-01-24 01:43:39.742853: step: 430/464, loss: 2.451887845993042 2023-01-24 01:43:40.506770: step: 432/464, loss: 0.3229205012321472 2023-01-24 01:43:41.209345: step: 434/464, loss: 0.18707747757434845 2023-01-24 01:43:41.939396: step: 436/464, loss: 2.7739617824554443 2023-01-24 01:43:42.694693: step: 438/464, loss: 5.1485137939453125 2023-01-24 01:43:43.540773: step: 440/464, loss: 1.6739176511764526 2023-01-24 01:43:44.218922: step: 442/464, loss: 0.21984398365020752 2023-01-24 01:43:45.025918: step: 444/464, loss: 4.9870991706848145 2023-01-24 01:43:45.674129: step: 446/464, loss: 2.1164140701293945 2023-01-24 01:43:46.400356: step: 448/464, loss: 3.631910800933838 2023-01-24 01:43:47.202737: step: 450/464, loss: 0.6704921722412109 2023-01-24 01:43:48.000348: step: 452/464, loss: 0.7273821234703064 2023-01-24 01:43:48.740865: step: 454/464, loss: 1.1897422075271606 2023-01-24 01:43:49.469592: step: 456/464, loss: 0.177559033036232 2023-01-24 01:43:50.184335: step: 458/464, loss: 3.297564744949341 2023-01-24 01:43:50.914029: step: 460/464, loss: 1.4739949703216553 2023-01-24 01:43:51.720658: step: 462/464, loss: 1.0736751556396484 2023-01-24 01:43:52.452546: step: 464/464, loss: 0.20888689160346985 2023-01-24 01:43:53.162281: step: 466/464, loss: 2.5224709510803223 2023-01-24 01:43:53.978895: step: 468/464, loss: 4.367872714996338 2023-01-24 01:43:54.732712: step: 470/464, loss: 0.2336680293083191 2023-01-24 01:43:55.497400: step: 472/464, loss: 0.4050363302230835 2023-01-24 01:43:56.281790: step: 474/464, loss: 0.3901623487472534 2023-01-24 01:43:57.012655: step: 476/464, loss: 0.5768817067146301 2023-01-24 01:43:57.756642: step: 478/464, loss: 1.0388317108154297 2023-01-24 01:43:58.590818: step: 480/464, loss: 2.2464306354522705 2023-01-24 01:43:59.460230: step: 482/464, loss: 10.363744735717773 2023-01-24 01:44:00.238935: step: 484/464, loss: 0.3566378951072693 2023-01-24 01:44:00.971790: step: 486/464, loss: 0.7015933990478516 2023-01-24 01:44:01.694964: step: 488/464, loss: 1.5301611423492432 2023-01-24 01:44:02.547662: step: 490/464, loss: 1.2292773723602295 2023-01-24 01:44:03.370016: step: 492/464, loss: 0.82595294713974 2023-01-24 01:44:04.062722: step: 494/464, loss: 0.7543503046035767 2023-01-24 01:44:04.893393: step: 496/464, loss: 0.8798842430114746 2023-01-24 01:44:05.604944: step: 498/464, loss: 1.4138880968093872 2023-01-24 01:44:06.309070: step: 500/464, loss: 0.45087796449661255 2023-01-24 01:44:07.055002: step: 502/464, loss: 1.0304077863693237 2023-01-24 01:44:07.919805: step: 504/464, loss: 3.080888032913208 2023-01-24 01:44:08.693274: step: 506/464, loss: 3.425476551055908 2023-01-24 01:44:09.420769: step: 508/464, loss: 0.5018462538719177 2023-01-24 01:44:10.105934: step: 510/464, loss: 0.4485335350036621 2023-01-24 01:44:10.859556: step: 512/464, loss: 2.6635360717773438 2023-01-24 01:44:11.621667: step: 514/464, loss: 3.398329257965088 2023-01-24 01:44:12.330821: step: 516/464, loss: 0.505752444267273 2023-01-24 01:44:13.058950: step: 518/464, loss: 1.5625126361846924 2023-01-24 01:44:13.807339: step: 520/464, loss: 0.7278554439544678 2023-01-24 01:44:14.590266: step: 522/464, loss: 1.5637742280960083 2023-01-24 01:44:15.369249: step: 524/464, loss: 1.576052188873291 2023-01-24 01:44:16.186112: step: 526/464, loss: 1.3697073459625244 2023-01-24 01:44:16.915153: step: 528/464, loss: 0.9000505208969116 2023-01-24 01:44:17.614660: step: 530/464, loss: 3.3905420303344727 2023-01-24 01:44:18.392784: step: 532/464, loss: 0.975693941116333 2023-01-24 01:44:19.098117: step: 534/464, loss: 6.041010856628418 2023-01-24 01:44:19.877006: step: 536/464, loss: 0.629315197467804 2023-01-24 01:44:20.679258: step: 538/464, loss: 2.6904046535491943 2023-01-24 01:44:21.412787: step: 540/464, loss: 1.1637006998062134 2023-01-24 01:44:22.152878: step: 542/464, loss: 4.338569164276123 2023-01-24 01:44:22.851512: step: 544/464, loss: 10.183465957641602 2023-01-24 01:44:23.591557: step: 546/464, loss: 1.6647273302078247 2023-01-24 01:44:24.358629: step: 548/464, loss: 0.8222893476486206 2023-01-24 01:44:25.117825: step: 550/464, loss: 1.9054585695266724 2023-01-24 01:44:25.835624: step: 552/464, loss: 2.1345303058624268 2023-01-24 01:44:26.621241: step: 554/464, loss: 4.587452411651611 2023-01-24 01:44:27.396831: step: 556/464, loss: 0.9287194609642029 2023-01-24 01:44:28.046078: step: 558/464, loss: 0.9194431900978088 2023-01-24 01:44:28.929807: step: 560/464, loss: 1.7446281909942627 2023-01-24 01:44:29.618976: step: 562/464, loss: 0.6097180843353271 2023-01-24 01:44:30.416563: step: 564/464, loss: 2.0353572368621826 2023-01-24 01:44:31.242398: step: 566/464, loss: 8.625846862792969 2023-01-24 01:44:32.045174: step: 568/464, loss: 0.5222208499908447 2023-01-24 01:44:32.894270: step: 570/464, loss: 1.3264281749725342 2023-01-24 01:44:33.632023: step: 572/464, loss: 2.493659734725952 2023-01-24 01:44:34.517139: step: 574/464, loss: 0.6286075711250305 2023-01-24 01:44:35.218359: step: 576/464, loss: 0.6821500062942505 2023-01-24 01:44:35.946908: step: 578/464, loss: 2.119264602661133 2023-01-24 01:44:36.707496: step: 580/464, loss: 3.8549129962921143 2023-01-24 01:44:37.448328: step: 582/464, loss: 1.0581588745117188 2023-01-24 01:44:38.275055: step: 584/464, loss: 0.9843960404396057 2023-01-24 01:44:39.057363: step: 586/464, loss: 0.9626711010932922 2023-01-24 01:44:39.794072: step: 588/464, loss: 1.1887109279632568 2023-01-24 01:44:40.544899: step: 590/464, loss: 1.310795545578003 2023-01-24 01:44:41.296333: step: 592/464, loss: 1.8952412605285645 2023-01-24 01:44:42.108150: step: 594/464, loss: 0.790389895439148 2023-01-24 01:44:42.856666: step: 596/464, loss: 1.7052712440490723 2023-01-24 01:44:43.602772: step: 598/464, loss: 0.49568837881088257 2023-01-24 01:44:44.370521: step: 600/464, loss: 0.4807586669921875 2023-01-24 01:44:45.181792: step: 602/464, loss: 2.6345295906066895 2023-01-24 01:44:45.889641: step: 604/464, loss: 0.7684303522109985 2023-01-24 01:44:46.654300: step: 606/464, loss: 0.34246817231178284 2023-01-24 01:44:47.455011: step: 608/464, loss: 1.8327733278274536 2023-01-24 01:44:48.253179: step: 610/464, loss: 0.8029569387435913 2023-01-24 01:44:49.011390: step: 612/464, loss: 2.389103412628174 2023-01-24 01:44:49.742298: step: 614/464, loss: 1.6085231304168701 2023-01-24 01:44:50.555041: step: 616/464, loss: 2.841780185699463 2023-01-24 01:44:51.365223: step: 618/464, loss: 0.411793053150177 2023-01-24 01:44:52.113987: step: 620/464, loss: 2.203213930130005 2023-01-24 01:44:52.890612: step: 622/464, loss: 7.631477355957031 2023-01-24 01:44:53.745984: step: 624/464, loss: 0.8750141263008118 2023-01-24 01:44:54.571988: step: 626/464, loss: 0.9731322526931763 2023-01-24 01:44:55.364215: step: 628/464, loss: 1.101901888847351 2023-01-24 01:44:56.099400: step: 630/464, loss: 0.8351380825042725 2023-01-24 01:44:56.850058: step: 632/464, loss: 1.7162944078445435 2023-01-24 01:44:57.670987: step: 634/464, loss: 4.447570323944092 2023-01-24 01:44:58.373517: step: 636/464, loss: 1.5265611410140991 2023-01-24 01:44:59.170443: step: 638/464, loss: 3.4335930347442627 2023-01-24 01:44:59.853413: step: 640/464, loss: 0.40369030833244324 2023-01-24 01:45:00.602574: step: 642/464, loss: 1.4695643186569214 2023-01-24 01:45:01.340985: step: 644/464, loss: 0.5647727251052856 2023-01-24 01:45:02.105951: step: 646/464, loss: 1.4111278057098389 2023-01-24 01:45:02.852013: step: 648/464, loss: 3.0106451511383057 2023-01-24 01:45:03.618122: step: 650/464, loss: 1.3463108539581299 2023-01-24 01:45:04.328399: step: 652/464, loss: 1.3156379461288452 2023-01-24 01:45:05.040701: step: 654/464, loss: 2.6676833629608154 2023-01-24 01:45:05.807451: step: 656/464, loss: 1.5572566986083984 2023-01-24 01:45:06.668580: step: 658/464, loss: 0.9726899862289429 2023-01-24 01:45:07.419063: step: 660/464, loss: 4.307257652282715 2023-01-24 01:45:08.177385: step: 662/464, loss: 0.39950403571128845 2023-01-24 01:45:08.890951: step: 664/464, loss: 7.1081109046936035 2023-01-24 01:45:09.738245: step: 666/464, loss: 0.9712538719177246 2023-01-24 01:45:10.497974: step: 668/464, loss: 0.19049298763275146 2023-01-24 01:45:11.281926: step: 670/464, loss: 0.5407162308692932 2023-01-24 01:45:12.073093: step: 672/464, loss: 1.2553319931030273 2023-01-24 01:45:12.857331: step: 674/464, loss: 0.3443169891834259 2023-01-24 01:45:13.616771: step: 676/464, loss: 1.114622950553894 2023-01-24 01:45:14.368171: step: 678/464, loss: 1.3746274709701538 2023-01-24 01:45:15.158163: step: 680/464, loss: 5.34721040725708 2023-01-24 01:45:15.911888: step: 682/464, loss: 0.7788916230201721 2023-01-24 01:45:16.643488: step: 684/464, loss: 0.7353830337524414 2023-01-24 01:45:17.393102: step: 686/464, loss: 0.2988717257976532 2023-01-24 01:45:18.259683: step: 688/464, loss: 0.9029651284217834 2023-01-24 01:45:19.030477: step: 690/464, loss: 5.1300811767578125 2023-01-24 01:45:19.826280: step: 692/464, loss: 1.1903573274612427 2023-01-24 01:45:20.691299: step: 694/464, loss: 7.861872673034668 2023-01-24 01:45:21.492534: step: 696/464, loss: 0.6495417356491089 2023-01-24 01:45:22.272165: step: 698/464, loss: 1.1668031215667725 2023-01-24 01:45:23.020944: step: 700/464, loss: 0.5051676630973816 2023-01-24 01:45:23.770560: step: 702/464, loss: 3.5430235862731934 2023-01-24 01:45:24.557807: step: 704/464, loss: 0.5422537922859192 2023-01-24 01:45:25.358290: step: 706/464, loss: 0.7244465351104736 2023-01-24 01:45:26.193559: step: 708/464, loss: 2.217958450317383 2023-01-24 01:45:26.912685: step: 710/464, loss: 4.301735877990723 2023-01-24 01:45:27.584435: step: 712/464, loss: 7.51944637298584 2023-01-24 01:45:28.347306: step: 714/464, loss: 1.5007673501968384 2023-01-24 01:45:29.133100: step: 716/464, loss: 1.6526864767074585 2023-01-24 01:45:29.955514: step: 718/464, loss: 1.4878666400909424 2023-01-24 01:45:30.750640: step: 720/464, loss: 1.9696204662322998 2023-01-24 01:45:31.614004: step: 722/464, loss: 1.068194031715393 2023-01-24 01:45:32.428040: step: 724/464, loss: 1.606663465499878 2023-01-24 01:45:33.242467: step: 726/464, loss: 0.4832223951816559 2023-01-24 01:45:34.086347: step: 728/464, loss: 0.7201308608055115 2023-01-24 01:45:34.875692: step: 730/464, loss: 3.175027847290039 2023-01-24 01:45:35.684891: step: 732/464, loss: 4.3967437744140625 2023-01-24 01:45:36.485616: step: 734/464, loss: 2.772202491760254 2023-01-24 01:45:37.203497: step: 736/464, loss: 0.44274598360061646 2023-01-24 01:45:37.945438: step: 738/464, loss: 1.7568962574005127 2023-01-24 01:45:38.730567: step: 740/464, loss: 2.24100923538208 2023-01-24 01:45:39.475388: step: 742/464, loss: 0.6800580024719238 2023-01-24 01:45:40.241029: step: 744/464, loss: 0.685122013092041 2023-01-24 01:45:40.953906: step: 746/464, loss: 0.48600563406944275 2023-01-24 01:45:41.739266: step: 748/464, loss: 2.582446336746216 2023-01-24 01:45:42.514525: step: 750/464, loss: 0.6945471167564392 2023-01-24 01:45:43.235941: step: 752/464, loss: 3.8455324172973633 2023-01-24 01:45:43.923258: step: 754/464, loss: 0.5839530229568481 2023-01-24 01:45:44.632712: step: 756/464, loss: 1.003867745399475 2023-01-24 01:45:45.451520: step: 758/464, loss: 1.6268566846847534 2023-01-24 01:45:46.202983: step: 760/464, loss: 0.4314429461956024 2023-01-24 01:45:47.040948: step: 762/464, loss: 1.3930310010910034 2023-01-24 01:45:47.766708: step: 764/464, loss: 1.0327585935592651 2023-01-24 01:45:48.594181: step: 766/464, loss: 1.0543372631072998 2023-01-24 01:45:49.365795: step: 768/464, loss: 1.979860544204712 2023-01-24 01:45:50.293311: step: 770/464, loss: 0.5045173168182373 2023-01-24 01:45:51.004873: step: 772/464, loss: 1.0678372383117676 2023-01-24 01:45:51.758812: step: 774/464, loss: 0.7546037435531616 2023-01-24 01:45:52.434431: step: 776/464, loss: 0.40795233845710754 2023-01-24 01:45:53.190213: step: 778/464, loss: 1.6976772546768188 2023-01-24 01:45:53.932718: step: 780/464, loss: 0.39988434314727783 2023-01-24 01:45:54.745463: step: 782/464, loss: 8.160738945007324 2023-01-24 01:45:55.596267: step: 784/464, loss: 1.8919651508331299 2023-01-24 01:45:56.377070: step: 786/464, loss: 1.4552507400512695 2023-01-24 01:45:57.112053: step: 788/464, loss: 1.8836709260940552 2023-01-24 01:45:57.863077: step: 790/464, loss: 2.472114086151123 2023-01-24 01:45:58.672420: step: 792/464, loss: 4.261412143707275 2023-01-24 01:45:59.478447: step: 794/464, loss: 0.5101083517074585 2023-01-24 01:46:00.231281: step: 796/464, loss: 0.31527039408683777 2023-01-24 01:46:00.978494: step: 798/464, loss: 0.5338462591171265 2023-01-24 01:46:01.689392: step: 800/464, loss: 4.2285871505737305 2023-01-24 01:46:02.478121: step: 802/464, loss: 0.4452413022518158 2023-01-24 01:46:03.331082: step: 804/464, loss: 2.7038815021514893 2023-01-24 01:46:04.102320: step: 806/464, loss: 1.2515664100646973 2023-01-24 01:46:04.901242: step: 808/464, loss: 0.48019105195999146 2023-01-24 01:46:05.690874: step: 810/464, loss: 0.627007246017456 2023-01-24 01:46:06.504661: step: 812/464, loss: 0.7928240895271301 2023-01-24 01:46:07.286132: step: 814/464, loss: 1.1529955863952637 2023-01-24 01:46:08.159841: step: 816/464, loss: 0.727571964263916 2023-01-24 01:46:08.919087: step: 818/464, loss: 2.150031566619873 2023-01-24 01:46:09.654647: step: 820/464, loss: 1.0720735788345337 2023-01-24 01:46:10.480509: step: 822/464, loss: 1.3180612325668335 2023-01-24 01:46:11.328901: step: 824/464, loss: 1.2374560832977295 2023-01-24 01:46:12.072881: step: 826/464, loss: 10.174234390258789 2023-01-24 01:46:12.807619: step: 828/464, loss: 0.2928467392921448 2023-01-24 01:46:13.638551: step: 830/464, loss: 2.6930325031280518 2023-01-24 01:46:14.421126: step: 832/464, loss: 4.566496849060059 2023-01-24 01:46:15.187044: step: 834/464, loss: 0.7894934415817261 2023-01-24 01:46:16.055031: step: 836/464, loss: 13.314796447753906 2023-01-24 01:46:16.790727: step: 838/464, loss: 1.932656168937683 2023-01-24 01:46:17.512453: step: 840/464, loss: 0.557920515537262 2023-01-24 01:46:18.310852: step: 842/464, loss: 0.49012115597724915 2023-01-24 01:46:19.067981: step: 844/464, loss: 0.7082429528236389 2023-01-24 01:46:19.874409: step: 846/464, loss: 1.2856661081314087 2023-01-24 01:46:20.669119: step: 848/464, loss: 5.341327667236328 2023-01-24 01:46:21.482271: step: 850/464, loss: 0.5253156423568726 2023-01-24 01:46:22.184526: step: 852/464, loss: 7.842291831970215 2023-01-24 01:46:22.929931: step: 854/464, loss: 0.8897383809089661 2023-01-24 01:46:23.727907: step: 856/464, loss: 1.7032040357589722 2023-01-24 01:46:24.438347: step: 858/464, loss: 0.3025331497192383 2023-01-24 01:46:25.197317: step: 860/464, loss: 0.7118395566940308 2023-01-24 01:46:26.042719: step: 862/464, loss: 0.8716322183609009 2023-01-24 01:46:26.776355: step: 864/464, loss: 2.1756081581115723 2023-01-24 01:46:27.555951: step: 866/464, loss: 0.9115775227546692 2023-01-24 01:46:28.311165: step: 868/464, loss: 0.44515085220336914 2023-01-24 01:46:29.042753: step: 870/464, loss: 0.5697401762008667 2023-01-24 01:46:29.762629: step: 872/464, loss: 0.3869156539440155 2023-01-24 01:46:30.535050: step: 874/464, loss: 0.9719885587692261 2023-01-24 01:46:31.329335: step: 876/464, loss: 0.6325796842575073 2023-01-24 01:46:32.069054: step: 878/464, loss: 0.21479357779026031 2023-01-24 01:46:32.837722: step: 880/464, loss: 1.4217106103897095 2023-01-24 01:46:33.566064: step: 882/464, loss: 2.153522491455078 2023-01-24 01:46:34.317066: step: 884/464, loss: 1.7959179878234863 2023-01-24 01:46:35.128554: step: 886/464, loss: 1.5518654584884644 2023-01-24 01:46:35.878406: step: 888/464, loss: 1.2342078685760498 2023-01-24 01:46:36.744923: step: 890/464, loss: 1.6774334907531738 2023-01-24 01:46:37.632951: step: 892/464, loss: 0.7303564548492432 2023-01-24 01:46:38.385632: step: 894/464, loss: 1.8460235595703125 2023-01-24 01:46:39.149532: step: 896/464, loss: 1.0837057828903198 2023-01-24 01:46:39.889440: step: 898/464, loss: 2.979775905609131 2023-01-24 01:46:40.658000: step: 900/464, loss: 1.2387704849243164 2023-01-24 01:46:41.365145: step: 902/464, loss: 0.5870174169540405 2023-01-24 01:46:42.166629: step: 904/464, loss: 3.3540124893188477 2023-01-24 01:46:42.904127: step: 906/464, loss: 5.47309684753418 2023-01-24 01:46:43.733321: step: 908/464, loss: 0.7924412488937378 2023-01-24 01:46:44.439212: step: 910/464, loss: 1.008704662322998 2023-01-24 01:46:45.146782: step: 912/464, loss: 2.3228940963745117 2023-01-24 01:46:45.908917: step: 914/464, loss: 1.8138552904129028 2023-01-24 01:46:46.697908: step: 916/464, loss: 6.6115570068359375 2023-01-24 01:46:47.600546: step: 918/464, loss: 0.5387253761291504 2023-01-24 01:46:48.302178: step: 920/464, loss: 0.24979518353939056 2023-01-24 01:46:49.080873: step: 922/464, loss: 0.9564343690872192 2023-01-24 01:46:49.826973: step: 924/464, loss: 1.6989599466323853 2023-01-24 01:46:50.589915: step: 926/464, loss: 0.4422229528427124 2023-01-24 01:46:51.349227: step: 928/464, loss: 7.08799934387207 2023-01-24 01:46:51.960597: step: 930/464, loss: 0.38450002670288086 ================================================== Loss: 2.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34236263736263733, 'r': 0.21073457792207792, 'f1': 0.2608859487523028}, 'combined': 0.19223175171222312, 'epoch': 1} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.36726590072571547, 'r': 0.1625070357193431, 'f1': 0.22531650351270888}, 'combined': 0.139933407444735, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3116872797936871, 'r': 0.20188835168454733, 'f1': 0.24505068894124363}, 'combined': 0.1805636655356532, 'epoch': 1} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3493962708615903, 'r': 0.1556307873159296, 'f1': 0.21534219142898015}, 'combined': 0.13373883467694558, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3198857459914847, 'r': 0.2011402796764639, 'f1': 0.24698155271900682}, 'combined': 0.1819864072666366, 'epoch': 1} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.360859680625419, 'r': 0.16286587355660503, 'f1': 0.2244371184377606}, 'combined': 0.13938726302976712, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24702380952380953, 'r': 0.14821428571428572, 'f1': 0.18526785714285715}, 'combined': 0.12351190476190477, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.034482758620689655, 'f1': 0.0625}, 'combined': 0.041666666666666664, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34236263736263733, 'r': 0.21073457792207792, 'f1': 0.2608859487523028}, 'combined': 0.19223175171222312, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.36726590072571547, 'r': 0.1625070357193431, 'f1': 0.22531650351270888}, 'combined': 0.139933407444735, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24702380952380953, 'r': 0.14821428571428572, 'f1': 0.18526785714285715}, 'combined': 0.12351190476190477, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3116872797936871, 'r': 0.20188835168454733, 'f1': 0.24505068894124363}, 'combined': 0.1805636655356532, 'epoch': 1} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3493962708615903, 'r': 0.1556307873159296, 'f1': 0.21534219142898015}, 'combined': 0.13373883467694558, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3198857459914847, 'r': 0.2011402796764639, 'f1': 0.24698155271900682}, 'combined': 0.1819864072666366, 'epoch': 1} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.360859680625419, 'r': 0.16286587355660503, 'f1': 0.2244371184377606}, 'combined': 0.13938726302976712, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.034482758620689655, 'f1': 0.0625}, 'combined': 0.041666666666666664, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:50:03.739527: step: 2/464, loss: 1.2311524152755737 2023-01-24 01:50:04.548783: step: 4/464, loss: 1.7830935716629028 2023-01-24 01:50:05.258761: step: 6/464, loss: 2.093751907348633 2023-01-24 01:50:05.933277: step: 8/464, loss: 2.221421718597412 2023-01-24 01:50:06.702524: step: 10/464, loss: 0.6308695077896118 2023-01-24 01:50:07.406589: step: 12/464, loss: 0.4722578525543213 2023-01-24 01:50:08.187820: step: 14/464, loss: 0.9641572833061218 2023-01-24 01:50:08.894168: step: 16/464, loss: 0.20689396560192108 2023-01-24 01:50:09.647988: step: 18/464, loss: 0.521108865737915 2023-01-24 01:50:10.358636: step: 20/464, loss: 0.6766486167907715 2023-01-24 01:50:11.179039: step: 22/464, loss: 2.2151641845703125 2023-01-24 01:50:12.014068: step: 24/464, loss: 1.4389357566833496 2023-01-24 01:50:12.743560: step: 26/464, loss: 0.9313511848449707 2023-01-24 01:50:13.461352: step: 28/464, loss: 2.294585943222046 2023-01-24 01:50:14.164450: step: 30/464, loss: 0.44173091650009155 2023-01-24 01:50:14.908095: step: 32/464, loss: 3.0902254581451416 2023-01-24 01:50:15.676995: step: 34/464, loss: 0.5559873580932617 2023-01-24 01:50:16.405484: step: 36/464, loss: 3.523479461669922 2023-01-24 01:50:17.261320: step: 38/464, loss: 1.3762128353118896 2023-01-24 01:50:18.010129: step: 40/464, loss: 0.8632673025131226 2023-01-24 01:50:18.799127: step: 42/464, loss: 0.735785722732544 2023-01-24 01:50:19.588702: step: 44/464, loss: 0.5903066992759705 2023-01-24 01:50:20.351457: step: 46/464, loss: 2.5188724994659424 2023-01-24 01:50:21.082683: step: 48/464, loss: 0.8308882117271423 2023-01-24 01:50:21.819040: step: 50/464, loss: 6.881199836730957 2023-01-24 01:50:22.559447: step: 52/464, loss: 2.8523025512695312 2023-01-24 01:50:23.276106: step: 54/464, loss: 2.872032880783081 2023-01-24 01:50:23.991328: step: 56/464, loss: 0.8633500337600708 2023-01-24 01:50:24.756270: step: 58/464, loss: 0.66408371925354 2023-01-24 01:50:25.522515: step: 60/464, loss: 1.057525396347046 2023-01-24 01:50:26.338742: step: 62/464, loss: 0.3248614966869354 2023-01-24 01:50:27.051695: step: 64/464, loss: 1.3190155029296875 2023-01-24 01:50:27.731471: step: 66/464, loss: 0.5487788915634155 2023-01-24 01:50:28.474420: step: 68/464, loss: 1.3203892707824707 2023-01-24 01:50:29.194524: step: 70/464, loss: 0.4012579023838043 2023-01-24 01:50:30.030568: step: 72/464, loss: 1.2286897897720337 2023-01-24 01:50:30.736214: step: 74/464, loss: 2.457897663116455 2023-01-24 01:50:31.478165: step: 76/464, loss: 3.04465913772583 2023-01-24 01:50:32.262465: step: 78/464, loss: 4.860772609710693 2023-01-24 01:50:33.001178: step: 80/464, loss: 1.2226388454437256 2023-01-24 01:50:33.757927: step: 82/464, loss: 0.30012208223342896 2023-01-24 01:50:34.501730: step: 84/464, loss: 1.0310789346694946 2023-01-24 01:50:35.260994: step: 86/464, loss: 3.1720633506774902 2023-01-24 01:50:36.009713: step: 88/464, loss: 1.8114345073699951 2023-01-24 01:50:36.709398: step: 90/464, loss: 1.5391186475753784 2023-01-24 01:50:37.383099: step: 92/464, loss: 0.7284096479415894 2023-01-24 01:50:38.207271: step: 94/464, loss: 6.220029830932617 2023-01-24 01:50:38.994856: step: 96/464, loss: 0.8167260885238647 2023-01-24 01:50:39.787181: step: 98/464, loss: 1.1842533349990845 2023-01-24 01:50:40.611404: step: 100/464, loss: 6.020644664764404 2023-01-24 01:50:41.299858: step: 102/464, loss: 0.5480703711509705 2023-01-24 01:50:42.057800: step: 104/464, loss: 1.0576692819595337 2023-01-24 01:50:42.769167: step: 106/464, loss: 1.0002667903900146 2023-01-24 01:50:43.507952: step: 108/464, loss: 1.7153160572052002 2023-01-24 01:50:44.231743: step: 110/464, loss: 0.47342249751091003 2023-01-24 01:50:44.996703: step: 112/464, loss: 3.2831356525421143 2023-01-24 01:50:45.710971: step: 114/464, loss: 0.616359293460846 2023-01-24 01:50:46.462962: step: 116/464, loss: 7.425902843475342 2023-01-24 01:50:47.272823: step: 118/464, loss: 0.6359020471572876 2023-01-24 01:50:48.078490: step: 120/464, loss: 0.44187021255493164 2023-01-24 01:50:48.848238: step: 122/464, loss: 1.5610920190811157 2023-01-24 01:50:49.575598: step: 124/464, loss: 1.3820793628692627 2023-01-24 01:50:50.437110: step: 126/464, loss: 1.758582592010498 2023-01-24 01:50:51.218272: step: 128/464, loss: 0.8005374670028687 2023-01-24 01:50:52.077024: step: 130/464, loss: 4.700216293334961 2023-01-24 01:50:52.896614: step: 132/464, loss: 1.1407063007354736 2023-01-24 01:50:53.635781: step: 134/464, loss: 0.48220837116241455 2023-01-24 01:50:54.414817: step: 136/464, loss: 2.5536394119262695 2023-01-24 01:50:55.192753: step: 138/464, loss: 2.0750999450683594 2023-01-24 01:50:55.901260: step: 140/464, loss: 2.2614126205444336 2023-01-24 01:50:56.686931: step: 142/464, loss: 1.667802095413208 2023-01-24 01:50:57.446415: step: 144/464, loss: 1.616926908493042 2023-01-24 01:50:58.228691: step: 146/464, loss: 0.33585309982299805 2023-01-24 01:50:58.958294: step: 148/464, loss: 0.7217862010002136 2023-01-24 01:50:59.676837: step: 150/464, loss: 0.8636781573295593 2023-01-24 01:51:00.454659: step: 152/464, loss: 0.3174082636833191 2023-01-24 01:51:01.190094: step: 154/464, loss: 4.170505523681641 2023-01-24 01:51:01.931470: step: 156/464, loss: 5.086087226867676 2023-01-24 01:51:02.649966: step: 158/464, loss: 2.6408884525299072 2023-01-24 01:51:03.384670: step: 160/464, loss: 9.706635475158691 2023-01-24 01:51:04.129909: step: 162/464, loss: 0.5258678793907166 2023-01-24 01:51:04.821704: step: 164/464, loss: 1.1015256643295288 2023-01-24 01:51:05.579048: step: 166/464, loss: 0.4202771484851837 2023-01-24 01:51:06.360236: step: 168/464, loss: 0.8112895488739014 2023-01-24 01:51:07.061824: step: 170/464, loss: 0.44624030590057373 2023-01-24 01:51:07.746984: step: 172/464, loss: 0.385553240776062 2023-01-24 01:51:08.648777: step: 174/464, loss: 1.3247578144073486 2023-01-24 01:51:09.425087: step: 176/464, loss: 1.6399046182632446 2023-01-24 01:51:10.151355: step: 178/464, loss: 0.32270684838294983 2023-01-24 01:51:11.006778: step: 180/464, loss: 0.5270733833312988 2023-01-24 01:51:11.823385: step: 182/464, loss: 0.6382239460945129 2023-01-24 01:51:12.607546: step: 184/464, loss: 0.5134657621383667 2023-01-24 01:51:13.428381: step: 186/464, loss: 1.0302172899246216 2023-01-24 01:51:14.149366: step: 188/464, loss: 0.4852708578109741 2023-01-24 01:51:14.925228: step: 190/464, loss: 6.4960784912109375 2023-01-24 01:51:15.652578: step: 192/464, loss: 0.7964613437652588 2023-01-24 01:51:16.421784: step: 194/464, loss: 0.3542407155036926 2023-01-24 01:51:17.121372: step: 196/464, loss: 3.117703437805176 2023-01-24 01:51:17.849152: step: 198/464, loss: 0.8800047636032104 2023-01-24 01:51:18.659170: step: 200/464, loss: 0.6491445899009705 2023-01-24 01:51:19.407169: step: 202/464, loss: 0.549182116985321 2023-01-24 01:51:20.132992: step: 204/464, loss: 1.4039323329925537 2023-01-24 01:51:20.889816: step: 206/464, loss: 0.6293597221374512 2023-01-24 01:51:21.650591: step: 208/464, loss: 3.826097249984741 2023-01-24 01:51:22.577572: step: 210/464, loss: 0.357878178358078 2023-01-24 01:51:23.327765: step: 212/464, loss: 0.6146247982978821 2023-01-24 01:51:24.058422: step: 214/464, loss: 0.48976725339889526 2023-01-24 01:51:24.920486: step: 216/464, loss: 0.2621183395385742 2023-01-24 01:51:25.752211: step: 218/464, loss: 0.9402350187301636 2023-01-24 01:51:26.555319: step: 220/464, loss: 2.0679545402526855 2023-01-24 01:51:27.224723: step: 222/464, loss: 2.71578049659729 2023-01-24 01:51:27.989332: step: 224/464, loss: 1.4649124145507812 2023-01-24 01:51:28.804660: step: 226/464, loss: 1.0338512659072876 2023-01-24 01:51:29.586848: step: 228/464, loss: 0.4438144564628601 2023-01-24 01:51:30.326339: step: 230/464, loss: 0.5852053761482239 2023-01-24 01:51:31.135960: step: 232/464, loss: 0.9666265249252319 2023-01-24 01:51:31.880194: step: 234/464, loss: 0.9432559609413147 2023-01-24 01:51:32.688382: step: 236/464, loss: 0.6843967437744141 2023-01-24 01:51:33.419597: step: 238/464, loss: 2.344256639480591 2023-01-24 01:51:34.194448: step: 240/464, loss: 1.774417757987976 2023-01-24 01:51:34.896814: step: 242/464, loss: 1.28359854221344 2023-01-24 01:51:35.696062: step: 244/464, loss: 1.9311877489089966 2023-01-24 01:51:36.420843: step: 246/464, loss: 1.1652761697769165 2023-01-24 01:51:37.199502: step: 248/464, loss: 1.9170876741409302 2023-01-24 01:51:37.970700: step: 250/464, loss: 0.16579344868659973 2023-01-24 01:51:38.795603: step: 252/464, loss: 0.22092676162719727 2023-01-24 01:51:39.557902: step: 254/464, loss: 0.44775503873825073 2023-01-24 01:51:40.363274: step: 256/464, loss: 0.511645495891571 2023-01-24 01:51:41.112477: step: 258/464, loss: 1.7976717948913574 2023-01-24 01:51:41.950323: step: 260/464, loss: 1.1174240112304688 2023-01-24 01:51:42.672598: step: 262/464, loss: 0.29902729392051697 2023-01-24 01:51:43.419172: step: 264/464, loss: 1.1252002716064453 2023-01-24 01:51:44.175173: step: 266/464, loss: 1.2579469680786133 2023-01-24 01:51:44.925505: step: 268/464, loss: 1.7518572807312012 2023-01-24 01:51:45.723125: step: 270/464, loss: 5.4651265144348145 2023-01-24 01:51:46.494170: step: 272/464, loss: 1.3823081254959106 2023-01-24 01:51:47.220060: step: 274/464, loss: 0.7875335216522217 2023-01-24 01:51:48.000970: step: 276/464, loss: 0.34622815251350403 2023-01-24 01:51:48.767076: step: 278/464, loss: 0.7284860014915466 2023-01-24 01:51:49.543541: step: 280/464, loss: 0.7788302302360535 2023-01-24 01:51:50.252425: step: 282/464, loss: 0.44866272807121277 2023-01-24 01:51:51.051780: step: 284/464, loss: 3.9130313396453857 2023-01-24 01:51:51.901625: step: 286/464, loss: 0.5465903282165527 2023-01-24 01:51:52.793533: step: 288/464, loss: 1.2728848457336426 2023-01-24 01:51:53.541641: step: 290/464, loss: 3.584545612335205 2023-01-24 01:51:54.287588: step: 292/464, loss: 0.26818153262138367 2023-01-24 01:51:55.049200: step: 294/464, loss: 0.5165697336196899 2023-01-24 01:51:55.872359: step: 296/464, loss: 0.5162287950515747 2023-01-24 01:51:56.605915: step: 298/464, loss: 0.7398544549942017 2023-01-24 01:51:57.359497: step: 300/464, loss: 1.1116465330123901 2023-01-24 01:51:58.038893: step: 302/464, loss: 1.6432995796203613 2023-01-24 01:51:58.855146: step: 304/464, loss: 2.452953338623047 2023-01-24 01:51:59.702841: step: 306/464, loss: 0.43862515687942505 2023-01-24 01:52:00.494230: step: 308/464, loss: 0.7555627226829529 2023-01-24 01:52:01.320062: step: 310/464, loss: 10.338842391967773 2023-01-24 01:52:02.070877: step: 312/464, loss: 1.2393325567245483 2023-01-24 01:52:02.906550: step: 314/464, loss: 3.2715816497802734 2023-01-24 01:52:03.620774: step: 316/464, loss: 2.1629157066345215 2023-01-24 01:52:04.396708: step: 318/464, loss: 1.165483832359314 2023-01-24 01:52:05.145864: step: 320/464, loss: 1.7454993724822998 2023-01-24 01:52:05.894341: step: 322/464, loss: 0.9971399307250977 2023-01-24 01:52:06.702723: step: 324/464, loss: 0.9380273818969727 2023-01-24 01:52:07.545487: step: 326/464, loss: 2.0600996017456055 2023-01-24 01:52:08.267744: step: 328/464, loss: 0.7213514447212219 2023-01-24 01:52:09.055632: step: 330/464, loss: 0.18026822805404663 2023-01-24 01:52:09.805460: step: 332/464, loss: 0.91179358959198 2023-01-24 01:52:10.526608: step: 334/464, loss: 0.45584264397621155 2023-01-24 01:52:11.263925: step: 336/464, loss: 1.7639589309692383 2023-01-24 01:52:12.067955: step: 338/464, loss: 0.46160751581192017 2023-01-24 01:52:12.855041: step: 340/464, loss: 0.4666309058666229 2023-01-24 01:52:13.718886: step: 342/464, loss: 1.0047087669372559 2023-01-24 01:52:14.564892: step: 344/464, loss: 3.103390693664551 2023-01-24 01:52:15.413438: step: 346/464, loss: 1.8842649459838867 2023-01-24 01:52:16.146313: step: 348/464, loss: 1.3954133987426758 2023-01-24 01:52:16.875310: step: 350/464, loss: 0.9122724533081055 2023-01-24 01:52:17.684043: step: 352/464, loss: 1.214568853378296 2023-01-24 01:52:18.386709: step: 354/464, loss: 0.7801638245582581 2023-01-24 01:52:19.276856: step: 356/464, loss: 0.8779234290122986 2023-01-24 01:52:20.054779: step: 358/464, loss: 0.21076428890228271 2023-01-24 01:52:20.815331: step: 360/464, loss: 0.5160536170005798 2023-01-24 01:52:21.507203: step: 362/464, loss: 0.2467440962791443 2023-01-24 01:52:22.300725: step: 364/464, loss: 3.439088821411133 2023-01-24 01:52:23.041128: step: 366/464, loss: 1.7350010871887207 2023-01-24 01:52:23.732798: step: 368/464, loss: 1.040642499923706 2023-01-24 01:52:24.507087: step: 370/464, loss: 2.3546676635742188 2023-01-24 01:52:25.283812: step: 372/464, loss: 1.243515968322754 2023-01-24 01:52:25.993725: step: 374/464, loss: 1.7401405572891235 2023-01-24 01:52:26.702326: step: 376/464, loss: 0.2650654911994934 2023-01-24 01:52:27.517185: step: 378/464, loss: 5.185362339019775 2023-01-24 01:52:28.219060: step: 380/464, loss: 0.27703002095222473 2023-01-24 01:52:28.981832: step: 382/464, loss: 1.7378602027893066 2023-01-24 01:52:29.720977: step: 384/464, loss: 0.3635416626930237 2023-01-24 01:52:30.521151: step: 386/464, loss: 0.9020249843597412 2023-01-24 01:52:31.241126: step: 388/464, loss: 1.87539803981781 2023-01-24 01:52:31.958384: step: 390/464, loss: 2.89346981048584 2023-01-24 01:52:32.698120: step: 392/464, loss: 4.191920757293701 2023-01-24 01:52:33.449213: step: 394/464, loss: 4.365891456604004 2023-01-24 01:52:34.175313: step: 396/464, loss: 0.9762228727340698 2023-01-24 01:52:34.903128: step: 398/464, loss: 1.7938779592514038 2023-01-24 01:52:35.688405: step: 400/464, loss: 0.5622841119766235 2023-01-24 01:52:36.478472: step: 402/464, loss: 1.528921365737915 2023-01-24 01:52:37.283918: step: 404/464, loss: 0.5592197179794312 2023-01-24 01:52:38.007468: step: 406/464, loss: 0.3098835349082947 2023-01-24 01:52:38.800156: step: 408/464, loss: 1.2348670959472656 2023-01-24 01:52:39.482157: step: 410/464, loss: 2.2563488483428955 2023-01-24 01:52:40.230599: step: 412/464, loss: 0.5252102017402649 2023-01-24 01:52:40.993059: step: 414/464, loss: 0.188716322183609 2023-01-24 01:52:41.874860: step: 416/464, loss: 0.5975791811943054 2023-01-24 01:52:42.704148: step: 418/464, loss: 0.9677351117134094 2023-01-24 01:52:43.405532: step: 420/464, loss: 1.548161268234253 2023-01-24 01:52:44.110359: step: 422/464, loss: 0.8674664497375488 2023-01-24 01:52:44.861546: step: 424/464, loss: 0.962982714176178 2023-01-24 01:52:45.595275: step: 426/464, loss: 1.0416945219039917 2023-01-24 01:52:46.466683: step: 428/464, loss: 1.573225975036621 2023-01-24 01:52:47.192032: step: 430/464, loss: 1.4529905319213867 2023-01-24 01:52:48.036557: step: 432/464, loss: 0.43926820158958435 2023-01-24 01:52:48.830718: step: 434/464, loss: 0.9773903489112854 2023-01-24 01:52:49.570687: step: 436/464, loss: 4.343841552734375 2023-01-24 01:52:50.293748: step: 438/464, loss: 9.071602821350098 2023-01-24 01:52:51.015800: step: 440/464, loss: 0.8627247214317322 2023-01-24 01:52:51.775005: step: 442/464, loss: 0.36518824100494385 2023-01-24 01:52:52.531925: step: 444/464, loss: 1.3917042016983032 2023-01-24 01:52:53.336601: step: 446/464, loss: 0.9935654401779175 2023-01-24 01:52:54.163765: step: 448/464, loss: 0.8858616948127747 2023-01-24 01:52:54.977007: step: 450/464, loss: 0.7505353093147278 2023-01-24 01:52:55.697968: step: 452/464, loss: 1.6640448570251465 2023-01-24 01:52:56.483280: step: 454/464, loss: 1.4120301008224487 2023-01-24 01:52:57.218110: step: 456/464, loss: 1.7707470655441284 2023-01-24 01:52:57.972787: step: 458/464, loss: 0.7131549715995789 2023-01-24 01:52:58.772633: step: 460/464, loss: 1.9891457557678223 2023-01-24 01:52:59.638371: step: 462/464, loss: 0.7496906518936157 2023-01-24 01:53:00.389420: step: 464/464, loss: 1.9933056831359863 2023-01-24 01:53:01.205382: step: 466/464, loss: 0.29916486144065857 2023-01-24 01:53:01.980970: step: 468/464, loss: 0.9164116382598877 2023-01-24 01:53:02.755667: step: 470/464, loss: 1.3954243659973145 2023-01-24 01:53:03.511909: step: 472/464, loss: 1.375968098640442 2023-01-24 01:53:04.184750: step: 474/464, loss: 0.18370366096496582 2023-01-24 01:53:05.028270: step: 476/464, loss: 0.6455228328704834 2023-01-24 01:53:05.797345: step: 478/464, loss: 5.674762725830078 2023-01-24 01:53:06.571018: step: 480/464, loss: 1.7659356594085693 2023-01-24 01:53:07.371122: step: 482/464, loss: 13.219393730163574 2023-01-24 01:53:08.070272: step: 484/464, loss: 3.6019363403320312 2023-01-24 01:53:08.831564: step: 486/464, loss: 1.4204580783843994 2023-01-24 01:53:09.555878: step: 488/464, loss: 0.5861679315567017 2023-01-24 01:53:10.296158: step: 490/464, loss: 1.4343019723892212 2023-01-24 01:53:11.114152: step: 492/464, loss: 0.4973408579826355 2023-01-24 01:53:11.872046: step: 494/464, loss: 0.45284712314605713 2023-01-24 01:53:12.566887: step: 496/464, loss: 0.6855869293212891 2023-01-24 01:53:13.314491: step: 498/464, loss: 0.5692172050476074 2023-01-24 01:53:14.039644: step: 500/464, loss: 0.7531363368034363 2023-01-24 01:53:14.801076: step: 502/464, loss: 1.243901252746582 2023-01-24 01:53:15.490421: step: 504/464, loss: 1.8031026124954224 2023-01-24 01:53:16.227329: step: 506/464, loss: 4.986914157867432 2023-01-24 01:53:17.010016: step: 508/464, loss: 2.2970335483551025 2023-01-24 01:53:17.709292: step: 510/464, loss: 1.328364372253418 2023-01-24 01:53:18.565280: step: 512/464, loss: 0.7237935066223145 2023-01-24 01:53:19.340071: step: 514/464, loss: 0.4380602240562439 2023-01-24 01:53:20.146563: step: 516/464, loss: 0.8923473954200745 2023-01-24 01:53:20.987960: step: 518/464, loss: 0.30256325006484985 2023-01-24 01:53:21.793427: step: 520/464, loss: 1.0971101522445679 2023-01-24 01:53:22.583425: step: 522/464, loss: 0.7510563135147095 2023-01-24 01:53:23.423086: step: 524/464, loss: 7.840592384338379 2023-01-24 01:53:24.110815: step: 526/464, loss: 0.3427618145942688 2023-01-24 01:53:24.907034: step: 528/464, loss: 0.6645991206169128 2023-01-24 01:53:25.633513: step: 530/464, loss: 3.503472328186035 2023-01-24 01:53:26.329027: step: 532/464, loss: 1.8764781951904297 2023-01-24 01:53:27.173058: step: 534/464, loss: 8.248307228088379 2023-01-24 01:53:27.954678: step: 536/464, loss: 1.3547332286834717 2023-01-24 01:53:28.766054: step: 538/464, loss: 2.041508436203003 2023-01-24 01:53:29.502525: step: 540/464, loss: 3.7953057289123535 2023-01-24 01:53:30.230325: step: 542/464, loss: 1.6800973415374756 2023-01-24 01:53:31.039361: step: 544/464, loss: 1.61313796043396 2023-01-24 01:53:31.764440: step: 546/464, loss: 1.4671369791030884 2023-01-24 01:53:32.535677: step: 548/464, loss: 1.2521719932556152 2023-01-24 01:53:33.351373: step: 550/464, loss: 5.39314079284668 2023-01-24 01:53:34.043770: step: 552/464, loss: 1.7202141284942627 2023-01-24 01:53:34.861952: step: 554/464, loss: 1.8110288381576538 2023-01-24 01:53:35.711280: step: 556/464, loss: 1.6474603414535522 2023-01-24 01:53:36.460017: step: 558/464, loss: 1.1023237705230713 2023-01-24 01:53:37.253176: step: 560/464, loss: 1.2070395946502686 2023-01-24 01:53:38.020616: step: 562/464, loss: 2.496333360671997 2023-01-24 01:53:38.768413: step: 564/464, loss: 1.7204372882843018 2023-01-24 01:53:39.512343: step: 566/464, loss: 1.6716899871826172 2023-01-24 01:53:40.262911: step: 568/464, loss: 1.0436874628067017 2023-01-24 01:53:41.017298: step: 570/464, loss: 1.597758412361145 2023-01-24 01:53:41.813954: step: 572/464, loss: 1.1227065324783325 2023-01-24 01:53:42.600398: step: 574/464, loss: 0.37439507246017456 2023-01-24 01:53:43.299622: step: 576/464, loss: 0.45976024866104126 2023-01-24 01:53:44.068734: step: 578/464, loss: 1.6979976892471313 2023-01-24 01:53:44.798260: step: 580/464, loss: 0.21175535023212433 2023-01-24 01:53:45.619145: step: 582/464, loss: 0.5808183550834656 2023-01-24 01:53:46.390468: step: 584/464, loss: 2.6092567443847656 2023-01-24 01:53:47.137578: step: 586/464, loss: 1.6025683879852295 2023-01-24 01:53:47.909595: step: 588/464, loss: 0.41113734245300293 2023-01-24 01:53:48.643905: step: 590/464, loss: 0.741346538066864 2023-01-24 01:53:49.432157: step: 592/464, loss: 1.8383212089538574 2023-01-24 01:53:50.157249: step: 594/464, loss: 2.8973169326782227 2023-01-24 01:53:50.931040: step: 596/464, loss: 2.1495635509490967 2023-01-24 01:53:51.722295: step: 598/464, loss: 3.4064736366271973 2023-01-24 01:53:52.461114: step: 600/464, loss: 1.1322810649871826 2023-01-24 01:53:53.349270: step: 602/464, loss: 2.7991814613342285 2023-01-24 01:53:54.065857: step: 604/464, loss: 0.3369024395942688 2023-01-24 01:53:54.846714: step: 606/464, loss: 1.2564321756362915 2023-01-24 01:53:55.527217: step: 608/464, loss: 2.30317759513855 2023-01-24 01:53:56.251298: step: 610/464, loss: 1.2888926267623901 2023-01-24 01:53:56.996084: step: 612/464, loss: 1.0882058143615723 2023-01-24 01:53:57.766672: step: 614/464, loss: 1.2446033954620361 2023-01-24 01:53:58.571081: step: 616/464, loss: 0.1993260234594345 2023-01-24 01:53:59.316592: step: 618/464, loss: 0.8716303110122681 2023-01-24 01:54:00.075048: step: 620/464, loss: 1.3918261528015137 2023-01-24 01:54:00.800972: step: 622/464, loss: 0.903435230255127 2023-01-24 01:54:01.588682: step: 624/464, loss: 0.8670368194580078 2023-01-24 01:54:02.323866: step: 626/464, loss: 0.6818031072616577 2023-01-24 01:54:03.073895: step: 628/464, loss: 0.5814501047134399 2023-01-24 01:54:03.843560: step: 630/464, loss: 2.889941453933716 2023-01-24 01:54:04.627459: step: 632/464, loss: 0.46588319540023804 2023-01-24 01:54:05.343613: step: 634/464, loss: 1.2020893096923828 2023-01-24 01:54:06.208216: step: 636/464, loss: 4.312961101531982 2023-01-24 01:54:06.998247: step: 638/464, loss: 2.40643310546875 2023-01-24 01:54:07.781508: step: 640/464, loss: 0.5957890152931213 2023-01-24 01:54:08.500425: step: 642/464, loss: 0.5673452615737915 2023-01-24 01:54:09.158532: step: 644/464, loss: 1.2440736293792725 2023-01-24 01:54:09.870355: step: 646/464, loss: 2.3645412921905518 2023-01-24 01:54:10.661903: step: 648/464, loss: 3.939936876296997 2023-01-24 01:54:11.418869: step: 650/464, loss: 1.5717930793762207 2023-01-24 01:54:12.180022: step: 652/464, loss: 0.369645893573761 2023-01-24 01:54:12.917477: step: 654/464, loss: 0.5531639456748962 2023-01-24 01:54:13.634123: step: 656/464, loss: 2.1650702953338623 2023-01-24 01:54:14.480977: step: 658/464, loss: 4.002811431884766 2023-01-24 01:54:15.202020: step: 660/464, loss: 0.5150967836380005 2023-01-24 01:54:15.868979: step: 662/464, loss: 0.8544698357582092 2023-01-24 01:54:16.612157: step: 664/464, loss: 1.0840424299240112 2023-01-24 01:54:17.395707: step: 666/464, loss: 0.2541203498840332 2023-01-24 01:54:18.195395: step: 668/464, loss: 2.227607488632202 2023-01-24 01:54:18.941532: step: 670/464, loss: 0.5744845867156982 2023-01-24 01:54:19.747920: step: 672/464, loss: 1.1762940883636475 2023-01-24 01:54:20.674349: step: 674/464, loss: 1.3462352752685547 2023-01-24 01:54:21.441268: step: 676/464, loss: 1.058624505996704 2023-01-24 01:54:22.223476: step: 678/464, loss: 0.7258554697036743 2023-01-24 01:54:22.993915: step: 680/464, loss: 0.5475314855575562 2023-01-24 01:54:23.727454: step: 682/464, loss: 1.2779431343078613 2023-01-24 01:54:24.528775: step: 684/464, loss: 1.897872805595398 2023-01-24 01:54:25.309004: step: 686/464, loss: 0.41958799958229065 2023-01-24 01:54:26.130650: step: 688/464, loss: 0.3179838955402374 2023-01-24 01:54:26.931865: step: 690/464, loss: 0.5948276519775391 2023-01-24 01:54:27.766828: step: 692/464, loss: 0.8153871297836304 2023-01-24 01:54:28.479376: step: 694/464, loss: 0.4472903311252594 2023-01-24 01:54:29.261368: step: 696/464, loss: 0.3059772849082947 2023-01-24 01:54:30.002385: step: 698/464, loss: 1.5691204071044922 2023-01-24 01:54:30.797318: step: 700/464, loss: 0.8117853403091431 2023-01-24 01:54:31.586647: step: 702/464, loss: 0.5508538484573364 2023-01-24 01:54:32.357927: step: 704/464, loss: 0.8707534670829773 2023-01-24 01:54:33.179243: step: 706/464, loss: 1.5053908824920654 2023-01-24 01:54:33.895040: step: 708/464, loss: 1.3240344524383545 2023-01-24 01:54:34.683193: step: 710/464, loss: 0.18909026682376862 2023-01-24 01:54:35.446204: step: 712/464, loss: 0.9368202090263367 2023-01-24 01:54:36.239958: step: 714/464, loss: 0.8139339685440063 2023-01-24 01:54:37.021867: step: 716/464, loss: 2.7701938152313232 2023-01-24 01:54:37.750790: step: 718/464, loss: 0.41402795910835266 2023-01-24 01:54:38.603825: step: 720/464, loss: 2.5215907096862793 2023-01-24 01:54:39.497698: step: 722/464, loss: 0.7189838290214539 2023-01-24 01:54:40.227768: step: 724/464, loss: 0.9132556915283203 2023-01-24 01:54:40.975315: step: 726/464, loss: 1.678982138633728 2023-01-24 01:54:41.842651: step: 728/464, loss: 1.4411935806274414 2023-01-24 01:54:42.589642: step: 730/464, loss: 0.815687358379364 2023-01-24 01:54:43.326897: step: 732/464, loss: 1.181318759918213 2023-01-24 01:54:44.027257: step: 734/464, loss: 2.313734531402588 2023-01-24 01:54:44.735141: step: 736/464, loss: 4.424801349639893 2023-01-24 01:54:45.457158: step: 738/464, loss: 2.7652111053466797 2023-01-24 01:54:46.186125: step: 740/464, loss: 1.9522521495819092 2023-01-24 01:54:46.972455: step: 742/464, loss: 1.2239620685577393 2023-01-24 01:54:47.774640: step: 744/464, loss: 1.2581905126571655 2023-01-24 01:54:48.532513: step: 746/464, loss: 1.5191899538040161 2023-01-24 01:54:49.263716: step: 748/464, loss: 2.230410099029541 2023-01-24 01:54:50.056412: step: 750/464, loss: 1.4481711387634277 2023-01-24 01:54:50.779204: step: 752/464, loss: 1.096886157989502 2023-01-24 01:54:51.556888: step: 754/464, loss: 0.3762080669403076 2023-01-24 01:54:52.264727: step: 756/464, loss: 1.5608004331588745 2023-01-24 01:54:53.042799: step: 758/464, loss: 1.5761467218399048 2023-01-24 01:54:53.822067: step: 760/464, loss: 0.257263720035553 2023-01-24 01:54:54.612472: step: 762/464, loss: 0.40274691581726074 2023-01-24 01:54:55.401507: step: 764/464, loss: 0.15530270338058472 2023-01-24 01:54:56.212989: step: 766/464, loss: 0.3401526212692261 2023-01-24 01:54:56.994756: step: 768/464, loss: 4.480698585510254 2023-01-24 01:54:57.777984: step: 770/464, loss: 2.8207991123199463 2023-01-24 01:54:58.574511: step: 772/464, loss: 0.4728001356124878 2023-01-24 01:54:59.269531: step: 774/464, loss: 3.435164451599121 2023-01-24 01:55:00.117419: step: 776/464, loss: 0.5584661364555359 2023-01-24 01:55:00.857478: step: 778/464, loss: 10.358461380004883 2023-01-24 01:55:01.652963: step: 780/464, loss: 1.0920209884643555 2023-01-24 01:55:02.422886: step: 782/464, loss: 5.505377769470215 2023-01-24 01:55:03.289117: step: 784/464, loss: 0.4454532265663147 2023-01-24 01:55:04.087573: step: 786/464, loss: 0.917724609375 2023-01-24 01:55:04.831985: step: 788/464, loss: 0.9360051155090332 2023-01-24 01:55:05.581152: step: 790/464, loss: 1.591188907623291 2023-01-24 01:55:06.339115: step: 792/464, loss: 0.5870780348777771 2023-01-24 01:55:07.090303: step: 794/464, loss: 0.6872538924217224 2023-01-24 01:55:07.789886: step: 796/464, loss: 0.4402296841144562 2023-01-24 01:55:08.533149: step: 798/464, loss: 1.5215715169906616 2023-01-24 01:55:09.442069: step: 800/464, loss: 1.1026206016540527 2023-01-24 01:55:10.216220: step: 802/464, loss: 0.8330729603767395 2023-01-24 01:55:11.219251: step: 804/464, loss: 0.5337406396865845 2023-01-24 01:55:11.974994: step: 806/464, loss: 1.0772619247436523 2023-01-24 01:55:12.773073: step: 808/464, loss: 0.3447778522968292 2023-01-24 01:55:13.543402: step: 810/464, loss: 1.0803074836730957 2023-01-24 01:55:14.408426: step: 812/464, loss: 3.355602741241455 2023-01-24 01:55:15.155328: step: 814/464, loss: 0.5117831230163574 2023-01-24 01:55:15.940533: step: 816/464, loss: 4.319705009460449 2023-01-24 01:55:16.672528: step: 818/464, loss: 1.5117748975753784 2023-01-24 01:55:17.426294: step: 820/464, loss: 2.1120705604553223 2023-01-24 01:55:18.177171: step: 822/464, loss: 0.9269815683364868 2023-01-24 01:55:18.902248: step: 824/464, loss: 0.7869834899902344 2023-01-24 01:55:19.628162: step: 826/464, loss: 2.005740165710449 2023-01-24 01:55:20.405419: step: 828/464, loss: 0.9282411336898804 2023-01-24 01:55:21.254236: step: 830/464, loss: 0.7959149479866028 2023-01-24 01:55:21.994246: step: 832/464, loss: 4.79236364364624 2023-01-24 01:55:22.727818: step: 834/464, loss: 0.7582805752754211 2023-01-24 01:55:23.484961: step: 836/464, loss: 0.9575939178466797 2023-01-24 01:55:24.241924: step: 838/464, loss: 2.441572666168213 2023-01-24 01:55:25.039792: step: 840/464, loss: 7.67648983001709 2023-01-24 01:55:25.800496: step: 842/464, loss: 0.6135707497596741 2023-01-24 01:55:26.575033: step: 844/464, loss: 0.34217751026153564 2023-01-24 01:55:27.307053: step: 846/464, loss: 1.6250519752502441 2023-01-24 01:55:28.169932: step: 848/464, loss: 5.8050079345703125 2023-01-24 01:55:28.933457: step: 850/464, loss: 0.6835314631462097 2023-01-24 01:55:29.659940: step: 852/464, loss: 0.7090028524398804 2023-01-24 01:55:30.441752: step: 854/464, loss: 0.9978798031806946 2023-01-24 01:55:31.209506: step: 856/464, loss: 0.6591278910636902 2023-01-24 01:55:32.046818: step: 858/464, loss: 0.9084625244140625 2023-01-24 01:55:32.866482: step: 860/464, loss: 0.6810509562492371 2023-01-24 01:55:33.559921: step: 862/464, loss: 0.6677621006965637 2023-01-24 01:55:34.393856: step: 864/464, loss: 1.1423194408416748 2023-01-24 01:55:35.194440: step: 866/464, loss: 4.478385925292969 2023-01-24 01:55:36.042300: step: 868/464, loss: 2.2984297275543213 2023-01-24 01:55:36.784835: step: 870/464, loss: 8.030241966247559 2023-01-24 01:55:37.557239: step: 872/464, loss: 0.6332724690437317 2023-01-24 01:55:38.413321: step: 874/464, loss: 11.14112377166748 2023-01-24 01:55:39.206062: step: 876/464, loss: 0.5625685453414917 2023-01-24 01:55:39.970750: step: 878/464, loss: 0.28894609212875366 2023-01-24 01:55:40.698575: step: 880/464, loss: 0.6808298826217651 2023-01-24 01:55:41.456267: step: 882/464, loss: 0.6325333118438721 2023-01-24 01:55:42.262670: step: 884/464, loss: 1.1005043983459473 2023-01-24 01:55:42.961619: step: 886/464, loss: 1.6734281778335571 2023-01-24 01:55:43.757462: step: 888/464, loss: 1.63663911819458 2023-01-24 01:55:44.528177: step: 890/464, loss: 0.4760796129703522 2023-01-24 01:55:45.275796: step: 892/464, loss: 4.4341254234313965 2023-01-24 01:55:45.959983: step: 894/464, loss: 2.1422908306121826 2023-01-24 01:55:46.746440: step: 896/464, loss: 3.9849419593811035 2023-01-24 01:55:47.508358: step: 898/464, loss: 1.9513940811157227 2023-01-24 01:55:48.187641: step: 900/464, loss: 2.256234645843506 2023-01-24 01:55:48.880459: step: 902/464, loss: 1.4129538536071777 2023-01-24 01:55:49.660755: step: 904/464, loss: 0.4391446113586426 2023-01-24 01:55:50.437876: step: 906/464, loss: 1.476486086845398 2023-01-24 01:55:51.132936: step: 908/464, loss: 1.112925410270691 2023-01-24 01:55:51.923874: step: 910/464, loss: 2.4584295749664307 2023-01-24 01:55:52.658587: step: 912/464, loss: 0.6789507865905762 2023-01-24 01:55:53.470030: step: 914/464, loss: 3.559232473373413 2023-01-24 01:55:54.259266: step: 916/464, loss: 0.7847874760627747 2023-01-24 01:55:55.118190: step: 918/464, loss: 0.9742597341537476 2023-01-24 01:55:55.916041: step: 920/464, loss: 1.0963793992996216 2023-01-24 01:55:56.744598: step: 922/464, loss: 0.8131226897239685 2023-01-24 01:55:57.451712: step: 924/464, loss: 1.1673150062561035 2023-01-24 01:55:58.243002: step: 926/464, loss: 1.7198338508605957 2023-01-24 01:55:58.968555: step: 928/464, loss: 1.1896001100540161 2023-01-24 01:55:59.634481: step: 930/464, loss: 0.19970275461673737 ================================================== Loss: 1.627 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30406624933354615, 'r': 0.24946077922638757, 'f1': 0.2740700980493438}, 'combined': 0.20194638803635856, 'epoch': 2} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29459183041838544, 'r': 0.20772500862834872, 'f1': 0.24364737854152177}, 'combined': 0.15131784562052406, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2874021141479805, 'r': 0.24385633927707437, 'f1': 0.2638445638079821}, 'combined': 0.19441178385851313, 'epoch': 2} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29405894880932776, 'r': 0.21008361659006322, 'f1': 0.24507737174541094}, 'combined': 0.15220594666293943, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31612442374595123, 'r': 0.25744981479310425, 'f1': 0.2837860171414594}, 'combined': 0.20910548631475956, 'epoch': 2} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29958252192114637, 'r': 0.2133122098886269, 'f1': 0.24919191339523925}, 'combined': 0.1547612935823065, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.41346153846153844, 'r': 0.30714285714285716, 'f1': 0.3524590163934426}, 'combined': 0.2349726775956284, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.19, 'r': 0.20652173913043478, 'f1': 0.19791666666666669}, 'combined': 0.09895833333333334, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30406624933354615, 'r': 0.24946077922638757, 'f1': 0.2740700980493438}, 'combined': 0.20194638803635856, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29459183041838544, 'r': 0.20772500862834872, 'f1': 0.24364737854152177}, 'combined': 0.15131784562052406, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.41346153846153844, 'r': 0.30714285714285716, 'f1': 0.3524590163934426}, 'combined': 0.2349726775956284, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2874021141479805, 'r': 0.24385633927707437, 'f1': 0.2638445638079821}, 'combined': 0.19441178385851313, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29405894880932776, 'r': 0.21008361659006322, 'f1': 0.24507737174541094}, 'combined': 0.15220594666293943, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.19, 'r': 0.20652173913043478, 'f1': 0.19791666666666669}, 'combined': 0.09895833333333334, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31612442374595123, 'r': 0.25744981479310425, 'f1': 0.2837860171414594}, 'combined': 0.20910548631475956, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29958252192114637, 'r': 0.2133122098886269, 'f1': 0.24919191339523925}, 'combined': 0.1547612935823065, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:59:13.214255: step: 2/464, loss: 0.5728092789649963 2023-01-24 01:59:13.913467: step: 4/464, loss: 1.6666297912597656 2023-01-24 01:59:14.735179: step: 6/464, loss: 0.9216357469558716 2023-01-24 01:59:15.548237: step: 8/464, loss: 0.5197516679763794 2023-01-24 01:59:16.301029: step: 10/464, loss: 1.2050963640213013 2023-01-24 01:59:17.071398: step: 12/464, loss: 0.5885632038116455 2023-01-24 01:59:17.871890: step: 14/464, loss: 3.2682347297668457 2023-01-24 01:59:18.599445: step: 16/464, loss: 0.785646378993988 2023-01-24 01:59:19.427786: step: 18/464, loss: 1.124619483947754 2023-01-24 01:59:20.308429: step: 20/464, loss: 1.2299696207046509 2023-01-24 01:59:21.100972: step: 22/464, loss: 0.6963951587677002 2023-01-24 01:59:21.911061: step: 24/464, loss: 0.9705716967582703 2023-01-24 01:59:22.724554: step: 26/464, loss: 0.5654642581939697 2023-01-24 01:59:23.477749: step: 28/464, loss: 0.9723125100135803 2023-01-24 01:59:24.265094: step: 30/464, loss: 1.4342992305755615 2023-01-24 01:59:25.004475: step: 32/464, loss: 1.0085419416427612 2023-01-24 01:59:25.762625: step: 34/464, loss: 0.2719883620738983 2023-01-24 01:59:26.486627: step: 36/464, loss: 0.6373543739318848 2023-01-24 01:59:27.241114: step: 38/464, loss: 1.7300281524658203 2023-01-24 01:59:28.061371: step: 40/464, loss: 0.2606988251209259 2023-01-24 01:59:28.904135: step: 42/464, loss: 0.26028841733932495 2023-01-24 01:59:29.680433: step: 44/464, loss: 0.4146405756473541 2023-01-24 01:59:30.445377: step: 46/464, loss: 0.3497615456581116 2023-01-24 01:59:31.291463: step: 48/464, loss: 0.6876152753829956 2023-01-24 01:59:32.022773: step: 50/464, loss: 0.3188195824623108 2023-01-24 01:59:32.804727: step: 52/464, loss: 0.5253841280937195 2023-01-24 01:59:33.568060: step: 54/464, loss: 0.6125118136405945 2023-01-24 01:59:34.334371: step: 56/464, loss: 0.7966494560241699 2023-01-24 01:59:35.131558: step: 58/464, loss: 2.040208339691162 2023-01-24 01:59:35.964722: step: 60/464, loss: 1.6140918731689453 2023-01-24 01:59:36.788018: step: 62/464, loss: 1.486433982849121 2023-01-24 01:59:37.583906: step: 64/464, loss: 0.5457777380943298 2023-01-24 01:59:38.352831: step: 66/464, loss: 0.5147194266319275 2023-01-24 01:59:39.040951: step: 68/464, loss: 0.9083465933799744 2023-01-24 01:59:39.776000: step: 70/464, loss: 0.44538620114326477 2023-01-24 01:59:40.533459: step: 72/464, loss: 0.8497267961502075 2023-01-24 01:59:41.372642: step: 74/464, loss: 0.9923356175422668 2023-01-24 01:59:42.176201: step: 76/464, loss: 0.22544407844543457 2023-01-24 01:59:42.885858: step: 78/464, loss: 0.7999193668365479 2023-01-24 01:59:43.556963: step: 80/464, loss: 1.3178879022598267 2023-01-24 01:59:44.358184: step: 82/464, loss: 1.2020602226257324 2023-01-24 01:59:45.106842: step: 84/464, loss: 0.2553924322128296 2023-01-24 01:59:45.850271: step: 86/464, loss: 0.285842627286911 2023-01-24 01:59:46.638444: step: 88/464, loss: 0.6283391714096069 2023-01-24 01:59:47.339468: step: 90/464, loss: 1.5153621435165405 2023-01-24 01:59:48.121700: step: 92/464, loss: 0.7195795178413391 2023-01-24 01:59:48.873805: step: 94/464, loss: 0.4200820326805115 2023-01-24 01:59:49.668086: step: 96/464, loss: 0.575192391872406 2023-01-24 01:59:50.541366: step: 98/464, loss: 3.373455762863159 2023-01-24 01:59:51.242113: step: 100/464, loss: 1.1632630825042725 2023-01-24 01:59:51.963166: step: 102/464, loss: 0.7568671703338623 2023-01-24 01:59:52.685079: step: 104/464, loss: 0.4086417257785797 2023-01-24 01:59:53.457699: step: 106/464, loss: 2.0888540744781494 2023-01-24 01:59:54.187554: step: 108/464, loss: 0.4860653281211853 2023-01-24 01:59:54.999784: step: 110/464, loss: 2.1116738319396973 2023-01-24 01:59:55.780058: step: 112/464, loss: 1.112531065940857 2023-01-24 01:59:56.567531: step: 114/464, loss: 2.252843141555786 2023-01-24 01:59:57.355619: step: 116/464, loss: 0.5225369930267334 2023-01-24 01:59:58.119748: step: 118/464, loss: 1.0584449768066406 2023-01-24 01:59:58.874219: step: 120/464, loss: 6.0835371017456055 2023-01-24 01:59:59.704981: step: 122/464, loss: 2.733490467071533 2023-01-24 02:00:00.473330: step: 124/464, loss: 1.4847700595855713 2023-01-24 02:00:01.309504: step: 126/464, loss: 2.487853527069092 2023-01-24 02:00:02.149487: step: 128/464, loss: 1.2639483213424683 2023-01-24 02:00:02.926013: step: 130/464, loss: 1.7387046813964844 2023-01-24 02:00:03.673499: step: 132/464, loss: 1.5289192199707031 2023-01-24 02:00:04.439885: step: 134/464, loss: 1.3074873685836792 2023-01-24 02:00:05.167063: step: 136/464, loss: 0.5716285109519958 2023-01-24 02:00:05.907127: step: 138/464, loss: 0.24294021725654602 2023-01-24 02:00:06.557949: step: 140/464, loss: 1.5488613843917847 2023-01-24 02:00:07.331791: step: 142/464, loss: 1.200225591659546 2023-01-24 02:00:08.055650: step: 144/464, loss: 1.0220304727554321 2023-01-24 02:00:08.804769: step: 146/464, loss: 0.27229607105255127 2023-01-24 02:00:09.565414: step: 148/464, loss: 1.827118158340454 2023-01-24 02:00:10.378628: step: 150/464, loss: 0.7478753924369812 2023-01-24 02:00:11.207239: step: 152/464, loss: 1.1500465869903564 2023-01-24 02:00:11.970465: step: 154/464, loss: 0.29159173369407654 2023-01-24 02:00:12.809588: step: 156/464, loss: 2.232395648956299 2023-01-24 02:00:13.547116: step: 158/464, loss: 0.5495754480361938 2023-01-24 02:00:14.320829: step: 160/464, loss: 1.171030879020691 2023-01-24 02:00:15.118181: step: 162/464, loss: 3.065204620361328 2023-01-24 02:00:15.839357: step: 164/464, loss: 1.0704265832901 2023-01-24 02:00:16.647093: step: 166/464, loss: 1.6894574165344238 2023-01-24 02:00:17.417569: step: 168/464, loss: 0.600529134273529 2023-01-24 02:00:18.193012: step: 170/464, loss: 6.212244510650635 2023-01-24 02:00:18.970567: step: 172/464, loss: 0.6440250277519226 2023-01-24 02:00:19.726123: step: 174/464, loss: 1.3235870599746704 2023-01-24 02:00:20.454420: step: 176/464, loss: 0.802613377571106 2023-01-24 02:00:21.264705: step: 178/464, loss: 1.1508985757827759 2023-01-24 02:00:22.033236: step: 180/464, loss: 0.834412693977356 2023-01-24 02:00:22.827962: step: 182/464, loss: 1.041805624961853 2023-01-24 02:00:23.548263: step: 184/464, loss: 0.36926427483558655 2023-01-24 02:00:24.265604: step: 186/464, loss: 2.562335968017578 2023-01-24 02:00:25.067303: step: 188/464, loss: 2.016711711883545 2023-01-24 02:00:25.855424: step: 190/464, loss: 1.2292652130126953 2023-01-24 02:00:26.565041: step: 192/464, loss: 0.31911700963974 2023-01-24 02:00:27.289624: step: 194/464, loss: 1.648099422454834 2023-01-24 02:00:28.041858: step: 196/464, loss: 0.5986658334732056 2023-01-24 02:00:28.750265: step: 198/464, loss: 1.1526308059692383 2023-01-24 02:00:29.529124: step: 200/464, loss: 1.5631656646728516 2023-01-24 02:00:30.410711: step: 202/464, loss: 0.9817005395889282 2023-01-24 02:00:31.161690: step: 204/464, loss: 0.5580400228500366 2023-01-24 02:00:31.890943: step: 206/464, loss: 0.6199046969413757 2023-01-24 02:00:32.542715: step: 208/464, loss: 0.47845181822776794 2023-01-24 02:00:33.242627: step: 210/464, loss: 0.6486523747444153 2023-01-24 02:00:33.953152: step: 212/464, loss: 1.9843801259994507 2023-01-24 02:00:34.722052: step: 214/464, loss: 0.5011245608329773 2023-01-24 02:00:35.366756: step: 216/464, loss: 0.6039958000183105 2023-01-24 02:00:36.074431: step: 218/464, loss: 0.7514966726303101 2023-01-24 02:00:36.796829: step: 220/464, loss: 0.9103838801383972 2023-01-24 02:00:37.557918: step: 222/464, loss: 0.17597933113574982 2023-01-24 02:00:38.303841: step: 224/464, loss: 1.262900710105896 2023-01-24 02:00:38.945252: step: 226/464, loss: 0.26214057207107544 2023-01-24 02:00:39.691549: step: 228/464, loss: 0.3917402923107147 2023-01-24 02:00:40.521434: step: 230/464, loss: 1.3757576942443848 2023-01-24 02:00:41.297147: step: 232/464, loss: 0.43058058619499207 2023-01-24 02:00:42.121831: step: 234/464, loss: 1.0661262273788452 2023-01-24 02:00:42.914828: step: 236/464, loss: 0.6610551476478577 2023-01-24 02:00:43.705606: step: 238/464, loss: 5.103069305419922 2023-01-24 02:00:44.459890: step: 240/464, loss: 1.5642491579055786 2023-01-24 02:00:45.291139: step: 242/464, loss: 3.216989040374756 2023-01-24 02:00:46.010597: step: 244/464, loss: 1.0435258150100708 2023-01-24 02:00:46.843087: step: 246/464, loss: 0.7400312423706055 2023-01-24 02:00:47.637457: step: 248/464, loss: 1.1300452947616577 2023-01-24 02:00:48.423412: step: 250/464, loss: 3.0344204902648926 2023-01-24 02:00:49.163207: step: 252/464, loss: 0.28440696001052856 2023-01-24 02:00:49.870616: step: 254/464, loss: 2.7709171772003174 2023-01-24 02:00:50.607951: step: 256/464, loss: 1.6949918270111084 2023-01-24 02:00:51.317200: step: 258/464, loss: 0.7783820033073425 2023-01-24 02:00:52.110854: step: 260/464, loss: 1.269107699394226 2023-01-24 02:00:52.892240: step: 262/464, loss: 0.2354559451341629 2023-01-24 02:00:53.605661: step: 264/464, loss: 0.3233901858329773 2023-01-24 02:00:54.399142: step: 266/464, loss: 6.524602890014648 2023-01-24 02:00:55.212971: step: 268/464, loss: 1.039006233215332 2023-01-24 02:00:55.935835: step: 270/464, loss: 0.36432382464408875 2023-01-24 02:00:56.809560: step: 272/464, loss: 0.6748474836349487 2023-01-24 02:00:57.561204: step: 274/464, loss: 2.2763798236846924 2023-01-24 02:00:58.313216: step: 276/464, loss: 0.3138844072818756 2023-01-24 02:00:59.054569: step: 278/464, loss: 1.578597068786621 2023-01-24 02:00:59.870115: step: 280/464, loss: 0.9769968390464783 2023-01-24 02:01:00.634551: step: 282/464, loss: 1.1653294563293457 2023-01-24 02:01:01.557623: step: 284/464, loss: 1.2490097284317017 2023-01-24 02:01:02.351937: step: 286/464, loss: 1.5943344831466675 2023-01-24 02:01:03.142963: step: 288/464, loss: 0.7358044981956482 2023-01-24 02:01:03.940409: step: 290/464, loss: 0.6281088590621948 2023-01-24 02:01:04.720411: step: 292/464, loss: 0.30778172612190247 2023-01-24 02:01:05.586406: step: 294/464, loss: 1.1002516746520996 2023-01-24 02:01:06.297008: step: 296/464, loss: 0.5382761359214783 2023-01-24 02:01:07.094377: step: 298/464, loss: 1.0521222352981567 2023-01-24 02:01:07.799332: step: 300/464, loss: 0.6565387845039368 2023-01-24 02:01:08.553263: step: 302/464, loss: 0.42484602332115173 2023-01-24 02:01:09.334324: step: 304/464, loss: 1.4506440162658691 2023-01-24 02:01:10.150440: step: 306/464, loss: 0.7423726320266724 2023-01-24 02:01:10.981594: step: 308/464, loss: 0.9235724806785583 2023-01-24 02:01:11.756067: step: 310/464, loss: 0.973768949508667 2023-01-24 02:01:12.541893: step: 312/464, loss: 5.143934726715088 2023-01-24 02:01:13.286117: step: 314/464, loss: 0.5139178037643433 2023-01-24 02:01:13.991207: step: 316/464, loss: 0.7528850436210632 2023-01-24 02:01:14.711709: step: 318/464, loss: 1.8455528020858765 2023-01-24 02:01:15.429773: step: 320/464, loss: 0.9531939029693604 2023-01-24 02:01:16.132189: step: 322/464, loss: 2.141551971435547 2023-01-24 02:01:16.925645: step: 324/464, loss: 1.0747900009155273 2023-01-24 02:01:17.773844: step: 326/464, loss: 1.205752968788147 2023-01-24 02:01:18.601078: step: 328/464, loss: 1.5321749448776245 2023-01-24 02:01:19.383774: step: 330/464, loss: 1.215419054031372 2023-01-24 02:01:20.136244: step: 332/464, loss: 0.4505934417247772 2023-01-24 02:01:20.869132: step: 334/464, loss: 0.421829491853714 2023-01-24 02:01:21.631640: step: 336/464, loss: 0.3405742645263672 2023-01-24 02:01:22.319819: step: 338/464, loss: 0.27169865369796753 2023-01-24 02:01:22.943975: step: 340/464, loss: 1.4433740377426147 2023-01-24 02:01:23.729749: step: 342/464, loss: 0.1989186704158783 2023-01-24 02:01:24.582756: step: 344/464, loss: 3.2341744899749756 2023-01-24 02:01:25.303569: step: 346/464, loss: 1.6050325632095337 2023-01-24 02:01:26.034304: step: 348/464, loss: 2.6557462215423584 2023-01-24 02:01:26.945454: step: 350/464, loss: 0.30212539434432983 2023-01-24 02:01:27.680225: step: 352/464, loss: 0.28420719504356384 2023-01-24 02:01:28.468286: step: 354/464, loss: 0.18610507249832153 2023-01-24 02:01:29.311406: step: 356/464, loss: 1.3019359111785889 2023-01-24 02:01:30.084814: step: 358/464, loss: 1.315913200378418 2023-01-24 02:01:30.769977: step: 360/464, loss: 1.4917500019073486 2023-01-24 02:01:31.605080: step: 362/464, loss: 0.5647187829017639 2023-01-24 02:01:32.361758: step: 364/464, loss: 0.34709829092025757 2023-01-24 02:01:33.155631: step: 366/464, loss: 1.1877985000610352 2023-01-24 02:01:33.879367: step: 368/464, loss: 0.29472312331199646 2023-01-24 02:01:34.651579: step: 370/464, loss: 0.6145370006561279 2023-01-24 02:01:35.404265: step: 372/464, loss: 2.30469012260437 2023-01-24 02:01:36.191923: step: 374/464, loss: 1.8337349891662598 2023-01-24 02:01:36.977656: step: 376/464, loss: 0.39600124955177307 2023-01-24 02:01:37.808350: step: 378/464, loss: 0.4111950695514679 2023-01-24 02:01:38.524788: step: 380/464, loss: 2.2897562980651855 2023-01-24 02:01:39.402785: step: 382/464, loss: 0.4143930673599243 2023-01-24 02:01:40.194170: step: 384/464, loss: 1.055375099182129 2023-01-24 02:01:41.061941: step: 386/464, loss: 0.27740585803985596 2023-01-24 02:01:41.877768: step: 388/464, loss: 0.8603242635726929 2023-01-24 02:01:42.625565: step: 390/464, loss: 0.9055214524269104 2023-01-24 02:01:43.373416: step: 392/464, loss: 0.43781185150146484 2023-01-24 02:01:44.322224: step: 394/464, loss: 1.1656153202056885 2023-01-24 02:01:45.027392: step: 396/464, loss: 0.13810645043849945 2023-01-24 02:01:45.724402: step: 398/464, loss: 1.0912761688232422 2023-01-24 02:01:46.517007: step: 400/464, loss: 0.5084050297737122 2023-01-24 02:01:47.323294: step: 402/464, loss: 3.217175006866455 2023-01-24 02:01:48.046499: step: 404/464, loss: 1.1894950866699219 2023-01-24 02:01:48.842975: step: 406/464, loss: 0.6266841888427734 2023-01-24 02:01:49.555831: step: 408/464, loss: 0.862480640411377 2023-01-24 02:01:50.273881: step: 410/464, loss: 0.3887926936149597 2023-01-24 02:01:51.103842: step: 412/464, loss: 1.5715738534927368 2023-01-24 02:01:51.895600: step: 414/464, loss: 0.43553680181503296 2023-01-24 02:01:52.655534: step: 416/464, loss: 0.9947322010993958 2023-01-24 02:01:53.400663: step: 418/464, loss: 0.2606787383556366 2023-01-24 02:01:54.152515: step: 420/464, loss: 0.36131685972213745 2023-01-24 02:01:54.915245: step: 422/464, loss: 1.416828989982605 2023-01-24 02:01:55.609993: step: 424/464, loss: 0.21712100505828857 2023-01-24 02:01:56.422045: step: 426/464, loss: 8.038595199584961 2023-01-24 02:01:57.161676: step: 428/464, loss: 0.4072921872138977 2023-01-24 02:01:57.868044: step: 430/464, loss: 1.0790162086486816 2023-01-24 02:01:58.629240: step: 432/464, loss: 0.5021647810935974 2023-01-24 02:01:59.458402: step: 434/464, loss: 0.18573813140392303 2023-01-24 02:02:00.290201: step: 436/464, loss: 0.41294416785240173 2023-01-24 02:02:01.019628: step: 438/464, loss: 0.49419182538986206 2023-01-24 02:02:01.729490: step: 440/464, loss: 0.7594491839408875 2023-01-24 02:02:02.505058: step: 442/464, loss: 0.5722238421440125 2023-01-24 02:02:03.327573: step: 444/464, loss: 0.5605573654174805 2023-01-24 02:02:04.076591: step: 446/464, loss: 0.43364423513412476 2023-01-24 02:02:04.853186: step: 448/464, loss: 0.632633626461029 2023-01-24 02:02:05.599163: step: 450/464, loss: 1.6663942337036133 2023-01-24 02:02:06.303420: step: 452/464, loss: 3.8243656158447266 2023-01-24 02:02:07.144619: step: 454/464, loss: 0.572558581829071 2023-01-24 02:02:07.972044: step: 456/464, loss: 0.7511688470840454 2023-01-24 02:02:08.738329: step: 458/464, loss: 1.0489780902862549 2023-01-24 02:02:09.535642: step: 460/464, loss: 1.9205814599990845 2023-01-24 02:02:10.289446: step: 462/464, loss: 0.6067019104957581 2023-01-24 02:02:11.002495: step: 464/464, loss: 0.5245887041091919 2023-01-24 02:02:11.816288: step: 466/464, loss: 0.38798803091049194 2023-01-24 02:02:12.590234: step: 468/464, loss: 1.0211551189422607 2023-01-24 02:02:13.314326: step: 470/464, loss: 0.848628044128418 2023-01-24 02:02:14.106023: step: 472/464, loss: 4.330991744995117 2023-01-24 02:02:14.865492: step: 474/464, loss: 0.36634135246276855 2023-01-24 02:02:15.736050: step: 476/464, loss: 0.8512176275253296 2023-01-24 02:02:16.505837: step: 478/464, loss: 3.9317703247070312 2023-01-24 02:02:17.331261: step: 480/464, loss: 0.7550517320632935 2023-01-24 02:02:18.072541: step: 482/464, loss: 0.6175093650817871 2023-01-24 02:02:18.793933: step: 484/464, loss: 0.4945186376571655 2023-01-24 02:02:19.566044: step: 486/464, loss: 0.8519952297210693 2023-01-24 02:02:20.311663: step: 488/464, loss: 1.2580838203430176 2023-01-24 02:02:21.071294: step: 490/464, loss: 1.7508350610733032 2023-01-24 02:02:21.803170: step: 492/464, loss: 2.554095983505249 2023-01-24 02:02:22.583210: step: 494/464, loss: 0.6756281852722168 2023-01-24 02:02:23.344773: step: 496/464, loss: 0.5863864421844482 2023-01-24 02:02:24.130581: step: 498/464, loss: 0.5947279334068298 2023-01-24 02:02:24.933581: step: 500/464, loss: 0.8904541730880737 2023-01-24 02:02:25.700381: step: 502/464, loss: 5.003479957580566 2023-01-24 02:02:26.502785: step: 504/464, loss: 1.011299967765808 2023-01-24 02:02:27.285862: step: 506/464, loss: 0.562726616859436 2023-01-24 02:02:27.963246: step: 508/464, loss: 0.868272602558136 2023-01-24 02:02:28.696439: step: 510/464, loss: 0.799949049949646 2023-01-24 02:02:29.481905: step: 512/464, loss: 1.1345824003219604 2023-01-24 02:02:30.177586: step: 514/464, loss: 1.110640287399292 2023-01-24 02:02:30.943852: step: 516/464, loss: 6.441725254058838 2023-01-24 02:02:31.629052: step: 518/464, loss: 0.31550291180610657 2023-01-24 02:02:32.411232: step: 520/464, loss: 0.36972540616989136 2023-01-24 02:02:33.180126: step: 522/464, loss: 0.3183565139770508 2023-01-24 02:02:33.956736: step: 524/464, loss: 1.7634189128875732 2023-01-24 02:02:34.745055: step: 526/464, loss: 0.6870174407958984 2023-01-24 02:02:35.508172: step: 528/464, loss: 0.8734363317489624 2023-01-24 02:02:36.225792: step: 530/464, loss: 3.1973259449005127 2023-01-24 02:02:36.948553: step: 532/464, loss: 0.20919574797153473 2023-01-24 02:02:37.773212: step: 534/464, loss: 4.95903205871582 2023-01-24 02:02:38.489360: step: 536/464, loss: 0.2242266833782196 2023-01-24 02:02:39.244964: step: 538/464, loss: 0.9058042168617249 2023-01-24 02:02:40.001741: step: 540/464, loss: 0.5100103616714478 2023-01-24 02:02:40.754403: step: 542/464, loss: 4.897339820861816 2023-01-24 02:02:41.495594: step: 544/464, loss: 1.1864523887634277 2023-01-24 02:02:42.256924: step: 546/464, loss: 1.217078447341919 2023-01-24 02:02:43.082610: step: 548/464, loss: 1.9085209369659424 2023-01-24 02:02:43.839864: step: 550/464, loss: 1.4283592700958252 2023-01-24 02:02:44.628640: step: 552/464, loss: 1.6112210750579834 2023-01-24 02:02:45.328736: step: 554/464, loss: 1.0772231817245483 2023-01-24 02:02:46.113847: step: 556/464, loss: 0.282481849193573 2023-01-24 02:02:46.885067: step: 558/464, loss: 2.148629665374756 2023-01-24 02:02:47.668728: step: 560/464, loss: 1.2082405090332031 2023-01-24 02:02:48.347390: step: 562/464, loss: 0.44685670733451843 2023-01-24 02:02:49.014785: step: 564/464, loss: 1.7282108068466187 2023-01-24 02:02:49.749297: step: 566/464, loss: 1.388054609298706 2023-01-24 02:02:50.566520: step: 568/464, loss: 1.1820733547210693 2023-01-24 02:02:51.384426: step: 570/464, loss: 0.8305114507675171 2023-01-24 02:02:52.151513: step: 572/464, loss: 2.3896570205688477 2023-01-24 02:02:52.952121: step: 574/464, loss: 0.38784927129745483 2023-01-24 02:02:53.719925: step: 576/464, loss: 0.8815006017684937 2023-01-24 02:02:54.490341: step: 578/464, loss: 1.1910144090652466 2023-01-24 02:02:55.184583: step: 580/464, loss: 1.0457309484481812 2023-01-24 02:02:55.974004: step: 582/464, loss: 1.1997699737548828 2023-01-24 02:02:56.704421: step: 584/464, loss: 2.263993978500366 2023-01-24 02:02:57.522446: step: 586/464, loss: 1.034746527671814 2023-01-24 02:02:58.399890: step: 588/464, loss: 3.714615821838379 2023-01-24 02:02:59.193540: step: 590/464, loss: 1.3886295557022095 2023-01-24 02:02:59.941954: step: 592/464, loss: 0.8830761909484863 2023-01-24 02:03:00.699627: step: 594/464, loss: 0.5461584329605103 2023-01-24 02:03:01.492880: step: 596/464, loss: 1.3844913244247437 2023-01-24 02:03:02.272231: step: 598/464, loss: 1.209411859512329 2023-01-24 02:03:02.965148: step: 600/464, loss: 0.11756696552038193 2023-01-24 02:03:03.727926: step: 602/464, loss: 0.6817506551742554 2023-01-24 02:03:04.486579: step: 604/464, loss: 0.7887982726097107 2023-01-24 02:03:05.264971: step: 606/464, loss: 10.397603988647461 2023-01-24 02:03:06.062655: step: 608/464, loss: 2.0216078758239746 2023-01-24 02:03:06.880300: step: 610/464, loss: 3.20801043510437 2023-01-24 02:03:07.702286: step: 612/464, loss: 1.7752575874328613 2023-01-24 02:03:08.478600: step: 614/464, loss: 0.9904509782791138 2023-01-24 02:03:09.227216: step: 616/464, loss: 1.2129480838775635 2023-01-24 02:03:09.951890: step: 618/464, loss: 0.8754247426986694 2023-01-24 02:03:10.676947: step: 620/464, loss: 2.8872084617614746 2023-01-24 02:03:11.373472: step: 622/464, loss: 0.9464370012283325 2023-01-24 02:03:12.184108: step: 624/464, loss: 0.6756294965744019 2023-01-24 02:03:12.912799: step: 626/464, loss: 0.8634769916534424 2023-01-24 02:03:13.585490: step: 628/464, loss: 2.143869400024414 2023-01-24 02:03:14.385922: step: 630/464, loss: 0.4692254066467285 2023-01-24 02:03:15.117494: step: 632/464, loss: 0.5207785367965698 2023-01-24 02:03:15.842934: step: 634/464, loss: 0.5450736284255981 2023-01-24 02:03:16.642437: step: 636/464, loss: 0.4662814140319824 2023-01-24 02:03:17.433767: step: 638/464, loss: 0.448432594537735 2023-01-24 02:03:18.197652: step: 640/464, loss: 0.6444108486175537 2023-01-24 02:03:18.864953: step: 642/464, loss: 1.191615343093872 2023-01-24 02:03:19.592929: step: 644/464, loss: 2.963665008544922 2023-01-24 02:03:20.314781: step: 646/464, loss: 1.7940711975097656 2023-01-24 02:03:21.078327: step: 648/464, loss: 0.5459948778152466 2023-01-24 02:03:21.796794: step: 650/464, loss: 1.9765942096710205 2023-01-24 02:03:22.480788: step: 652/464, loss: 0.784062922000885 2023-01-24 02:03:23.244676: step: 654/464, loss: 0.6920633912086487 2023-01-24 02:03:24.065419: step: 656/464, loss: 0.2723618149757385 2023-01-24 02:03:24.722933: step: 658/464, loss: 7.097645282745361 2023-01-24 02:03:25.505297: step: 660/464, loss: 1.8126413822174072 2023-01-24 02:03:26.288734: step: 662/464, loss: 1.025843620300293 2023-01-24 02:03:27.032203: step: 664/464, loss: 0.48338747024536133 2023-01-24 02:03:27.727379: step: 666/464, loss: 1.12424898147583 2023-01-24 02:03:28.429120: step: 668/464, loss: 2.855159282684326 2023-01-24 02:03:29.206054: step: 670/464, loss: 0.46674615144729614 2023-01-24 02:03:29.974382: step: 672/464, loss: 4.845136642456055 2023-01-24 02:03:30.755284: step: 674/464, loss: 0.7831045389175415 2023-01-24 02:03:31.599025: step: 676/464, loss: 1.4002931118011475 2023-01-24 02:03:32.393843: step: 678/464, loss: 0.4147849678993225 2023-01-24 02:03:33.148931: step: 680/464, loss: 1.0630037784576416 2023-01-24 02:03:34.039780: step: 682/464, loss: 2.586699962615967 2023-01-24 02:03:34.720236: step: 684/464, loss: 0.8683406114578247 2023-01-24 02:03:35.478530: step: 686/464, loss: 1.7670503854751587 2023-01-24 02:03:36.235305: step: 688/464, loss: 0.2609246075153351 2023-01-24 02:03:37.044962: step: 690/464, loss: 0.4074583947658539 2023-01-24 02:03:37.735643: step: 692/464, loss: 1.5731773376464844 2023-01-24 02:03:38.466676: step: 694/464, loss: 0.8332323431968689 2023-01-24 02:03:39.254514: step: 696/464, loss: 1.06688392162323 2023-01-24 02:03:40.059667: step: 698/464, loss: 17.2744083404541 2023-01-24 02:03:40.831111: step: 700/464, loss: 1.0277228355407715 2023-01-24 02:03:41.482253: step: 702/464, loss: 1.0320792198181152 2023-01-24 02:03:42.283248: step: 704/464, loss: 0.6775314807891846 2023-01-24 02:03:43.059221: step: 706/464, loss: 0.8533573150634766 2023-01-24 02:03:43.882156: step: 708/464, loss: 1.7036771774291992 2023-01-24 02:03:44.592507: step: 710/464, loss: 0.3614809513092041 2023-01-24 02:03:45.366082: step: 712/464, loss: 0.7562206983566284 2023-01-24 02:03:46.220572: step: 714/464, loss: 1.420016884803772 2023-01-24 02:03:47.005125: step: 716/464, loss: 1.6971116065979004 2023-01-24 02:03:47.789201: step: 718/464, loss: 0.3137013614177704 2023-01-24 02:03:48.494613: step: 720/464, loss: 0.7519422769546509 2023-01-24 02:03:49.245200: step: 722/464, loss: 1.1476322412490845 2023-01-24 02:03:50.014847: step: 724/464, loss: 0.9484537243843079 2023-01-24 02:03:50.861195: step: 726/464, loss: 0.9967315196990967 2023-01-24 02:03:51.597134: step: 728/464, loss: 1.7490859031677246 2023-01-24 02:03:52.366061: step: 730/464, loss: 0.9392281174659729 2023-01-24 02:03:53.159563: step: 732/464, loss: 2.6961867809295654 2023-01-24 02:03:53.918670: step: 734/464, loss: 0.4748970866203308 2023-01-24 02:03:54.621794: step: 736/464, loss: 0.4516463279724121 2023-01-24 02:03:55.381538: step: 738/464, loss: 0.8927331566810608 2023-01-24 02:03:56.111956: step: 740/464, loss: 1.5245000123977661 2023-01-24 02:03:56.847600: step: 742/464, loss: 2.048813581466675 2023-01-24 02:03:57.664615: step: 744/464, loss: 0.941074788570404 2023-01-24 02:03:58.398218: step: 746/464, loss: 0.9800759553909302 2023-01-24 02:03:59.146340: step: 748/464, loss: 0.7823781371116638 2023-01-24 02:03:59.896245: step: 750/464, loss: 2.4357547760009766 2023-01-24 02:04:00.658361: step: 752/464, loss: 2.9422607421875 2023-01-24 02:04:01.461931: step: 754/464, loss: 1.5935968160629272 2023-01-24 02:04:02.259451: step: 756/464, loss: 0.6326267123222351 2023-01-24 02:04:02.947763: step: 758/464, loss: 0.5571883320808411 2023-01-24 02:04:03.701796: step: 760/464, loss: 0.4855504631996155 2023-01-24 02:04:04.424523: step: 762/464, loss: 2.940896511077881 2023-01-24 02:04:05.143396: step: 764/464, loss: 0.7516842484474182 2023-01-24 02:04:05.936819: step: 766/464, loss: 0.41791966557502747 2023-01-24 02:04:06.656906: step: 768/464, loss: 1.0552831888198853 2023-01-24 02:04:07.399484: step: 770/464, loss: 2.5800318717956543 2023-01-24 02:04:08.161462: step: 772/464, loss: 0.5675879716873169 2023-01-24 02:04:09.022622: step: 774/464, loss: 0.8680490255355835 2023-01-24 02:04:09.804265: step: 776/464, loss: 1.4004287719726562 2023-01-24 02:04:10.610411: step: 778/464, loss: 0.8454161882400513 2023-01-24 02:04:11.388714: step: 780/464, loss: 0.904163122177124 2023-01-24 02:04:12.200097: step: 782/464, loss: 0.6053056120872498 2023-01-24 02:04:12.935562: step: 784/464, loss: 0.21278899908065796 2023-01-24 02:04:13.699007: step: 786/464, loss: 4.879207611083984 2023-01-24 02:04:14.433161: step: 788/464, loss: 0.8809518814086914 2023-01-24 02:04:15.205987: step: 790/464, loss: 1.1817303895950317 2023-01-24 02:04:15.971916: step: 792/464, loss: 2.4634742736816406 2023-01-24 02:04:16.749759: step: 794/464, loss: 0.7877818942070007 2023-01-24 02:04:17.502078: step: 796/464, loss: 0.3282577693462372 2023-01-24 02:04:18.270850: step: 798/464, loss: 0.5703331828117371 2023-01-24 02:04:18.996301: step: 800/464, loss: 1.4936720132827759 2023-01-24 02:04:19.760000: step: 802/464, loss: 2.9826011657714844 2023-01-24 02:04:20.586790: step: 804/464, loss: 0.5595707893371582 2023-01-24 02:04:21.335314: step: 806/464, loss: 3.8043880462646484 2023-01-24 02:04:22.094971: step: 808/464, loss: 1.1721092462539673 2023-01-24 02:04:22.813349: step: 810/464, loss: 0.980811595916748 2023-01-24 02:04:23.578679: step: 812/464, loss: 4.484066963195801 2023-01-24 02:04:24.474808: step: 814/464, loss: 0.5161080360412598 2023-01-24 02:04:25.281997: step: 816/464, loss: 0.8435653448104858 2023-01-24 02:04:26.131772: step: 818/464, loss: 0.9036491513252258 2023-01-24 02:04:26.917180: step: 820/464, loss: 1.5572112798690796 2023-01-24 02:04:27.648730: step: 822/464, loss: 0.7679738402366638 2023-01-24 02:04:28.419776: step: 824/464, loss: 1.4088101387023926 2023-01-24 02:04:29.235272: step: 826/464, loss: 0.3526294529438019 2023-01-24 02:04:29.981886: step: 828/464, loss: 0.7417711019515991 2023-01-24 02:04:30.732972: step: 830/464, loss: 0.29044491052627563 2023-01-24 02:04:31.492418: step: 832/464, loss: 0.6436646580696106 2023-01-24 02:04:32.210253: step: 834/464, loss: 0.7028468251228333 2023-01-24 02:04:32.964537: step: 836/464, loss: 0.35998618602752686 2023-01-24 02:04:33.679515: step: 838/464, loss: 0.3419780135154724 2023-01-24 02:04:34.351630: step: 840/464, loss: 1.7909822463989258 2023-01-24 02:04:35.132404: step: 842/464, loss: 1.2623323202133179 2023-01-24 02:04:35.924796: step: 844/464, loss: 2.257474899291992 2023-01-24 02:04:36.723317: step: 846/464, loss: 0.32528308033943176 2023-01-24 02:04:37.627030: step: 848/464, loss: 0.24421803653240204 2023-01-24 02:04:38.318363: step: 850/464, loss: 0.41921886801719666 2023-01-24 02:04:39.185379: step: 852/464, loss: 4.022235870361328 2023-01-24 02:04:39.908879: step: 854/464, loss: 1.883143424987793 2023-01-24 02:04:40.635631: step: 856/464, loss: 0.9430786371231079 2023-01-24 02:04:41.321305: step: 858/464, loss: 0.6863318681716919 2023-01-24 02:04:42.222856: step: 860/464, loss: 5.270090103149414 2023-01-24 02:04:42.996688: step: 862/464, loss: 0.574114978313446 2023-01-24 02:04:43.745990: step: 864/464, loss: 0.9139434695243835 2023-01-24 02:04:44.458643: step: 866/464, loss: 1.3902381658554077 2023-01-24 02:04:45.214730: step: 868/464, loss: 0.6280410289764404 2023-01-24 02:04:45.963325: step: 870/464, loss: 0.24456611275672913 2023-01-24 02:04:46.763882: step: 872/464, loss: 2.4077582359313965 2023-01-24 02:04:47.452536: step: 874/464, loss: 1.9177312850952148 2023-01-24 02:04:48.188939: step: 876/464, loss: 0.7885171175003052 2023-01-24 02:04:48.989417: step: 878/464, loss: 0.9026452302932739 2023-01-24 02:04:49.828515: step: 880/464, loss: 2.1258692741394043 2023-01-24 02:04:50.626850: step: 882/464, loss: 2.132807970046997 2023-01-24 02:04:51.352950: step: 884/464, loss: 0.6072775721549988 2023-01-24 02:04:52.160512: step: 886/464, loss: 1.4739820957183838 2023-01-24 02:04:52.983291: step: 888/464, loss: 0.6416248679161072 2023-01-24 02:04:53.727345: step: 890/464, loss: 2.314173936843872 2023-01-24 02:04:54.525949: step: 892/464, loss: 0.9566014409065247 2023-01-24 02:04:55.390384: step: 894/464, loss: 0.6913099884986877 2023-01-24 02:04:56.164725: step: 896/464, loss: 2.844928741455078 2023-01-24 02:04:56.950816: step: 898/464, loss: 0.2614089846611023 2023-01-24 02:04:57.765912: step: 900/464, loss: 5.129838943481445 2023-01-24 02:04:58.498476: step: 902/464, loss: 0.91115802526474 2023-01-24 02:04:59.245272: step: 904/464, loss: 2.6563570499420166 2023-01-24 02:05:00.067326: step: 906/464, loss: 1.552895188331604 2023-01-24 02:05:00.791009: step: 908/464, loss: 0.2228650450706482 2023-01-24 02:05:01.582354: step: 910/464, loss: 1.1201412677764893 2023-01-24 02:05:02.312544: step: 912/464, loss: 0.28336161375045776 2023-01-24 02:05:03.109521: step: 914/464, loss: 0.6156604290008545 2023-01-24 02:05:03.833475: step: 916/464, loss: 2.0127663612365723 2023-01-24 02:05:04.618328: step: 918/464, loss: 0.4022485017776489 2023-01-24 02:05:05.394902: step: 920/464, loss: 0.40107741951942444 2023-01-24 02:05:06.163497: step: 922/464, loss: 0.6638324856758118 2023-01-24 02:05:06.976884: step: 924/464, loss: 0.5233751535415649 2023-01-24 02:05:07.688583: step: 926/464, loss: 0.5563598871231079 2023-01-24 02:05:08.416293: step: 928/464, loss: 1.0427556037902832 2023-01-24 02:05:09.074234: step: 930/464, loss: 2.201897144317627 ================================================== Loss: 1.302 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3452653372561839, 'r': 0.286301617421162, 'f1': 0.31303102153724555}, 'combined': 0.2306544369221809, 'epoch': 3} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32751673390386515, 'r': 0.23172132556834726, 'f1': 0.27141433041107343}, 'combined': 0.16856258415003508, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3148025954030573, 'r': 0.25865184783590855, 'f1': 0.28397817460317454}, 'combined': 0.20924707602339176, 'epoch': 3} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3242456997204659, 'r': 0.22950065814370588, 'f1': 0.268767822774074}, 'combined': 0.16691896361758282, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34441328494446277, 'r': 0.28298093430920757, 'f1': 0.31068948412698416}, 'combined': 0.22892909356725147, 'epoch': 3} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.33689085298832067, 'r': 0.23978115005387876, 'f1': 0.2801595213432286}, 'combined': 0.1739938079921104, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2714285714285714, 'r': 0.2714285714285714, 'f1': 0.2714285714285714}, 'combined': 0.18095238095238092, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.30434782608695654, 'f1': 0.27450980392156865}, 'combined': 0.13725490196078433, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4861111111111111, 'r': 0.15086206896551724, 'f1': 0.23026315789473686}, 'combined': 0.15350877192982457, 'epoch': 3} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30406624933354615, 'r': 0.24946077922638757, 'f1': 0.2740700980493438}, 'combined': 0.20194638803635856, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29459183041838544, 'r': 0.20772500862834872, 'f1': 0.24364737854152177}, 'combined': 0.15131784562052406, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.41346153846153844, 'r': 0.30714285714285716, 'f1': 0.3524590163934426}, 'combined': 0.2349726775956284, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3148025954030573, 'r': 0.25865184783590855, 'f1': 0.28397817460317454}, 'combined': 0.20924707602339176, 'epoch': 3} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3242456997204659, 'r': 0.22950065814370588, 'f1': 0.268767822774074}, 'combined': 0.16691896361758282, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.30434782608695654, 'f1': 0.27450980392156865}, 'combined': 0.13725490196078433, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31612442374595123, 'r': 0.25744981479310425, 'f1': 0.2837860171414594}, 'combined': 0.20910548631475956, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29958252192114637, 'r': 0.2133122098886269, 'f1': 0.24919191339523925}, 'combined': 0.1547612935823065, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:08:02.579785: step: 2/464, loss: 0.40228208899497986 2023-01-24 02:08:03.334082: step: 4/464, loss: 0.761715829372406 2023-01-24 02:08:04.087243: step: 6/464, loss: 0.7494341731071472 2023-01-24 02:08:04.872961: step: 8/464, loss: 0.24450060725212097 2023-01-24 02:08:05.566033: step: 10/464, loss: 0.5414758920669556 2023-01-24 02:08:06.384057: step: 12/464, loss: 1.0487775802612305 2023-01-24 02:08:07.084643: step: 14/464, loss: 0.7465416193008423 2023-01-24 02:08:07.872341: step: 16/464, loss: 0.2710585296154022 2023-01-24 02:08:08.630253: step: 18/464, loss: 0.36387255787849426 2023-01-24 02:08:09.319878: step: 20/464, loss: 1.130811333656311 2023-01-24 02:08:10.022817: step: 22/464, loss: 0.6743142604827881 2023-01-24 02:08:10.782597: step: 24/464, loss: 1.241051197052002 2023-01-24 02:08:11.515329: step: 26/464, loss: 0.5547807216644287 2023-01-24 02:08:12.408625: step: 28/464, loss: 0.6198810935020447 2023-01-24 02:08:13.114186: step: 30/464, loss: 1.1782160997390747 2023-01-24 02:08:13.916934: step: 32/464, loss: 0.813835620880127 2023-01-24 02:08:14.673540: step: 34/464, loss: 0.641745388507843 2023-01-24 02:08:15.435077: step: 36/464, loss: 0.4335750937461853 2023-01-24 02:08:16.216646: step: 38/464, loss: 0.7220770120620728 2023-01-24 02:08:16.946899: step: 40/464, loss: 0.1818157434463501 2023-01-24 02:08:17.726270: step: 42/464, loss: 0.573371171951294 2023-01-24 02:08:18.623225: step: 44/464, loss: 2.2441868782043457 2023-01-24 02:08:19.382570: step: 46/464, loss: 0.2036203145980835 2023-01-24 02:08:20.202842: step: 48/464, loss: 0.3498281240463257 2023-01-24 02:08:21.002331: step: 50/464, loss: 3.470280647277832 2023-01-24 02:08:21.766681: step: 52/464, loss: 2.04329776763916 2023-01-24 02:08:22.534827: step: 54/464, loss: 0.6625763773918152 2023-01-24 02:08:23.293546: step: 56/464, loss: 1.1685752868652344 2023-01-24 02:08:24.044841: step: 58/464, loss: 2.4842872619628906 2023-01-24 02:08:24.835208: step: 60/464, loss: 1.0792351961135864 2023-01-24 02:08:25.555878: step: 62/464, loss: 0.49406152963638306 2023-01-24 02:08:26.280336: step: 64/464, loss: 0.2838814854621887 2023-01-24 02:08:27.064352: step: 66/464, loss: 0.40156522393226624 2023-01-24 02:08:27.853925: step: 68/464, loss: 1.2194373607635498 2023-01-24 02:08:28.685339: step: 70/464, loss: 0.8869839310646057 2023-01-24 02:08:29.449335: step: 72/464, loss: 0.5222635865211487 2023-01-24 02:08:30.186758: step: 74/464, loss: 0.6052136421203613 2023-01-24 02:08:30.967542: step: 76/464, loss: 0.8005256056785583 2023-01-24 02:08:31.724138: step: 78/464, loss: 0.8984832763671875 2023-01-24 02:08:32.516834: step: 80/464, loss: 0.41829895973205566 2023-01-24 02:08:33.306283: step: 82/464, loss: 0.3826153874397278 2023-01-24 02:08:34.024495: step: 84/464, loss: 0.2728906273841858 2023-01-24 02:08:34.827953: step: 86/464, loss: 0.673712968826294 2023-01-24 02:08:35.629522: step: 88/464, loss: 0.44470030069351196 2023-01-24 02:08:36.465827: step: 90/464, loss: 2.178097724914551 2023-01-24 02:08:37.155656: step: 92/464, loss: 0.7721235752105713 2023-01-24 02:08:37.938428: step: 94/464, loss: 0.8557058572769165 2023-01-24 02:08:38.762780: step: 96/464, loss: 0.9540102481842041 2023-01-24 02:08:39.560201: step: 98/464, loss: 4.250099182128906 2023-01-24 02:08:40.322239: step: 100/464, loss: 0.47582846879959106 2023-01-24 02:08:41.123672: step: 102/464, loss: 1.3785321712493896 2023-01-24 02:08:41.978821: step: 104/464, loss: 0.26402074098587036 2023-01-24 02:08:42.745616: step: 106/464, loss: 0.7524766325950623 2023-01-24 02:08:43.592754: step: 108/464, loss: 1.1828628778457642 2023-01-24 02:08:44.292903: step: 110/464, loss: 0.8786439299583435 2023-01-24 02:08:45.054482: step: 112/464, loss: 1.3983309268951416 2023-01-24 02:08:45.842415: step: 114/464, loss: 0.604883074760437 2023-01-24 02:08:46.578921: step: 116/464, loss: 1.3533883094787598 2023-01-24 02:08:47.344297: step: 118/464, loss: 0.4021783173084259 2023-01-24 02:08:48.053436: step: 120/464, loss: 0.9296446442604065 2023-01-24 02:08:48.849161: step: 122/464, loss: 0.7201257944107056 2023-01-24 02:08:49.612819: step: 124/464, loss: 0.46312540769577026 2023-01-24 02:08:50.431119: step: 126/464, loss: 1.0364785194396973 2023-01-24 02:08:51.312813: step: 128/464, loss: 0.2913583517074585 2023-01-24 02:08:52.083816: step: 130/464, loss: 0.6095572710037231 2023-01-24 02:08:52.859467: step: 132/464, loss: 1.744209885597229 2023-01-24 02:08:53.614852: step: 134/464, loss: 0.7447730898857117 2023-01-24 02:08:54.338440: step: 136/464, loss: 1.452962040901184 2023-01-24 02:08:55.116344: step: 138/464, loss: 1.8778823614120483 2023-01-24 02:08:55.815817: step: 140/464, loss: 0.7671042084693909 2023-01-24 02:08:56.646679: step: 142/464, loss: 1.4017772674560547 2023-01-24 02:08:57.418322: step: 144/464, loss: 0.5678072571754456 2023-01-24 02:08:58.208342: step: 146/464, loss: 3.1386330127716064 2023-01-24 02:08:58.995400: step: 148/464, loss: 0.49076610803604126 2023-01-24 02:08:59.732188: step: 150/464, loss: 1.209444284439087 2023-01-24 02:09:00.664515: step: 152/464, loss: 1.068770408630371 2023-01-24 02:09:01.446962: step: 154/464, loss: 1.9193202257156372 2023-01-24 02:09:02.226178: step: 156/464, loss: 0.46384406089782715 2023-01-24 02:09:03.012695: step: 158/464, loss: 1.053743839263916 2023-01-24 02:09:03.766288: step: 160/464, loss: 0.545936107635498 2023-01-24 02:09:04.498349: step: 162/464, loss: 0.8476034998893738 2023-01-24 02:09:05.294630: step: 164/464, loss: 0.8386257886886597 2023-01-24 02:09:06.061340: step: 166/464, loss: 0.4921552538871765 2023-01-24 02:09:06.802129: step: 168/464, loss: 0.7412563562393188 2023-01-24 02:09:07.605449: step: 170/464, loss: 0.6649913191795349 2023-01-24 02:09:08.377729: step: 172/464, loss: 0.16923581063747406 2023-01-24 02:09:09.229939: step: 174/464, loss: 0.6418856978416443 2023-01-24 02:09:10.002636: step: 176/464, loss: 0.5956724882125854 2023-01-24 02:09:10.777151: step: 178/464, loss: 1.5731823444366455 2023-01-24 02:09:11.551654: step: 180/464, loss: 0.6780312061309814 2023-01-24 02:09:12.346477: step: 182/464, loss: 0.3298395574092865 2023-01-24 02:09:13.174477: step: 184/464, loss: 0.8208402395248413 2023-01-24 02:09:13.888123: step: 186/464, loss: 1.238532304763794 2023-01-24 02:09:14.601286: step: 188/464, loss: 0.22493049502372742 2023-01-24 02:09:15.339516: step: 190/464, loss: 1.5633842945098877 2023-01-24 02:09:16.070866: step: 192/464, loss: 0.8262635469436646 2023-01-24 02:09:16.832399: step: 194/464, loss: 0.917330801486969 2023-01-24 02:09:17.579060: step: 196/464, loss: 0.9680485129356384 2023-01-24 02:09:18.358363: step: 198/464, loss: 0.3854012191295624 2023-01-24 02:09:19.218208: step: 200/464, loss: 1.4094256162643433 2023-01-24 02:09:19.952131: step: 202/464, loss: 0.3496444821357727 2023-01-24 02:09:20.674263: step: 204/464, loss: 1.3187754154205322 2023-01-24 02:09:21.406486: step: 206/464, loss: 1.3401422500610352 2023-01-24 02:09:22.130689: step: 208/464, loss: 1.0585306882858276 2023-01-24 02:09:22.803772: step: 210/464, loss: 0.3400452733039856 2023-01-24 02:09:23.611916: step: 212/464, loss: 0.5716167688369751 2023-01-24 02:09:24.397509: step: 214/464, loss: 0.9066051840782166 2023-01-24 02:09:25.112465: step: 216/464, loss: 0.510635495185852 2023-01-24 02:09:25.893206: step: 218/464, loss: 0.5335964560508728 2023-01-24 02:09:26.687211: step: 220/464, loss: 0.7558530569076538 2023-01-24 02:09:27.409947: step: 222/464, loss: 0.4452095031738281 2023-01-24 02:09:28.089216: step: 224/464, loss: 0.32481619715690613 2023-01-24 02:09:28.826313: step: 226/464, loss: 1.4661684036254883 2023-01-24 02:09:29.700209: step: 228/464, loss: 0.3202166259288788 2023-01-24 02:09:30.429517: step: 230/464, loss: 0.694557785987854 2023-01-24 02:09:31.208070: step: 232/464, loss: 1.280537486076355 2023-01-24 02:09:31.916543: step: 234/464, loss: 1.118554711341858 2023-01-24 02:09:32.667811: step: 236/464, loss: 0.620063066482544 2023-01-24 02:09:33.415556: step: 238/464, loss: 0.40609267354011536 2023-01-24 02:09:34.183229: step: 240/464, loss: 3.694124460220337 2023-01-24 02:09:34.912435: step: 242/464, loss: 0.3537047505378723 2023-01-24 02:09:35.670023: step: 244/464, loss: 0.5573844909667969 2023-01-24 02:09:36.440766: step: 246/464, loss: 0.921636164188385 2023-01-24 02:09:37.151087: step: 248/464, loss: 1.0031071901321411 2023-01-24 02:09:37.936465: step: 250/464, loss: 0.6408367156982422 2023-01-24 02:09:38.766110: step: 252/464, loss: 0.6051366329193115 2023-01-24 02:09:39.563983: step: 254/464, loss: 0.29460883140563965 2023-01-24 02:09:40.264896: step: 256/464, loss: 0.6332646608352661 2023-01-24 02:09:41.066360: step: 258/464, loss: 3.4274678230285645 2023-01-24 02:09:41.866605: step: 260/464, loss: 0.6829164028167725 2023-01-24 02:09:42.624405: step: 262/464, loss: 0.6332787275314331 2023-01-24 02:09:43.461459: step: 264/464, loss: 0.2608512043952942 2023-01-24 02:09:44.201632: step: 266/464, loss: 1.234724760055542 2023-01-24 02:09:44.979228: step: 268/464, loss: 0.23023658990859985 2023-01-24 02:09:45.740727: step: 270/464, loss: 2.5275135040283203 2023-01-24 02:09:46.520610: step: 272/464, loss: 0.7357670068740845 2023-01-24 02:09:47.341152: step: 274/464, loss: 1.2567230463027954 2023-01-24 02:09:48.068685: step: 276/464, loss: 0.9408270716667175 2023-01-24 02:09:48.821075: step: 278/464, loss: 0.8275658488273621 2023-01-24 02:09:49.576815: step: 280/464, loss: 1.6424264907836914 2023-01-24 02:09:50.333238: step: 282/464, loss: 0.5935283899307251 2023-01-24 02:09:51.013758: step: 284/464, loss: 0.923167884349823 2023-01-24 02:09:51.722674: step: 286/464, loss: 4.628636360168457 2023-01-24 02:09:52.556750: step: 288/464, loss: 0.6874059438705444 2023-01-24 02:09:53.319265: step: 290/464, loss: 3.098637104034424 2023-01-24 02:09:54.093273: step: 292/464, loss: 0.7319610118865967 2023-01-24 02:09:54.812143: step: 294/464, loss: 2.2061524391174316 2023-01-24 02:09:55.613185: step: 296/464, loss: 0.7146373987197876 2023-01-24 02:09:56.318204: step: 298/464, loss: 0.8566915392875671 2023-01-24 02:09:57.052720: step: 300/464, loss: 2.7071781158447266 2023-01-24 02:09:57.773408: step: 302/464, loss: 0.4175330400466919 2023-01-24 02:09:58.492689: step: 304/464, loss: 2.2370574474334717 2023-01-24 02:09:59.321776: step: 306/464, loss: 0.8507087230682373 2023-01-24 02:10:00.102217: step: 308/464, loss: 1.7574455738067627 2023-01-24 02:10:00.895284: step: 310/464, loss: 1.437440037727356 2023-01-24 02:10:01.602148: step: 312/464, loss: 1.0773636102676392 2023-01-24 02:10:02.265778: step: 314/464, loss: 4.062402248382568 2023-01-24 02:10:03.010471: step: 316/464, loss: 1.0066360235214233 2023-01-24 02:10:03.799854: step: 318/464, loss: 0.920922040939331 2023-01-24 02:10:04.500109: step: 320/464, loss: 1.5979911088943481 2023-01-24 02:10:05.403160: step: 322/464, loss: 3.2422804832458496 2023-01-24 02:10:06.153721: step: 324/464, loss: 0.5224069356918335 2023-01-24 02:10:06.923431: step: 326/464, loss: 1.6979987621307373 2023-01-24 02:10:07.653753: step: 328/464, loss: 0.4662569761276245 2023-01-24 02:10:08.473762: step: 330/464, loss: 0.4052315950393677 2023-01-24 02:10:09.198882: step: 332/464, loss: 0.2515992224216461 2023-01-24 02:10:09.916495: step: 334/464, loss: 0.7200693488121033 2023-01-24 02:10:10.689844: step: 336/464, loss: 1.927569031715393 2023-01-24 02:10:11.485237: step: 338/464, loss: 1.7271506786346436 2023-01-24 02:10:12.250303: step: 340/464, loss: 0.10802609473466873 2023-01-24 02:10:13.048108: step: 342/464, loss: 0.7540997266769409 2023-01-24 02:10:13.900269: step: 344/464, loss: 2.4743459224700928 2023-01-24 02:10:14.604273: step: 346/464, loss: 1.7580007314682007 2023-01-24 02:10:15.432821: step: 348/464, loss: 0.2616334557533264 2023-01-24 02:10:16.143779: step: 350/464, loss: 0.4886997938156128 2023-01-24 02:10:16.923326: step: 352/464, loss: 0.6108362078666687 2023-01-24 02:10:17.672420: step: 354/464, loss: 1.4357883930206299 2023-01-24 02:10:18.448017: step: 356/464, loss: 0.7157084345817566 2023-01-24 02:10:19.137502: step: 358/464, loss: 0.188466876745224 2023-01-24 02:10:19.898648: step: 360/464, loss: 0.6241902112960815 2023-01-24 02:10:20.648189: step: 362/464, loss: 0.7048114538192749 2023-01-24 02:10:21.364989: step: 364/464, loss: 0.7779271006584167 2023-01-24 02:10:22.121513: step: 366/464, loss: 0.2853296101093292 2023-01-24 02:10:22.873489: step: 368/464, loss: 1.675212025642395 2023-01-24 02:10:23.659285: step: 370/464, loss: 0.7650219202041626 2023-01-24 02:10:24.422222: step: 372/464, loss: 0.40282338857650757 2023-01-24 02:10:25.193124: step: 374/464, loss: 0.49744731187820435 2023-01-24 02:10:26.001920: step: 376/464, loss: 2.2720847129821777 2023-01-24 02:10:26.757225: step: 378/464, loss: 0.8903071284294128 2023-01-24 02:10:27.480739: step: 380/464, loss: 1.1935958862304688 2023-01-24 02:10:28.263564: step: 382/464, loss: 1.7125489711761475 2023-01-24 02:10:29.047892: step: 384/464, loss: 1.872301459312439 2023-01-24 02:10:29.809627: step: 386/464, loss: 0.4071800708770752 2023-01-24 02:10:30.589112: step: 388/464, loss: 0.4406774938106537 2023-01-24 02:10:31.357589: step: 390/464, loss: 0.8730248212814331 2023-01-24 02:10:32.135449: step: 392/464, loss: 0.19085253775119781 2023-01-24 02:10:32.894043: step: 394/464, loss: 0.2352760285139084 2023-01-24 02:10:33.654697: step: 396/464, loss: 2.909917116165161 2023-01-24 02:10:34.457869: step: 398/464, loss: 4.20819616317749 2023-01-24 02:10:35.222871: step: 400/464, loss: 1.4049768447875977 2023-01-24 02:10:36.025006: step: 402/464, loss: 0.6060394644737244 2023-01-24 02:10:36.737774: step: 404/464, loss: 0.3055975139141083 2023-01-24 02:10:37.504567: step: 406/464, loss: 0.778168261051178 2023-01-24 02:10:38.240524: step: 408/464, loss: 0.9540801644325256 2023-01-24 02:10:39.054409: step: 410/464, loss: 0.276130735874176 2023-01-24 02:10:39.851815: step: 412/464, loss: 0.6118130087852478 2023-01-24 02:10:40.553612: step: 414/464, loss: 0.8638632893562317 2023-01-24 02:10:41.303519: step: 416/464, loss: 0.24808137118816376 2023-01-24 02:10:42.137023: step: 418/464, loss: 1.0742295980453491 2023-01-24 02:10:42.966975: step: 420/464, loss: 0.5043665170669556 2023-01-24 02:10:43.712840: step: 422/464, loss: 0.34764429926872253 2023-01-24 02:10:44.499455: step: 424/464, loss: 1.4734704494476318 2023-01-24 02:10:45.243945: step: 426/464, loss: 0.7792679071426392 2023-01-24 02:10:46.167405: step: 428/464, loss: 1.2594319581985474 2023-01-24 02:10:46.997932: step: 430/464, loss: 0.8206206560134888 2023-01-24 02:10:47.721251: step: 432/464, loss: 0.9869316816329956 2023-01-24 02:10:48.445676: step: 434/464, loss: 2.3212811946868896 2023-01-24 02:10:49.111851: step: 436/464, loss: 0.2014824002981186 2023-01-24 02:10:49.806927: step: 438/464, loss: 0.7801494598388672 2023-01-24 02:10:50.633933: step: 440/464, loss: 0.9981138110160828 2023-01-24 02:10:51.425809: step: 442/464, loss: 0.5376265645027161 2023-01-24 02:10:52.318287: step: 444/464, loss: 0.3107282519340515 2023-01-24 02:10:53.111029: step: 446/464, loss: 0.45178577303886414 2023-01-24 02:10:53.957703: step: 448/464, loss: 0.47116056084632874 2023-01-24 02:10:54.674128: step: 450/464, loss: 1.1121314764022827 2023-01-24 02:10:55.389812: step: 452/464, loss: 1.8235584497451782 2023-01-24 02:10:56.087921: step: 454/464, loss: 1.2838335037231445 2023-01-24 02:10:56.927737: step: 456/464, loss: 0.43410632014274597 2023-01-24 02:10:57.680921: step: 458/464, loss: 1.8817094564437866 2023-01-24 02:10:58.467114: step: 460/464, loss: 0.8303155899047852 2023-01-24 02:10:59.230393: step: 462/464, loss: 0.7997901439666748 2023-01-24 02:10:59.982832: step: 464/464, loss: 0.4771265685558319 2023-01-24 02:11:00.780062: step: 466/464, loss: 0.8618097901344299 2023-01-24 02:11:01.526046: step: 468/464, loss: 0.3874833285808563 2023-01-24 02:11:02.361898: step: 470/464, loss: 0.8758236169815063 2023-01-24 02:11:03.099472: step: 472/464, loss: 0.2645411193370819 2023-01-24 02:11:03.872290: step: 474/464, loss: 0.423144668340683 2023-01-24 02:11:04.579400: step: 476/464, loss: 0.4718470573425293 2023-01-24 02:11:05.316244: step: 478/464, loss: 0.7281615734100342 2023-01-24 02:11:06.091224: step: 480/464, loss: 1.2971644401550293 2023-01-24 02:11:06.819906: step: 482/464, loss: 0.3497962951660156 2023-01-24 02:11:07.623355: step: 484/464, loss: 3.243353843688965 2023-01-24 02:11:08.344206: step: 486/464, loss: 1.0315594673156738 2023-01-24 02:11:09.031631: step: 488/464, loss: 2.403261184692383 2023-01-24 02:11:09.777507: step: 490/464, loss: 0.3512910008430481 2023-01-24 02:11:10.535227: step: 492/464, loss: 4.593682289123535 2023-01-24 02:11:11.305769: step: 494/464, loss: 0.6878775954246521 2023-01-24 02:11:12.089990: step: 496/464, loss: 0.7801576852798462 2023-01-24 02:11:12.859313: step: 498/464, loss: 0.2250710427761078 2023-01-24 02:11:13.599394: step: 500/464, loss: 1.9546105861663818 2023-01-24 02:11:14.440135: step: 502/464, loss: 0.5993082523345947 2023-01-24 02:11:15.214783: step: 504/464, loss: 0.9544249773025513 2023-01-24 02:11:15.909814: step: 506/464, loss: 0.6948167085647583 2023-01-24 02:11:16.667062: step: 508/464, loss: 1.0587568283081055 2023-01-24 02:11:17.425655: step: 510/464, loss: 1.2667818069458008 2023-01-24 02:11:18.284243: step: 512/464, loss: 0.41677990555763245 2023-01-24 02:11:19.001026: step: 514/464, loss: 1.2263845205307007 2023-01-24 02:11:19.796458: step: 516/464, loss: 1.315137505531311 2023-01-24 02:11:20.577163: step: 518/464, loss: 1.9078733921051025 2023-01-24 02:11:21.346668: step: 520/464, loss: 1.1094090938568115 2023-01-24 02:11:22.054894: step: 522/464, loss: 0.9098461866378784 2023-01-24 02:11:22.885326: step: 524/464, loss: 1.0443501472473145 2023-01-24 02:11:23.626898: step: 526/464, loss: 1.2700201272964478 2023-01-24 02:11:24.511360: step: 528/464, loss: 0.7448489665985107 2023-01-24 02:11:25.230806: step: 530/464, loss: 0.4656221270561218 2023-01-24 02:11:25.954171: step: 532/464, loss: 0.850081205368042 2023-01-24 02:11:26.800110: step: 534/464, loss: 0.5706437230110168 2023-01-24 02:11:27.578244: step: 536/464, loss: 0.5199675559997559 2023-01-24 02:11:28.389692: step: 538/464, loss: 1.1948305368423462 2023-01-24 02:11:29.129381: step: 540/464, loss: 0.8280027508735657 2023-01-24 02:11:29.878216: step: 542/464, loss: 0.7765970826148987 2023-01-24 02:11:30.630548: step: 544/464, loss: 1.8322759866714478 2023-01-24 02:11:31.390551: step: 546/464, loss: 0.9865900278091431 2023-01-24 02:11:32.115256: step: 548/464, loss: 1.1756948232650757 2023-01-24 02:11:32.855613: step: 550/464, loss: 0.7664552927017212 2023-01-24 02:11:33.624966: step: 552/464, loss: 0.8237978219985962 2023-01-24 02:11:34.309005: step: 554/464, loss: 1.9098732471466064 2023-01-24 02:11:35.086232: step: 556/464, loss: 0.3626733422279358 2023-01-24 02:11:35.820030: step: 558/464, loss: 0.9509762525558472 2023-01-24 02:11:36.610433: step: 560/464, loss: 2.97688364982605 2023-01-24 02:11:37.490312: step: 562/464, loss: 0.4221142530441284 2023-01-24 02:11:38.173725: step: 564/464, loss: 0.9137882590293884 2023-01-24 02:11:38.931295: step: 566/464, loss: 0.972124457359314 2023-01-24 02:11:39.787715: step: 568/464, loss: 0.9981567859649658 2023-01-24 02:11:40.501381: step: 570/464, loss: 0.873939037322998 2023-01-24 02:11:41.235623: step: 572/464, loss: 0.6504555344581604 2023-01-24 02:11:42.017020: step: 574/464, loss: 0.5079010128974915 2023-01-24 02:11:42.772835: step: 576/464, loss: 0.9543384313583374 2023-01-24 02:11:43.522618: step: 578/464, loss: 0.23754742741584778 2023-01-24 02:11:44.322642: step: 580/464, loss: 0.7818069458007812 2023-01-24 02:11:45.089146: step: 582/464, loss: 0.7598432898521423 2023-01-24 02:11:45.795542: step: 584/464, loss: 0.978622317314148 2023-01-24 02:11:46.522094: step: 586/464, loss: 0.7816836833953857 2023-01-24 02:11:47.337104: step: 588/464, loss: 1.2809606790542603 2023-01-24 02:11:48.099257: step: 590/464, loss: 0.5787965059280396 2023-01-24 02:11:48.881684: step: 592/464, loss: 1.170480489730835 2023-01-24 02:11:49.669335: step: 594/464, loss: 0.41068509221076965 2023-01-24 02:11:50.385984: step: 596/464, loss: 1.8384788036346436 2023-01-24 02:11:51.097534: step: 598/464, loss: 0.7644957304000854 2023-01-24 02:11:51.811635: step: 600/464, loss: 0.5268754363059998 2023-01-24 02:11:52.559173: step: 602/464, loss: 0.7573223114013672 2023-01-24 02:11:53.315715: step: 604/464, loss: 0.5257716774940491 2023-01-24 02:11:54.105030: step: 606/464, loss: 0.8698573112487793 2023-01-24 02:11:54.778571: step: 608/464, loss: 0.6615485548973083 2023-01-24 02:11:55.553303: step: 610/464, loss: 0.40329596400260925 2023-01-24 02:11:56.303609: step: 612/464, loss: 0.7785581350326538 2023-01-24 02:11:57.033740: step: 614/464, loss: 0.8779846429824829 2023-01-24 02:11:57.798646: step: 616/464, loss: 0.89288729429245 2023-01-24 02:11:58.535564: step: 618/464, loss: 0.8979387879371643 2023-01-24 02:11:59.282180: step: 620/464, loss: 0.37278124690055847 2023-01-24 02:12:00.055424: step: 622/464, loss: 0.32420939207077026 2023-01-24 02:12:00.795282: step: 624/464, loss: 0.5986490845680237 2023-01-24 02:12:01.564493: step: 626/464, loss: 1.7960107326507568 2023-01-24 02:12:02.319518: step: 628/464, loss: 0.23462745547294617 2023-01-24 02:12:03.093689: step: 630/464, loss: 0.7956054210662842 2023-01-24 02:12:03.884309: step: 632/464, loss: 2.865629196166992 2023-01-24 02:12:04.644252: step: 634/464, loss: 0.3457973301410675 2023-01-24 02:12:05.468870: step: 636/464, loss: 0.4045425057411194 2023-01-24 02:12:06.273801: step: 638/464, loss: 0.5609143376350403 2023-01-24 02:12:07.103865: step: 640/464, loss: 1.1414687633514404 2023-01-24 02:12:07.847255: step: 642/464, loss: 1.053191900253296 2023-01-24 02:12:08.715149: step: 644/464, loss: 0.66423499584198 2023-01-24 02:12:09.521611: step: 646/464, loss: 1.7464537620544434 2023-01-24 02:12:10.241162: step: 648/464, loss: 0.8280461430549622 2023-01-24 02:12:10.969437: step: 650/464, loss: 1.8533332347869873 2023-01-24 02:12:11.743333: step: 652/464, loss: 0.9674221277236938 2023-01-24 02:12:12.554515: step: 654/464, loss: 0.37536531686782837 2023-01-24 02:12:13.347311: step: 656/464, loss: 2.653020143508911 2023-01-24 02:12:14.143677: step: 658/464, loss: 4.386117458343506 2023-01-24 02:12:14.931076: step: 660/464, loss: 1.0777931213378906 2023-01-24 02:12:15.807146: step: 662/464, loss: 1.7315013408660889 2023-01-24 02:12:16.513258: step: 664/464, loss: 1.4899041652679443 2023-01-24 02:12:17.328365: step: 666/464, loss: 1.2888059616088867 2023-01-24 02:12:18.149309: step: 668/464, loss: 0.9007691144943237 2023-01-24 02:12:18.900199: step: 670/464, loss: 8.064138412475586 2023-01-24 02:12:19.706274: step: 672/464, loss: 0.3794676959514618 2023-01-24 02:12:20.479029: step: 674/464, loss: 0.6640487909317017 2023-01-24 02:12:21.256068: step: 676/464, loss: 1.3708484172821045 2023-01-24 02:12:22.006130: step: 678/464, loss: 0.2608605921268463 2023-01-24 02:12:22.705960: step: 680/464, loss: 0.4714253544807434 2023-01-24 02:12:23.483429: step: 682/464, loss: 0.598812997341156 2023-01-24 02:12:24.259833: step: 684/464, loss: 0.558146595954895 2023-01-24 02:12:25.079529: step: 686/464, loss: 0.0715576708316803 2023-01-24 02:12:25.806343: step: 688/464, loss: 0.5410977005958557 2023-01-24 02:12:26.536048: step: 690/464, loss: 0.7585949897766113 2023-01-24 02:12:27.362081: step: 692/464, loss: 0.643237292766571 2023-01-24 02:12:28.175924: step: 694/464, loss: 0.5445526242256165 2023-01-24 02:12:28.932734: step: 696/464, loss: 0.9776055812835693 2023-01-24 02:12:29.678422: step: 698/464, loss: 0.6344531774520874 2023-01-24 02:12:30.425765: step: 700/464, loss: 0.9945721626281738 2023-01-24 02:12:31.187288: step: 702/464, loss: 0.7680280804634094 2023-01-24 02:12:32.023926: step: 704/464, loss: 0.3382861614227295 2023-01-24 02:12:32.778746: step: 706/464, loss: 0.6660884618759155 2023-01-24 02:12:33.591162: step: 708/464, loss: 1.070008397102356 2023-01-24 02:12:34.377769: step: 710/464, loss: 0.7828474044799805 2023-01-24 02:12:35.170144: step: 712/464, loss: 1.4197325706481934 2023-01-24 02:12:35.886023: step: 714/464, loss: 0.23748788237571716 2023-01-24 02:12:36.666894: step: 716/464, loss: 8.611396789550781 2023-01-24 02:12:37.463533: step: 718/464, loss: 1.331899881362915 2023-01-24 02:12:38.248663: step: 720/464, loss: 2.305028200149536 2023-01-24 02:12:39.120481: step: 722/464, loss: 1.1942238807678223 2023-01-24 02:12:39.908511: step: 724/464, loss: 2.60715913772583 2023-01-24 02:12:40.643866: step: 726/464, loss: 0.2433922439813614 2023-01-24 02:12:41.434281: step: 728/464, loss: 0.3391481637954712 2023-01-24 02:12:42.213451: step: 730/464, loss: 3.575850486755371 2023-01-24 02:12:42.872302: step: 732/464, loss: 1.3523809909820557 2023-01-24 02:12:43.603868: step: 734/464, loss: 1.4300650358200073 2023-01-24 02:12:44.367910: step: 736/464, loss: 0.24131545424461365 2023-01-24 02:12:45.168827: step: 738/464, loss: 1.5541698932647705 2023-01-24 02:12:45.887928: step: 740/464, loss: 0.812211275100708 2023-01-24 02:12:46.730084: step: 742/464, loss: 0.7140324115753174 2023-01-24 02:12:47.450671: step: 744/464, loss: 0.3727984130382538 2023-01-24 02:12:48.215367: step: 746/464, loss: 0.7878866195678711 2023-01-24 02:12:48.994975: step: 748/464, loss: 0.2013627290725708 2023-01-24 02:12:49.679772: step: 750/464, loss: 0.2744179964065552 2023-01-24 02:12:50.469197: step: 752/464, loss: 2.1657464504241943 2023-01-24 02:12:51.198700: step: 754/464, loss: 0.2591649293899536 2023-01-24 02:12:52.006914: step: 756/464, loss: 5.291714668273926 2023-01-24 02:12:52.847287: step: 758/464, loss: 0.6014288663864136 2023-01-24 02:12:53.607874: step: 760/464, loss: 1.4996731281280518 2023-01-24 02:12:54.342440: step: 762/464, loss: 1.117444634437561 2023-01-24 02:12:55.157881: step: 764/464, loss: 0.1649799793958664 2023-01-24 02:12:55.895328: step: 766/464, loss: 1.094132661819458 2023-01-24 02:12:56.668738: step: 768/464, loss: 0.41113656759262085 2023-01-24 02:12:57.440217: step: 770/464, loss: 1.1057980060577393 2023-01-24 02:12:58.279967: step: 772/464, loss: 2.4089059829711914 2023-01-24 02:12:59.112019: step: 774/464, loss: 0.7369115948677063 2023-01-24 02:12:59.812762: step: 776/464, loss: 1.485062599182129 2023-01-24 02:13:00.532269: step: 778/464, loss: 1.239599585533142 2023-01-24 02:13:01.302870: step: 780/464, loss: 1.2481592893600464 2023-01-24 02:13:02.046921: step: 782/464, loss: 0.5724455118179321 2023-01-24 02:13:02.826197: step: 784/464, loss: 0.562556803226471 2023-01-24 02:13:03.651382: step: 786/464, loss: 0.19182422757148743 2023-01-24 02:13:04.417323: step: 788/464, loss: 0.9646680951118469 2023-01-24 02:13:05.173992: step: 790/464, loss: 0.4203810691833496 2023-01-24 02:13:05.922953: step: 792/464, loss: 0.7610728740692139 2023-01-24 02:13:06.633583: step: 794/464, loss: 1.6422280073165894 2023-01-24 02:13:07.294540: step: 796/464, loss: 1.0750470161437988 2023-01-24 02:13:07.970607: step: 798/464, loss: 0.2180919051170349 2023-01-24 02:13:08.767647: step: 800/464, loss: 3.362419366836548 2023-01-24 02:13:09.572972: step: 802/464, loss: 0.798356294631958 2023-01-24 02:13:10.320263: step: 804/464, loss: 0.351754367351532 2023-01-24 02:13:11.301911: step: 806/464, loss: 0.20373542606830597 2023-01-24 02:13:12.101639: step: 808/464, loss: 0.25572940707206726 2023-01-24 02:13:12.847867: step: 810/464, loss: 0.6978384852409363 2023-01-24 02:13:13.509416: step: 812/464, loss: 0.8130373954772949 2023-01-24 02:13:14.313164: step: 814/464, loss: 1.1461539268493652 2023-01-24 02:13:15.085789: step: 816/464, loss: 0.3477003574371338 2023-01-24 02:13:15.871184: step: 818/464, loss: 0.8022814989089966 2023-01-24 02:13:16.742687: step: 820/464, loss: 0.9525044560432434 2023-01-24 02:13:17.456802: step: 822/464, loss: 0.5294411182403564 2023-01-24 02:13:18.280132: step: 824/464, loss: 0.6476122140884399 2023-01-24 02:13:19.109471: step: 826/464, loss: 0.4958522617816925 2023-01-24 02:13:19.930487: step: 828/464, loss: 1.3305891752243042 2023-01-24 02:13:20.699291: step: 830/464, loss: 2.5851356983184814 2023-01-24 02:13:21.504723: step: 832/464, loss: 1.622355341911316 2023-01-24 02:13:22.272779: step: 834/464, loss: 0.9364479184150696 2023-01-24 02:13:23.029101: step: 836/464, loss: 0.2947760224342346 2023-01-24 02:13:23.785844: step: 838/464, loss: 0.1488008052110672 2023-01-24 02:13:24.478974: step: 840/464, loss: 0.41798877716064453 2023-01-24 02:13:25.194611: step: 842/464, loss: 0.5885865688323975 2023-01-24 02:13:25.921650: step: 844/464, loss: 1.004331350326538 2023-01-24 02:13:26.602171: step: 846/464, loss: 0.27680039405822754 2023-01-24 02:13:27.382612: step: 848/464, loss: 0.32235682010650635 2023-01-24 02:13:28.109406: step: 850/464, loss: 1.527274250984192 2023-01-24 02:13:28.878260: step: 852/464, loss: 0.4541100263595581 2023-01-24 02:13:29.594989: step: 854/464, loss: 1.2514349222183228 2023-01-24 02:13:30.286878: step: 856/464, loss: 1.489627718925476 2023-01-24 02:13:31.211709: step: 858/464, loss: 0.9419457316398621 2023-01-24 02:13:31.968889: step: 860/464, loss: 0.7608920931816101 2023-01-24 02:13:32.712318: step: 862/464, loss: 0.3095041513442993 2023-01-24 02:13:33.476465: step: 864/464, loss: 2.6239664554595947 2023-01-24 02:13:34.238435: step: 866/464, loss: 5.607526779174805 2023-01-24 02:13:35.108371: step: 868/464, loss: 1.5280218124389648 2023-01-24 02:13:35.895447: step: 870/464, loss: 0.5562810897827148 2023-01-24 02:13:36.644574: step: 872/464, loss: 0.493901789188385 2023-01-24 02:13:37.344986: step: 874/464, loss: 0.5550905466079712 2023-01-24 02:13:38.271895: step: 876/464, loss: 0.6465400457382202 2023-01-24 02:13:39.090179: step: 878/464, loss: 0.8182223439216614 2023-01-24 02:13:39.794701: step: 880/464, loss: 0.44080498814582825 2023-01-24 02:13:40.546011: step: 882/464, loss: 0.8962292671203613 2023-01-24 02:13:41.200836: step: 884/464, loss: 0.7163071632385254 2023-01-24 02:13:42.016196: step: 886/464, loss: 0.781548023223877 2023-01-24 02:13:42.845295: step: 888/464, loss: 0.6812397241592407 2023-01-24 02:13:43.643430: step: 890/464, loss: 1.0619182586669922 2023-01-24 02:13:44.381122: step: 892/464, loss: 0.78568434715271 2023-01-24 02:13:45.138136: step: 894/464, loss: 0.39763808250427246 2023-01-24 02:13:45.868702: step: 896/464, loss: 1.4507485628128052 2023-01-24 02:13:46.623182: step: 898/464, loss: 0.8367409706115723 2023-01-24 02:13:47.369568: step: 900/464, loss: 0.7433250546455383 2023-01-24 02:13:48.137431: step: 902/464, loss: 1.7983527183532715 2023-01-24 02:13:48.866166: step: 904/464, loss: 0.7389869689941406 2023-01-24 02:13:49.619994: step: 906/464, loss: 0.3523313105106354 2023-01-24 02:13:50.295997: step: 908/464, loss: 0.7650223970413208 2023-01-24 02:13:51.049759: step: 910/464, loss: 0.5269652009010315 2023-01-24 02:13:51.828865: step: 912/464, loss: 4.824460029602051 2023-01-24 02:13:52.615014: step: 914/464, loss: 2.659306526184082 2023-01-24 02:13:53.374192: step: 916/464, loss: 0.8726629018783569 2023-01-24 02:13:54.110238: step: 918/464, loss: 1.209802508354187 2023-01-24 02:13:54.881328: step: 920/464, loss: 1.4985301494598389 2023-01-24 02:13:55.634276: step: 922/464, loss: 0.36272984743118286 2023-01-24 02:13:56.353560: step: 924/464, loss: 0.9055819511413574 2023-01-24 02:13:57.096519: step: 926/464, loss: 0.9490545392036438 2023-01-24 02:13:57.935521: step: 928/464, loss: 1.9158504009246826 2023-01-24 02:13:58.598267: step: 930/464, loss: 0.1756402850151062 ================================================== Loss: 1.044 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3345195392377826, 'r': 0.3041086720343478, 'f1': 0.3185900373693168}, 'combined': 0.23475055385107552, 'epoch': 4} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3174410049953423, 'r': 0.22522000156784164, 'f1': 0.26349438333717423}, 'combined': 0.163643880177824, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3181875044571755, 'r': 0.29947059243028284, 'f1': 0.3085454588675641}, 'combined': 0.22734928548136302, 'epoch': 4} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3159954405150347, 'r': 0.22388214510798404, 'f1': 0.26208066032305366}, 'combined': 0.1627658837795807, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3360532326750742, 'r': 0.30544496859840714, 'f1': 0.3200188836011144}, 'combined': 0.2358033879166106, 'epoch': 4} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3238311636676841, 'r': 0.22674568659177094, 'f1': 0.26672868469148}, 'combined': 0.16565255154523495, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.3142857142857143, 'f1': 0.3235294117647059}, 'combined': 0.21568627450980393, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24193548387096775, 'r': 0.32608695652173914, 'f1': 0.2777777777777778}, 'combined': 0.1388888888888889, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5446428571428571, 'r': 0.2629310344827586, 'f1': 0.3546511627906977}, 'combined': 0.2364341085271318, 'epoch': 4} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3345195392377826, 'r': 0.3041086720343478, 'f1': 0.3185900373693168}, 'combined': 0.23475055385107552, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3174410049953423, 'r': 0.22522000156784164, 'f1': 0.26349438333717423}, 'combined': 0.163643880177824, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.3142857142857143, 'f1': 0.3235294117647059}, 'combined': 0.21568627450980393, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3181875044571755, 'r': 0.29947059243028284, 'f1': 0.3085454588675641}, 'combined': 0.22734928548136302, 'epoch': 4} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3159954405150347, 'r': 0.22388214510798404, 'f1': 0.26208066032305366}, 'combined': 0.1627658837795807, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24193548387096775, 'r': 0.32608695652173914, 'f1': 0.2777777777777778}, 'combined': 0.1388888888888889, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3360532326750742, 'r': 0.30544496859840714, 'f1': 0.3200188836011144}, 'combined': 0.2358033879166106, 'epoch': 4} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3238311636676841, 'r': 0.22674568659177094, 'f1': 0.26672868469148}, 'combined': 0.16565255154523495, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5446428571428571, 'r': 0.2629310344827586, 'f1': 0.3546511627906977}, 'combined': 0.2364341085271318, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:16:59.165583: step: 2/464, loss: 0.6496837139129639 2023-01-24 02:17:00.006257: step: 4/464, loss: 0.6422550678253174 2023-01-24 02:17:00.744452: step: 6/464, loss: 1.8030405044555664 2023-01-24 02:17:01.502190: step: 8/464, loss: 0.9896953105926514 2023-01-24 02:17:02.248058: step: 10/464, loss: 0.6033563613891602 2023-01-24 02:17:03.047761: step: 12/464, loss: 1.4169790744781494 2023-01-24 02:17:03.785687: step: 14/464, loss: 0.4817117750644684 2023-01-24 02:17:04.495823: step: 16/464, loss: 0.47974684834480286 2023-01-24 02:17:05.272470: step: 18/464, loss: 5.893843650817871 2023-01-24 02:17:06.050820: step: 20/464, loss: 1.6159776449203491 2023-01-24 02:17:06.738784: step: 22/464, loss: 1.6918559074401855 2023-01-24 02:17:07.460149: step: 24/464, loss: 0.6972661018371582 2023-01-24 02:17:08.231244: step: 26/464, loss: 0.8587067127227783 2023-01-24 02:17:08.987868: step: 28/464, loss: 0.42765671014785767 2023-01-24 02:17:09.759461: step: 30/464, loss: 0.6781284809112549 2023-01-24 02:17:10.541681: step: 32/464, loss: 0.2493610978126526 2023-01-24 02:17:11.443577: step: 34/464, loss: 0.9981824159622192 2023-01-24 02:17:12.243127: step: 36/464, loss: 0.4823274314403534 2023-01-24 02:17:12.958165: step: 38/464, loss: 0.4170873761177063 2023-01-24 02:17:13.738646: step: 40/464, loss: 0.6056738495826721 2023-01-24 02:17:14.445929: step: 42/464, loss: 0.22396408021450043 2023-01-24 02:17:15.464076: step: 44/464, loss: 1.1149306297302246 2023-01-24 02:17:16.161148: step: 46/464, loss: 0.6650685667991638 2023-01-24 02:17:16.869836: step: 48/464, loss: 0.6071967482566833 2023-01-24 02:17:17.673817: step: 50/464, loss: 0.4230685234069824 2023-01-24 02:17:18.543497: step: 52/464, loss: 2.148970603942871 2023-01-24 02:17:19.261775: step: 54/464, loss: 0.40129613876342773 2023-01-24 02:17:20.054404: step: 56/464, loss: 0.5853995680809021 2023-01-24 02:17:20.753526: step: 58/464, loss: 0.43005678057670593 2023-01-24 02:17:21.623784: step: 60/464, loss: 0.14942124485969543 2023-01-24 02:17:22.440683: step: 62/464, loss: 0.8140151500701904 2023-01-24 02:17:23.230470: step: 64/464, loss: 0.20544444024562836 2023-01-24 02:17:23.976857: step: 66/464, loss: 0.3382854759693146 2023-01-24 02:17:24.727610: step: 68/464, loss: 0.4904332160949707 2023-01-24 02:17:25.486946: step: 70/464, loss: 0.642681360244751 2023-01-24 02:17:26.192808: step: 72/464, loss: 0.7253457307815552 2023-01-24 02:17:26.931183: step: 74/464, loss: 0.7383750677108765 2023-01-24 02:17:27.623163: step: 76/464, loss: 0.6518418788909912 2023-01-24 02:17:28.378834: step: 78/464, loss: 1.713513731956482 2023-01-24 02:17:29.089404: step: 80/464, loss: 0.5307286381721497 2023-01-24 02:17:29.781025: step: 82/464, loss: 0.26544585824012756 2023-01-24 02:17:30.516754: step: 84/464, loss: 0.14702025055885315 2023-01-24 02:17:31.306253: step: 86/464, loss: 0.34280622005462646 2023-01-24 02:17:32.039231: step: 88/464, loss: 0.8543655276298523 2023-01-24 02:17:32.747179: step: 90/464, loss: 1.2161846160888672 2023-01-24 02:17:33.501959: step: 92/464, loss: 0.30192968249320984 2023-01-24 02:17:34.221516: step: 94/464, loss: 1.5269676446914673 2023-01-24 02:17:34.945958: step: 96/464, loss: 0.28412994742393494 2023-01-24 02:17:35.758134: step: 98/464, loss: 6.153512001037598 2023-01-24 02:17:36.564648: step: 100/464, loss: 1.3124456405639648 2023-01-24 02:17:37.393032: step: 102/464, loss: 1.2780077457427979 2023-01-24 02:17:38.172715: step: 104/464, loss: 0.7583173513412476 2023-01-24 02:17:38.984495: step: 106/464, loss: 0.5176824331283569 2023-01-24 02:17:39.775757: step: 108/464, loss: 0.8629632592201233 2023-01-24 02:17:40.500324: step: 110/464, loss: 0.9889530539512634 2023-01-24 02:17:41.188926: step: 112/464, loss: 0.48290348052978516 2023-01-24 02:17:41.942344: step: 114/464, loss: 1.1069570779800415 2023-01-24 02:17:42.731451: step: 116/464, loss: 0.26499226689338684 2023-01-24 02:17:43.530859: step: 118/464, loss: 0.9873026609420776 2023-01-24 02:17:44.259730: step: 120/464, loss: 0.37587887048721313 2023-01-24 02:17:44.975936: step: 122/464, loss: 2.3691699504852295 2023-01-24 02:17:45.863457: step: 124/464, loss: 0.31297823786735535 2023-01-24 02:17:46.572786: step: 126/464, loss: 0.3409603536128998 2023-01-24 02:17:47.314683: step: 128/464, loss: 1.7431035041809082 2023-01-24 02:17:48.111508: step: 130/464, loss: 0.7571587562561035 2023-01-24 02:17:48.975996: step: 132/464, loss: 0.7416355609893799 2023-01-24 02:17:49.727233: step: 134/464, loss: 0.8539094924926758 2023-01-24 02:17:50.475264: step: 136/464, loss: 1.114874243736267 2023-01-24 02:17:51.306386: step: 138/464, loss: 1.1200034618377686 2023-01-24 02:17:52.036176: step: 140/464, loss: 0.6744300723075867 2023-01-24 02:17:52.787396: step: 142/464, loss: 0.36426013708114624 2023-01-24 02:17:53.500715: step: 144/464, loss: 0.6616504788398743 2023-01-24 02:17:54.188421: step: 146/464, loss: 0.7991723418235779 2023-01-24 02:17:54.948310: step: 148/464, loss: 0.3798237442970276 2023-01-24 02:17:55.689537: step: 150/464, loss: 0.30810824036598206 2023-01-24 02:17:56.441166: step: 152/464, loss: 0.7624216079711914 2023-01-24 02:17:57.240306: step: 154/464, loss: 0.20280928909778595 2023-01-24 02:17:58.065664: step: 156/464, loss: 0.28102943301200867 2023-01-24 02:17:58.812212: step: 158/464, loss: 0.8951029181480408 2023-01-24 02:17:59.558034: step: 160/464, loss: 2.415346622467041 2023-01-24 02:18:00.337653: step: 162/464, loss: 1.141200065612793 2023-01-24 02:18:01.046845: step: 164/464, loss: 0.28005051612854004 2023-01-24 02:18:01.742866: step: 166/464, loss: 1.085160732269287 2023-01-24 02:18:02.515186: step: 168/464, loss: 0.33003517985343933 2023-01-24 02:18:03.231007: step: 170/464, loss: 0.5919443964958191 2023-01-24 02:18:03.986171: step: 172/464, loss: 0.7433185577392578 2023-01-24 02:18:04.696341: step: 174/464, loss: 0.7050065994262695 2023-01-24 02:18:05.515018: step: 176/464, loss: 0.946040153503418 2023-01-24 02:18:06.223804: step: 178/464, loss: 0.3946067988872528 2023-01-24 02:18:07.018527: step: 180/464, loss: 0.8988597989082336 2023-01-24 02:18:07.850406: step: 182/464, loss: 0.24341298639774323 2023-01-24 02:18:08.613922: step: 184/464, loss: 0.634074330329895 2023-01-24 02:18:09.389974: step: 186/464, loss: 0.6246568560600281 2023-01-24 02:18:10.187409: step: 188/464, loss: 0.496273934841156 2023-01-24 02:18:10.907177: step: 190/464, loss: 0.5497092008590698 2023-01-24 02:18:11.642715: step: 192/464, loss: 1.4893434047698975 2023-01-24 02:18:12.397491: step: 194/464, loss: 0.7866815328598022 2023-01-24 02:18:13.189811: step: 196/464, loss: 1.3385767936706543 2023-01-24 02:18:13.946347: step: 198/464, loss: 2.3288447856903076 2023-01-24 02:18:14.745074: step: 200/464, loss: 0.9130333662033081 2023-01-24 02:18:15.512469: step: 202/464, loss: 0.2995184361934662 2023-01-24 02:18:16.361856: step: 204/464, loss: 0.38763970136642456 2023-01-24 02:18:17.220697: step: 206/464, loss: 0.16953855752944946 2023-01-24 02:18:17.947879: step: 208/464, loss: 0.316944420337677 2023-01-24 02:18:18.662835: step: 210/464, loss: 7.582270622253418 2023-01-24 02:18:19.443068: step: 212/464, loss: 1.0553747415542603 2023-01-24 02:18:20.196767: step: 214/464, loss: 0.27340349555015564 2023-01-24 02:18:21.034153: step: 216/464, loss: 0.4284445345401764 2023-01-24 02:18:21.880276: step: 218/464, loss: 0.3169950246810913 2023-01-24 02:18:22.614581: step: 220/464, loss: 0.38640186190605164 2023-01-24 02:18:23.369905: step: 222/464, loss: 1.3186635971069336 2023-01-24 02:18:24.178667: step: 224/464, loss: 0.8435964584350586 2023-01-24 02:18:25.077327: step: 226/464, loss: 0.3275609016418457 2023-01-24 02:18:25.816392: step: 228/464, loss: 0.5751467347145081 2023-01-24 02:18:26.619182: step: 230/464, loss: 0.3827030658721924 2023-01-24 02:18:27.367323: step: 232/464, loss: 0.6583121418952942 2023-01-24 02:18:28.083949: step: 234/464, loss: 0.6310871243476868 2023-01-24 02:18:28.935133: step: 236/464, loss: 1.0897424221038818 2023-01-24 02:18:29.708554: step: 238/464, loss: 0.5430383682250977 2023-01-24 02:18:30.446065: step: 240/464, loss: 4.718739986419678 2023-01-24 02:18:31.260242: step: 242/464, loss: 0.5165674090385437 2023-01-24 02:18:31.970924: step: 244/464, loss: 0.4952998757362366 2023-01-24 02:18:32.679354: step: 246/464, loss: 0.6369624137878418 2023-01-24 02:18:33.522474: step: 248/464, loss: 0.6831679344177246 2023-01-24 02:18:34.222957: step: 250/464, loss: 0.6541798710823059 2023-01-24 02:18:34.998808: step: 252/464, loss: 3.147430181503296 2023-01-24 02:18:35.697223: step: 254/464, loss: 0.6115115880966187 2023-01-24 02:18:36.516555: step: 256/464, loss: 0.977453351020813 2023-01-24 02:18:37.268785: step: 258/464, loss: 0.7032570838928223 2023-01-24 02:18:38.042437: step: 260/464, loss: 0.6393693089485168 2023-01-24 02:18:38.746681: step: 262/464, loss: 0.581774115562439 2023-01-24 02:18:39.426955: step: 264/464, loss: 1.0034289360046387 2023-01-24 02:18:40.230864: step: 266/464, loss: 0.18100430071353912 2023-01-24 02:18:41.018222: step: 268/464, loss: 0.1348925530910492 2023-01-24 02:18:41.818573: step: 270/464, loss: 0.5453640818595886 2023-01-24 02:18:42.542878: step: 272/464, loss: 0.20027294754981995 2023-01-24 02:18:43.513152: step: 274/464, loss: 0.9242507219314575 2023-01-24 02:18:44.284595: step: 276/464, loss: 0.49561578035354614 2023-01-24 02:18:44.948433: step: 278/464, loss: 1.211693286895752 2023-01-24 02:18:45.754713: step: 280/464, loss: 0.40580329298973083 2023-01-24 02:18:46.508585: step: 282/464, loss: 1.4561851024627686 2023-01-24 02:18:47.280343: step: 284/464, loss: 1.5789389610290527 2023-01-24 02:18:48.085207: step: 286/464, loss: 0.38022440671920776 2023-01-24 02:18:48.844843: step: 288/464, loss: 1.5119850635528564 2023-01-24 02:18:49.606850: step: 290/464, loss: 0.8197949528694153 2023-01-24 02:18:50.409391: step: 292/464, loss: 0.18052376806735992 2023-01-24 02:18:51.179910: step: 294/464, loss: 1.1562833786010742 2023-01-24 02:18:51.931379: step: 296/464, loss: 0.34153157472610474 2023-01-24 02:18:52.684039: step: 298/464, loss: 0.3635067939758301 2023-01-24 02:18:53.504474: step: 300/464, loss: 2.3166756629943848 2023-01-24 02:18:54.250955: step: 302/464, loss: 0.3704625070095062 2023-01-24 02:18:55.040861: step: 304/464, loss: 0.5335472226142883 2023-01-24 02:18:55.784692: step: 306/464, loss: 0.5522812008857727 2023-01-24 02:18:56.587691: step: 308/464, loss: 0.7435985207557678 2023-01-24 02:18:57.349153: step: 310/464, loss: 0.5566904544830322 2023-01-24 02:18:58.122890: step: 312/464, loss: 0.5143800377845764 2023-01-24 02:18:58.886450: step: 314/464, loss: 2.127366065979004 2023-01-24 02:18:59.591792: step: 316/464, loss: 1.1592189073562622 2023-01-24 02:19:00.397539: step: 318/464, loss: 0.45830056071281433 2023-01-24 02:19:01.153783: step: 320/464, loss: 1.5356149673461914 2023-01-24 02:19:01.900129: step: 322/464, loss: 0.8824881315231323 2023-01-24 02:19:02.654179: step: 324/464, loss: 0.9135230779647827 2023-01-24 02:19:03.593879: step: 326/464, loss: 0.4782622158527374 2023-01-24 02:19:04.227736: step: 328/464, loss: 0.4283546209335327 2023-01-24 02:19:04.984005: step: 330/464, loss: 1.4542497396469116 2023-01-24 02:19:05.704588: step: 332/464, loss: 1.019738793373108 2023-01-24 02:19:06.454432: step: 334/464, loss: 0.6632002592086792 2023-01-24 02:19:07.209361: step: 336/464, loss: 0.8099590539932251 2023-01-24 02:19:08.005142: step: 338/464, loss: 1.555448055267334 2023-01-24 02:19:08.715572: step: 340/464, loss: 0.9206175804138184 2023-01-24 02:19:09.454098: step: 342/464, loss: 0.35792335867881775 2023-01-24 02:19:10.231964: step: 344/464, loss: 0.3381599187850952 2023-01-24 02:19:11.032658: step: 346/464, loss: 0.16699723899364471 2023-01-24 02:19:11.708255: step: 348/464, loss: 0.5061295628547668 2023-01-24 02:19:12.594177: step: 350/464, loss: 1.0372631549835205 2023-01-24 02:19:13.403693: step: 352/464, loss: 0.6776844263076782 2023-01-24 02:19:14.277746: step: 354/464, loss: 0.32754576206207275 2023-01-24 02:19:15.030355: step: 356/464, loss: 0.9929206371307373 2023-01-24 02:19:15.799708: step: 358/464, loss: 1.648221731185913 2023-01-24 02:19:16.540702: step: 360/464, loss: 0.7910313606262207 2023-01-24 02:19:17.310172: step: 362/464, loss: 0.3531991243362427 2023-01-24 02:19:18.083364: step: 364/464, loss: 7.432271957397461 2023-01-24 02:19:18.902506: step: 366/464, loss: 0.783743143081665 2023-01-24 02:19:19.714366: step: 368/464, loss: 0.7211997509002686 2023-01-24 02:19:20.480427: step: 370/464, loss: 4.978240966796875 2023-01-24 02:19:21.229015: step: 372/464, loss: 1.7499102354049683 2023-01-24 02:19:22.086658: step: 374/464, loss: 0.985282301902771 2023-01-24 02:19:22.890626: step: 376/464, loss: 0.3552509546279907 2023-01-24 02:19:23.650253: step: 378/464, loss: 3.0569443702697754 2023-01-24 02:19:24.502149: step: 380/464, loss: 0.9991600513458252 2023-01-24 02:19:25.324079: step: 382/464, loss: 0.6255131363868713 2023-01-24 02:19:26.056972: step: 384/464, loss: 0.280425488948822 2023-01-24 02:19:26.801784: step: 386/464, loss: 0.9654706716537476 2023-01-24 02:19:27.613275: step: 388/464, loss: 0.7268888354301453 2023-01-24 02:19:28.337484: step: 390/464, loss: 0.5317648649215698 2023-01-24 02:19:29.140939: step: 392/464, loss: 0.3457382917404175 2023-01-24 02:19:29.947529: step: 394/464, loss: 1.3924022912979126 2023-01-24 02:19:30.761505: step: 396/464, loss: 0.32296717166900635 2023-01-24 02:19:31.600170: step: 398/464, loss: 0.6169033050537109 2023-01-24 02:19:32.435035: step: 400/464, loss: 0.20771922171115875 2023-01-24 02:19:33.177530: step: 402/464, loss: 0.8123390674591064 2023-01-24 02:19:34.081162: step: 404/464, loss: 0.37989774346351624 2023-01-24 02:19:34.877439: step: 406/464, loss: 1.4054900407791138 2023-01-24 02:19:35.660433: step: 408/464, loss: 1.6585242748260498 2023-01-24 02:19:36.421248: step: 410/464, loss: 0.742591381072998 2023-01-24 02:19:37.294113: step: 412/464, loss: 3.3262979984283447 2023-01-24 02:19:37.974104: step: 414/464, loss: 0.4885295331478119 2023-01-24 02:19:38.783405: step: 416/464, loss: 0.6670538187026978 2023-01-24 02:19:39.542898: step: 418/464, loss: 0.8947754502296448 2023-01-24 02:19:40.408815: step: 420/464, loss: 1.3545454740524292 2023-01-24 02:19:41.205518: step: 422/464, loss: 1.00066077709198 2023-01-24 02:19:41.982835: step: 424/464, loss: 1.192989468574524 2023-01-24 02:19:42.703861: step: 426/464, loss: 0.8917605876922607 2023-01-24 02:19:43.408609: step: 428/464, loss: 0.6383041143417358 2023-01-24 02:19:44.163247: step: 430/464, loss: 1.215901255607605 2023-01-24 02:19:44.892696: step: 432/464, loss: 0.7293286323547363 2023-01-24 02:19:45.635570: step: 434/464, loss: 0.6771726012229919 2023-01-24 02:19:46.581684: step: 436/464, loss: 0.7097762823104858 2023-01-24 02:19:47.254239: step: 438/464, loss: 0.4456166923046112 2023-01-24 02:19:48.009362: step: 440/464, loss: 0.5973028540611267 2023-01-24 02:19:48.720375: step: 442/464, loss: 0.6711571216583252 2023-01-24 02:19:49.594432: step: 444/464, loss: 0.19482938945293427 2023-01-24 02:19:50.370652: step: 446/464, loss: 0.2720489203929901 2023-01-24 02:19:51.112042: step: 448/464, loss: 2.246628999710083 2023-01-24 02:19:51.854245: step: 450/464, loss: 0.6945271492004395 2023-01-24 02:19:52.598934: step: 452/464, loss: 1.3300384283065796 2023-01-24 02:19:53.328986: step: 454/464, loss: 0.28272974491119385 2023-01-24 02:19:54.149100: step: 456/464, loss: 1.008739709854126 2023-01-24 02:19:54.881298: step: 458/464, loss: 1.3190908432006836 2023-01-24 02:19:55.566806: step: 460/464, loss: 1.1253019571304321 2023-01-24 02:19:56.293911: step: 462/464, loss: 0.7815342545509338 2023-01-24 02:19:57.136778: step: 464/464, loss: 0.5137872695922852 2023-01-24 02:19:57.902150: step: 466/464, loss: 0.7086704969406128 2023-01-24 02:19:58.772962: step: 468/464, loss: 0.1647951453924179 2023-01-24 02:19:59.522806: step: 470/464, loss: 1.607774019241333 2023-01-24 02:20:00.336675: step: 472/464, loss: 0.23846077919006348 2023-01-24 02:20:01.062364: step: 474/464, loss: 0.39363813400268555 2023-01-24 02:20:01.883222: step: 476/464, loss: 0.29684215784072876 2023-01-24 02:20:02.616875: step: 478/464, loss: 0.08082844316959381 2023-01-24 02:20:03.333252: step: 480/464, loss: 0.9043727517127991 2023-01-24 02:20:04.075489: step: 482/464, loss: 1.3557593822479248 2023-01-24 02:20:04.822477: step: 484/464, loss: 3.4816226959228516 2023-01-24 02:20:05.608146: step: 486/464, loss: 0.8969863653182983 2023-01-24 02:20:06.373831: step: 488/464, loss: 0.7074337601661682 2023-01-24 02:20:07.127663: step: 490/464, loss: 0.8259276747703552 2023-01-24 02:20:07.878060: step: 492/464, loss: 0.4520622491836548 2023-01-24 02:20:08.656845: step: 494/464, loss: 0.7717126607894897 2023-01-24 02:20:09.433538: step: 496/464, loss: 0.25604677200317383 2023-01-24 02:20:10.165876: step: 498/464, loss: 0.6380689740180969 2023-01-24 02:20:10.959297: step: 500/464, loss: 0.42727962136268616 2023-01-24 02:20:11.729339: step: 502/464, loss: 0.3959901034832001 2023-01-24 02:20:12.533183: step: 504/464, loss: 0.4253363311290741 2023-01-24 02:20:13.296553: step: 506/464, loss: 0.8880304098129272 2023-01-24 02:20:14.072824: step: 508/464, loss: 0.2152801901102066 2023-01-24 02:20:14.855176: step: 510/464, loss: 0.9369227886199951 2023-01-24 02:20:15.598806: step: 512/464, loss: 2.3638715744018555 2023-01-24 02:20:16.348348: step: 514/464, loss: 0.42224061489105225 2023-01-24 02:20:17.119491: step: 516/464, loss: 0.343061238527298 2023-01-24 02:20:17.891024: step: 518/464, loss: 0.5075637102127075 2023-01-24 02:20:18.569074: step: 520/464, loss: 0.6902667880058289 2023-01-24 02:20:19.451480: step: 522/464, loss: 0.6213053464889526 2023-01-24 02:20:20.305271: step: 524/464, loss: 15.072422981262207 2023-01-24 02:20:21.041424: step: 526/464, loss: 2.107042074203491 2023-01-24 02:20:21.788003: step: 528/464, loss: 0.17740926146507263 2023-01-24 02:20:22.545852: step: 530/464, loss: 2.396019697189331 2023-01-24 02:20:23.273486: step: 532/464, loss: 1.1157536506652832 2023-01-24 02:20:23.973897: step: 534/464, loss: 0.7146736979484558 2023-01-24 02:20:24.773179: step: 536/464, loss: 0.8112752437591553 2023-01-24 02:20:25.559044: step: 538/464, loss: 0.6888623833656311 2023-01-24 02:20:26.341415: step: 540/464, loss: 0.2979349195957184 2023-01-24 02:20:27.089865: step: 542/464, loss: 0.2549689710140228 2023-01-24 02:20:27.871960: step: 544/464, loss: 2.5347049236297607 2023-01-24 02:20:28.685792: step: 546/464, loss: 0.4879702925682068 2023-01-24 02:20:29.405498: step: 548/464, loss: 0.24486508965492249 2023-01-24 02:20:30.122800: step: 550/464, loss: 1.338243842124939 2023-01-24 02:20:30.848150: step: 552/464, loss: 2.470813274383545 2023-01-24 02:20:31.602633: step: 554/464, loss: 0.2586001455783844 2023-01-24 02:20:32.416745: step: 556/464, loss: 0.21788036823272705 2023-01-24 02:20:33.123938: step: 558/464, loss: 0.6774846315383911 2023-01-24 02:20:33.886673: step: 560/464, loss: 0.31087690591812134 2023-01-24 02:20:34.707357: step: 562/464, loss: 1.0382733345031738 2023-01-24 02:20:35.432026: step: 564/464, loss: 0.28306156396865845 2023-01-24 02:20:36.151858: step: 566/464, loss: 0.713935911655426 2023-01-24 02:20:37.025721: step: 568/464, loss: 0.38265261054039 2023-01-24 02:20:37.783510: step: 570/464, loss: 0.7601750493049622 2023-01-24 02:20:38.515259: step: 572/464, loss: 0.7853434681892395 2023-01-24 02:20:39.259696: step: 574/464, loss: 0.24956616759300232 2023-01-24 02:20:40.015348: step: 576/464, loss: 0.48518678545951843 2023-01-24 02:20:40.776989: step: 578/464, loss: 0.6642841100692749 2023-01-24 02:20:41.497468: step: 580/464, loss: 0.8533198237419128 2023-01-24 02:20:42.238496: step: 582/464, loss: 0.4441028833389282 2023-01-24 02:20:42.952218: step: 584/464, loss: 0.8509069085121155 2023-01-24 02:20:43.752028: step: 586/464, loss: 0.816931426525116 2023-01-24 02:20:44.516658: step: 588/464, loss: 0.8874893188476562 2023-01-24 02:20:45.325013: step: 590/464, loss: 1.2424025535583496 2023-01-24 02:20:46.062026: step: 592/464, loss: 0.39771029353141785 2023-01-24 02:20:46.824096: step: 594/464, loss: 0.2105729579925537 2023-01-24 02:20:47.594957: step: 596/464, loss: 1.876196265220642 2023-01-24 02:20:48.448715: step: 598/464, loss: 0.22702628374099731 2023-01-24 02:20:49.124465: step: 600/464, loss: 4.492393970489502 2023-01-24 02:20:49.913181: step: 602/464, loss: 0.18425554037094116 2023-01-24 02:20:50.626848: step: 604/464, loss: 0.5201539993286133 2023-01-24 02:20:51.465940: step: 606/464, loss: 0.30435648560523987 2023-01-24 02:20:52.208943: step: 608/464, loss: 0.6498906016349792 2023-01-24 02:20:52.961694: step: 610/464, loss: 1.6036509275436401 2023-01-24 02:20:53.734111: step: 612/464, loss: 1.4098213911056519 2023-01-24 02:20:54.491305: step: 614/464, loss: 0.24880897998809814 2023-01-24 02:20:55.262291: step: 616/464, loss: 1.1465108394622803 2023-01-24 02:20:56.080698: step: 618/464, loss: 2.580686569213867 2023-01-24 02:20:56.887163: step: 620/464, loss: 0.45269080996513367 2023-01-24 02:20:57.694131: step: 622/464, loss: 5.887742042541504 2023-01-24 02:20:58.475030: step: 624/464, loss: 0.5069065690040588 2023-01-24 02:20:59.237632: step: 626/464, loss: 0.25332337617874146 2023-01-24 02:20:59.973359: step: 628/464, loss: 1.8866604566574097 2023-01-24 02:21:00.678280: step: 630/464, loss: 0.4924744963645935 2023-01-24 02:21:01.404992: step: 632/464, loss: 1.128131628036499 2023-01-24 02:21:02.133422: step: 634/464, loss: 0.3855154812335968 2023-01-24 02:21:02.922842: step: 636/464, loss: 0.3899562954902649 2023-01-24 02:21:03.656014: step: 638/464, loss: 2.0404696464538574 2023-01-24 02:21:04.474979: step: 640/464, loss: 0.23224273324012756 2023-01-24 02:21:05.219525: step: 642/464, loss: 0.9769107103347778 2023-01-24 02:21:06.053680: step: 644/464, loss: 1.0341360569000244 2023-01-24 02:21:06.821591: step: 646/464, loss: 1.0036234855651855 2023-01-24 02:21:07.557766: step: 648/464, loss: 0.36653128266334534 2023-01-24 02:21:08.296457: step: 650/464, loss: 0.6084499955177307 2023-01-24 02:21:09.122930: step: 652/464, loss: 0.6824120283126831 2023-01-24 02:21:09.909217: step: 654/464, loss: 1.4943143129348755 2023-01-24 02:21:10.599444: step: 656/464, loss: 0.5834726691246033 2023-01-24 02:21:11.353017: step: 658/464, loss: 0.48704636096954346 2023-01-24 02:21:12.080524: step: 660/464, loss: 1.881295919418335 2023-01-24 02:21:12.811580: step: 662/464, loss: 0.5884661078453064 2023-01-24 02:21:13.590549: step: 664/464, loss: 0.8509942293167114 2023-01-24 02:21:14.336973: step: 666/464, loss: 0.8986581563949585 2023-01-24 02:21:15.186622: step: 668/464, loss: 0.965084433555603 2023-01-24 02:21:15.965771: step: 670/464, loss: 0.47922876477241516 2023-01-24 02:21:16.784720: step: 672/464, loss: 0.6889102458953857 2023-01-24 02:21:17.546034: step: 674/464, loss: 0.4863603413105011 2023-01-24 02:21:18.289078: step: 676/464, loss: 0.4932281970977783 2023-01-24 02:21:19.028350: step: 678/464, loss: 0.7120741009712219 2023-01-24 02:21:19.805616: step: 680/464, loss: 1.7616684436798096 2023-01-24 02:21:20.520247: step: 682/464, loss: 5.395840644836426 2023-01-24 02:21:21.291815: step: 684/464, loss: 0.5092490315437317 2023-01-24 02:21:22.037011: step: 686/464, loss: 0.4408661723136902 2023-01-24 02:21:22.743550: step: 688/464, loss: 0.6526728272438049 2023-01-24 02:21:23.498755: step: 690/464, loss: 0.4350407123565674 2023-01-24 02:21:24.227200: step: 692/464, loss: 0.9150271415710449 2023-01-24 02:21:25.008278: step: 694/464, loss: 0.9353266358375549 2023-01-24 02:21:25.794859: step: 696/464, loss: 0.6587817072868347 2023-01-24 02:21:26.578637: step: 698/464, loss: 0.34132251143455505 2023-01-24 02:21:27.347970: step: 700/464, loss: 0.9389505982398987 2023-01-24 02:21:28.060479: step: 702/464, loss: 0.8931823968887329 2023-01-24 02:21:28.810624: step: 704/464, loss: 0.45848965644836426 2023-01-24 02:21:29.507001: step: 706/464, loss: 0.5228652358055115 2023-01-24 02:21:30.278690: step: 708/464, loss: 0.7406331896781921 2023-01-24 02:21:31.095064: step: 710/464, loss: 1.119754672050476 2023-01-24 02:21:31.881346: step: 712/464, loss: 0.46346792578697205 2023-01-24 02:21:32.572095: step: 714/464, loss: 0.7823930382728577 2023-01-24 02:21:33.301110: step: 716/464, loss: 0.3420368731021881 2023-01-24 02:21:34.080345: step: 718/464, loss: 0.6503040194511414 2023-01-24 02:21:34.821252: step: 720/464, loss: 0.41361570358276367 2023-01-24 02:21:35.622198: step: 722/464, loss: 0.8351732492446899 2023-01-24 02:21:36.436495: step: 724/464, loss: 0.37005409598350525 2023-01-24 02:21:37.192070: step: 726/464, loss: 6.30983829498291 2023-01-24 02:21:37.958413: step: 728/464, loss: 1.8675658702850342 2023-01-24 02:21:38.718744: step: 730/464, loss: 0.3663023114204407 2023-01-24 02:21:39.454658: step: 732/464, loss: 1.7332795858383179 2023-01-24 02:21:40.212591: step: 734/464, loss: 0.4731151759624481 2023-01-24 02:21:41.052436: step: 736/464, loss: 1.2002259492874146 2023-01-24 02:21:41.811901: step: 738/464, loss: 0.8168594241142273 2023-01-24 02:21:42.504697: step: 740/464, loss: 0.66452556848526 2023-01-24 02:21:43.265983: step: 742/464, loss: 0.3836321532726288 2023-01-24 02:21:43.944518: step: 744/464, loss: 1.8308157920837402 2023-01-24 02:21:44.692731: step: 746/464, loss: 0.5129436254501343 2023-01-24 02:21:45.491831: step: 748/464, loss: 2.239680290222168 2023-01-24 02:21:46.276442: step: 750/464, loss: 0.9776561260223389 2023-01-24 02:21:46.933616: step: 752/464, loss: 1.0558445453643799 2023-01-24 02:21:47.687557: step: 754/464, loss: 0.875616192817688 2023-01-24 02:21:48.392815: step: 756/464, loss: 0.948789656162262 2023-01-24 02:21:49.252670: step: 758/464, loss: 0.5340040922164917 2023-01-24 02:21:49.973519: step: 760/464, loss: 0.20830850303173065 2023-01-24 02:21:50.787063: step: 762/464, loss: 2.270432472229004 2023-01-24 02:21:51.455949: step: 764/464, loss: 0.6409857273101807 2023-01-24 02:21:52.116675: step: 766/464, loss: 1.0566260814666748 2023-01-24 02:21:52.867121: step: 768/464, loss: 0.5132637023925781 2023-01-24 02:21:53.573161: step: 770/464, loss: 0.6227934956550598 2023-01-24 02:21:54.415635: step: 772/464, loss: 0.2820763885974884 2023-01-24 02:21:55.162834: step: 774/464, loss: 0.7153021097183228 2023-01-24 02:21:55.945687: step: 776/464, loss: 1.080057144165039 2023-01-24 02:21:56.761093: step: 778/464, loss: 1.1948184967041016 2023-01-24 02:21:57.617133: step: 780/464, loss: 0.8179348707199097 2023-01-24 02:21:58.424666: step: 782/464, loss: 1.160705804824829 2023-01-24 02:21:59.147583: step: 784/464, loss: 1.1092203855514526 2023-01-24 02:21:59.882537: step: 786/464, loss: 0.7730402946472168 2023-01-24 02:22:00.668745: step: 788/464, loss: 4.028815746307373 2023-01-24 02:22:01.413569: step: 790/464, loss: 1.1363494396209717 2023-01-24 02:22:02.330916: step: 792/464, loss: 1.3450284004211426 2023-01-24 02:22:03.101612: step: 794/464, loss: 0.47559836506843567 2023-01-24 02:22:03.855475: step: 796/464, loss: 0.6122507452964783 2023-01-24 02:22:04.543521: step: 798/464, loss: 0.22464758157730103 2023-01-24 02:22:05.366514: step: 800/464, loss: 1.3674689531326294 2023-01-24 02:22:06.108532: step: 802/464, loss: 0.24557270109653473 2023-01-24 02:22:06.950268: step: 804/464, loss: 1.0580368041992188 2023-01-24 02:22:07.742982: step: 806/464, loss: 1.3049439191818237 2023-01-24 02:22:08.633669: step: 808/464, loss: 1.3354514837265015 2023-01-24 02:22:09.362993: step: 810/464, loss: 0.13791459798812866 2023-01-24 02:22:10.144906: step: 812/464, loss: 0.5022804737091064 2023-01-24 02:22:10.919981: step: 814/464, loss: 2.687323570251465 2023-01-24 02:22:11.653339: step: 816/464, loss: 0.39061281085014343 2023-01-24 02:22:12.393685: step: 818/464, loss: 0.26048630475997925 2023-01-24 02:22:13.204845: step: 820/464, loss: 0.9743564128875732 2023-01-24 02:22:13.966135: step: 822/464, loss: 0.20935381948947906 2023-01-24 02:22:14.677884: step: 824/464, loss: 0.782504677772522 2023-01-24 02:22:15.438384: step: 826/464, loss: 0.5422369837760925 2023-01-24 02:22:16.230276: step: 828/464, loss: 0.569701611995697 2023-01-24 02:22:17.029252: step: 830/464, loss: 0.788164496421814 2023-01-24 02:22:17.869040: step: 832/464, loss: 0.47551649808883667 2023-01-24 02:22:18.620168: step: 834/464, loss: 0.6161491870880127 2023-01-24 02:22:19.354284: step: 836/464, loss: 0.2869093418121338 2023-01-24 02:22:20.117808: step: 838/464, loss: 0.30319467186927795 2023-01-24 02:22:20.829182: step: 840/464, loss: 0.7133747935295105 2023-01-24 02:22:21.581747: step: 842/464, loss: 0.6851193904876709 2023-01-24 02:22:22.449558: step: 844/464, loss: 1.2948142290115356 2023-01-24 02:22:23.253944: step: 846/464, loss: 1.523021936416626 2023-01-24 02:22:23.994412: step: 848/464, loss: 0.5034209489822388 2023-01-24 02:22:24.820995: step: 850/464, loss: 0.7313014268875122 2023-01-24 02:22:25.592255: step: 852/464, loss: 0.7082140445709229 2023-01-24 02:22:26.280022: step: 854/464, loss: 1.2561655044555664 2023-01-24 02:22:27.046918: step: 856/464, loss: 0.7107453942298889 2023-01-24 02:22:27.706471: step: 858/464, loss: 0.4908789396286011 2023-01-24 02:22:28.519733: step: 860/464, loss: 0.49198484420776367 2023-01-24 02:22:29.285440: step: 862/464, loss: 1.3563846349716187 2023-01-24 02:22:30.009274: step: 864/464, loss: 0.3984234929084778 2023-01-24 02:22:30.779244: step: 866/464, loss: 1.1371181011199951 2023-01-24 02:22:31.536203: step: 868/464, loss: 1.318985939025879 2023-01-24 02:22:32.269331: step: 870/464, loss: 0.7706924080848694 2023-01-24 02:22:33.029775: step: 872/464, loss: 0.45280876755714417 2023-01-24 02:22:33.836223: step: 874/464, loss: 0.3999007046222687 2023-01-24 02:22:34.536786: step: 876/464, loss: 0.28994327783584595 2023-01-24 02:22:35.288418: step: 878/464, loss: 0.5507034063339233 2023-01-24 02:22:36.033330: step: 880/464, loss: 0.23364470899105072 2023-01-24 02:22:36.716748: step: 882/464, loss: 3.641611099243164 2023-01-24 02:22:37.408863: step: 884/464, loss: 0.41873595118522644 2023-01-24 02:22:38.249555: step: 886/464, loss: 0.6705647706985474 2023-01-24 02:22:39.000812: step: 888/464, loss: 0.30085307359695435 2023-01-24 02:22:39.790506: step: 890/464, loss: 1.7496700286865234 2023-01-24 02:22:40.632170: step: 892/464, loss: 2.0429694652557373 2023-01-24 02:22:41.386318: step: 894/464, loss: 1.0652697086334229 2023-01-24 02:22:42.092748: step: 896/464, loss: 0.562082052230835 2023-01-24 02:22:42.801504: step: 898/464, loss: 0.5753889083862305 2023-01-24 02:22:43.568727: step: 900/464, loss: 0.5420715808868408 2023-01-24 02:22:44.351627: step: 902/464, loss: 1.3072800636291504 2023-01-24 02:22:45.102164: step: 904/464, loss: 0.48481813073158264 2023-01-24 02:22:45.858373: step: 906/464, loss: 1.3669205904006958 2023-01-24 02:22:46.601596: step: 908/464, loss: 0.38139575719833374 2023-01-24 02:22:47.268256: step: 910/464, loss: 0.4156199097633362 2023-01-24 02:22:48.006691: step: 912/464, loss: 0.3172839283943176 2023-01-24 02:22:48.711649: step: 914/464, loss: 0.29000595211982727 2023-01-24 02:22:49.418027: step: 916/464, loss: 0.2754070460796356 2023-01-24 02:22:50.112924: step: 918/464, loss: 0.8639828562736511 2023-01-24 02:22:51.025895: step: 920/464, loss: 0.6062501072883606 2023-01-24 02:22:51.743475: step: 922/464, loss: 0.9811422228813171 2023-01-24 02:22:52.494200: step: 924/464, loss: 1.0257622003555298 2023-01-24 02:22:53.330265: step: 926/464, loss: 1.0060298442840576 2023-01-24 02:22:54.042897: step: 928/464, loss: 0.8957557082176208 2023-01-24 02:22:54.740439: step: 930/464, loss: 0.33803051710128784 ================================================== Loss: 0.956 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3246428352687052, 'r': 0.3295616661061098, 'f1': 0.3270837588421541}, 'combined': 0.24100908546263988, 'epoch': 5} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30310024449034256, 'r': 0.2528328791651919, 'f1': 0.27569397911123733}, 'combined': 0.1712204712375053, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3005869784133561, 'r': 0.3199796866980887, 'f1': 0.30998032148877347}, 'combined': 0.22840655267593835, 'epoch': 5} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.2939622802187892, 'r': 0.2496070249195044, 'f1': 0.269974958152936}, 'combined': 0.1676686582212971, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3179826577921608, 'r': 0.3234028167317999, 'f1': 0.3206698351819537}, 'combined': 0.2362830364498606, 'epoch': 5} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30834291055018004, 'r': 0.25275005081551705, 'f1': 0.2777924218492147}, 'combined': 0.1725237146221439, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2526041666666667, 'r': 0.3464285714285714, 'f1': 0.2921686746987952}, 'combined': 0.19477911646586346, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2357142857142857, 'r': 0.358695652173913, 'f1': 0.2844827586206896}, 'combined': 0.1422413793103448, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5673076923076923, 'r': 0.2543103448275862, 'f1': 0.3511904761904762}, 'combined': 0.23412698412698413, 'epoch': 5} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3345195392377826, 'r': 0.3041086720343478, 'f1': 0.3185900373693168}, 'combined': 0.23475055385107552, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3174410049953423, 'r': 0.22522000156784164, 'f1': 0.26349438333717423}, 'combined': 0.163643880177824, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.3142857142857143, 'f1': 0.3235294117647059}, 'combined': 0.21568627450980393, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3005869784133561, 'r': 0.3199796866980887, 'f1': 0.30998032148877347}, 'combined': 0.22840655267593835, 'epoch': 5} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.2939622802187892, 'r': 0.2496070249195044, 'f1': 0.269974958152936}, 'combined': 0.1676686582212971, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2357142857142857, 'r': 0.358695652173913, 'f1': 0.2844827586206896}, 'combined': 0.1422413793103448, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3360532326750742, 'r': 0.30544496859840714, 'f1': 0.3200188836011144}, 'combined': 0.2358033879166106, 'epoch': 4} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3238311636676841, 'r': 0.22674568659177094, 'f1': 0.26672868469148}, 'combined': 0.16565255154523495, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5446428571428571, 'r': 0.2629310344827586, 'f1': 0.3546511627906977}, 'combined': 0.2364341085271318, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:25:49.659442: step: 2/464, loss: 0.12472327053546906 2023-01-24 02:25:50.391737: step: 4/464, loss: 0.4600578546524048 2023-01-24 02:25:51.176050: step: 6/464, loss: 0.5284104943275452 2023-01-24 02:25:51.929732: step: 8/464, loss: 0.2502985894680023 2023-01-24 02:25:52.650234: step: 10/464, loss: 0.3811435401439667 2023-01-24 02:25:53.346429: step: 12/464, loss: 1.4870409965515137 2023-01-24 02:25:54.094548: step: 14/464, loss: 0.32735830545425415 2023-01-24 02:25:54.904720: step: 16/464, loss: 0.19058562815189362 2023-01-24 02:25:55.663197: step: 18/464, loss: 0.4643670618534088 2023-01-24 02:25:56.422454: step: 20/464, loss: 0.7304343581199646 2023-01-24 02:25:57.130001: step: 22/464, loss: 1.3959012031555176 2023-01-24 02:25:57.901126: step: 24/464, loss: 0.8194465637207031 2023-01-24 02:25:58.763304: step: 26/464, loss: 0.5014887452125549 2023-01-24 02:25:59.641808: step: 28/464, loss: 0.1794343739748001 2023-01-24 02:26:00.345844: step: 30/464, loss: 0.03984064981341362 2023-01-24 02:26:01.043764: step: 32/464, loss: 0.23485831916332245 2023-01-24 02:26:02.003023: step: 34/464, loss: 0.7319287657737732 2023-01-24 02:26:02.771763: step: 36/464, loss: 0.5528953671455383 2023-01-24 02:26:03.447756: step: 38/464, loss: 0.6319125890731812 2023-01-24 02:26:04.322876: step: 40/464, loss: 0.5621069073677063 2023-01-24 02:26:05.021776: step: 42/464, loss: 0.9505723118782043 2023-01-24 02:26:05.827432: step: 44/464, loss: 1.7991302013397217 2023-01-24 02:26:06.535552: step: 46/464, loss: 6.067421913146973 2023-01-24 02:26:07.249818: step: 48/464, loss: 0.37295812368392944 2023-01-24 02:26:07.982093: step: 50/464, loss: 0.19374096393585205 2023-01-24 02:26:08.735811: step: 52/464, loss: 0.2651880979537964 2023-01-24 02:26:09.555031: step: 54/464, loss: 0.6591582298278809 2023-01-24 02:26:10.447012: step: 56/464, loss: 1.1974310874938965 2023-01-24 02:26:11.208756: step: 58/464, loss: 0.44687268137931824 2023-01-24 02:26:12.066507: step: 60/464, loss: 0.8164681196212769 2023-01-24 02:26:12.873476: step: 62/464, loss: 2.135221004486084 2023-01-24 02:26:13.685951: step: 64/464, loss: 1.5907602310180664 2023-01-24 02:26:14.437840: step: 66/464, loss: 0.6092633008956909 2023-01-24 02:26:15.135267: step: 68/464, loss: 0.4194296598434448 2023-01-24 02:26:15.853739: step: 70/464, loss: 0.2739868462085724 2023-01-24 02:26:16.572213: step: 72/464, loss: 0.4589228630065918 2023-01-24 02:26:17.297708: step: 74/464, loss: 1.0880742073059082 2023-01-24 02:26:18.066676: step: 76/464, loss: 3.139005184173584 2023-01-24 02:26:18.851923: step: 78/464, loss: 0.6059046387672424 2023-01-24 02:26:19.563784: step: 80/464, loss: 4.217742919921875 2023-01-24 02:26:20.348211: step: 82/464, loss: 0.2161814421415329 2023-01-24 02:26:21.114630: step: 84/464, loss: 0.6917901039123535 2023-01-24 02:26:21.914684: step: 86/464, loss: 0.4375626742839813 2023-01-24 02:26:22.719138: step: 88/464, loss: 1.4799258708953857 2023-01-24 02:26:23.551588: step: 90/464, loss: 0.49118438363075256 2023-01-24 02:26:24.307904: step: 92/464, loss: 0.2365902066230774 2023-01-24 02:26:25.147196: step: 94/464, loss: 0.541080117225647 2023-01-24 02:26:25.992884: step: 96/464, loss: 0.5610907673835754 2023-01-24 02:26:26.783596: step: 98/464, loss: 0.505711019039154 2023-01-24 02:26:27.510289: step: 100/464, loss: 0.6612415909767151 2023-01-24 02:26:28.253761: step: 102/464, loss: 0.38282832503318787 2023-01-24 02:26:29.056551: step: 104/464, loss: 7.262805938720703 2023-01-24 02:26:29.796141: step: 106/464, loss: 0.750267744064331 2023-01-24 02:26:30.582454: step: 108/464, loss: 0.32267841696739197 2023-01-24 02:26:31.325576: step: 110/464, loss: 0.5233975052833557 2023-01-24 02:26:32.063515: step: 112/464, loss: 1.1282964944839478 2023-01-24 02:26:32.783629: step: 114/464, loss: 0.3750358521938324 2023-01-24 02:26:33.541320: step: 116/464, loss: 0.2865687906742096 2023-01-24 02:26:34.249615: step: 118/464, loss: 0.23131638765335083 2023-01-24 02:26:35.070844: step: 120/464, loss: 0.7558478116989136 2023-01-24 02:26:35.864473: step: 122/464, loss: 0.8477310538291931 2023-01-24 02:26:36.593507: step: 124/464, loss: 0.24333900213241577 2023-01-24 02:26:37.330168: step: 126/464, loss: 0.3285759687423706 2023-01-24 02:26:38.059334: step: 128/464, loss: 0.7810271382331848 2023-01-24 02:26:38.859527: step: 130/464, loss: 0.33401158452033997 2023-01-24 02:26:39.631160: step: 132/464, loss: 0.24617628753185272 2023-01-24 02:26:40.376547: step: 134/464, loss: 0.30306100845336914 2023-01-24 02:26:41.119177: step: 136/464, loss: 6.122437477111816 2023-01-24 02:26:41.917632: step: 138/464, loss: 0.17895404994487762 2023-01-24 02:26:42.667457: step: 140/464, loss: 0.3743041157722473 2023-01-24 02:26:43.363688: step: 142/464, loss: 0.9956777095794678 2023-01-24 02:26:44.185587: step: 144/464, loss: 0.5255347490310669 2023-01-24 02:26:44.994856: step: 146/464, loss: 1.6436017751693726 2023-01-24 02:26:45.780219: step: 148/464, loss: 0.44622185826301575 2023-01-24 02:26:46.470569: step: 150/464, loss: 0.4024694561958313 2023-01-24 02:26:47.191170: step: 152/464, loss: 1.1937528848648071 2023-01-24 02:26:48.029273: step: 154/464, loss: 0.5776351690292358 2023-01-24 02:26:48.871150: step: 156/464, loss: 0.6195184588432312 2023-01-24 02:26:49.737066: step: 158/464, loss: 0.24424467980861664 2023-01-24 02:26:50.496986: step: 160/464, loss: 2.263171672821045 2023-01-24 02:26:51.197630: step: 162/464, loss: 1.060383915901184 2023-01-24 02:26:52.009479: step: 164/464, loss: 0.5988831520080566 2023-01-24 02:26:52.804546: step: 166/464, loss: 0.6007016897201538 2023-01-24 02:26:53.565021: step: 168/464, loss: 1.3479111194610596 2023-01-24 02:26:54.337939: step: 170/464, loss: 2.0738675594329834 2023-01-24 02:26:55.083164: step: 172/464, loss: 1.2061904668807983 2023-01-24 02:26:55.857177: step: 174/464, loss: 0.7636981010437012 2023-01-24 02:26:56.591498: step: 176/464, loss: 0.5179991126060486 2023-01-24 02:26:57.340168: step: 178/464, loss: 1.457108497619629 2023-01-24 02:26:58.115204: step: 180/464, loss: 0.7868736386299133 2023-01-24 02:26:58.886894: step: 182/464, loss: 0.5773451328277588 2023-01-24 02:26:59.608026: step: 184/464, loss: 0.21230916678905487 2023-01-24 02:27:00.396112: step: 186/464, loss: 0.5763471126556396 2023-01-24 02:27:01.191725: step: 188/464, loss: 0.23866167664527893 2023-01-24 02:27:01.956213: step: 190/464, loss: 0.3690454661846161 2023-01-24 02:27:02.727599: step: 192/464, loss: 0.975060224533081 2023-01-24 02:27:03.485678: step: 194/464, loss: 0.6560547947883606 2023-01-24 02:27:04.227240: step: 196/464, loss: 0.3782944679260254 2023-01-24 02:27:04.992441: step: 198/464, loss: 0.33345192670822144 2023-01-24 02:27:05.734138: step: 200/464, loss: 0.810796856880188 2023-01-24 02:27:06.498005: step: 202/464, loss: 0.35488876700401306 2023-01-24 02:27:07.227068: step: 204/464, loss: 1.0040911436080933 2023-01-24 02:27:07.968963: step: 206/464, loss: 0.3226962089538574 2023-01-24 02:27:08.727272: step: 208/464, loss: 0.6221941113471985 2023-01-24 02:27:09.486235: step: 210/464, loss: 0.4910554885864258 2023-01-24 02:27:10.401973: step: 212/464, loss: 0.7612470984458923 2023-01-24 02:27:11.147878: step: 214/464, loss: 0.5407028794288635 2023-01-24 02:27:11.882310: step: 216/464, loss: 3.4458680152893066 2023-01-24 02:27:12.664619: step: 218/464, loss: 0.4417225122451782 2023-01-24 02:27:13.373826: step: 220/464, loss: 0.5013437271118164 2023-01-24 02:27:14.123560: step: 222/464, loss: 0.36002710461616516 2023-01-24 02:27:14.841480: step: 224/464, loss: 0.42992568016052246 2023-01-24 02:27:15.548397: step: 226/464, loss: 0.6748926639556885 2023-01-24 02:27:16.335004: step: 228/464, loss: 0.6752133369445801 2023-01-24 02:27:17.138989: step: 230/464, loss: 1.1018412113189697 2023-01-24 02:27:17.833536: step: 232/464, loss: 1.4467039108276367 2023-01-24 02:27:18.538834: step: 234/464, loss: 0.5036592483520508 2023-01-24 02:27:19.332858: step: 236/464, loss: 1.1971505880355835 2023-01-24 02:27:20.073198: step: 238/464, loss: 0.7818788886070251 2023-01-24 02:27:20.936366: step: 240/464, loss: 0.3119359016418457 2023-01-24 02:27:21.702847: step: 242/464, loss: 1.0075072050094604 2023-01-24 02:27:22.413926: step: 244/464, loss: 0.7762327194213867 2023-01-24 02:27:23.099808: step: 246/464, loss: 0.29996880888938904 2023-01-24 02:27:23.847747: step: 248/464, loss: 0.5244670510292053 2023-01-24 02:27:24.585543: step: 250/464, loss: 0.4027194082736969 2023-01-24 02:27:25.392449: step: 252/464, loss: 0.3438206911087036 2023-01-24 02:27:26.159294: step: 254/464, loss: 0.3174258768558502 2023-01-24 02:27:26.921753: step: 256/464, loss: 0.44195500016212463 2023-01-24 02:27:27.661066: step: 258/464, loss: 0.7431162595748901 2023-01-24 02:27:28.414180: step: 260/464, loss: 0.390606552362442 2023-01-24 02:27:29.177825: step: 262/464, loss: 0.21001914143562317 2023-01-24 02:27:29.948443: step: 264/464, loss: 0.7312803864479065 2023-01-24 02:27:30.673880: step: 266/464, loss: 0.41264137625694275 2023-01-24 02:27:31.424652: step: 268/464, loss: 1.7871190309524536 2023-01-24 02:27:32.177365: step: 270/464, loss: 1.6481120586395264 2023-01-24 02:27:33.006624: step: 272/464, loss: 0.2945783734321594 2023-01-24 02:27:33.803624: step: 274/464, loss: 0.9895068407058716 2023-01-24 02:27:34.567625: step: 276/464, loss: 1.1285908222198486 2023-01-24 02:27:35.292944: step: 278/464, loss: 2.258369207382202 2023-01-24 02:27:36.061125: step: 280/464, loss: 1.4946790933609009 2023-01-24 02:27:36.919758: step: 282/464, loss: 1.9093759059906006 2023-01-24 02:27:37.667638: step: 284/464, loss: 0.6587912440299988 2023-01-24 02:27:38.432911: step: 286/464, loss: 0.5902923345565796 2023-01-24 02:27:39.295845: step: 288/464, loss: 0.2788597345352173 2023-01-24 02:27:40.035913: step: 290/464, loss: 0.7567580938339233 2023-01-24 02:27:40.745006: step: 292/464, loss: 0.2813103497028351 2023-01-24 02:27:41.448109: step: 294/464, loss: 0.3289432227611542 2023-01-24 02:27:42.225626: step: 296/464, loss: 0.3027788996696472 2023-01-24 02:27:43.022538: step: 298/464, loss: 0.5907791256904602 2023-01-24 02:27:43.835141: step: 300/464, loss: 0.9173389673233032 2023-01-24 02:27:44.571023: step: 302/464, loss: 0.2431243360042572 2023-01-24 02:27:45.386508: step: 304/464, loss: 1.1197048425674438 2023-01-24 02:27:46.169471: step: 306/464, loss: 0.37354084849357605 2023-01-24 02:27:46.869673: step: 308/464, loss: 0.1799282729625702 2023-01-24 02:27:47.623445: step: 310/464, loss: 0.6075442433357239 2023-01-24 02:27:48.489578: step: 312/464, loss: 0.3274470865726471 2023-01-24 02:27:49.254331: step: 314/464, loss: 0.6684872508049011 2023-01-24 02:27:49.991793: step: 316/464, loss: 1.3049945831298828 2023-01-24 02:27:50.741263: step: 318/464, loss: 0.777603268623352 2023-01-24 02:27:51.497437: step: 320/464, loss: 0.567020833492279 2023-01-24 02:27:52.240648: step: 322/464, loss: 0.32762405276298523 2023-01-24 02:27:52.982481: step: 324/464, loss: 0.5362238883972168 2023-01-24 02:27:53.802705: step: 326/464, loss: 0.2942344546318054 2023-01-24 02:27:54.555520: step: 328/464, loss: 0.3326949179172516 2023-01-24 02:27:55.279449: step: 330/464, loss: 0.7896073460578918 2023-01-24 02:27:56.082577: step: 332/464, loss: 0.25079795718193054 2023-01-24 02:27:56.872733: step: 334/464, loss: 0.3522076904773712 2023-01-24 02:27:57.694366: step: 336/464, loss: 0.6076290607452393 2023-01-24 02:27:58.395280: step: 338/464, loss: 0.27255961298942566 2023-01-24 02:27:59.279021: step: 340/464, loss: 0.5010226368904114 2023-01-24 02:27:59.990661: step: 342/464, loss: 0.2654639780521393 2023-01-24 02:28:00.705517: step: 344/464, loss: 1.3633586168289185 2023-01-24 02:28:01.442022: step: 346/464, loss: 0.7960776686668396 2023-01-24 02:28:02.218670: step: 348/464, loss: 0.8121628165245056 2023-01-24 02:28:03.095950: step: 350/464, loss: 0.8419029116630554 2023-01-24 02:28:03.915334: step: 352/464, loss: 2.2198326587677 2023-01-24 02:28:04.738832: step: 354/464, loss: 0.5667412281036377 2023-01-24 02:28:05.535531: step: 356/464, loss: 0.6649174690246582 2023-01-24 02:28:06.317118: step: 358/464, loss: 0.24803166091442108 2023-01-24 02:28:07.052686: step: 360/464, loss: 0.6043193340301514 2023-01-24 02:28:07.829874: step: 362/464, loss: 0.651032030582428 2023-01-24 02:28:08.551651: step: 364/464, loss: 0.4023246765136719 2023-01-24 02:28:09.341508: step: 366/464, loss: 1.8113951683044434 2023-01-24 02:28:10.095657: step: 368/464, loss: 0.25779157876968384 2023-01-24 02:28:10.827426: step: 370/464, loss: 1.8274005651474 2023-01-24 02:28:11.574324: step: 372/464, loss: 1.1966185569763184 2023-01-24 02:28:12.323899: step: 374/464, loss: 0.8468092083930969 2023-01-24 02:28:13.068681: step: 376/464, loss: 0.6065338850021362 2023-01-24 02:28:13.871966: step: 378/464, loss: 0.2458256334066391 2023-01-24 02:28:14.768728: step: 380/464, loss: 2.1111176013946533 2023-01-24 02:28:15.562412: step: 382/464, loss: 0.22330960631370544 2023-01-24 02:28:16.308166: step: 384/464, loss: 1.5689202547073364 2023-01-24 02:28:17.111720: step: 386/464, loss: 0.49095427989959717 2023-01-24 02:28:17.920346: step: 388/464, loss: 0.340639591217041 2023-01-24 02:28:18.774225: step: 390/464, loss: 1.0026850700378418 2023-01-24 02:28:19.569859: step: 392/464, loss: 0.7220879793167114 2023-01-24 02:28:20.349637: step: 394/464, loss: 0.8729772567749023 2023-01-24 02:28:21.115832: step: 396/464, loss: 0.26813337206840515 2023-01-24 02:28:21.923062: step: 398/464, loss: 0.6239210963249207 2023-01-24 02:28:22.765392: step: 400/464, loss: 0.20718388259410858 2023-01-24 02:28:23.620877: step: 402/464, loss: 0.2550746202468872 2023-01-24 02:28:24.439976: step: 404/464, loss: 0.849943995475769 2023-01-24 02:28:25.204166: step: 406/464, loss: 0.5320936441421509 2023-01-24 02:28:25.954609: step: 408/464, loss: 0.2525803744792938 2023-01-24 02:28:26.772175: step: 410/464, loss: 0.4498633146286011 2023-01-24 02:28:27.478285: step: 412/464, loss: 0.7323371171951294 2023-01-24 02:28:28.211452: step: 414/464, loss: 0.20026081800460815 2023-01-24 02:28:29.045968: step: 416/464, loss: 0.7177056670188904 2023-01-24 02:28:29.900254: step: 418/464, loss: 0.39402830600738525 2023-01-24 02:28:30.606760: step: 420/464, loss: 0.22038528323173523 2023-01-24 02:28:31.348331: step: 422/464, loss: 1.1169726848602295 2023-01-24 02:28:32.109533: step: 424/464, loss: 0.6293752193450928 2023-01-24 02:28:32.828162: step: 426/464, loss: 0.3213549554347992 2023-01-24 02:28:33.575798: step: 428/464, loss: 1.2308756113052368 2023-01-24 02:28:34.265730: step: 430/464, loss: 1.4570958614349365 2023-01-24 02:28:34.972501: step: 432/464, loss: 0.6932898759841919 2023-01-24 02:28:35.717152: step: 434/464, loss: 0.6488903164863586 2023-01-24 02:28:36.426987: step: 436/464, loss: 0.09309423714876175 2023-01-24 02:28:37.211589: step: 438/464, loss: 1.178100347518921 2023-01-24 02:28:37.953008: step: 440/464, loss: 2.2519099712371826 2023-01-24 02:28:38.741672: step: 442/464, loss: 1.376326322555542 2023-01-24 02:28:39.547835: step: 444/464, loss: 0.41640615463256836 2023-01-24 02:28:40.330227: step: 446/464, loss: 0.6458289623260498 2023-01-24 02:28:41.161083: step: 448/464, loss: 0.5401825308799744 2023-01-24 02:28:41.914902: step: 450/464, loss: 1.0886940956115723 2023-01-24 02:28:42.715443: step: 452/464, loss: 1.7589441537857056 2023-01-24 02:28:43.465397: step: 454/464, loss: 0.406145304441452 2023-01-24 02:28:44.243812: step: 456/464, loss: 0.26355504989624023 2023-01-24 02:28:44.959184: step: 458/464, loss: 0.23833003640174866 2023-01-24 02:28:45.715795: step: 460/464, loss: 0.5560361742973328 2023-01-24 02:28:46.468476: step: 462/464, loss: 0.5383360981941223 2023-01-24 02:28:47.246288: step: 464/464, loss: 1.2050065994262695 2023-01-24 02:28:48.036361: step: 466/464, loss: 0.5009714365005493 2023-01-24 02:28:48.729938: step: 468/464, loss: 0.5247203707695007 2023-01-24 02:28:49.523599: step: 470/464, loss: 0.24868924915790558 2023-01-24 02:28:50.264665: step: 472/464, loss: 0.8285905122756958 2023-01-24 02:28:51.067118: step: 474/464, loss: 0.3940313458442688 2023-01-24 02:28:51.798461: step: 476/464, loss: 0.5545944571495056 2023-01-24 02:28:52.510077: step: 478/464, loss: 1.8019726276397705 2023-01-24 02:28:53.344197: step: 480/464, loss: 1.1479442119598389 2023-01-24 02:28:54.141475: step: 482/464, loss: 0.43199148774147034 2023-01-24 02:28:54.964130: step: 484/464, loss: 0.1521385759115219 2023-01-24 02:28:55.738003: step: 486/464, loss: 3.1033084392547607 2023-01-24 02:28:56.446372: step: 488/464, loss: 0.5504053235054016 2023-01-24 02:28:57.255667: step: 490/464, loss: 2.8585193157196045 2023-01-24 02:28:58.023839: step: 492/464, loss: 0.5221114158630371 2023-01-24 02:28:58.791925: step: 494/464, loss: 0.24164840579032898 2023-01-24 02:28:59.574398: step: 496/464, loss: 0.649586021900177 2023-01-24 02:29:00.371231: step: 498/464, loss: 0.20573021471500397 2023-01-24 02:29:01.157840: step: 500/464, loss: 0.356124609708786 2023-01-24 02:29:01.969430: step: 502/464, loss: 1.0682035684585571 2023-01-24 02:29:02.709428: step: 504/464, loss: 0.2562498450279236 2023-01-24 02:29:03.371408: step: 506/464, loss: 0.9069114327430725 2023-01-24 02:29:04.106866: step: 508/464, loss: 0.260059118270874 2023-01-24 02:29:04.904068: step: 510/464, loss: 0.5727384090423584 2023-01-24 02:29:05.662376: step: 512/464, loss: 0.6543835997581482 2023-01-24 02:29:06.411932: step: 514/464, loss: 1.037026047706604 2023-01-24 02:29:07.145223: step: 516/464, loss: 0.5035011768341064 2023-01-24 02:29:07.897302: step: 518/464, loss: 0.40311571955680847 2023-01-24 02:29:08.681826: step: 520/464, loss: 0.6194513440132141 2023-01-24 02:29:09.451652: step: 522/464, loss: 0.9201607704162598 2023-01-24 02:29:10.216499: step: 524/464, loss: 0.37401604652404785 2023-01-24 02:29:11.022149: step: 526/464, loss: 0.3895934224128723 2023-01-24 02:29:11.797139: step: 528/464, loss: 0.2508663535118103 2023-01-24 02:29:12.493117: step: 530/464, loss: 0.6469005942344666 2023-01-24 02:29:13.265156: step: 532/464, loss: 4.749858856201172 2023-01-24 02:29:14.132717: step: 534/464, loss: 0.15832938253879547 2023-01-24 02:29:14.865104: step: 536/464, loss: 0.6010364890098572 2023-01-24 02:29:15.624593: step: 538/464, loss: 0.9541651010513306 2023-01-24 02:29:16.510116: step: 540/464, loss: 0.7156731486320496 2023-01-24 02:29:17.224885: step: 542/464, loss: 0.26916003227233887 2023-01-24 02:29:17.987851: step: 544/464, loss: 0.34205710887908936 2023-01-24 02:29:18.750955: step: 546/464, loss: 0.38013705611228943 2023-01-24 02:29:19.547772: step: 548/464, loss: 4.3453240394592285 2023-01-24 02:29:20.291383: step: 550/464, loss: 1.071614384651184 2023-01-24 02:29:21.081004: step: 552/464, loss: 0.4483928978443146 2023-01-24 02:29:21.817126: step: 554/464, loss: 1.2307825088500977 2023-01-24 02:29:22.628331: step: 556/464, loss: 0.25892454385757446 2023-01-24 02:29:23.370791: step: 558/464, loss: 2.0551559925079346 2023-01-24 02:29:24.251967: step: 560/464, loss: 5.914419174194336 2023-01-24 02:29:25.044939: step: 562/464, loss: 0.41530847549438477 2023-01-24 02:29:25.813346: step: 564/464, loss: 0.9857712388038635 2023-01-24 02:29:26.635794: step: 566/464, loss: 0.3567578196525574 2023-01-24 02:29:27.366580: step: 568/464, loss: 0.18065162003040314 2023-01-24 02:29:28.182871: step: 570/464, loss: 0.5749038457870483 2023-01-24 02:29:28.915881: step: 572/464, loss: 0.13398727774620056 2023-01-24 02:29:29.611224: step: 574/464, loss: 0.3920579254627228 2023-01-24 02:29:30.489825: step: 576/464, loss: 1.651190161705017 2023-01-24 02:29:31.218767: step: 578/464, loss: 0.17924638092517853 2023-01-24 02:29:31.930567: step: 580/464, loss: 0.8294220566749573 2023-01-24 02:29:32.715827: step: 582/464, loss: 0.4700474143028259 2023-01-24 02:29:33.504064: step: 584/464, loss: 0.7171123623847961 2023-01-24 02:29:34.299979: step: 586/464, loss: 0.5684266090393066 2023-01-24 02:29:35.008735: step: 588/464, loss: 0.5454313158988953 2023-01-24 02:29:35.780345: step: 590/464, loss: 0.331861287355423 2023-01-24 02:29:36.506661: step: 592/464, loss: 0.1441434621810913 2023-01-24 02:29:37.288096: step: 594/464, loss: 0.8970625996589661 2023-01-24 02:29:38.034376: step: 596/464, loss: 0.77960604429245 2023-01-24 02:29:38.850134: step: 598/464, loss: 1.413642406463623 2023-01-24 02:29:39.577583: step: 600/464, loss: 0.2677861154079437 2023-01-24 02:29:40.267074: step: 602/464, loss: 0.2316088229417801 2023-01-24 02:29:40.964992: step: 604/464, loss: 0.7844187021255493 2023-01-24 02:29:41.675960: step: 606/464, loss: 0.8072429299354553 2023-01-24 02:29:42.442668: step: 608/464, loss: 0.3729170262813568 2023-01-24 02:29:43.160332: step: 610/464, loss: 1.2633850574493408 2023-01-24 02:29:43.908348: step: 612/464, loss: 0.3410780429840088 2023-01-24 02:29:44.737039: step: 614/464, loss: 0.32746434211730957 2023-01-24 02:29:45.568909: step: 616/464, loss: 0.283867210149765 2023-01-24 02:29:46.330240: step: 618/464, loss: 0.36813536286354065 2023-01-24 02:29:47.034335: step: 620/464, loss: 0.30180618166923523 2023-01-24 02:29:47.745670: step: 622/464, loss: 0.547936201095581 2023-01-24 02:29:48.447170: step: 624/464, loss: 0.12627914547920227 2023-01-24 02:29:49.171597: step: 626/464, loss: 0.2586286664009094 2023-01-24 02:29:49.982175: step: 628/464, loss: 0.186224102973938 2023-01-24 02:29:50.799225: step: 630/464, loss: 0.5099416375160217 2023-01-24 02:29:51.565535: step: 632/464, loss: 0.3976011574268341 2023-01-24 02:29:52.321438: step: 634/464, loss: 1.3149917125701904 2023-01-24 02:29:53.043381: step: 636/464, loss: 0.35117021203041077 2023-01-24 02:29:53.785629: step: 638/464, loss: 0.4128319323062897 2023-01-24 02:29:54.492778: step: 640/464, loss: 1.0195266008377075 2023-01-24 02:29:55.231865: step: 642/464, loss: 0.30473464727401733 2023-01-24 02:29:55.973902: step: 644/464, loss: 0.833469808101654 2023-01-24 02:29:56.738518: step: 646/464, loss: 0.20172801613807678 2023-01-24 02:29:57.467266: step: 648/464, loss: 0.29524433612823486 2023-01-24 02:29:58.279503: step: 650/464, loss: 0.34519845247268677 2023-01-24 02:29:59.045070: step: 652/464, loss: 0.18610112369060516 2023-01-24 02:29:59.771961: step: 654/464, loss: 0.448944091796875 2023-01-24 02:30:00.700494: step: 656/464, loss: 0.7930111289024353 2023-01-24 02:30:01.470026: step: 658/464, loss: 0.4779053032398224 2023-01-24 02:30:02.235292: step: 660/464, loss: 1.270046353340149 2023-01-24 02:30:02.988742: step: 662/464, loss: 0.6414569020271301 2023-01-24 02:30:03.812493: step: 664/464, loss: 1.1048862934112549 2023-01-24 02:30:04.575436: step: 666/464, loss: 0.19807268679141998 2023-01-24 02:30:05.331517: step: 668/464, loss: 1.0151479244232178 2023-01-24 02:30:06.070721: step: 670/464, loss: 1.5276272296905518 2023-01-24 02:30:06.841063: step: 672/464, loss: 0.37346214056015015 2023-01-24 02:30:07.574593: step: 674/464, loss: 0.4739722013473511 2023-01-24 02:30:08.364438: step: 676/464, loss: 0.37458914518356323 2023-01-24 02:30:09.187536: step: 678/464, loss: 0.9428262114524841 2023-01-24 02:30:09.984304: step: 680/464, loss: 2.244079828262329 2023-01-24 02:30:10.771946: step: 682/464, loss: 0.25588732957839966 2023-01-24 02:30:11.512670: step: 684/464, loss: 0.09564242511987686 2023-01-24 02:30:12.325278: step: 686/464, loss: 0.44685637950897217 2023-01-24 02:30:13.086736: step: 688/464, loss: 0.34719905257225037 2023-01-24 02:30:13.813019: step: 690/464, loss: 0.3859795033931732 2023-01-24 02:30:14.523690: step: 692/464, loss: 0.5400804877281189 2023-01-24 02:30:15.234291: step: 694/464, loss: 0.5231386423110962 2023-01-24 02:30:15.970353: step: 696/464, loss: 0.30685046315193176 2023-01-24 02:30:16.730417: step: 698/464, loss: 0.6772754192352295 2023-01-24 02:30:17.466225: step: 700/464, loss: 0.259883314371109 2023-01-24 02:30:18.183307: step: 702/464, loss: 0.6134689450263977 2023-01-24 02:30:19.015287: step: 704/464, loss: 0.29171958565711975 2023-01-24 02:30:19.786809: step: 706/464, loss: 0.130640909075737 2023-01-24 02:30:20.580238: step: 708/464, loss: 0.11610277742147446 2023-01-24 02:30:21.410569: step: 710/464, loss: 0.304213285446167 2023-01-24 02:30:22.118966: step: 712/464, loss: 0.7409196496009827 2023-01-24 02:30:22.791687: step: 714/464, loss: 0.31891071796417236 2023-01-24 02:30:23.566773: step: 716/464, loss: 0.728702962398529 2023-01-24 02:30:24.294841: step: 718/464, loss: 2.555067539215088 2023-01-24 02:30:25.039787: step: 720/464, loss: 0.33662861585617065 2023-01-24 02:30:25.767284: step: 722/464, loss: 0.1771378517150879 2023-01-24 02:30:26.645934: step: 724/464, loss: 2.431415319442749 2023-01-24 02:30:27.439744: step: 726/464, loss: 0.11213550716638565 2023-01-24 02:30:28.239429: step: 728/464, loss: 0.7706308960914612 2023-01-24 02:30:29.049900: step: 730/464, loss: 0.31764960289001465 2023-01-24 02:30:29.897207: step: 732/464, loss: 0.5397222638130188 2023-01-24 02:30:30.668833: step: 734/464, loss: 0.24206602573394775 2023-01-24 02:30:31.393270: step: 736/464, loss: 0.4958699941635132 2023-01-24 02:30:32.149875: step: 738/464, loss: 0.6772986054420471 2023-01-24 02:30:32.985056: step: 740/464, loss: 2.2552149295806885 2023-01-24 02:30:33.734638: step: 742/464, loss: 0.2647966146469116 2023-01-24 02:30:34.466950: step: 744/464, loss: 0.6256701946258545 2023-01-24 02:30:35.168010: step: 746/464, loss: 0.27616631984710693 2023-01-24 02:30:35.965259: step: 748/464, loss: 0.3815680146217346 2023-01-24 02:30:36.659819: step: 750/464, loss: 0.36198049783706665 2023-01-24 02:30:37.412041: step: 752/464, loss: 0.10101629048585892 2023-01-24 02:30:38.093585: step: 754/464, loss: 1.5349050760269165 2023-01-24 02:30:38.766664: step: 756/464, loss: 1.0652238130569458 2023-01-24 02:30:39.562286: step: 758/464, loss: 0.4184451401233673 2023-01-24 02:30:40.231108: step: 760/464, loss: 0.4156523644924164 2023-01-24 02:30:41.011048: step: 762/464, loss: 0.3656218945980072 2023-01-24 02:30:41.795321: step: 764/464, loss: 1.1675796508789062 2023-01-24 02:30:42.579101: step: 766/464, loss: 0.6156561970710754 2023-01-24 02:30:43.410051: step: 768/464, loss: 0.32256415486335754 2023-01-24 02:30:44.283452: step: 770/464, loss: 1.1748402118682861 2023-01-24 02:30:44.999622: step: 772/464, loss: 0.9681693315505981 2023-01-24 02:30:45.830424: step: 774/464, loss: 0.25069674849510193 2023-01-24 02:30:46.606036: step: 776/464, loss: 0.368958979845047 2023-01-24 02:30:47.386596: step: 778/464, loss: 0.7839964628219604 2023-01-24 02:30:48.110397: step: 780/464, loss: 0.20218144357204437 2023-01-24 02:30:48.889624: step: 782/464, loss: 1.0326519012451172 2023-01-24 02:30:49.672147: step: 784/464, loss: 0.5584985613822937 2023-01-24 02:30:50.418084: step: 786/464, loss: 0.9903871417045593 2023-01-24 02:30:51.160510: step: 788/464, loss: 2.6020190715789795 2023-01-24 02:30:51.893923: step: 790/464, loss: 2.3147616386413574 2023-01-24 02:30:52.749170: step: 792/464, loss: 1.7034763097763062 2023-01-24 02:30:53.469416: step: 794/464, loss: 0.5222653746604919 2023-01-24 02:30:54.225202: step: 796/464, loss: 0.92058265209198 2023-01-24 02:30:54.949626: step: 798/464, loss: 0.3047405183315277 2023-01-24 02:30:55.738542: step: 800/464, loss: 0.3361084759235382 2023-01-24 02:30:56.452326: step: 802/464, loss: 0.63560950756073 2023-01-24 02:30:57.230079: step: 804/464, loss: 0.42710667848587036 2023-01-24 02:30:57.987524: step: 806/464, loss: 1.273741364479065 2023-01-24 02:30:58.681100: step: 808/464, loss: 0.6142624020576477 2023-01-24 02:30:59.495273: step: 810/464, loss: 1.2906978130340576 2023-01-24 02:31:00.216343: step: 812/464, loss: 0.656749427318573 2023-01-24 02:31:01.016398: step: 814/464, loss: 0.5131410956382751 2023-01-24 02:31:01.762231: step: 816/464, loss: 1.9324876070022583 2023-01-24 02:31:02.587742: step: 818/464, loss: 0.3766292631626129 2023-01-24 02:31:03.274136: step: 820/464, loss: 1.3447926044464111 2023-01-24 02:31:04.047097: step: 822/464, loss: 0.8682775497436523 2023-01-24 02:31:04.765038: step: 824/464, loss: 0.21632665395736694 2023-01-24 02:31:05.527892: step: 826/464, loss: 0.4097214937210083 2023-01-24 02:31:06.280596: step: 828/464, loss: 0.25114351511001587 2023-01-24 02:31:07.077199: step: 830/464, loss: 0.2567175328731537 2023-01-24 02:31:07.781085: step: 832/464, loss: 1.210261344909668 2023-01-24 02:31:08.640414: step: 834/464, loss: 0.6198481321334839 2023-01-24 02:31:09.391657: step: 836/464, loss: 0.06847158074378967 2023-01-24 02:31:10.160956: step: 838/464, loss: 0.43360579013824463 2023-01-24 02:31:10.933739: step: 840/464, loss: 0.6006065011024475 2023-01-24 02:31:11.713427: step: 842/464, loss: 0.17225876450538635 2023-01-24 02:31:12.549045: step: 844/464, loss: 0.733666181564331 2023-01-24 02:31:13.271961: step: 846/464, loss: 0.3815063238143921 2023-01-24 02:31:14.076214: step: 848/464, loss: 0.484394371509552 2023-01-24 02:31:14.885581: step: 850/464, loss: 0.6926419734954834 2023-01-24 02:31:15.690074: step: 852/464, loss: 1.4232854843139648 2023-01-24 02:31:16.520299: step: 854/464, loss: 2.2661728858947754 2023-01-24 02:31:17.402800: step: 856/464, loss: 0.2177295684814453 2023-01-24 02:31:18.113044: step: 858/464, loss: 2.403341293334961 2023-01-24 02:31:18.818133: step: 860/464, loss: 0.27968692779541016 2023-01-24 02:31:19.604719: step: 862/464, loss: 0.810583233833313 2023-01-24 02:31:20.391612: step: 864/464, loss: 1.0074455738067627 2023-01-24 02:31:21.179806: step: 866/464, loss: 1.2238481044769287 2023-01-24 02:31:21.952060: step: 868/464, loss: 0.25915518403053284 2023-01-24 02:31:22.677876: step: 870/464, loss: 0.522385835647583 2023-01-24 02:31:23.446921: step: 872/464, loss: 0.3643008768558502 2023-01-24 02:31:24.324312: step: 874/464, loss: 1.0815117359161377 2023-01-24 02:31:25.104144: step: 876/464, loss: 0.21362431347370148 2023-01-24 02:31:25.827910: step: 878/464, loss: 0.5841333866119385 2023-01-24 02:31:26.556550: step: 880/464, loss: 0.7333189249038696 2023-01-24 02:31:27.257467: step: 882/464, loss: 0.6409208178520203 2023-01-24 02:31:27.975406: step: 884/464, loss: 0.18310889601707458 2023-01-24 02:31:28.697830: step: 886/464, loss: 0.2629651427268982 2023-01-24 02:31:29.447894: step: 888/464, loss: 0.22283713519573212 2023-01-24 02:31:30.194649: step: 890/464, loss: 7.620507717132568 2023-01-24 02:31:30.964345: step: 892/464, loss: 0.3329012095928192 2023-01-24 02:31:31.665527: step: 894/464, loss: 0.2923496663570404 2023-01-24 02:31:32.618822: step: 896/464, loss: 0.5430619120597839 2023-01-24 02:31:33.306407: step: 898/464, loss: 0.46999070048332214 2023-01-24 02:31:34.020569: step: 900/464, loss: 0.30711182951927185 2023-01-24 02:31:34.806548: step: 902/464, loss: 0.360378623008728 2023-01-24 02:31:35.645776: step: 904/464, loss: 0.7840194702148438 2023-01-24 02:31:36.345903: step: 906/464, loss: 0.7028653621673584 2023-01-24 02:31:37.064043: step: 908/464, loss: 0.2442149817943573 2023-01-24 02:31:37.911648: step: 910/464, loss: 0.49424415826797485 2023-01-24 02:31:38.669879: step: 912/464, loss: 0.33642280101776123 2023-01-24 02:31:39.458955: step: 914/464, loss: 1.1186965703964233 2023-01-24 02:31:40.290222: step: 916/464, loss: 0.18287938833236694 2023-01-24 02:31:41.059728: step: 918/464, loss: 0.21742171049118042 2023-01-24 02:31:41.923007: step: 920/464, loss: 3.1498212814331055 2023-01-24 02:31:42.723176: step: 922/464, loss: 0.5353689193725586 2023-01-24 02:31:43.437617: step: 924/464, loss: 0.5739856958389282 2023-01-24 02:31:44.259455: step: 926/464, loss: 1.5230512619018555 2023-01-24 02:31:44.998279: step: 928/464, loss: 1.3895797729492188 2023-01-24 02:31:45.682925: step: 930/464, loss: 0.4182075262069702 ================================================== Loss: 0.784 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32278934300993123, 'r': 0.32278934300993123, 'f1': 0.32278934300993123}, 'combined': 0.2378447790599493, 'epoch': 6} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.320918981405015, 'r': 0.25850926833809695, 'f1': 0.2863530769015771}, 'combined': 0.17784033197045315, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2953953382084095, 'r': 0.30660578747628087, 'f1': 0.3008961824953445}, 'combined': 0.221712976575517, 'epoch': 6} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30699447077773995, 'r': 0.2503232308612174, 'f1': 0.27577752350452767}, 'combined': 0.17127235670281193, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3139869700243046, 'r': 0.31517857142857136, 'f1': 0.31458164231601726}, 'combined': 0.23179699960127587, 'epoch': 6} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3211181125921073, 'r': 0.2608886541592343, 'f1': 0.28788693536307663}, 'combined': 0.17879293880443708, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25735294117647056, 'r': 0.25, 'f1': 0.25362318840579706}, 'combined': 0.1690821256038647, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.234375, 'r': 0.32608695652173914, 'f1': 0.2727272727272727}, 'combined': 0.13636363636363635, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5113636363636364, 'r': 0.1939655172413793, 'f1': 0.28125}, 'combined': 0.1875, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3345195392377826, 'r': 0.3041086720343478, 'f1': 0.3185900373693168}, 'combined': 0.23475055385107552, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3174410049953423, 'r': 0.22522000156784164, 'f1': 0.26349438333717423}, 'combined': 0.163643880177824, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.3142857142857143, 'f1': 0.3235294117647059}, 'combined': 0.21568627450980393, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3005869784133561, 'r': 0.3199796866980887, 'f1': 0.30998032148877347}, 'combined': 0.22840655267593835, 'epoch': 5} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.2939622802187892, 'r': 0.2496070249195044, 'f1': 0.269974958152936}, 'combined': 0.1676686582212971, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2357142857142857, 'r': 0.358695652173913, 'f1': 0.2844827586206896}, 'combined': 0.1422413793103448, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3360532326750742, 'r': 0.30544496859840714, 'f1': 0.3200188836011144}, 'combined': 0.2358033879166106, 'epoch': 4} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3238311636676841, 'r': 0.22674568659177094, 'f1': 0.26672868469148}, 'combined': 0.16565255154523495, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5446428571428571, 'r': 0.2629310344827586, 'f1': 0.3546511627906977}, 'combined': 0.2364341085271318, 'epoch': 4} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:34:30.878649: step: 2/464, loss: 0.36327677965164185 2023-01-24 02:34:31.656644: step: 4/464, loss: 1.713686466217041 2023-01-24 02:34:32.396385: step: 6/464, loss: 0.20647361874580383 2023-01-24 02:34:33.100356: step: 8/464, loss: 0.32153114676475525 2023-01-24 02:34:33.934420: step: 10/464, loss: 0.21674101054668427 2023-01-24 02:34:34.750687: step: 12/464, loss: 0.2844339609146118 2023-01-24 02:34:35.520840: step: 14/464, loss: 0.13163089752197266 2023-01-24 02:34:36.247487: step: 16/464, loss: 0.12527650594711304 2023-01-24 02:34:36.899818: step: 18/464, loss: 1.9080713987350464 2023-01-24 02:34:37.761598: step: 20/464, loss: 0.2996969521045685 2023-01-24 02:34:38.497350: step: 22/464, loss: 0.4937852919101715 2023-01-24 02:34:39.265477: step: 24/464, loss: 0.17886342108249664 2023-01-24 02:34:39.994642: step: 26/464, loss: 1.044276475906372 2023-01-24 02:34:40.720753: step: 28/464, loss: 0.2835633456707001 2023-01-24 02:34:41.550978: step: 30/464, loss: 0.5925331115722656 2023-01-24 02:34:42.324867: step: 32/464, loss: 0.5911035537719727 2023-01-24 02:34:43.122908: step: 34/464, loss: 1.7006969451904297 2023-01-24 02:34:43.839824: step: 36/464, loss: 0.7910258769989014 2023-01-24 02:34:44.585663: step: 38/464, loss: 1.1063692569732666 2023-01-24 02:34:45.301334: step: 40/464, loss: 0.3510200083255768 2023-01-24 02:34:46.027455: step: 42/464, loss: 0.4138941764831543 2023-01-24 02:34:46.690772: step: 44/464, loss: 0.6433913111686707 2023-01-24 02:34:47.381157: step: 46/464, loss: 0.19204004108905792 2023-01-24 02:34:48.103285: step: 48/464, loss: 0.41403651237487793 2023-01-24 02:34:48.801115: step: 50/464, loss: 0.9922606945037842 2023-01-24 02:34:49.601873: step: 52/464, loss: 0.2410106509923935 2023-01-24 02:34:50.352994: step: 54/464, loss: 0.993074893951416 2023-01-24 02:34:51.142377: step: 56/464, loss: 0.3063466548919678 2023-01-24 02:34:51.977877: step: 58/464, loss: 0.6400434970855713 2023-01-24 02:34:52.776286: step: 60/464, loss: 0.3732597827911377 2023-01-24 02:34:53.513013: step: 62/464, loss: 0.2601960301399231 2023-01-24 02:34:54.299944: step: 64/464, loss: 0.4548768699169159 2023-01-24 02:34:55.039692: step: 66/464, loss: 0.5139056444168091 2023-01-24 02:34:55.759852: step: 68/464, loss: 1.3860743045806885 2023-01-24 02:34:56.462188: step: 70/464, loss: 0.3661994934082031 2023-01-24 02:34:57.298056: step: 72/464, loss: 0.9115047454833984 2023-01-24 02:34:58.008338: step: 74/464, loss: 0.32631921768188477 2023-01-24 02:34:58.740651: step: 76/464, loss: 0.8258537650108337 2023-01-24 02:34:59.547141: step: 78/464, loss: 0.5958231687545776 2023-01-24 02:35:00.376667: step: 80/464, loss: 1.1353201866149902 2023-01-24 02:35:01.148436: step: 82/464, loss: 0.9883450269699097 2023-01-24 02:35:01.948287: step: 84/464, loss: 0.6555312871932983 2023-01-24 02:35:02.717431: step: 86/464, loss: 0.585017740726471 2023-01-24 02:35:03.397212: step: 88/464, loss: 0.1543656438589096 2023-01-24 02:35:04.186525: step: 90/464, loss: 0.2782514691352844 2023-01-24 02:35:04.942475: step: 92/464, loss: 5.046449184417725 2023-01-24 02:35:05.619515: step: 94/464, loss: 0.4302979111671448 2023-01-24 02:35:06.279344: step: 96/464, loss: 0.570051908493042 2023-01-24 02:35:07.067588: step: 98/464, loss: 0.4938129186630249 2023-01-24 02:35:07.828937: step: 100/464, loss: 0.5796581506729126 2023-01-24 02:35:08.585342: step: 102/464, loss: 0.3145013749599457 2023-01-24 02:35:09.343371: step: 104/464, loss: 0.49091294407844543 2023-01-24 02:35:10.056792: step: 106/464, loss: 2.0131349563598633 2023-01-24 02:35:10.770288: step: 108/464, loss: 0.31967562437057495 2023-01-24 02:35:11.549078: step: 110/464, loss: 0.2807406485080719 2023-01-24 02:35:12.352150: step: 112/464, loss: 0.3552629351615906 2023-01-24 02:35:13.095839: step: 114/464, loss: 0.15171876549720764 2023-01-24 02:35:13.832869: step: 116/464, loss: 0.29429763555526733 2023-01-24 02:35:14.631077: step: 118/464, loss: 0.8097703456878662 2023-01-24 02:35:15.351607: step: 120/464, loss: 0.12587079405784607 2023-01-24 02:35:16.125236: step: 122/464, loss: 0.15087245404720306 2023-01-24 02:35:16.874804: step: 124/464, loss: 0.16236327588558197 2023-01-24 02:35:17.656310: step: 126/464, loss: 0.6959328651428223 2023-01-24 02:35:18.483494: step: 128/464, loss: 0.2463809698820114 2023-01-24 02:35:19.162265: step: 130/464, loss: 0.21326425671577454 2023-01-24 02:35:19.870458: step: 132/464, loss: 0.5366947054862976 2023-01-24 02:35:20.568226: step: 134/464, loss: 0.3185555934906006 2023-01-24 02:35:21.318157: step: 136/464, loss: 0.20601888000965118 2023-01-24 02:35:22.017469: step: 138/464, loss: 0.3682593107223511 2023-01-24 02:35:22.725598: step: 140/464, loss: 0.21474689245224 2023-01-24 02:35:23.428561: step: 142/464, loss: 1.0219560861587524 2023-01-24 02:35:24.144060: step: 144/464, loss: 1.8815715312957764 2023-01-24 02:35:24.918980: step: 146/464, loss: 0.5428639054298401 2023-01-24 02:35:25.625281: step: 148/464, loss: 0.5084086656570435 2023-01-24 02:35:26.293293: step: 150/464, loss: 0.5396155714988708 2023-01-24 02:35:27.010020: step: 152/464, loss: 0.21716253459453583 2023-01-24 02:35:27.718708: step: 154/464, loss: 0.9855729937553406 2023-01-24 02:35:28.512351: step: 156/464, loss: 0.3413148820400238 2023-01-24 02:35:29.425166: step: 158/464, loss: 0.25234007835388184 2023-01-24 02:35:30.275943: step: 160/464, loss: 0.3375931680202484 2023-01-24 02:35:30.963296: step: 162/464, loss: 0.4368821084499359 2023-01-24 02:35:31.704122: step: 164/464, loss: 0.6206197142601013 2023-01-24 02:35:32.488581: step: 166/464, loss: 0.24986518919467926 2023-01-24 02:35:33.212095: step: 168/464, loss: 1.968464970588684 2023-01-24 02:35:33.917037: step: 170/464, loss: 0.4953523576259613 2023-01-24 02:35:34.672148: step: 172/464, loss: 0.102138951420784 2023-01-24 02:35:35.380174: step: 174/464, loss: 0.6342282891273499 2023-01-24 02:35:36.117862: step: 176/464, loss: 0.5496773719787598 2023-01-24 02:35:36.816161: step: 178/464, loss: 0.21457117795944214 2023-01-24 02:35:37.610317: step: 180/464, loss: 0.7042529582977295 2023-01-24 02:35:38.335280: step: 182/464, loss: 0.8047784566879272 2023-01-24 02:35:39.224033: step: 184/464, loss: 0.2432776391506195 2023-01-24 02:35:39.990307: step: 186/464, loss: 0.27830901741981506 2023-01-24 02:35:40.682425: step: 188/464, loss: 0.03338748216629028 2023-01-24 02:35:41.429543: step: 190/464, loss: 0.8855580687522888 2023-01-24 02:35:42.154613: step: 192/464, loss: 0.1724783480167389 2023-01-24 02:35:42.915930: step: 194/464, loss: 0.39989563822746277 2023-01-24 02:35:43.683667: step: 196/464, loss: 0.5461003184318542 2023-01-24 02:35:44.469309: step: 198/464, loss: 3.4108574390411377 2023-01-24 02:35:45.202972: step: 200/464, loss: 0.42918553948402405 2023-01-24 02:35:45.996912: step: 202/464, loss: 0.8983579277992249 2023-01-24 02:35:46.805944: step: 204/464, loss: 0.3279099464416504 2023-01-24 02:35:47.513927: step: 206/464, loss: 0.2302406132221222 2023-01-24 02:35:48.191925: step: 208/464, loss: 1.0292377471923828 2023-01-24 02:35:48.914174: step: 210/464, loss: 0.5882940888404846 2023-01-24 02:35:49.663114: step: 212/464, loss: 0.17929045855998993 2023-01-24 02:35:50.443474: step: 214/464, loss: 0.1669246256351471 2023-01-24 02:35:51.236126: step: 216/464, loss: 0.5283328294754028 2023-01-24 02:35:52.014868: step: 218/464, loss: 1.1631416082382202 2023-01-24 02:35:52.757771: step: 220/464, loss: 0.6150327920913696 2023-01-24 02:35:53.523840: step: 222/464, loss: 0.4816374182701111 2023-01-24 02:35:54.194414: step: 224/464, loss: 0.18757089972496033 2023-01-24 02:35:54.891348: step: 226/464, loss: 0.2773834466934204 2023-01-24 02:35:55.605100: step: 228/464, loss: 0.2142343968153 2023-01-24 02:35:56.361816: step: 230/464, loss: 0.19133417308330536 2023-01-24 02:35:57.098502: step: 232/464, loss: 0.4470163881778717 2023-01-24 02:35:57.857628: step: 234/464, loss: 0.5169656276702881 2023-01-24 02:35:58.675252: step: 236/464, loss: 1.0284165143966675 2023-01-24 02:35:59.440360: step: 238/464, loss: 1.306230902671814 2023-01-24 02:36:00.160521: step: 240/464, loss: 0.20086947083473206 2023-01-24 02:36:00.916006: step: 242/464, loss: 0.9084507822990417 2023-01-24 02:36:01.547801: step: 244/464, loss: 1.1629050970077515 2023-01-24 02:36:02.294631: step: 246/464, loss: 0.24601887166500092 2023-01-24 02:36:03.023103: step: 248/464, loss: 0.6524565815925598 2023-01-24 02:36:03.757941: step: 250/464, loss: 0.9458284974098206 2023-01-24 02:36:04.573559: step: 252/464, loss: 0.5239224433898926 2023-01-24 02:36:05.357769: step: 254/464, loss: 1.2422142028808594 2023-01-24 02:36:06.066581: step: 256/464, loss: 0.9221518635749817 2023-01-24 02:36:06.764670: step: 258/464, loss: 0.17750589549541473 2023-01-24 02:36:07.525154: step: 260/464, loss: 0.5422233939170837 2023-01-24 02:36:08.246115: step: 262/464, loss: 0.26078078150749207 2023-01-24 02:36:08.964190: step: 264/464, loss: 0.7859283089637756 2023-01-24 02:36:09.734239: step: 266/464, loss: 0.4978753328323364 2023-01-24 02:36:10.460544: step: 268/464, loss: 0.19332841038703918 2023-01-24 02:36:11.151809: step: 270/464, loss: 0.04691997542977333 2023-01-24 02:36:11.977571: step: 272/464, loss: 1.331789255142212 2023-01-24 02:36:12.768262: step: 274/464, loss: 0.511642575263977 2023-01-24 02:36:13.481831: step: 276/464, loss: 0.16044507920742035 2023-01-24 02:36:14.261268: step: 278/464, loss: 0.45334386825561523 2023-01-24 02:36:14.920706: step: 280/464, loss: 0.25107744336128235 2023-01-24 02:36:15.673642: step: 282/464, loss: 0.14941652119159698 2023-01-24 02:36:16.438506: step: 284/464, loss: 0.753431499004364 2023-01-24 02:36:17.365726: step: 286/464, loss: 0.21342191100120544 2023-01-24 02:36:18.050322: step: 288/464, loss: 0.4793633222579956 2023-01-24 02:36:18.811892: step: 290/464, loss: 1.7517832517623901 2023-01-24 02:36:19.501812: step: 292/464, loss: 0.23970462381839752 2023-01-24 02:36:20.243071: step: 294/464, loss: 1.064223051071167 2023-01-24 02:36:21.072619: step: 296/464, loss: 0.584053099155426 2023-01-24 02:36:21.836038: step: 298/464, loss: 1.830169439315796 2023-01-24 02:36:22.633582: step: 300/464, loss: 0.1358712762594223 2023-01-24 02:36:23.364523: step: 302/464, loss: 1.2534114122390747 2023-01-24 02:36:24.138648: step: 304/464, loss: 0.5717884302139282 2023-01-24 02:36:24.871409: step: 306/464, loss: 0.220110222697258 2023-01-24 02:36:25.624246: step: 308/464, loss: 0.2528989613056183 2023-01-24 02:36:26.456902: step: 310/464, loss: 0.46339473128318787 2023-01-24 02:36:27.116318: step: 312/464, loss: 0.20236316323280334 2023-01-24 02:36:27.809217: step: 314/464, loss: 0.32077664136886597 2023-01-24 02:36:28.531115: step: 316/464, loss: 0.1633232831954956 2023-01-24 02:36:29.237244: step: 318/464, loss: 0.7350283861160278 2023-01-24 02:36:29.967146: step: 320/464, loss: 0.8395072221755981 2023-01-24 02:36:30.602741: step: 322/464, loss: 0.597608745098114 2023-01-24 02:36:31.331389: step: 324/464, loss: 0.34893375635147095 2023-01-24 02:36:32.040805: step: 326/464, loss: 0.7595546841621399 2023-01-24 02:36:32.815454: step: 328/464, loss: 0.7179768085479736 2023-01-24 02:36:33.569255: step: 330/464, loss: 0.24666252732276917 2023-01-24 02:36:34.246242: step: 332/464, loss: 0.28931012749671936 2023-01-24 02:36:35.027226: step: 334/464, loss: 0.7932789325714111 2023-01-24 02:36:35.860045: step: 336/464, loss: 0.14844614267349243 2023-01-24 02:36:36.603700: step: 338/464, loss: 0.1988063007593155 2023-01-24 02:36:37.307807: step: 340/464, loss: 0.466692179441452 2023-01-24 02:36:38.031792: step: 342/464, loss: 0.20156744122505188 2023-01-24 02:36:38.730899: step: 344/464, loss: 0.4504646360874176 2023-01-24 02:36:39.469669: step: 346/464, loss: 0.7976289987564087 2023-01-24 02:36:40.197381: step: 348/464, loss: 0.5727869272232056 2023-01-24 02:36:41.017023: step: 350/464, loss: 0.13022443652153015 2023-01-24 02:36:41.815699: step: 352/464, loss: 0.1916579008102417 2023-01-24 02:36:42.617545: step: 354/464, loss: 0.6014117002487183 2023-01-24 02:36:43.364523: step: 356/464, loss: 0.18349871039390564 2023-01-24 02:36:44.076820: step: 358/464, loss: 0.382188081741333 2023-01-24 02:36:44.809568: step: 360/464, loss: 1.9701430797576904 2023-01-24 02:36:45.584750: step: 362/464, loss: 0.9048128128051758 2023-01-24 02:36:46.394828: step: 364/464, loss: 0.5793383717536926 2023-01-24 02:36:47.140382: step: 366/464, loss: 0.9921724796295166 2023-01-24 02:36:47.945131: step: 368/464, loss: 0.44221946597099304 2023-01-24 02:36:48.638234: step: 370/464, loss: 0.42944034934043884 2023-01-24 02:36:49.340620: step: 372/464, loss: 0.3041359484195709 2023-01-24 02:36:50.093045: step: 374/464, loss: 0.3399038314819336 2023-01-24 02:36:50.756249: step: 376/464, loss: 0.35421428084373474 2023-01-24 02:36:51.454686: step: 378/464, loss: 0.26342979073524475 2023-01-24 02:36:52.189248: step: 380/464, loss: 0.10741574317216873 2023-01-24 02:36:53.013720: step: 382/464, loss: 0.5449819564819336 2023-01-24 02:36:53.840323: step: 384/464, loss: 1.1310272216796875 2023-01-24 02:36:54.645596: step: 386/464, loss: 1.4800504446029663 2023-01-24 02:36:55.399225: step: 388/464, loss: 0.13597045838832855 2023-01-24 02:36:56.146919: step: 390/464, loss: 0.8671262860298157 2023-01-24 02:36:56.887808: step: 392/464, loss: 0.5314968228340149 2023-01-24 02:36:57.633482: step: 394/464, loss: 0.5388355255126953 2023-01-24 02:36:58.382797: step: 396/464, loss: 2.4609920978546143 2023-01-24 02:36:59.153259: step: 398/464, loss: 0.32738056778907776 2023-01-24 02:36:59.923010: step: 400/464, loss: 0.4142988324165344 2023-01-24 02:37:00.717156: step: 402/464, loss: 0.41231632232666016 2023-01-24 02:37:01.468498: step: 404/464, loss: 0.8846166133880615 2023-01-24 02:37:02.190270: step: 406/464, loss: 0.2211620807647705 2023-01-24 02:37:02.936496: step: 408/464, loss: 0.31446048617362976 2023-01-24 02:37:03.656315: step: 410/464, loss: 1.0601859092712402 2023-01-24 02:37:04.413415: step: 412/464, loss: 1.1913033723831177 2023-01-24 02:37:05.228404: step: 414/464, loss: 0.6423701047897339 2023-01-24 02:37:06.010669: step: 416/464, loss: 0.6132628917694092 2023-01-24 02:37:06.824313: step: 418/464, loss: 1.12965726852417 2023-01-24 02:37:07.537672: step: 420/464, loss: 0.36937347054481506 2023-01-24 02:37:08.291177: step: 422/464, loss: 0.21653147041797638 2023-01-24 02:37:08.981697: step: 424/464, loss: 1.2950403690338135 2023-01-24 02:37:09.675734: step: 426/464, loss: 1.0248606204986572 2023-01-24 02:37:10.353393: step: 428/464, loss: 0.16431821882724762 2023-01-24 02:37:11.059634: step: 430/464, loss: 0.9855753779411316 2023-01-24 02:37:11.780787: step: 432/464, loss: 0.7400719523429871 2023-01-24 02:37:12.561037: step: 434/464, loss: 1.0649687051773071 2023-01-24 02:37:13.313542: step: 436/464, loss: 0.1411045640707016 2023-01-24 02:37:13.994831: step: 438/464, loss: 0.8169607520103455 2023-01-24 02:37:14.773898: step: 440/464, loss: 0.3585193455219269 2023-01-24 02:37:15.448695: step: 442/464, loss: 0.4122265577316284 2023-01-24 02:37:16.218615: step: 444/464, loss: 0.8370229601860046 2023-01-24 02:37:17.015560: step: 446/464, loss: 1.1493605375289917 2023-01-24 02:37:17.803668: step: 448/464, loss: 0.2955279052257538 2023-01-24 02:37:18.578934: step: 450/464, loss: 0.16914665699005127 2023-01-24 02:37:19.314234: step: 452/464, loss: 0.3054179847240448 2023-01-24 02:37:20.035390: step: 454/464, loss: 0.31248268485069275 2023-01-24 02:37:20.777228: step: 456/464, loss: 1.0347895622253418 2023-01-24 02:37:21.481954: step: 458/464, loss: 0.4280281662940979 2023-01-24 02:37:22.267838: step: 460/464, loss: 0.2224617600440979 2023-01-24 02:37:23.007824: step: 462/464, loss: 0.4653012454509735 2023-01-24 02:37:23.784537: step: 464/464, loss: 1.165769338607788 2023-01-24 02:37:24.515392: step: 466/464, loss: 0.2663632333278656 2023-01-24 02:37:25.219530: step: 468/464, loss: 0.09995938092470169 2023-01-24 02:37:26.026976: step: 470/464, loss: 0.2406713366508484 2023-01-24 02:37:26.750301: step: 472/464, loss: 0.1282961070537567 2023-01-24 02:37:27.509461: step: 474/464, loss: 0.2567034959793091 2023-01-24 02:37:28.178586: step: 476/464, loss: 1.0045839548110962 2023-01-24 02:37:28.927747: step: 478/464, loss: 0.4196160137653351 2023-01-24 02:37:29.801925: step: 480/464, loss: 1.2827622890472412 2023-01-24 02:37:30.571527: step: 482/464, loss: 2.9640793800354004 2023-01-24 02:37:31.333494: step: 484/464, loss: 0.22173580527305603 2023-01-24 02:37:32.044316: step: 486/464, loss: 0.25746065378189087 2023-01-24 02:37:32.843650: step: 488/464, loss: 1.617734432220459 2023-01-24 02:37:33.658828: step: 490/464, loss: 0.5065245032310486 2023-01-24 02:37:34.400435: step: 492/464, loss: 0.17967504262924194 2023-01-24 02:37:35.116358: step: 494/464, loss: 0.39161476492881775 2023-01-24 02:37:35.774046: step: 496/464, loss: 0.4677339494228363 2023-01-24 02:37:36.494657: step: 498/464, loss: 0.4482991695404053 2023-01-24 02:37:37.349011: step: 500/464, loss: 0.6047283411026001 2023-01-24 02:37:38.199511: step: 502/464, loss: 0.8439066410064697 2023-01-24 02:37:38.931634: step: 504/464, loss: 0.3559965193271637 2023-01-24 02:37:39.694171: step: 506/464, loss: 0.7783767580986023 2023-01-24 02:37:40.418639: step: 508/464, loss: 0.42967337369918823 2023-01-24 02:37:41.214082: step: 510/464, loss: 0.4983972907066345 2023-01-24 02:37:42.013108: step: 512/464, loss: 0.7379698157310486 2023-01-24 02:37:42.876338: step: 514/464, loss: 0.19468754529953003 2023-01-24 02:37:43.594248: step: 516/464, loss: 0.3303523659706116 2023-01-24 02:37:44.247646: step: 518/464, loss: 0.40996867418289185 2023-01-24 02:37:44.920043: step: 520/464, loss: 1.7850862741470337 2023-01-24 02:37:45.617850: step: 522/464, loss: 0.6261616945266724 2023-01-24 02:37:46.350109: step: 524/464, loss: 0.45365774631500244 2023-01-24 02:37:47.126419: step: 526/464, loss: 1.1625231504440308 2023-01-24 02:37:47.780167: step: 528/464, loss: 0.4727645516395569 2023-01-24 02:37:48.461275: step: 530/464, loss: 3.4565367698669434 2023-01-24 02:37:49.214984: step: 532/464, loss: 0.5150865316390991 2023-01-24 02:37:49.962291: step: 534/464, loss: 4.625498294830322 2023-01-24 02:37:50.681719: step: 536/464, loss: 0.5529197454452515 2023-01-24 02:37:51.494129: step: 538/464, loss: 0.3276940584182739 2023-01-24 02:37:52.201586: step: 540/464, loss: 0.3311607539653778 2023-01-24 02:37:52.898966: step: 542/464, loss: 1.1282060146331787 2023-01-24 02:37:53.639993: step: 544/464, loss: 0.18877974152565002 2023-01-24 02:37:54.368710: step: 546/464, loss: 0.5707278251647949 2023-01-24 02:37:55.093720: step: 548/464, loss: 0.24991458654403687 2023-01-24 02:37:55.959910: step: 550/464, loss: 0.7295461893081665 2023-01-24 02:37:56.689057: step: 552/464, loss: 2.1148874759674072 2023-01-24 02:37:57.418117: step: 554/464, loss: 1.4030309915542603 2023-01-24 02:37:58.180926: step: 556/464, loss: 0.21013569831848145 2023-01-24 02:37:58.976532: step: 558/464, loss: 1.9200247526168823 2023-01-24 02:37:59.711889: step: 560/464, loss: 0.3160833716392517 2023-01-24 02:38:00.402676: step: 562/464, loss: 0.34151044487953186 2023-01-24 02:38:01.143669: step: 564/464, loss: 1.735850214958191 2023-01-24 02:38:01.878257: step: 566/464, loss: 0.7673071026802063 2023-01-24 02:38:02.672663: step: 568/464, loss: 0.1795767992734909 2023-01-24 02:38:03.359816: step: 570/464, loss: 0.2937583327293396 2023-01-24 02:38:04.177686: step: 572/464, loss: 0.28772732615470886 2023-01-24 02:38:04.901871: step: 574/464, loss: 0.5424758791923523 2023-01-24 02:38:05.596868: step: 576/464, loss: 1.393399953842163 2023-01-24 02:38:06.346080: step: 578/464, loss: 0.27327778935432434 2023-01-24 02:38:07.014903: step: 580/464, loss: 0.2959989011287689 2023-01-24 02:38:07.805718: step: 582/464, loss: 1.3786649703979492 2023-01-24 02:38:08.577259: step: 584/464, loss: 0.6207864284515381 2023-01-24 02:38:09.302707: step: 586/464, loss: 0.14585839211940765 2023-01-24 02:38:09.999352: step: 588/464, loss: 0.752159595489502 2023-01-24 02:38:10.689305: step: 590/464, loss: 0.6962541937828064 2023-01-24 02:38:11.405488: step: 592/464, loss: 0.22196218371391296 2023-01-24 02:38:12.202214: step: 594/464, loss: 0.4736698865890503 2023-01-24 02:38:12.885161: step: 596/464, loss: 0.827335000038147 2023-01-24 02:38:13.622456: step: 598/464, loss: 0.9425488710403442 2023-01-24 02:38:14.339045: step: 600/464, loss: 0.35686421394348145 2023-01-24 02:38:15.068509: step: 602/464, loss: 0.2859801650047302 2023-01-24 02:38:15.812217: step: 604/464, loss: 0.21103359758853912 2023-01-24 02:38:16.461608: step: 606/464, loss: 0.19106243550777435 2023-01-24 02:38:17.320493: step: 608/464, loss: 0.6681709885597229 2023-01-24 02:38:18.108957: step: 610/464, loss: 0.1334463208913803 2023-01-24 02:38:18.893100: step: 612/464, loss: 0.3725147247314453 2023-01-24 02:38:19.725324: step: 614/464, loss: 0.8420844078063965 2023-01-24 02:38:20.515126: step: 616/464, loss: 1.3648327589035034 2023-01-24 02:38:21.246551: step: 618/464, loss: 1.5693870782852173 2023-01-24 02:38:22.032711: step: 620/464, loss: 0.35302260518074036 2023-01-24 02:38:22.872535: step: 622/464, loss: 0.11129667609930038 2023-01-24 02:38:23.586019: step: 624/464, loss: 0.5374612212181091 2023-01-24 02:38:24.288602: step: 626/464, loss: 0.34831130504608154 2023-01-24 02:38:25.047031: step: 628/464, loss: 0.1023213267326355 2023-01-24 02:38:25.724310: step: 630/464, loss: 0.8473554849624634 2023-01-24 02:38:26.469079: step: 632/464, loss: 0.3351025879383087 2023-01-24 02:38:27.229576: step: 634/464, loss: 0.3838156461715698 2023-01-24 02:38:28.022078: step: 636/464, loss: 0.2758370339870453 2023-01-24 02:38:28.847441: step: 638/464, loss: 0.29054591059684753 2023-01-24 02:38:29.632913: step: 640/464, loss: 0.2623771131038666 2023-01-24 02:38:30.344357: step: 642/464, loss: 0.4661090672016144 2023-01-24 02:38:31.081312: step: 644/464, loss: 0.16628088057041168 2023-01-24 02:38:31.834707: step: 646/464, loss: 0.32660287618637085 2023-01-24 02:38:32.575616: step: 648/464, loss: 0.49122801423072815 2023-01-24 02:38:33.243710: step: 650/464, loss: 0.3161919116973877 2023-01-24 02:38:33.899127: step: 652/464, loss: 0.1668117344379425 2023-01-24 02:38:34.601530: step: 654/464, loss: 0.32301071286201477 2023-01-24 02:38:35.364898: step: 656/464, loss: 0.39199984073638916 2023-01-24 02:38:36.082159: step: 658/464, loss: 0.6262532472610474 2023-01-24 02:38:36.794423: step: 660/464, loss: 0.22475136816501617 2023-01-24 02:38:37.538108: step: 662/464, loss: 1.1950969696044922 2023-01-24 02:38:38.284963: step: 664/464, loss: 0.5805909037590027 2023-01-24 02:38:39.040440: step: 666/464, loss: 0.42047902941703796 2023-01-24 02:38:39.777560: step: 668/464, loss: 0.7445780634880066 2023-01-24 02:38:40.498862: step: 670/464, loss: 0.9380486607551575 2023-01-24 02:38:41.206192: step: 672/464, loss: 0.49395236372947693 2023-01-24 02:38:41.935008: step: 674/464, loss: 0.2148665338754654 2023-01-24 02:38:42.715785: step: 676/464, loss: 0.16246622800827026 2023-01-24 02:38:43.443652: step: 678/464, loss: 0.36869409680366516 2023-01-24 02:38:44.115619: step: 680/464, loss: 0.34773388504981995 2023-01-24 02:38:44.844753: step: 682/464, loss: 0.4818330705165863 2023-01-24 02:38:45.584698: step: 684/464, loss: 0.3407643139362335 2023-01-24 02:38:46.418674: step: 686/464, loss: 0.5388532876968384 2023-01-24 02:38:47.192835: step: 688/464, loss: 0.9964404702186584 2023-01-24 02:38:47.920809: step: 690/464, loss: 0.3177805244922638 2023-01-24 02:38:48.668081: step: 692/464, loss: 1.2455317974090576 2023-01-24 02:38:49.419724: step: 694/464, loss: 0.6586301326751709 2023-01-24 02:38:50.160638: step: 696/464, loss: 0.932684600353241 2023-01-24 02:38:50.853273: step: 698/464, loss: 0.2300596386194229 2023-01-24 02:38:51.626068: step: 700/464, loss: 0.5244414210319519 2023-01-24 02:38:52.414823: step: 702/464, loss: 0.15712791681289673 2023-01-24 02:38:53.191377: step: 704/464, loss: 0.5612965226173401 2023-01-24 02:38:53.896603: step: 706/464, loss: 0.4752296805381775 2023-01-24 02:38:54.572265: step: 708/464, loss: 0.8295560479164124 2023-01-24 02:38:55.300745: step: 710/464, loss: 0.5479645133018494 2023-01-24 02:38:56.084550: step: 712/464, loss: 0.1347873955965042 2023-01-24 02:38:56.865494: step: 714/464, loss: 0.41366565227508545 2023-01-24 02:38:57.550280: step: 716/464, loss: 0.4166448414325714 2023-01-24 02:38:58.306180: step: 718/464, loss: 0.4311206340789795 2023-01-24 02:38:59.023144: step: 720/464, loss: 0.37743064761161804 2023-01-24 02:38:59.738523: step: 722/464, loss: 0.804713785648346 2023-01-24 02:39:00.491899: step: 724/464, loss: 0.38146859407424927 2023-01-24 02:39:01.247906: step: 726/464, loss: 0.2579302489757538 2023-01-24 02:39:02.024294: step: 728/464, loss: 0.23407909274101257 2023-01-24 02:39:02.793762: step: 730/464, loss: 0.618119478225708 2023-01-24 02:39:03.484664: step: 732/464, loss: 0.6577721834182739 2023-01-24 02:39:04.246398: step: 734/464, loss: 1.7237317562103271 2023-01-24 02:39:04.987980: step: 736/464, loss: 0.2821105122566223 2023-01-24 02:39:05.817628: step: 738/464, loss: 0.8553675413131714 2023-01-24 02:39:06.497442: step: 740/464, loss: 0.9743956327438354 2023-01-24 02:39:07.200359: step: 742/464, loss: 0.7038723826408386 2023-01-24 02:39:07.868434: step: 744/464, loss: 0.6714287996292114 2023-01-24 02:39:08.568883: step: 746/464, loss: 0.48815155029296875 2023-01-24 02:39:09.333498: step: 748/464, loss: 0.14024285972118378 2023-01-24 02:39:10.149594: step: 750/464, loss: 0.35646864771842957 2023-01-24 02:39:10.905761: step: 752/464, loss: 0.4604167342185974 2023-01-24 02:39:11.691174: step: 754/464, loss: 0.1984981894493103 2023-01-24 02:39:12.438813: step: 756/464, loss: 0.36041873693466187 2023-01-24 02:39:13.221150: step: 758/464, loss: 0.5603522062301636 2023-01-24 02:39:14.034567: step: 760/464, loss: 0.6194506883621216 2023-01-24 02:39:14.739290: step: 762/464, loss: 0.30803316831588745 2023-01-24 02:39:15.516128: step: 764/464, loss: 1.318713665008545 2023-01-24 02:39:16.355238: step: 766/464, loss: 0.13423074781894684 2023-01-24 02:39:17.178280: step: 768/464, loss: 0.3748876452445984 2023-01-24 02:39:17.951858: step: 770/464, loss: 1.5057021379470825 2023-01-24 02:39:18.726831: step: 772/464, loss: 0.6628919243812561 2023-01-24 02:39:19.431549: step: 774/464, loss: 0.6508646607398987 2023-01-24 02:39:20.160062: step: 776/464, loss: 0.5535852313041687 2023-01-24 02:39:20.904324: step: 778/464, loss: 0.6056692600250244 2023-01-24 02:39:21.694935: step: 780/464, loss: 0.1761878877878189 2023-01-24 02:39:22.544651: step: 782/464, loss: 1.3329436779022217 2023-01-24 02:39:23.283105: step: 784/464, loss: 0.22690290212631226 2023-01-24 02:39:24.004375: step: 786/464, loss: 0.4519822299480438 2023-01-24 02:39:24.734075: step: 788/464, loss: 1.4893604516983032 2023-01-24 02:39:25.418858: step: 790/464, loss: 0.43520382046699524 2023-01-24 02:39:26.208585: step: 792/464, loss: 2.284243106842041 2023-01-24 02:39:27.006213: step: 794/464, loss: 1.342424750328064 2023-01-24 02:39:27.797507: step: 796/464, loss: 0.18607650697231293 2023-01-24 02:39:28.564128: step: 798/464, loss: 1.91313898563385 2023-01-24 02:39:29.384734: step: 800/464, loss: 0.14298135042190552 2023-01-24 02:39:30.086116: step: 802/464, loss: 0.16389590501785278 2023-01-24 02:39:30.814632: step: 804/464, loss: 1.263809323310852 2023-01-24 02:39:31.592570: step: 806/464, loss: 0.5659499764442444 2023-01-24 02:39:32.335231: step: 808/464, loss: 0.3437536656856537 2023-01-24 02:39:33.125985: step: 810/464, loss: 0.23270297050476074 2023-01-24 02:39:33.867351: step: 812/464, loss: 0.2002369612455368 2023-01-24 02:39:34.726969: step: 814/464, loss: 0.3684091567993164 2023-01-24 02:39:35.418697: step: 816/464, loss: 0.2902369797229767 2023-01-24 02:39:36.166332: step: 818/464, loss: 0.7540484666824341 2023-01-24 02:39:36.886337: step: 820/464, loss: 0.9410716891288757 2023-01-24 02:39:37.591501: step: 822/464, loss: 0.9295744299888611 2023-01-24 02:39:38.308439: step: 824/464, loss: 0.16161967813968658 2023-01-24 02:39:39.005909: step: 826/464, loss: 0.23595713078975677 2023-01-24 02:39:39.696499: step: 828/464, loss: 1.1932841539382935 2023-01-24 02:39:40.475034: step: 830/464, loss: 0.3627581298351288 2023-01-24 02:39:41.213486: step: 832/464, loss: 0.9675153493881226 2023-01-24 02:39:42.082053: step: 834/464, loss: 0.37052589654922485 2023-01-24 02:39:42.837222: step: 836/464, loss: 0.817292332649231 2023-01-24 02:39:43.577742: step: 838/464, loss: 0.3747046887874603 2023-01-24 02:39:44.468190: step: 840/464, loss: 1.0048964023590088 2023-01-24 02:39:45.279117: step: 842/464, loss: 0.47187212109565735 2023-01-24 02:39:45.972344: step: 844/464, loss: 0.8129963278770447 2023-01-24 02:39:46.789852: step: 846/464, loss: 0.7479146718978882 2023-01-24 02:39:47.553374: step: 848/464, loss: 0.48911774158477783 2023-01-24 02:39:48.248905: step: 850/464, loss: 1.1757187843322754 2023-01-24 02:39:49.044390: step: 852/464, loss: 0.2751547694206238 2023-01-24 02:39:49.732012: step: 854/464, loss: 0.9891265630722046 2023-01-24 02:39:50.478697: step: 856/464, loss: 0.3847293257713318 2023-01-24 02:39:51.222061: step: 858/464, loss: 0.40411749482154846 2023-01-24 02:39:51.973850: step: 860/464, loss: 0.17029529809951782 2023-01-24 02:39:52.632256: step: 862/464, loss: 0.46243488788604736 2023-01-24 02:39:53.326230: step: 864/464, loss: 0.10078753530979156 2023-01-24 02:39:54.128166: step: 866/464, loss: 0.6281033158302307 2023-01-24 02:39:54.880095: step: 868/464, loss: 0.9208279252052307 2023-01-24 02:39:55.657573: step: 870/464, loss: 0.6805605888366699 2023-01-24 02:39:56.376034: step: 872/464, loss: 0.09829024970531464 2023-01-24 02:39:57.122234: step: 874/464, loss: 0.10819683223962784 2023-01-24 02:39:57.816050: step: 876/464, loss: 0.4335554838180542 2023-01-24 02:39:58.572055: step: 878/464, loss: 0.2784618139266968 2023-01-24 02:39:59.356807: step: 880/464, loss: 0.603888213634491 2023-01-24 02:40:00.059595: step: 882/464, loss: 0.15740109980106354 2023-01-24 02:40:00.827093: step: 884/464, loss: 0.9284422993659973 2023-01-24 02:40:01.553411: step: 886/464, loss: 0.9966660737991333 2023-01-24 02:40:02.349988: step: 888/464, loss: 0.4289911985397339 2023-01-24 02:40:03.106521: step: 890/464, loss: 0.796715259552002 2023-01-24 02:40:03.892482: step: 892/464, loss: 1.042264461517334 2023-01-24 02:40:04.617791: step: 894/464, loss: 0.21413186192512512 2023-01-24 02:40:05.312813: step: 896/464, loss: 0.148712158203125 2023-01-24 02:40:06.064528: step: 898/464, loss: 0.34306561946868896 2023-01-24 02:40:06.957921: step: 900/464, loss: 0.1933916062116623 2023-01-24 02:40:07.761339: step: 902/464, loss: 0.8368797898292542 2023-01-24 02:40:08.469896: step: 904/464, loss: 0.1777791827917099 2023-01-24 02:40:09.206235: step: 906/464, loss: 0.17683547735214233 2023-01-24 02:40:09.867789: step: 908/464, loss: 0.19002756476402283 2023-01-24 02:40:10.612340: step: 910/464, loss: 0.2975521385669708 2023-01-24 02:40:11.303933: step: 912/464, loss: 0.48966196179389954 2023-01-24 02:40:12.070309: step: 914/464, loss: 0.8314082622528076 2023-01-24 02:40:12.875736: step: 916/464, loss: 0.25605788826942444 2023-01-24 02:40:13.654182: step: 918/464, loss: 0.255087286233902 2023-01-24 02:40:14.404171: step: 920/464, loss: 0.28800809383392334 2023-01-24 02:40:15.182663: step: 922/464, loss: 0.5026906132698059 2023-01-24 02:40:15.926678: step: 924/464, loss: 0.5396649241447449 2023-01-24 02:40:16.657282: step: 926/464, loss: 1.2712345123291016 2023-01-24 02:40:17.404205: step: 928/464, loss: 0.32311421632766724 2023-01-24 02:40:18.007870: step: 930/464, loss: 0.2566630244255066 ================================================== Loss: 0.608 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33881111588301155, 'r': 0.29517635095868433, 'f1': 0.3154921321987557}, 'combined': 0.23246788688329367, 'epoch': 7} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32550565210866855, 'r': 0.23544479974658636, 'f1': 0.2732455703481025}, 'combined': 0.16969988053197946, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3123058923741133, 'r': 0.28036551701766993, 'f1': 0.2954750358988617}, 'combined': 0.2177184475044244, 'epoch': 7} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.314002992020427, 'r': 0.23022749019679528, 'f1': 0.26566729769573183}, 'combined': 0.1649933743584019, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3281217095992458, 'r': 0.2920780369538741, 'f1': 0.30905251204738576}, 'combined': 0.2277229036138632, 'epoch': 7} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3324537235089592, 'r': 0.24539815361777917, 'f1': 0.28236831320203815}, 'combined': 0.17536558398863422, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2662037037037037, 'r': 0.20535714285714285, 'f1': 0.23185483870967744}, 'combined': 0.15456989247311828, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31, 'r': 0.33695652173913043, 'f1': 0.3229166666666667}, 'combined': 0.16145833333333334, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5446428571428571, 'r': 0.2629310344827586, 'f1': 0.3546511627906977}, 'combined': 0.2364341085271318, 'epoch': 7} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3345195392377826, 'r': 0.3041086720343478, 'f1': 0.3185900373693168}, 'combined': 0.23475055385107552, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3174410049953423, 'r': 0.22522000156784164, 'f1': 0.26349438333717423}, 'combined': 0.163643880177824, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.3142857142857143, 'f1': 0.3235294117647059}, 'combined': 0.21568627450980393, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3123058923741133, 'r': 0.28036551701766993, 'f1': 0.2954750358988617}, 'combined': 0.2177184475044244, 'epoch': 7} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.314002992020427, 'r': 0.23022749019679528, 'f1': 0.26566729769573183}, 'combined': 0.1649933743584019, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31, 'r': 0.33695652173913043, 'f1': 0.3229166666666667}, 'combined': 0.16145833333333334, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3360532326750742, 'r': 0.30544496859840714, 'f1': 0.3200188836011144}, 'combined': 0.2358033879166106, 'epoch': 4} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3238311636676841, 'r': 0.22674568659177094, 'f1': 0.26672868469148}, 'combined': 0.16565255154523495, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5446428571428571, 'r': 0.2629310344827586, 'f1': 0.3546511627906977}, 'combined': 0.2364341085271318, 'epoch': 4} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:43:02.621441: step: 2/464, loss: 0.24098850786685944 2023-01-24 02:43:03.267786: step: 4/464, loss: 0.21834394335746765 2023-01-24 02:43:03.909976: step: 6/464, loss: 0.6970574259757996 2023-01-24 02:43:04.653455: step: 8/464, loss: 0.11427651345729828 2023-01-24 02:43:05.417444: step: 10/464, loss: 0.3398534059524536 2023-01-24 02:43:06.220673: step: 12/464, loss: 0.3831498920917511 2023-01-24 02:43:06.917158: step: 14/464, loss: 0.32490742206573486 2023-01-24 02:43:07.635162: step: 16/464, loss: 0.5013684630393982 2023-01-24 02:43:08.341336: step: 18/464, loss: 0.2335856556892395 2023-01-24 02:43:09.055331: step: 20/464, loss: 1.0744787454605103 2023-01-24 02:43:09.936291: step: 22/464, loss: 0.1543176919221878 2023-01-24 02:43:10.725006: step: 24/464, loss: 0.36627593636512756 2023-01-24 02:43:11.472911: step: 26/464, loss: 0.32450997829437256 2023-01-24 02:43:12.293106: step: 28/464, loss: 0.13919056951999664 2023-01-24 02:43:13.002477: step: 30/464, loss: 1.290597915649414 2023-01-24 02:43:13.746800: step: 32/464, loss: 5.267265319824219 2023-01-24 02:43:14.472117: step: 34/464, loss: 0.13220682740211487 2023-01-24 02:43:15.198146: step: 36/464, loss: 0.8730195760726929 2023-01-24 02:43:16.007243: step: 38/464, loss: 0.9116947650909424 2023-01-24 02:43:16.704736: step: 40/464, loss: 0.31921955943107605 2023-01-24 02:43:17.549369: step: 42/464, loss: 0.09013380110263824 2023-01-24 02:43:18.331764: step: 44/464, loss: 0.518787145614624 2023-01-24 02:43:19.011258: step: 46/464, loss: 0.558215856552124 2023-01-24 02:43:19.705971: step: 48/464, loss: 0.25874605774879456 2023-01-24 02:43:20.454591: step: 50/464, loss: 0.24418827891349792 2023-01-24 02:43:21.202171: step: 52/464, loss: 0.9657977819442749 2023-01-24 02:43:21.975655: step: 54/464, loss: 0.12193039804697037 2023-01-24 02:43:22.738225: step: 56/464, loss: 0.35909783840179443 2023-01-24 02:43:23.504607: step: 58/464, loss: 2.500666618347168 2023-01-24 02:43:24.208203: step: 60/464, loss: 0.5895683765411377 2023-01-24 02:43:24.954183: step: 62/464, loss: 0.21890252828598022 2023-01-24 02:43:25.735258: step: 64/464, loss: 0.5999624133110046 2023-01-24 02:43:26.587353: step: 66/464, loss: 2.444990396499634 2023-01-24 02:43:27.385204: step: 68/464, loss: 0.13536398112773895 2023-01-24 02:43:28.118323: step: 70/464, loss: 1.011974811553955 2023-01-24 02:43:28.868534: step: 72/464, loss: 0.4110199511051178 2023-01-24 02:43:29.559788: step: 74/464, loss: 0.25794094800949097 2023-01-24 02:43:30.332830: step: 76/464, loss: 0.2513432204723358 2023-01-24 02:43:31.147620: step: 78/464, loss: 0.7264769673347473 2023-01-24 02:43:31.838904: step: 80/464, loss: 0.10029570758342743 2023-01-24 02:43:32.533227: step: 82/464, loss: 0.0936921015381813 2023-01-24 02:43:33.230307: step: 84/464, loss: 0.2198677659034729 2023-01-24 02:43:33.969255: step: 86/464, loss: 1.1094175577163696 2023-01-24 02:43:34.621602: step: 88/464, loss: 0.5705043077468872 2023-01-24 02:43:35.371522: step: 90/464, loss: 0.7141909003257751 2023-01-24 02:43:36.054865: step: 92/464, loss: 0.42526358366012573 2023-01-24 02:43:36.743852: step: 94/464, loss: 0.29963523149490356 2023-01-24 02:43:37.532854: step: 96/464, loss: 0.844380795955658 2023-01-24 02:43:38.230920: step: 98/464, loss: 0.2561212182044983 2023-01-24 02:43:38.948847: step: 100/464, loss: 0.28629907965660095 2023-01-24 02:43:39.680401: step: 102/464, loss: 1.0631016492843628 2023-01-24 02:43:40.477194: step: 104/464, loss: 0.21041373908519745 2023-01-24 02:43:41.157451: step: 106/464, loss: 0.6114664077758789 2023-01-24 02:43:41.937033: step: 108/464, loss: 0.1547708362340927 2023-01-24 02:43:42.664150: step: 110/464, loss: 0.24771416187286377 2023-01-24 02:43:43.452495: step: 112/464, loss: 0.4548543393611908 2023-01-24 02:43:44.200595: step: 114/464, loss: 0.4440891146659851 2023-01-24 02:43:44.885630: step: 116/464, loss: 0.609367311000824 2023-01-24 02:43:45.638748: step: 118/464, loss: 0.4446181356906891 2023-01-24 02:43:46.329296: step: 120/464, loss: 0.08416564762592316 2023-01-24 02:43:46.996284: step: 122/464, loss: 0.10157329589128494 2023-01-24 02:43:47.742877: step: 124/464, loss: 0.16663186252117157 2023-01-24 02:43:48.499159: step: 126/464, loss: 0.24829711019992828 2023-01-24 02:43:49.217444: step: 128/464, loss: 0.5671038031578064 2023-01-24 02:43:49.967977: step: 130/464, loss: 0.2176276594400406 2023-01-24 02:43:50.752041: step: 132/464, loss: 0.30462634563446045 2023-01-24 02:43:51.522161: step: 134/464, loss: 0.158929243683815 2023-01-24 02:43:52.248320: step: 136/464, loss: 0.2325487732887268 2023-01-24 02:43:52.986993: step: 138/464, loss: 0.3691418766975403 2023-01-24 02:43:53.715940: step: 140/464, loss: 0.08586454391479492 2023-01-24 02:43:54.449464: step: 142/464, loss: 0.22785647213459015 2023-01-24 02:43:55.175241: step: 144/464, loss: 0.2567114531993866 2023-01-24 02:43:55.917710: step: 146/464, loss: 0.21886906027793884 2023-01-24 02:43:56.661673: step: 148/464, loss: 0.7984110116958618 2023-01-24 02:43:57.404615: step: 150/464, loss: 0.35591015219688416 2023-01-24 02:43:58.089756: step: 152/464, loss: 0.4163927435874939 2023-01-24 02:43:58.837406: step: 154/464, loss: 0.20556171238422394 2023-01-24 02:43:59.743199: step: 156/464, loss: 0.2828492820262909 2023-01-24 02:44:00.497659: step: 158/464, loss: 0.13407738506793976 2023-01-24 02:44:01.277203: step: 160/464, loss: 0.9878560900688171 2023-01-24 02:44:01.990906: step: 162/464, loss: 0.10810502618551254 2023-01-24 02:44:02.783632: step: 164/464, loss: 0.2033417969942093 2023-01-24 02:44:03.519705: step: 166/464, loss: 0.641880452632904 2023-01-24 02:44:04.284887: step: 168/464, loss: 0.2256614863872528 2023-01-24 02:44:04.982545: step: 170/464, loss: 0.49018874764442444 2023-01-24 02:44:05.645533: step: 172/464, loss: 0.47806239128112793 2023-01-24 02:44:06.440873: step: 174/464, loss: 0.7968089580535889 2023-01-24 02:44:07.209952: step: 176/464, loss: 0.13665267825126648 2023-01-24 02:44:07.998583: step: 178/464, loss: 0.14715999364852905 2023-01-24 02:44:08.755476: step: 180/464, loss: 0.19499091804027557 2023-01-24 02:44:09.543032: step: 182/464, loss: 0.26871755719184875 2023-01-24 02:44:10.382793: step: 184/464, loss: 0.5004696846008301 2023-01-24 02:44:11.242268: step: 186/464, loss: 0.17549346387386322 2023-01-24 02:44:12.016835: step: 188/464, loss: 0.29949861764907837 2023-01-24 02:44:12.752956: step: 190/464, loss: 0.29847872257232666 2023-01-24 02:44:13.499687: step: 192/464, loss: 0.4522988200187683 2023-01-24 02:44:14.164626: step: 194/464, loss: 0.4335414469242096 2023-01-24 02:44:14.884306: step: 196/464, loss: 0.1644258052110672 2023-01-24 02:44:15.603052: step: 198/464, loss: 0.5021744966506958 2023-01-24 02:44:16.412696: step: 200/464, loss: 1.0680073499679565 2023-01-24 02:44:17.105306: step: 202/464, loss: 0.1432187706232071 2023-01-24 02:44:17.795433: step: 204/464, loss: 0.48796772956848145 2023-01-24 02:44:18.563857: step: 206/464, loss: 0.3300281763076782 2023-01-24 02:44:19.294998: step: 208/464, loss: 0.9741002321243286 2023-01-24 02:44:20.070907: step: 210/464, loss: 0.9319281578063965 2023-01-24 02:44:20.945503: step: 212/464, loss: 0.9063480496406555 2023-01-24 02:44:21.649666: step: 214/464, loss: 0.17574895918369293 2023-01-24 02:44:22.406553: step: 216/464, loss: 0.392004132270813 2023-01-24 02:44:23.114890: step: 218/464, loss: 0.9362916946411133 2023-01-24 02:44:23.843143: step: 220/464, loss: 0.08633681386709213 2023-01-24 02:44:24.548114: step: 222/464, loss: 0.2805129587650299 2023-01-24 02:44:25.233088: step: 224/464, loss: 0.7117599248886108 2023-01-24 02:44:25.908368: step: 226/464, loss: 0.22947485744953156 2023-01-24 02:44:26.629463: step: 228/464, loss: 0.19838595390319824 2023-01-24 02:44:27.314249: step: 230/464, loss: 0.1607564240694046 2023-01-24 02:44:28.092922: step: 232/464, loss: 0.5248289108276367 2023-01-24 02:44:28.874273: step: 234/464, loss: 0.972169816493988 2023-01-24 02:44:29.563014: step: 236/464, loss: 0.3914636969566345 2023-01-24 02:44:30.461669: step: 238/464, loss: 0.11111956089735031 2023-01-24 02:44:31.256836: step: 240/464, loss: 0.1756807267665863 2023-01-24 02:44:31.912152: step: 242/464, loss: 0.4231186807155609 2023-01-24 02:44:32.573672: step: 244/464, loss: 0.3299257755279541 2023-01-24 02:44:33.279980: step: 246/464, loss: 0.36662188172340393 2023-01-24 02:44:34.087072: step: 248/464, loss: 0.298601895570755 2023-01-24 02:44:34.798688: step: 250/464, loss: 0.15368987619876862 2023-01-24 02:44:35.557156: step: 252/464, loss: 0.5711445212364197 2023-01-24 02:44:36.263065: step: 254/464, loss: 0.26884526014328003 2023-01-24 02:44:37.112863: step: 256/464, loss: 0.4593277871608734 2023-01-24 02:44:37.900284: step: 258/464, loss: 0.3296632766723633 2023-01-24 02:44:38.690586: step: 260/464, loss: 0.23488648235797882 2023-01-24 02:44:39.434818: step: 262/464, loss: 0.4223480522632599 2023-01-24 02:44:40.188604: step: 264/464, loss: 0.43289366364479065 2023-01-24 02:44:40.969247: step: 266/464, loss: 0.13029201328754425 2023-01-24 02:44:41.658736: step: 268/464, loss: 0.8143272995948792 2023-01-24 02:44:42.470977: step: 270/464, loss: 0.3097124695777893 2023-01-24 02:44:43.158718: step: 272/464, loss: 0.7568479180335999 2023-01-24 02:44:43.864261: step: 274/464, loss: 0.1135609894990921 2023-01-24 02:44:44.603717: step: 276/464, loss: 0.29489538073539734 2023-01-24 02:44:45.273304: step: 278/464, loss: 0.40163666009902954 2023-01-24 02:44:45.980418: step: 280/464, loss: 0.3510648012161255 2023-01-24 02:44:46.669616: step: 282/464, loss: 0.4717705547809601 2023-01-24 02:44:47.412823: step: 284/464, loss: 0.2724142074584961 2023-01-24 02:44:48.189152: step: 286/464, loss: 0.24752037227153778 2023-01-24 02:44:48.983900: step: 288/464, loss: 0.6803609132766724 2023-01-24 02:44:49.702737: step: 290/464, loss: 0.25626876950263977 2023-01-24 02:44:50.469150: step: 292/464, loss: 0.3036022186279297 2023-01-24 02:44:51.219235: step: 294/464, loss: 0.21912869811058044 2023-01-24 02:44:51.980195: step: 296/464, loss: 0.2549686133861542 2023-01-24 02:44:52.732385: step: 298/464, loss: 0.11996863037347794 2023-01-24 02:44:53.502575: step: 300/464, loss: 0.1933732032775879 2023-01-24 02:44:54.275686: step: 302/464, loss: 1.3739672899246216 2023-01-24 02:44:55.140735: step: 304/464, loss: 0.7721089124679565 2023-01-24 02:44:55.819133: step: 306/464, loss: 0.16225355863571167 2023-01-24 02:44:56.639680: step: 308/464, loss: 0.24977611005306244 2023-01-24 02:44:57.323480: step: 310/464, loss: 0.1537967324256897 2023-01-24 02:44:57.975707: step: 312/464, loss: 0.43189412355422974 2023-01-24 02:44:58.745088: step: 314/464, loss: 0.2579169273376465 2023-01-24 02:44:59.477866: step: 316/464, loss: 0.42762044072151184 2023-01-24 02:45:00.274806: step: 318/464, loss: 0.5698819160461426 2023-01-24 02:45:01.063417: step: 320/464, loss: 0.22148877382278442 2023-01-24 02:45:01.850645: step: 322/464, loss: 0.44749972224235535 2023-01-24 02:45:02.587379: step: 324/464, loss: 0.18170328438282013 2023-01-24 02:45:03.333268: step: 326/464, loss: 2.2977375984191895 2023-01-24 02:45:04.011287: step: 328/464, loss: 0.22166982293128967 2023-01-24 02:45:04.770885: step: 330/464, loss: 1.137890100479126 2023-01-24 02:45:05.472972: step: 332/464, loss: 0.3206275701522827 2023-01-24 02:45:06.166356: step: 334/464, loss: 0.7786614894866943 2023-01-24 02:45:06.915826: step: 336/464, loss: 0.08940373361110687 2023-01-24 02:45:07.644279: step: 338/464, loss: 0.7511243224143982 2023-01-24 02:45:08.405388: step: 340/464, loss: 0.18221306800842285 2023-01-24 02:45:09.080894: step: 342/464, loss: 0.18981769680976868 2023-01-24 02:45:09.806253: step: 344/464, loss: 0.2692997455596924 2023-01-24 02:45:10.531326: step: 346/464, loss: 0.4894130825996399 2023-01-24 02:45:11.251855: step: 348/464, loss: 0.7022056579589844 2023-01-24 02:45:11.972326: step: 350/464, loss: 0.17065872251987457 2023-01-24 02:45:12.753952: step: 352/464, loss: 0.13291820883750916 2023-01-24 02:45:13.559873: step: 354/464, loss: 0.7096335887908936 2023-01-24 02:45:14.351588: step: 356/464, loss: 0.6142822504043579 2023-01-24 02:45:15.131193: step: 358/464, loss: 0.60723876953125 2023-01-24 02:45:15.885808: step: 360/464, loss: 0.3338114321231842 2023-01-24 02:45:16.672076: step: 362/464, loss: 0.43763482570648193 2023-01-24 02:45:17.508264: step: 364/464, loss: 0.2908762991428375 2023-01-24 02:45:18.267729: step: 366/464, loss: 0.3009243905544281 2023-01-24 02:45:19.007807: step: 368/464, loss: 0.2691645920276642 2023-01-24 02:45:19.731570: step: 370/464, loss: 0.7100043296813965 2023-01-24 02:45:20.457426: step: 372/464, loss: 0.23052287101745605 2023-01-24 02:45:21.174604: step: 374/464, loss: 0.2514415979385376 2023-01-24 02:45:21.940052: step: 376/464, loss: 0.2036939263343811 2023-01-24 02:45:22.588916: step: 378/464, loss: 0.6348828077316284 2023-01-24 02:45:23.320572: step: 380/464, loss: 0.1718049794435501 2023-01-24 02:45:23.996915: step: 382/464, loss: 0.2206181138753891 2023-01-24 02:45:24.721001: step: 384/464, loss: 0.6723924875259399 2023-01-24 02:45:25.464940: step: 386/464, loss: 0.24519896507263184 2023-01-24 02:45:26.062359: step: 388/464, loss: 0.6588399410247803 2023-01-24 02:45:26.819804: step: 390/464, loss: 0.6879944801330566 2023-01-24 02:45:27.654784: step: 392/464, loss: 0.7047319412231445 2023-01-24 02:45:28.481918: step: 394/464, loss: 0.15743254125118256 2023-01-24 02:45:29.154531: step: 396/464, loss: 0.060449376702308655 2023-01-24 02:45:29.954207: step: 398/464, loss: 0.4597553014755249 2023-01-24 02:45:30.702516: step: 400/464, loss: 0.19338826835155487 2023-01-24 02:45:31.374616: step: 402/464, loss: 0.8644437789916992 2023-01-24 02:45:32.169141: step: 404/464, loss: 1.0874934196472168 2023-01-24 02:45:32.897700: step: 406/464, loss: 0.1290365308523178 2023-01-24 02:45:33.676046: step: 408/464, loss: 0.26761525869369507 2023-01-24 02:45:34.341019: step: 410/464, loss: 0.5796024799346924 2023-01-24 02:45:35.045143: step: 412/464, loss: 0.6488765478134155 2023-01-24 02:45:35.768014: step: 414/464, loss: 0.22247929871082306 2023-01-24 02:45:36.550427: step: 416/464, loss: 0.35781481862068176 2023-01-24 02:45:37.253084: step: 418/464, loss: 0.785033643245697 2023-01-24 02:45:38.048284: step: 420/464, loss: 0.23130568861961365 2023-01-24 02:45:38.794313: step: 422/464, loss: 0.10906381905078888 2023-01-24 02:45:39.596702: step: 424/464, loss: 0.16461393237113953 2023-01-24 02:45:40.339040: step: 426/464, loss: 0.10141077637672424 2023-01-24 02:45:41.068909: step: 428/464, loss: 0.4492965042591095 2023-01-24 02:45:41.942545: step: 430/464, loss: 0.42122381925582886 2023-01-24 02:45:42.727882: step: 432/464, loss: 1.8734062910079956 2023-01-24 02:45:43.527607: step: 434/464, loss: 0.2933647334575653 2023-01-24 02:45:44.260592: step: 436/464, loss: 0.05986648052930832 2023-01-24 02:45:44.999661: step: 438/464, loss: 0.8424848914146423 2023-01-24 02:45:45.716902: step: 440/464, loss: 0.14208859205245972 2023-01-24 02:45:46.479660: step: 442/464, loss: 0.43434256315231323 2023-01-24 02:45:47.213165: step: 444/464, loss: 0.524348795413971 2023-01-24 02:45:48.109231: step: 446/464, loss: 0.2798750102519989 2023-01-24 02:45:48.802547: step: 448/464, loss: 0.18711858987808228 2023-01-24 02:45:49.427397: step: 450/464, loss: 0.15931551158428192 2023-01-24 02:45:50.183444: step: 452/464, loss: 0.34590521454811096 2023-01-24 02:45:50.872052: step: 454/464, loss: 0.19958004355430603 2023-01-24 02:45:51.658859: step: 456/464, loss: 0.5248697996139526 2023-01-24 02:45:52.341831: step: 458/464, loss: 0.2061764895915985 2023-01-24 02:45:53.154663: step: 460/464, loss: 0.3065434992313385 2023-01-24 02:45:53.823252: step: 462/464, loss: 0.9484499096870422 2023-01-24 02:45:54.562394: step: 464/464, loss: 0.554010808467865 2023-01-24 02:45:55.297970: step: 466/464, loss: 3.225388526916504 2023-01-24 02:45:56.091390: step: 468/464, loss: 2.29485821723938 2023-01-24 02:45:56.727582: step: 470/464, loss: 0.7562961578369141 2023-01-24 02:45:57.486984: step: 472/464, loss: 0.8341850638389587 2023-01-24 02:45:58.250025: step: 474/464, loss: 0.33977198600769043 2023-01-24 02:45:58.966828: step: 476/464, loss: 0.28051602840423584 2023-01-24 02:45:59.761097: step: 478/464, loss: 0.19837482273578644 2023-01-24 02:46:00.490103: step: 480/464, loss: 0.33840715885162354 2023-01-24 02:46:01.328860: step: 482/464, loss: 0.41176798939704895 2023-01-24 02:46:02.194448: step: 484/464, loss: 0.4754239320755005 2023-01-24 02:46:02.949720: step: 486/464, loss: 0.07769311219453812 2023-01-24 02:46:03.735564: step: 488/464, loss: 0.6456102728843689 2023-01-24 02:46:04.472637: step: 490/464, loss: 0.32670024037361145 2023-01-24 02:46:05.196494: step: 492/464, loss: 0.3997291922569275 2023-01-24 02:46:05.959895: step: 494/464, loss: 0.24537032842636108 2023-01-24 02:46:06.719714: step: 496/464, loss: 0.45198655128479004 2023-01-24 02:46:07.398774: step: 498/464, loss: 0.7440019845962524 2023-01-24 02:46:08.083574: step: 500/464, loss: 0.3995836675167084 2023-01-24 02:46:08.837655: step: 502/464, loss: 0.8018618822097778 2023-01-24 02:46:09.573942: step: 504/464, loss: 0.4375607371330261 2023-01-24 02:46:10.303749: step: 506/464, loss: 0.49072539806365967 2023-01-24 02:46:11.040245: step: 508/464, loss: 0.37828344106674194 2023-01-24 02:46:11.818896: step: 510/464, loss: 1.328326940536499 2023-01-24 02:46:12.514414: step: 512/464, loss: 0.18207770586013794 2023-01-24 02:46:13.207677: step: 514/464, loss: 0.2423122227191925 2023-01-24 02:46:13.898339: step: 516/464, loss: 0.19432279467582703 2023-01-24 02:46:14.669928: step: 518/464, loss: 0.17209100723266602 2023-01-24 02:46:15.414450: step: 520/464, loss: 0.4468704164028168 2023-01-24 02:46:16.131342: step: 522/464, loss: 0.29360130429267883 2023-01-24 02:46:16.960753: step: 524/464, loss: 0.12493553012609482 2023-01-24 02:46:17.650615: step: 526/464, loss: 0.20450644195079803 2023-01-24 02:46:18.462255: step: 528/464, loss: 0.28605949878692627 2023-01-24 02:46:19.169526: step: 530/464, loss: 0.08858560770750046 2023-01-24 02:46:19.845363: step: 532/464, loss: 0.511139988899231 2023-01-24 02:46:20.662535: step: 534/464, loss: 0.33546334505081177 2023-01-24 02:46:21.411904: step: 536/464, loss: 1.009992003440857 2023-01-24 02:46:22.162832: step: 538/464, loss: 0.2257574498653412 2023-01-24 02:46:22.900635: step: 540/464, loss: 0.31822896003723145 2023-01-24 02:46:23.679929: step: 542/464, loss: 0.42033156752586365 2023-01-24 02:46:24.367922: step: 544/464, loss: 0.3264782130718231 2023-01-24 02:46:25.207134: step: 546/464, loss: 2.3610825538635254 2023-01-24 02:46:25.946849: step: 548/464, loss: 0.1951487511396408 2023-01-24 02:46:26.654083: step: 550/464, loss: 0.21507689356803894 2023-01-24 02:46:27.419392: step: 552/464, loss: 0.6023355722427368 2023-01-24 02:46:28.303016: step: 554/464, loss: 0.5097122192382812 2023-01-24 02:46:29.080314: step: 556/464, loss: 0.2306637465953827 2023-01-24 02:46:29.835708: step: 558/464, loss: 0.7728459239006042 2023-01-24 02:46:30.613910: step: 560/464, loss: 0.21024790406227112 2023-01-24 02:46:31.322070: step: 562/464, loss: 0.6667017936706543 2023-01-24 02:46:32.096011: step: 564/464, loss: 0.19177010655403137 2023-01-24 02:46:32.876865: step: 566/464, loss: 0.3377716541290283 2023-01-24 02:46:33.627739: step: 568/464, loss: 0.4583777189254761 2023-01-24 02:46:34.376719: step: 570/464, loss: 0.698870837688446 2023-01-24 02:46:35.051103: step: 572/464, loss: 0.23514105379581451 2023-01-24 02:46:35.826246: step: 574/464, loss: 0.16757991909980774 2023-01-24 02:46:36.582833: step: 576/464, loss: 0.7315901517868042 2023-01-24 02:46:37.359526: step: 578/464, loss: 0.3616211712360382 2023-01-24 02:46:38.108054: step: 580/464, loss: 0.6538026928901672 2023-01-24 02:46:38.956726: step: 582/464, loss: 0.7223403453826904 2023-01-24 02:46:39.727938: step: 584/464, loss: 0.17685629427433014 2023-01-24 02:46:40.406637: step: 586/464, loss: 0.3052135705947876 2023-01-24 02:46:41.164971: step: 588/464, loss: 0.49685677886009216 2023-01-24 02:46:41.941753: step: 590/464, loss: 0.06801187247037888 2023-01-24 02:46:42.697607: step: 592/464, loss: 0.2643454372882843 2023-01-24 02:46:43.371104: step: 594/464, loss: 0.23082764446735382 2023-01-24 02:46:44.165358: step: 596/464, loss: 0.19490766525268555 2023-01-24 02:46:44.909508: step: 598/464, loss: 0.15837804973125458 2023-01-24 02:46:45.654304: step: 600/464, loss: 0.208059623837471 2023-01-24 02:46:46.419962: step: 602/464, loss: 0.4763175845146179 2023-01-24 02:46:47.148054: step: 604/464, loss: 0.41145578026771545 2023-01-24 02:46:47.876833: step: 606/464, loss: 0.19580335915088654 2023-01-24 02:46:48.678025: step: 608/464, loss: 6.187551021575928 2023-01-24 02:46:49.407104: step: 610/464, loss: 0.345478355884552 2023-01-24 02:46:50.182589: step: 612/464, loss: 1.0338616371154785 2023-01-24 02:46:50.903521: step: 614/464, loss: 0.4276840388774872 2023-01-24 02:46:51.557115: step: 616/464, loss: 0.7984152436256409 2023-01-24 02:46:52.228789: step: 618/464, loss: 0.5086597800254822 2023-01-24 02:46:52.960230: step: 620/464, loss: 1.4494423866271973 2023-01-24 02:46:53.742333: step: 622/464, loss: 0.088454470038414 2023-01-24 02:46:54.480850: step: 624/464, loss: 0.5433585047721863 2023-01-24 02:46:55.211937: step: 626/464, loss: 0.19739283621311188 2023-01-24 02:46:55.977584: step: 628/464, loss: 0.23902741074562073 2023-01-24 02:46:56.695319: step: 630/464, loss: 0.38742804527282715 2023-01-24 02:46:57.441514: step: 632/464, loss: 0.6698702573776245 2023-01-24 02:46:58.187422: step: 634/464, loss: 0.9108648300170898 2023-01-24 02:46:58.929372: step: 636/464, loss: 0.13190753757953644 2023-01-24 02:46:59.664495: step: 638/464, loss: 0.2754857540130615 2023-01-24 02:47:00.418147: step: 640/464, loss: 0.920817494392395 2023-01-24 02:47:01.084958: step: 642/464, loss: 0.8862787485122681 2023-01-24 02:47:01.824547: step: 644/464, loss: 0.34673672914505005 2023-01-24 02:47:02.511490: step: 646/464, loss: 0.6815704107284546 2023-01-24 02:47:03.228406: step: 648/464, loss: 0.1537713259458542 2023-01-24 02:47:04.011976: step: 650/464, loss: 0.33302345871925354 2023-01-24 02:47:04.752492: step: 652/464, loss: 0.19951090216636658 2023-01-24 02:47:05.456983: step: 654/464, loss: 0.20980501174926758 2023-01-24 02:47:06.187070: step: 656/464, loss: 0.5005956888198853 2023-01-24 02:47:06.927829: step: 658/464, loss: 0.27629342675209045 2023-01-24 02:47:07.593525: step: 660/464, loss: 0.3143295347690582 2023-01-24 02:47:08.263407: step: 662/464, loss: 0.10405009239912033 2023-01-24 02:47:08.982125: step: 664/464, loss: 0.4270290434360504 2023-01-24 02:47:09.753432: step: 666/464, loss: 0.2317143976688385 2023-01-24 02:47:10.509758: step: 668/464, loss: 0.3168708384037018 2023-01-24 02:47:11.252305: step: 670/464, loss: 0.3464636504650116 2023-01-24 02:47:11.971506: step: 672/464, loss: 0.48676538467407227 2023-01-24 02:47:12.714420: step: 674/464, loss: 0.6338642835617065 2023-01-24 02:47:13.570030: step: 676/464, loss: 0.536266028881073 2023-01-24 02:47:14.254052: step: 678/464, loss: 0.5338266491889954 2023-01-24 02:47:14.931202: step: 680/464, loss: 0.4320400655269623 2023-01-24 02:47:15.668792: step: 682/464, loss: 0.1096869707107544 2023-01-24 02:47:16.442002: step: 684/464, loss: 0.3257814645767212 2023-01-24 02:47:17.198499: step: 686/464, loss: 0.48217248916625977 2023-01-24 02:47:17.907612: step: 688/464, loss: 0.12141574174165726 2023-01-24 02:47:18.631657: step: 690/464, loss: 0.4606741666793823 2023-01-24 02:47:19.291654: step: 692/464, loss: 0.18588869273662567 2023-01-24 02:47:20.153549: step: 694/464, loss: 0.469809353351593 2023-01-24 02:47:20.946582: step: 696/464, loss: 0.20712803304195404 2023-01-24 02:47:21.663628: step: 698/464, loss: 0.3310616612434387 2023-01-24 02:47:22.453948: step: 700/464, loss: 0.6919637322425842 2023-01-24 02:47:23.206407: step: 702/464, loss: 5.140326976776123 2023-01-24 02:47:23.876229: step: 704/464, loss: 0.48945337533950806 2023-01-24 02:47:24.620365: step: 706/464, loss: 0.25606769323349 2023-01-24 02:47:25.321209: step: 708/464, loss: 0.607251763343811 2023-01-24 02:47:26.015034: step: 710/464, loss: 0.3275914192199707 2023-01-24 02:47:26.786556: step: 712/464, loss: 0.23145699501037598 2023-01-24 02:47:27.588412: step: 714/464, loss: 0.2339666187763214 2023-01-24 02:47:28.260008: step: 716/464, loss: 0.14772441983222961 2023-01-24 02:47:28.977727: step: 718/464, loss: 11.812416076660156 2023-01-24 02:47:29.724375: step: 720/464, loss: 0.7451220750808716 2023-01-24 02:47:30.437661: step: 722/464, loss: 0.31856614351272583 2023-01-24 02:47:31.192326: step: 724/464, loss: 0.17423215508460999 2023-01-24 02:47:32.018772: step: 726/464, loss: 0.2563784718513489 2023-01-24 02:47:32.816719: step: 728/464, loss: 0.23273800313472748 2023-01-24 02:47:33.548971: step: 730/464, loss: 0.19138793647289276 2023-01-24 02:47:34.276185: step: 732/464, loss: 0.07046403735876083 2023-01-24 02:47:35.005481: step: 734/464, loss: 0.32704442739486694 2023-01-24 02:47:35.863283: step: 736/464, loss: 1.0046184062957764 2023-01-24 02:47:36.613048: step: 738/464, loss: 0.9572482705116272 2023-01-24 02:47:37.404212: step: 740/464, loss: 0.46071451902389526 2023-01-24 02:47:38.164907: step: 742/464, loss: 0.2304687350988388 2023-01-24 02:47:38.875901: step: 744/464, loss: 0.5303010940551758 2023-01-24 02:47:39.605462: step: 746/464, loss: 0.3964517414569855 2023-01-24 02:47:40.359937: step: 748/464, loss: 0.27344828844070435 2023-01-24 02:47:41.104298: step: 750/464, loss: 0.33394724130630493 2023-01-24 02:47:41.864958: step: 752/464, loss: 0.48939767479896545 2023-01-24 02:47:42.459481: step: 754/464, loss: 0.1414308100938797 2023-01-24 02:47:43.205989: step: 756/464, loss: 0.18101376295089722 2023-01-24 02:47:43.961951: step: 758/464, loss: 0.38260453939437866 2023-01-24 02:47:44.685524: step: 760/464, loss: 1.707888126373291 2023-01-24 02:47:45.452913: step: 762/464, loss: 0.31803902983665466 2023-01-24 02:47:46.246138: step: 764/464, loss: 0.38109463453292847 2023-01-24 02:47:47.063333: step: 766/464, loss: 0.3848925530910492 2023-01-24 02:47:47.892195: step: 768/464, loss: 0.447073757648468 2023-01-24 02:47:48.650850: step: 770/464, loss: 0.822684645652771 2023-01-24 02:47:49.355545: step: 772/464, loss: 0.20294032990932465 2023-01-24 02:47:50.197580: step: 774/464, loss: 0.34626081585884094 2023-01-24 02:47:51.007549: step: 776/464, loss: 0.4519239068031311 2023-01-24 02:47:51.746452: step: 778/464, loss: 0.6026832461357117 2023-01-24 02:47:52.469304: step: 780/464, loss: 0.2907651364803314 2023-01-24 02:47:53.280383: step: 782/464, loss: 0.45342278480529785 2023-01-24 02:47:54.028861: step: 784/464, loss: 0.4344886541366577 2023-01-24 02:47:54.788637: step: 786/464, loss: 0.4853147864341736 2023-01-24 02:47:55.447570: step: 788/464, loss: 0.10905138403177261 2023-01-24 02:47:56.171521: step: 790/464, loss: 0.22025759518146515 2023-01-24 02:47:57.107295: step: 792/464, loss: 0.12082141637802124 2023-01-24 02:47:57.824155: step: 794/464, loss: 0.8412554264068604 2023-01-24 02:47:58.610980: step: 796/464, loss: 0.2237834930419922 2023-01-24 02:47:59.321525: step: 798/464, loss: 0.18725483119487762 2023-01-24 02:48:00.048352: step: 800/464, loss: 0.3585301339626312 2023-01-24 02:48:00.750321: step: 802/464, loss: 0.2320476770401001 2023-01-24 02:48:01.613472: step: 804/464, loss: 0.6213764548301697 2023-01-24 02:48:02.468332: step: 806/464, loss: 0.1954798549413681 2023-01-24 02:48:03.192971: step: 808/464, loss: 1.6411569118499756 2023-01-24 02:48:03.971958: step: 810/464, loss: 0.23510593175888062 2023-01-24 02:48:04.742539: step: 812/464, loss: 0.9333515763282776 2023-01-24 02:48:05.466042: step: 814/464, loss: 0.5875794887542725 2023-01-24 02:48:06.315158: step: 816/464, loss: 0.22823041677474976 2023-01-24 02:48:07.013029: step: 818/464, loss: 0.2510369122028351 2023-01-24 02:48:07.763043: step: 820/464, loss: 0.9232980012893677 2023-01-24 02:48:08.498396: step: 822/464, loss: 0.29697927832603455 2023-01-24 02:48:09.203408: step: 824/464, loss: 0.19320182502269745 2023-01-24 02:48:09.913171: step: 826/464, loss: 0.20587298274040222 2023-01-24 02:48:10.658151: step: 828/464, loss: 0.3217647075653076 2023-01-24 02:48:11.422878: step: 830/464, loss: 0.5316275954246521 2023-01-24 02:48:12.194837: step: 832/464, loss: 0.4806307554244995 2023-01-24 02:48:12.864296: step: 834/464, loss: 0.12461249530315399 2023-01-24 02:48:13.624705: step: 836/464, loss: 0.1817103773355484 2023-01-24 02:48:14.388894: step: 838/464, loss: 0.20273560285568237 2023-01-24 02:48:15.168498: step: 840/464, loss: 0.6376850008964539 2023-01-24 02:48:15.976718: step: 842/464, loss: 0.1536242812871933 2023-01-24 02:48:16.713963: step: 844/464, loss: 0.4686459004878998 2023-01-24 02:48:17.530511: step: 846/464, loss: 0.18846267461776733 2023-01-24 02:48:18.293435: step: 848/464, loss: 1.2304623126983643 2023-01-24 02:48:19.010537: step: 850/464, loss: 0.7672339081764221 2023-01-24 02:48:19.747293: step: 852/464, loss: 0.48824992775917053 2023-01-24 02:48:20.544542: step: 854/464, loss: 0.1030515730381012 2023-01-24 02:48:21.274691: step: 856/464, loss: 1.1871172189712524 2023-01-24 02:48:22.076526: step: 858/464, loss: 0.321567565202713 2023-01-24 02:48:22.812948: step: 860/464, loss: 0.18968945741653442 2023-01-24 02:48:23.547415: step: 862/464, loss: 0.11518923193216324 2023-01-24 02:48:24.276690: step: 864/464, loss: 0.3778989911079407 2023-01-24 02:48:25.054773: step: 866/464, loss: 0.21847502887248993 2023-01-24 02:48:25.763608: step: 868/464, loss: 0.3434563875198364 2023-01-24 02:48:26.542119: step: 870/464, loss: 0.4898814558982849 2023-01-24 02:48:27.341162: step: 872/464, loss: 1.2420252561569214 2023-01-24 02:48:28.064920: step: 874/464, loss: 0.5621987581253052 2023-01-24 02:48:28.826265: step: 876/464, loss: 0.5099846124649048 2023-01-24 02:48:29.483472: step: 878/464, loss: 0.9314126372337341 2023-01-24 02:48:30.185504: step: 880/464, loss: 0.720192551612854 2023-01-24 02:48:30.952975: step: 882/464, loss: 0.15588845312595367 2023-01-24 02:48:31.656974: step: 884/464, loss: 0.18109045922756195 2023-01-24 02:48:32.403926: step: 886/464, loss: 0.513529360294342 2023-01-24 02:48:33.099364: step: 888/464, loss: 0.2132686972618103 2023-01-24 02:48:33.808131: step: 890/464, loss: 0.40210071206092834 2023-01-24 02:48:34.564005: step: 892/464, loss: 0.36502307653427124 2023-01-24 02:48:35.405049: step: 894/464, loss: 0.5589260458946228 2023-01-24 02:48:36.207298: step: 896/464, loss: 0.4219580888748169 2023-01-24 02:48:36.950135: step: 898/464, loss: 0.3626061975955963 2023-01-24 02:48:37.587048: step: 900/464, loss: 0.4053698182106018 2023-01-24 02:48:38.311944: step: 902/464, loss: 1.0465059280395508 2023-01-24 02:48:39.065753: step: 904/464, loss: 0.16469791531562805 2023-01-24 02:48:39.807933: step: 906/464, loss: 0.5895463228225708 2023-01-24 02:48:40.603365: step: 908/464, loss: 0.7060515284538269 2023-01-24 02:48:41.280055: step: 910/464, loss: 0.3695732057094574 2023-01-24 02:48:42.019859: step: 912/464, loss: 0.22272424399852753 2023-01-24 02:48:42.695855: step: 914/464, loss: 0.381093293428421 2023-01-24 02:48:43.374856: step: 916/464, loss: 0.16798549890518188 2023-01-24 02:48:44.065333: step: 918/464, loss: 0.4496923089027405 2023-01-24 02:48:44.854102: step: 920/464, loss: 0.2888336777687073 2023-01-24 02:48:45.594768: step: 922/464, loss: 0.3023048937320709 2023-01-24 02:48:46.381634: step: 924/464, loss: 0.29694077372550964 2023-01-24 02:48:47.098622: step: 926/464, loss: 0.178087055683136 2023-01-24 02:48:47.767029: step: 928/464, loss: 0.3675735592842102 2023-01-24 02:48:48.437726: step: 930/464, loss: 0.22161225974559784 ================================================== Loss: 0.500 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3416673624966836, 'r': 0.3086027790292626, 'f1': 0.32429444575956407}, 'combined': 0.23895380213862613, 'epoch': 8} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3187798260120535, 'r': 0.2592448585058499, 'f1': 0.2859463725426922}, 'combined': 0.17758774715809306, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3320903636026018, 'r': 0.30184304395758305, 'f1': 0.31624509774482357}, 'combined': 0.23302270360144894, 'epoch': 8} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3239951647527646, 'r': 0.26476679965468014, 'f1': 0.2914018501909041}, 'combined': 0.18097588590803518, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.22857142857142856, 'f1': 0.25396825396825395}, 'combined': 0.1693121693121693, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5096153846153846, 'r': 0.22844827586206898, 'f1': 0.31547619047619047}, 'combined': 0.2103174603174603, 'epoch': 8} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3345195392377826, 'r': 0.3041086720343478, 'f1': 0.3185900373693168}, 'combined': 0.23475055385107552, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3174410049953423, 'r': 0.22522000156784164, 'f1': 0.26349438333717423}, 'combined': 0.163643880177824, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.3142857142857143, 'f1': 0.3235294117647059}, 'combined': 0.21568627450980393, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3360532326750742, 'r': 0.30544496859840714, 'f1': 0.3200188836011144}, 'combined': 0.2358033879166106, 'epoch': 4} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3238311636676841, 'r': 0.22674568659177094, 'f1': 0.26672868469148}, 'combined': 0.16565255154523495, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5446428571428571, 'r': 0.2629310344827586, 'f1': 0.3546511627906977}, 'combined': 0.2364341085271318, 'epoch': 4} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:51:36.604736: step: 2/464, loss: 0.7644608020782471 2023-01-24 02:51:37.344375: step: 4/464, loss: 0.1526871919631958 2023-01-24 02:51:38.089647: step: 6/464, loss: 0.13496136665344238 2023-01-24 02:51:38.855459: step: 8/464, loss: 0.38817891478538513 2023-01-24 02:51:39.600634: step: 10/464, loss: 0.09272399544715881 2023-01-24 02:51:40.385575: step: 12/464, loss: 0.9074238538742065 2023-01-24 02:51:41.093976: step: 14/464, loss: 0.2888231575489044 2023-01-24 02:51:41.853025: step: 16/464, loss: 0.22618751227855682 2023-01-24 02:51:42.592018: step: 18/464, loss: 0.14795294404029846 2023-01-24 02:51:43.400547: step: 20/464, loss: 0.29055696725845337 2023-01-24 02:51:44.147840: step: 22/464, loss: 1.158676028251648 2023-01-24 02:51:44.917078: step: 24/464, loss: 0.32179543375968933 2023-01-24 02:51:45.664182: step: 26/464, loss: 0.23352162539958954 2023-01-24 02:51:46.434972: step: 28/464, loss: 0.15808358788490295 2023-01-24 02:51:47.173437: step: 30/464, loss: 0.44342419505119324 2023-01-24 02:51:47.949848: step: 32/464, loss: 0.6043683290481567 2023-01-24 02:51:48.680931: step: 34/464, loss: 0.1392679214477539 2023-01-24 02:51:49.407092: step: 36/464, loss: 0.2797326147556305 2023-01-24 02:51:50.103708: step: 38/464, loss: 0.22573457658290863 2023-01-24 02:51:50.931162: step: 40/464, loss: 0.16020973026752472 2023-01-24 02:51:51.630838: step: 42/464, loss: 0.43194395303726196 2023-01-24 02:51:52.350989: step: 44/464, loss: 0.3286089599132538 2023-01-24 02:51:53.078603: step: 46/464, loss: 0.40341219305992126 2023-01-24 02:51:53.864108: step: 48/464, loss: 0.27753618359565735 2023-01-24 02:51:54.609372: step: 50/464, loss: 0.9163278937339783 2023-01-24 02:51:55.337678: step: 52/464, loss: 0.2896912693977356 2023-01-24 02:51:56.031132: step: 54/464, loss: 0.6954100728034973 2023-01-24 02:51:56.737553: step: 56/464, loss: 0.1046927198767662 2023-01-24 02:51:57.487460: step: 58/464, loss: 0.36709487438201904 2023-01-24 02:51:58.183626: step: 60/464, loss: 0.3054746985435486 2023-01-24 02:51:58.904187: step: 62/464, loss: 0.3704795837402344 2023-01-24 02:51:59.582980: step: 64/464, loss: 0.34034356474876404 2023-01-24 02:52:00.350597: step: 66/464, loss: 0.05187249183654785 2023-01-24 02:52:01.032288: step: 68/464, loss: 0.28469640016555786 2023-01-24 02:52:01.740979: step: 70/464, loss: 0.14327633380889893 2023-01-24 02:52:02.451376: step: 72/464, loss: 0.2619388699531555 2023-01-24 02:52:03.197473: step: 74/464, loss: 0.5715567469596863 2023-01-24 02:52:03.895275: step: 76/464, loss: 0.3016913831233978 2023-01-24 02:52:04.576382: step: 78/464, loss: 0.26843929290771484 2023-01-24 02:52:05.326699: step: 80/464, loss: 0.28098320960998535 2023-01-24 02:52:06.006696: step: 82/464, loss: 0.3540817201137543 2023-01-24 02:52:06.704665: step: 84/464, loss: 0.13923750817775726 2023-01-24 02:52:07.400906: step: 86/464, loss: 0.234792560338974 2023-01-24 02:52:08.073826: step: 88/464, loss: 0.11665306985378265 2023-01-24 02:52:08.880235: step: 90/464, loss: 0.32617178559303284 2023-01-24 02:52:09.612815: step: 92/464, loss: 0.1398487240076065 2023-01-24 02:52:10.359503: step: 94/464, loss: 0.5901756882667542 2023-01-24 02:52:11.118155: step: 96/464, loss: 0.25927284359931946 2023-01-24 02:52:11.840908: step: 98/464, loss: 0.5350198149681091 2023-01-24 02:52:12.575013: step: 100/464, loss: 0.36380600929260254 2023-01-24 02:52:13.434897: step: 102/464, loss: 0.24624530971050262 2023-01-24 02:52:14.247898: step: 104/464, loss: 0.3970596492290497 2023-01-24 02:52:14.992412: step: 106/464, loss: 0.2849036455154419 2023-01-24 02:52:15.763462: step: 108/464, loss: 0.4194733500480652 2023-01-24 02:52:16.436472: step: 110/464, loss: 0.8568342328071594 2023-01-24 02:52:17.236923: step: 112/464, loss: 0.22576601803302765 2023-01-24 02:52:17.977477: step: 114/464, loss: 0.22194799780845642 2023-01-24 02:52:18.774210: step: 116/464, loss: 0.08715911954641342 2023-01-24 02:52:19.492145: step: 118/464, loss: 0.20871415734291077 2023-01-24 02:52:20.332110: step: 120/464, loss: 0.2952080965042114 2023-01-24 02:52:21.044157: step: 122/464, loss: 0.3873203992843628 2023-01-24 02:52:21.758455: step: 124/464, loss: 0.23726877570152283 2023-01-24 02:52:22.414599: step: 126/464, loss: 0.08396518975496292 2023-01-24 02:52:23.171570: step: 128/464, loss: 0.28581172227859497 2023-01-24 02:52:23.892015: step: 130/464, loss: 0.18190300464630127 2023-01-24 02:52:24.784488: step: 132/464, loss: 3.430314064025879 2023-01-24 02:52:25.531950: step: 134/464, loss: 0.25667664408683777 2023-01-24 02:52:26.255179: step: 136/464, loss: 0.46132755279541016 2023-01-24 02:52:27.034580: step: 138/464, loss: 0.20997853577136993 2023-01-24 02:52:27.853478: step: 140/464, loss: 0.5437301993370056 2023-01-24 02:52:28.561272: step: 142/464, loss: 0.18092238903045654 2023-01-24 02:52:29.270985: step: 144/464, loss: 0.16767120361328125 2023-01-24 02:52:30.009982: step: 146/464, loss: 0.24160486459732056 2023-01-24 02:52:30.727969: step: 148/464, loss: 1.0245047807693481 2023-01-24 02:52:31.427253: step: 150/464, loss: 0.1303568333387375 2023-01-24 02:52:32.157326: step: 152/464, loss: 0.21133965253829956 2023-01-24 02:52:32.908607: step: 154/464, loss: 0.3464350700378418 2023-01-24 02:52:33.576649: step: 156/464, loss: 1.610190749168396 2023-01-24 02:52:34.321491: step: 158/464, loss: 0.49895238876342773 2023-01-24 02:52:35.011940: step: 160/464, loss: 0.09356162697076797 2023-01-24 02:52:35.750791: step: 162/464, loss: 0.3734308183193207 2023-01-24 02:52:36.624920: step: 164/464, loss: 5.4781317710876465 2023-01-24 02:52:37.400011: step: 166/464, loss: 0.2703748345375061 2023-01-24 02:52:38.140889: step: 168/464, loss: 0.48195675015449524 2023-01-24 02:52:38.884733: step: 170/464, loss: 0.30958640575408936 2023-01-24 02:52:39.629570: step: 172/464, loss: 0.47871479392051697 2023-01-24 02:52:40.354737: step: 174/464, loss: 0.26660820841789246 2023-01-24 02:52:41.127053: step: 176/464, loss: 0.4663125276565552 2023-01-24 02:52:41.876977: step: 178/464, loss: 0.1425776481628418 2023-01-24 02:52:42.595508: step: 180/464, loss: 0.08970626443624496 2023-01-24 02:52:43.234855: step: 182/464, loss: 0.26843932271003723 2023-01-24 02:52:43.877300: step: 184/464, loss: 0.19826146960258484 2023-01-24 02:52:44.597924: step: 186/464, loss: 0.366080105304718 2023-01-24 02:52:45.353648: step: 188/464, loss: 0.39854663610458374 2023-01-24 02:52:46.084069: step: 190/464, loss: 0.14912031590938568 2023-01-24 02:52:46.757701: step: 192/464, loss: 0.07307624816894531 2023-01-24 02:52:47.457902: step: 194/464, loss: 0.2346281260251999 2023-01-24 02:52:48.252761: step: 196/464, loss: 0.25821825861930847 2023-01-24 02:52:49.025543: step: 198/464, loss: 0.13114692270755768 2023-01-24 02:52:49.774229: step: 200/464, loss: 0.2134842872619629 2023-01-24 02:52:50.475803: step: 202/464, loss: 0.5420844554901123 2023-01-24 02:52:51.303017: step: 204/464, loss: 0.5633516311645508 2023-01-24 02:52:52.019036: step: 206/464, loss: 0.11726406216621399 2023-01-24 02:52:52.688290: step: 208/464, loss: 0.21820184588432312 2023-01-24 02:52:53.472464: step: 210/464, loss: 0.19968479871749878 2023-01-24 02:52:54.140233: step: 212/464, loss: 0.1356292963027954 2023-01-24 02:52:54.874837: step: 214/464, loss: 0.18377524614334106 2023-01-24 02:52:55.626120: step: 216/464, loss: 0.1599075347185135 2023-01-24 02:52:56.451616: step: 218/464, loss: 0.6362401247024536 2023-01-24 02:52:57.205108: step: 220/464, loss: 0.16068458557128906 2023-01-24 02:52:57.896223: step: 222/464, loss: 0.20188620686531067 2023-01-24 02:52:58.610935: step: 224/464, loss: 0.12553514540195465 2023-01-24 02:52:59.423847: step: 226/464, loss: 1.1796537637710571 2023-01-24 02:53:00.234892: step: 228/464, loss: 0.6061399579048157 2023-01-24 02:53:00.951762: step: 230/464, loss: 0.2460578978061676 2023-01-24 02:53:01.620703: step: 232/464, loss: 0.0386289544403553 2023-01-24 02:53:02.422756: step: 234/464, loss: 0.30962854623794556 2023-01-24 02:53:03.144295: step: 236/464, loss: 0.2959977388381958 2023-01-24 02:53:03.922526: step: 238/464, loss: 0.1805409938097 2023-01-24 02:53:04.661894: step: 240/464, loss: 0.2075037956237793 2023-01-24 02:53:05.347491: step: 242/464, loss: 0.5304340124130249 2023-01-24 02:53:06.063104: step: 244/464, loss: 0.10196323692798615 2023-01-24 02:53:06.818249: step: 246/464, loss: 0.41461655497550964 2023-01-24 02:53:07.585302: step: 248/464, loss: 0.36507394909858704 2023-01-24 02:53:08.302667: step: 250/464, loss: 0.15062814950942993 2023-01-24 02:53:09.133891: step: 252/464, loss: 0.5410463213920593 2023-01-24 02:53:09.814894: step: 254/464, loss: 0.32974231243133545 2023-01-24 02:53:10.585831: step: 256/464, loss: 0.2526639699935913 2023-01-24 02:53:11.308875: step: 258/464, loss: 0.22622418403625488 2023-01-24 02:53:12.022531: step: 260/464, loss: 0.15075217187404633 2023-01-24 02:53:12.894171: step: 262/464, loss: 0.2535134255886078 2023-01-24 02:53:13.651338: step: 264/464, loss: 0.2069745510816574 2023-01-24 02:53:14.378651: step: 266/464, loss: 1.1059925556182861 2023-01-24 02:53:15.125016: step: 268/464, loss: 0.16578927636146545 2023-01-24 02:53:15.858057: step: 270/464, loss: 0.1252116560935974 2023-01-24 02:53:16.679818: step: 272/464, loss: 0.4197810888290405 2023-01-24 02:53:17.502653: step: 274/464, loss: 0.7874919176101685 2023-01-24 02:53:18.299752: step: 276/464, loss: 0.43675607442855835 2023-01-24 02:53:19.084841: step: 278/464, loss: 0.15678708255290985 2023-01-24 02:53:19.863136: step: 280/464, loss: 0.3846047818660736 2023-01-24 02:53:20.597808: step: 282/464, loss: 0.35570046305656433 2023-01-24 02:53:21.347209: step: 284/464, loss: 0.10399957001209259 2023-01-24 02:53:22.111015: step: 286/464, loss: 0.3356718122959137 2023-01-24 02:53:22.851209: step: 288/464, loss: 0.2095506191253662 2023-01-24 02:53:23.602685: step: 290/464, loss: 0.1627904325723648 2023-01-24 02:53:24.343139: step: 292/464, loss: 0.47878342866897583 2023-01-24 02:53:25.114619: step: 294/464, loss: 0.29623061418533325 2023-01-24 02:53:25.862635: step: 296/464, loss: 0.31689897179603577 2023-01-24 02:53:26.639679: step: 298/464, loss: 0.14780159294605255 2023-01-24 02:53:27.375953: step: 300/464, loss: 0.11021255701780319 2023-01-24 02:53:28.099483: step: 302/464, loss: 0.151094451546669 2023-01-24 02:53:28.826755: step: 304/464, loss: 0.49748021364212036 2023-01-24 02:53:29.553859: step: 306/464, loss: 0.6771782636642456 2023-01-24 02:53:30.333604: step: 308/464, loss: 0.1410297006368637 2023-01-24 02:53:31.049673: step: 310/464, loss: 0.3992559313774109 2023-01-24 02:53:31.850917: step: 312/464, loss: 0.4508551061153412 2023-01-24 02:53:32.596821: step: 314/464, loss: 0.1885729432106018 2023-01-24 02:53:33.402361: step: 316/464, loss: 0.18776766955852509 2023-01-24 02:53:34.099946: step: 318/464, loss: 0.19099010527133942 2023-01-24 02:53:34.796152: step: 320/464, loss: 0.5384302139282227 2023-01-24 02:53:35.460143: step: 322/464, loss: 0.10380914062261581 2023-01-24 02:53:36.222917: step: 324/464, loss: 0.22476299107074738 2023-01-24 02:53:37.004086: step: 326/464, loss: 0.2729957699775696 2023-01-24 02:53:37.743659: step: 328/464, loss: 0.5379776954650879 2023-01-24 02:53:38.457387: step: 330/464, loss: 0.770466685295105 2023-01-24 02:53:39.236912: step: 332/464, loss: 0.4133603274822235 2023-01-24 02:53:39.967380: step: 334/464, loss: 0.18049293756484985 2023-01-24 02:53:40.751487: step: 336/464, loss: 0.6266219019889832 2023-01-24 02:53:41.511510: step: 338/464, loss: 0.11049497127532959 2023-01-24 02:53:42.225126: step: 340/464, loss: 0.2000427544116974 2023-01-24 02:53:43.018030: step: 342/464, loss: 0.7935618758201599 2023-01-24 02:53:43.791438: step: 344/464, loss: 0.18541643023490906 2023-01-24 02:53:44.495719: step: 346/464, loss: 1.1642208099365234 2023-01-24 02:53:45.361690: step: 348/464, loss: 0.7056695818901062 2023-01-24 02:53:46.140382: step: 350/464, loss: 0.06701252609491348 2023-01-24 02:53:46.895302: step: 352/464, loss: 0.6939871311187744 2023-01-24 02:53:47.668373: step: 354/464, loss: 0.439879834651947 2023-01-24 02:53:48.339255: step: 356/464, loss: 0.29044264554977417 2023-01-24 02:53:49.163876: step: 358/464, loss: 0.3111569583415985 2023-01-24 02:53:49.897237: step: 360/464, loss: 0.17994770407676697 2023-01-24 02:53:50.670591: step: 362/464, loss: 0.22458504140377045 2023-01-24 02:53:51.401800: step: 364/464, loss: 0.22156009078025818 2023-01-24 02:53:52.162093: step: 366/464, loss: 0.6971669793128967 2023-01-24 02:53:52.901992: step: 368/464, loss: 0.14607200026512146 2023-01-24 02:53:53.651909: step: 370/464, loss: 0.41244274377822876 2023-01-24 02:53:54.389343: step: 372/464, loss: 0.21590563654899597 2023-01-24 02:53:55.143414: step: 374/464, loss: 0.3225278854370117 2023-01-24 02:53:55.898003: step: 376/464, loss: 0.09340378642082214 2023-01-24 02:53:56.688212: step: 378/464, loss: 0.23559413850307465 2023-01-24 02:53:57.380369: step: 380/464, loss: 0.23710493743419647 2023-01-24 02:53:58.197776: step: 382/464, loss: 0.18201129138469696 2023-01-24 02:53:58.983613: step: 384/464, loss: 0.7326669692993164 2023-01-24 02:53:59.749454: step: 386/464, loss: 0.37326520681381226 2023-01-24 02:54:00.564537: step: 388/464, loss: 0.08627074211835861 2023-01-24 02:54:01.209358: step: 390/464, loss: 0.13408063352108002 2023-01-24 02:54:01.900340: step: 392/464, loss: 0.21325339376926422 2023-01-24 02:54:02.711434: step: 394/464, loss: 0.21826452016830444 2023-01-24 02:54:03.504584: step: 396/464, loss: 0.9736225008964539 2023-01-24 02:54:04.210728: step: 398/464, loss: 0.23253558576107025 2023-01-24 02:54:04.986174: step: 400/464, loss: 0.060269854962825775 2023-01-24 02:54:05.818370: step: 402/464, loss: 1.2768745422363281 2023-01-24 02:54:06.552154: step: 404/464, loss: 7.715400695800781 2023-01-24 02:54:07.234647: step: 406/464, loss: 0.39205795526504517 2023-01-24 02:54:07.937722: step: 408/464, loss: 0.2768790125846863 2023-01-24 02:54:08.661648: step: 410/464, loss: 0.14054222404956818 2023-01-24 02:54:09.350896: step: 412/464, loss: 0.9549347162246704 2023-01-24 02:54:10.092489: step: 414/464, loss: 0.30312371253967285 2023-01-24 02:54:10.752040: step: 416/464, loss: 0.2272430658340454 2023-01-24 02:54:11.517213: step: 418/464, loss: 0.41558340191841125 2023-01-24 02:54:12.440814: step: 420/464, loss: 0.6737045645713806 2023-01-24 02:54:13.167838: step: 422/464, loss: 0.2897966206073761 2023-01-24 02:54:13.848519: step: 424/464, loss: 0.7693721055984497 2023-01-24 02:54:14.605591: step: 426/464, loss: 0.12149006128311157 2023-01-24 02:54:15.305483: step: 428/464, loss: 0.21027140319347382 2023-01-24 02:54:15.996865: step: 430/464, loss: 0.13245093822479248 2023-01-24 02:54:16.738046: step: 432/464, loss: 0.503391444683075 2023-01-24 02:54:17.520205: step: 434/464, loss: 0.164590984582901 2023-01-24 02:54:18.278642: step: 436/464, loss: 0.42156076431274414 2023-01-24 02:54:19.053242: step: 438/464, loss: 0.20272719860076904 2023-01-24 02:54:19.803317: step: 440/464, loss: 0.5416241884231567 2023-01-24 02:54:20.538422: step: 442/464, loss: 0.20643553137779236 2023-01-24 02:54:21.327001: step: 444/464, loss: 0.2214449644088745 2023-01-24 02:54:22.026242: step: 446/464, loss: 0.14966677129268646 2023-01-24 02:54:22.776846: step: 448/464, loss: 0.17533740401268005 2023-01-24 02:54:23.491441: step: 450/464, loss: 0.37993356585502625 2023-01-24 02:54:24.239975: step: 452/464, loss: 0.09402307868003845 2023-01-24 02:54:24.965453: step: 454/464, loss: 0.17073950171470642 2023-01-24 02:54:25.710370: step: 456/464, loss: 0.2114734798669815 2023-01-24 02:54:26.395586: step: 458/464, loss: 0.21925882995128632 2023-01-24 02:54:27.110796: step: 460/464, loss: 0.6372971534729004 2023-01-24 02:54:27.926867: step: 462/464, loss: 0.8937779664993286 2023-01-24 02:54:28.770151: step: 464/464, loss: 0.1505538523197174 2023-01-24 02:54:29.578723: step: 466/464, loss: 0.1130121722817421 2023-01-24 02:54:30.399795: step: 468/464, loss: 0.3290260434150696 2023-01-24 02:54:31.155625: step: 470/464, loss: 0.5424768328666687 2023-01-24 02:54:31.895417: step: 472/464, loss: 0.1475936770439148 2023-01-24 02:54:32.656403: step: 474/464, loss: 0.5609605312347412 2023-01-24 02:54:33.339178: step: 476/464, loss: 1.326806664466858 2023-01-24 02:54:34.088417: step: 478/464, loss: 0.3607947826385498 2023-01-24 02:54:34.798658: step: 480/464, loss: 0.825629472732544 2023-01-24 02:54:35.604311: step: 482/464, loss: 0.40424904227256775 2023-01-24 02:54:36.366886: step: 484/464, loss: 0.25395286083221436 2023-01-24 02:54:37.125791: step: 486/464, loss: 0.2789684534072876 2023-01-24 02:54:37.832124: step: 488/464, loss: 0.5685667991638184 2023-01-24 02:54:38.580784: step: 490/464, loss: 0.25906193256378174 2023-01-24 02:54:39.336893: step: 492/464, loss: 0.14646874368190765 2023-01-24 02:54:40.116241: step: 494/464, loss: 0.10087735950946808 2023-01-24 02:54:40.851666: step: 496/464, loss: 0.10177915543317795 2023-01-24 02:54:41.591810: step: 498/464, loss: 1.3080449104309082 2023-01-24 02:54:42.351744: step: 500/464, loss: 0.9278597831726074 2023-01-24 02:54:43.243403: step: 502/464, loss: 0.2622971534729004 2023-01-24 02:54:44.039748: step: 504/464, loss: 0.07536578923463821 2023-01-24 02:54:44.847767: step: 506/464, loss: 0.19723260402679443 2023-01-24 02:54:45.649290: step: 508/464, loss: 0.12470276653766632 2023-01-24 02:54:46.459952: step: 510/464, loss: 0.2333928346633911 2023-01-24 02:54:47.188082: step: 512/464, loss: 0.1920052170753479 2023-01-24 02:54:47.942130: step: 514/464, loss: 0.22460505366325378 2023-01-24 02:54:48.738022: step: 516/464, loss: 0.2736990451812744 2023-01-24 02:54:49.435426: step: 518/464, loss: 0.3205868601799011 2023-01-24 02:54:50.267705: step: 520/464, loss: 0.25555187463760376 2023-01-24 02:54:51.046153: step: 522/464, loss: 0.3476880192756653 2023-01-24 02:54:51.829201: step: 524/464, loss: 0.15089766681194305 2023-01-24 02:54:52.575081: step: 526/464, loss: 0.1468602567911148 2023-01-24 02:54:53.317094: step: 528/464, loss: 3.05962872505188 2023-01-24 02:54:54.037228: step: 530/464, loss: 0.14920800924301147 2023-01-24 02:54:54.706833: step: 532/464, loss: 0.1825367510318756 2023-01-24 02:54:55.390312: step: 534/464, loss: 0.08955138921737671 2023-01-24 02:54:56.097072: step: 536/464, loss: 0.08840145170688629 2023-01-24 02:54:56.803357: step: 538/464, loss: 1.535651445388794 2023-01-24 02:54:57.515614: step: 540/464, loss: 0.14180992543697357 2023-01-24 02:54:58.284318: step: 542/464, loss: 0.39354032278060913 2023-01-24 02:54:58.942866: step: 544/464, loss: 0.2640589475631714 2023-01-24 02:54:59.694603: step: 546/464, loss: 0.17533175647258759 2023-01-24 02:55:00.451418: step: 548/464, loss: 0.2676864266395569 2023-01-24 02:55:01.207536: step: 550/464, loss: 0.16309544444084167 2023-01-24 02:55:01.964238: step: 552/464, loss: 0.30232033133506775 2023-01-24 02:55:02.628962: step: 554/464, loss: 0.5454744100570679 2023-01-24 02:55:03.362175: step: 556/464, loss: 0.28288885951042175 2023-01-24 02:55:04.180854: step: 558/464, loss: 0.2505795359611511 2023-01-24 02:55:04.801103: step: 560/464, loss: 0.4683348834514618 2023-01-24 02:55:05.620358: step: 562/464, loss: 0.2200739085674286 2023-01-24 02:55:06.341546: step: 564/464, loss: 0.12059260159730911 2023-01-24 02:55:07.118136: step: 566/464, loss: 0.5861608982086182 2023-01-24 02:55:07.843446: step: 568/464, loss: 0.14045344293117523 2023-01-24 02:55:08.582662: step: 570/464, loss: 0.38023900985717773 2023-01-24 02:55:09.328910: step: 572/464, loss: 0.508532702922821 2023-01-24 02:55:10.059432: step: 574/464, loss: 0.145177960395813 2023-01-24 02:55:10.762084: step: 576/464, loss: 0.2807120382785797 2023-01-24 02:55:11.529455: step: 578/464, loss: 0.22880499064922333 2023-01-24 02:55:12.377122: step: 580/464, loss: 0.14477400481700897 2023-01-24 02:55:13.140051: step: 582/464, loss: 0.40797674655914307 2023-01-24 02:55:13.824253: step: 584/464, loss: 0.8094741702079773 2023-01-24 02:55:14.560379: step: 586/464, loss: 0.11368077993392944 2023-01-24 02:55:15.295131: step: 588/464, loss: 0.37009745836257935 2023-01-24 02:55:16.012389: step: 590/464, loss: 0.9669628739356995 2023-01-24 02:55:16.877225: step: 592/464, loss: 1.0077821016311646 2023-01-24 02:55:17.596060: step: 594/464, loss: 0.2285720556974411 2023-01-24 02:55:18.443785: step: 596/464, loss: 0.2633154094219208 2023-01-24 02:55:19.099497: step: 598/464, loss: 0.16029880940914154 2023-01-24 02:55:19.865081: step: 600/464, loss: 0.22524085640907288 2023-01-24 02:55:20.651136: step: 602/464, loss: 0.6412642598152161 2023-01-24 02:55:21.442739: step: 604/464, loss: 0.2362058162689209 2023-01-24 02:55:22.167009: step: 606/464, loss: 2.0845179557800293 2023-01-24 02:55:22.877897: step: 608/464, loss: 0.2981489598751068 2023-01-24 02:55:23.597234: step: 610/464, loss: 0.1880742907524109 2023-01-24 02:55:24.325986: step: 612/464, loss: 0.4543037414550781 2023-01-24 02:55:25.105573: step: 614/464, loss: 0.44105908274650574 2023-01-24 02:55:25.878453: step: 616/464, loss: 0.2084379643201828 2023-01-24 02:55:26.699852: step: 618/464, loss: 0.30074912309646606 2023-01-24 02:55:27.423017: step: 620/464, loss: 0.837376058101654 2023-01-24 02:55:28.272951: step: 622/464, loss: 0.09070264548063278 2023-01-24 02:55:29.053105: step: 624/464, loss: 0.7012251615524292 2023-01-24 02:55:29.712225: step: 626/464, loss: 0.2049962729215622 2023-01-24 02:55:30.503296: step: 628/464, loss: 0.27639079093933105 2023-01-24 02:55:31.295286: step: 630/464, loss: 0.16173185408115387 2023-01-24 02:55:32.016232: step: 632/464, loss: 0.1771915704011917 2023-01-24 02:55:32.799612: step: 634/464, loss: 0.0934867411851883 2023-01-24 02:55:33.566152: step: 636/464, loss: 0.23240536451339722 2023-01-24 02:55:34.276799: step: 638/464, loss: 0.9698227047920227 2023-01-24 02:55:34.971687: step: 640/464, loss: 0.49567627906799316 2023-01-24 02:55:35.683295: step: 642/464, loss: 0.07443743199110031 2023-01-24 02:55:36.338021: step: 644/464, loss: 0.30980682373046875 2023-01-24 02:55:37.172941: step: 646/464, loss: 0.22703073918819427 2023-01-24 02:55:38.043603: step: 648/464, loss: 0.271577388048172 2023-01-24 02:55:38.779366: step: 650/464, loss: 0.3126218914985657 2023-01-24 02:55:39.485418: step: 652/464, loss: 0.5985322594642639 2023-01-24 02:55:40.181958: step: 654/464, loss: 0.24709294736385345 2023-01-24 02:55:40.947746: step: 656/464, loss: 0.7609562873840332 2023-01-24 02:55:41.602024: step: 658/464, loss: 0.2991550862789154 2023-01-24 02:55:42.403969: step: 660/464, loss: 0.28040316700935364 2023-01-24 02:55:43.188748: step: 662/464, loss: 0.09969662874937057 2023-01-24 02:55:43.914107: step: 664/464, loss: 0.8616743087768555 2023-01-24 02:55:44.654651: step: 666/464, loss: 0.730205774307251 2023-01-24 02:55:45.308379: step: 668/464, loss: 0.2294856160879135 2023-01-24 02:55:46.005773: step: 670/464, loss: 0.17386649549007416 2023-01-24 02:55:46.763272: step: 672/464, loss: 0.21030853688716888 2023-01-24 02:55:47.499980: step: 674/464, loss: 0.4977702796459198 2023-01-24 02:55:48.190007: step: 676/464, loss: 0.23179440200328827 2023-01-24 02:55:48.842834: step: 678/464, loss: 0.3305434286594391 2023-01-24 02:55:49.653532: step: 680/464, loss: 0.5477005839347839 2023-01-24 02:55:50.350617: step: 682/464, loss: 0.4550781548023224 2023-01-24 02:55:51.070216: step: 684/464, loss: 0.127974271774292 2023-01-24 02:55:51.783042: step: 686/464, loss: 0.22048044204711914 2023-01-24 02:55:52.506453: step: 688/464, loss: 0.08273495733737946 2023-01-24 02:55:53.262620: step: 690/464, loss: 0.8862178921699524 2023-01-24 02:55:54.022081: step: 692/464, loss: 0.27741530537605286 2023-01-24 02:55:54.819971: step: 694/464, loss: 0.3920413553714752 2023-01-24 02:55:55.556891: step: 696/464, loss: 0.21315898001194 2023-01-24 02:55:56.398124: step: 698/464, loss: 0.28585731983184814 2023-01-24 02:55:57.133826: step: 700/464, loss: 0.2642350196838379 2023-01-24 02:55:57.865850: step: 702/464, loss: 0.21909445524215698 2023-01-24 02:55:58.546566: step: 704/464, loss: 0.13675111532211304 2023-01-24 02:55:59.245838: step: 706/464, loss: 0.6145305633544922 2023-01-24 02:56:00.078130: step: 708/464, loss: 0.633683979511261 2023-01-24 02:56:00.828561: step: 710/464, loss: 0.23416614532470703 2023-01-24 02:56:01.501485: step: 712/464, loss: 0.43624699115753174 2023-01-24 02:56:02.225638: step: 714/464, loss: 0.29760614037513733 2023-01-24 02:56:02.994245: step: 716/464, loss: 0.11144375801086426 2023-01-24 02:56:03.669902: step: 718/464, loss: 0.5339705348014832 2023-01-24 02:56:04.453807: step: 720/464, loss: 0.1553748995065689 2023-01-24 02:56:05.227723: step: 722/464, loss: 0.15754304826259613 2023-01-24 02:56:05.939553: step: 724/464, loss: 0.15813219547271729 2023-01-24 02:56:06.643470: step: 726/464, loss: 0.16239820420742035 2023-01-24 02:56:07.314066: step: 728/464, loss: 0.29819780588150024 2023-01-24 02:56:08.148556: step: 730/464, loss: 0.7145152688026428 2023-01-24 02:56:09.003433: step: 732/464, loss: 0.384068101644516 2023-01-24 02:56:09.748698: step: 734/464, loss: 0.4480460584163666 2023-01-24 02:56:10.497657: step: 736/464, loss: 0.5774493217468262 2023-01-24 02:56:11.331690: step: 738/464, loss: 0.22652669250965118 2023-01-24 02:56:12.084212: step: 740/464, loss: 0.485545814037323 2023-01-24 02:56:12.782045: step: 742/464, loss: 0.051977403461933136 2023-01-24 02:56:13.510730: step: 744/464, loss: 0.1289929449558258 2023-01-24 02:56:14.278732: step: 746/464, loss: 0.15977248549461365 2023-01-24 02:56:15.045411: step: 748/464, loss: 0.14681187272071838 2023-01-24 02:56:15.872291: step: 750/464, loss: 0.14676165580749512 2023-01-24 02:56:16.557030: step: 752/464, loss: 0.4534044563770294 2023-01-24 02:56:17.447491: step: 754/464, loss: 0.31766563653945923 2023-01-24 02:56:18.199083: step: 756/464, loss: 0.5398532152175903 2023-01-24 02:56:18.900325: step: 758/464, loss: 0.14937244355678558 2023-01-24 02:56:19.707189: step: 760/464, loss: 0.26810702681541443 2023-01-24 02:56:20.433718: step: 762/464, loss: 0.4462334215641022 2023-01-24 02:56:21.197443: step: 764/464, loss: 0.44105684757232666 2023-01-24 02:56:22.015849: step: 766/464, loss: 0.3404810130596161 2023-01-24 02:56:22.792404: step: 768/464, loss: 0.4538693130016327 2023-01-24 02:56:23.492606: step: 770/464, loss: 0.17481106519699097 2023-01-24 02:56:24.132625: step: 772/464, loss: 0.22036492824554443 2023-01-24 02:56:24.830268: step: 774/464, loss: 0.2719525396823883 2023-01-24 02:56:25.579533: step: 776/464, loss: 0.21486765146255493 2023-01-24 02:56:26.327988: step: 778/464, loss: 0.4466085433959961 2023-01-24 02:56:27.062607: step: 780/464, loss: 0.21514861285686493 2023-01-24 02:56:27.735474: step: 782/464, loss: 0.4557275176048279 2023-01-24 02:56:28.531854: step: 784/464, loss: 0.12855775654315948 2023-01-24 02:56:29.278494: step: 786/464, loss: 0.24510878324508667 2023-01-24 02:56:30.061893: step: 788/464, loss: 0.12076195329427719 2023-01-24 02:56:30.880766: step: 790/464, loss: 0.5561219453811646 2023-01-24 02:56:31.591852: step: 792/464, loss: 0.5087153911590576 2023-01-24 02:56:32.281955: step: 794/464, loss: 1.0408319234848022 2023-01-24 02:56:33.069152: step: 796/464, loss: 0.18620692193508148 2023-01-24 02:56:33.709814: step: 798/464, loss: 0.156281515955925 2023-01-24 02:56:34.438151: step: 800/464, loss: 0.48612117767333984 2023-01-24 02:56:35.222911: step: 802/464, loss: 0.6834096908569336 2023-01-24 02:56:35.908316: step: 804/464, loss: 0.2013227492570877 2023-01-24 02:56:36.701040: step: 806/464, loss: 0.5944108366966248 2023-01-24 02:56:37.428340: step: 808/464, loss: 0.1534864604473114 2023-01-24 02:56:38.132994: step: 810/464, loss: 0.2654111385345459 2023-01-24 02:56:38.885450: step: 812/464, loss: 0.11527702957391739 2023-01-24 02:56:39.646293: step: 814/464, loss: 0.4222082197666168 2023-01-24 02:56:40.348926: step: 816/464, loss: 0.19088996946811676 2023-01-24 02:56:41.140671: step: 818/464, loss: 0.459178626537323 2023-01-24 02:56:41.925883: step: 820/464, loss: 0.0953294038772583 2023-01-24 02:56:42.720724: step: 822/464, loss: 0.5236583352088928 2023-01-24 02:56:43.549335: step: 824/464, loss: 0.3258075416088104 2023-01-24 02:56:44.275708: step: 826/464, loss: 0.1721639782190323 2023-01-24 02:56:45.019897: step: 828/464, loss: 0.39053988456726074 2023-01-24 02:56:45.778484: step: 830/464, loss: 0.23920369148254395 2023-01-24 02:56:46.493272: step: 832/464, loss: 0.3683702349662781 2023-01-24 02:56:47.271673: step: 834/464, loss: 0.32811301946640015 2023-01-24 02:56:47.954892: step: 836/464, loss: 0.5699950456619263 2023-01-24 02:56:48.694391: step: 838/464, loss: 0.3164607584476471 2023-01-24 02:56:49.418704: step: 840/464, loss: 0.16344623267650604 2023-01-24 02:56:50.139450: step: 842/464, loss: 0.33790773153305054 2023-01-24 02:56:50.892964: step: 844/464, loss: 0.17128928005695343 2023-01-24 02:56:51.594687: step: 846/464, loss: 0.20610596239566803 2023-01-24 02:56:52.310393: step: 848/464, loss: 0.41363999247550964 2023-01-24 02:56:53.001638: step: 850/464, loss: 0.10889443010091782 2023-01-24 02:56:53.708151: step: 852/464, loss: 0.1969783902168274 2023-01-24 02:56:54.462559: step: 854/464, loss: 3.389371156692505 2023-01-24 02:56:55.197610: step: 856/464, loss: 0.2419338971376419 2023-01-24 02:56:55.907269: step: 858/464, loss: 0.23793935775756836 2023-01-24 02:56:56.617599: step: 860/464, loss: 0.4282359182834625 2023-01-24 02:56:57.370882: step: 862/464, loss: 0.15168102085590363 2023-01-24 02:56:58.084984: step: 864/464, loss: 0.2177678644657135 2023-01-24 02:56:58.858686: step: 866/464, loss: 0.6482052803039551 2023-01-24 02:56:59.665461: step: 868/464, loss: 0.49208351969718933 2023-01-24 02:57:00.516593: step: 870/464, loss: 0.8154049515724182 2023-01-24 02:57:01.253627: step: 872/464, loss: 0.289344847202301 2023-01-24 02:57:01.996575: step: 874/464, loss: 0.2476513385772705 2023-01-24 02:57:02.750134: step: 876/464, loss: 0.3879658877849579 2023-01-24 02:57:03.496835: step: 878/464, loss: 0.17360030114650726 2023-01-24 02:57:04.256020: step: 880/464, loss: 1.1755492687225342 2023-01-24 02:57:05.071956: step: 882/464, loss: 0.6070190072059631 2023-01-24 02:57:05.869549: step: 884/464, loss: 0.6003977060317993 2023-01-24 02:57:06.637731: step: 886/464, loss: 0.5772039294242859 2023-01-24 02:57:07.304257: step: 888/464, loss: 1.0244700908660889 2023-01-24 02:57:08.090422: step: 890/464, loss: 0.26510658860206604 2023-01-24 02:57:08.801338: step: 892/464, loss: 0.2694578766822815 2023-01-24 02:57:09.467054: step: 894/464, loss: 0.1434503048658371 2023-01-24 02:57:10.203457: step: 896/464, loss: 0.41476795077323914 2023-01-24 02:57:10.886480: step: 898/464, loss: 0.15585322678089142 2023-01-24 02:57:11.569285: step: 900/464, loss: 0.6767136454582214 2023-01-24 02:57:12.365252: step: 902/464, loss: 0.30244138836860657 2023-01-24 02:57:13.028517: step: 904/464, loss: 0.07525203377008438 2023-01-24 02:57:13.724807: step: 906/464, loss: 0.3611765205860138 2023-01-24 02:57:14.451289: step: 908/464, loss: 0.21834854781627655 2023-01-24 02:57:15.108528: step: 910/464, loss: 0.307809442281723 2023-01-24 02:57:15.828887: step: 912/464, loss: 0.48542436957359314 2023-01-24 02:57:16.620215: step: 914/464, loss: 1.1236315965652466 2023-01-24 02:57:17.336806: step: 916/464, loss: 0.6004358530044556 2023-01-24 02:57:18.059923: step: 918/464, loss: 0.5024645328521729 2023-01-24 02:57:18.820854: step: 920/464, loss: 0.8452791571617126 2023-01-24 02:57:19.544874: step: 922/464, loss: 0.24478454887866974 2023-01-24 02:57:20.308305: step: 924/464, loss: 0.14474430680274963 2023-01-24 02:57:21.059185: step: 926/464, loss: 0.27781936526298523 2023-01-24 02:57:21.809587: step: 928/464, loss: 0.12701298296451569 2023-01-24 02:57:22.436377: step: 930/464, loss: 1.2429008483886719 ================================================== Loss: 0.400 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3430680085139636, 'r': 0.3183306568564102, 'f1': 0.330236724730961}, 'combined': 0.24333232348597125, 'epoch': 9} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32349203894432277, 'r': 0.26013751499676996, 'f1': 0.2883761268220772}, 'combined': 0.1790967524473953, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3164792785604226, 'r': 0.300264970171179, 'f1': 0.30815898594004154}, 'combined': 0.22706451595582006, 'epoch': 9} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32335457602159945, 'r': 0.26334405253486626, 'f1': 0.29028022330557074}, 'combined': 0.1802792965792492, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3331713050622994, 'r': 0.30345773516110763, 'f1': 0.3176211051239399}, 'combined': 0.23403660377553462, 'epoch': 9} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.33966819343217763, 'r': 0.26891795768273097, 'f1': 0.3001805962893735}, 'combined': 0.18642794927445303, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36041666666666666, 'r': 0.30892857142857144, 'f1': 0.33269230769230773}, 'combined': 0.2217948717948718, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2734375, 'r': 0.3804347826086957, 'f1': 0.31818181818181823}, 'combined': 0.15909090909090912, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4791666666666667, 'r': 0.2974137931034483, 'f1': 0.36702127659574474}, 'combined': 0.2446808510638298, 'epoch': 9} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3430680085139636, 'r': 0.3183306568564102, 'f1': 0.330236724730961}, 'combined': 0.24333232348597125, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32349203894432277, 'r': 0.26013751499676996, 'f1': 0.2883761268220772}, 'combined': 0.1790967524473953, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36041666666666666, 'r': 0.30892857142857144, 'f1': 0.33269230769230773}, 'combined': 0.2217948717948718, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3331713050622994, 'r': 0.30345773516110763, 'f1': 0.3176211051239399}, 'combined': 0.23403660377553462, 'epoch': 9} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.33966819343217763, 'r': 0.26891795768273097, 'f1': 0.3001805962893735}, 'combined': 0.18642794927445303, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4791666666666667, 'r': 0.2974137931034483, 'f1': 0.36702127659574474}, 'combined': 0.2446808510638298, 'epoch': 9} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:00:19.153755: step: 2/464, loss: 0.21232697367668152 2023-01-24 03:00:19.967637: step: 4/464, loss: 0.5965004563331604 2023-01-24 03:00:20.715288: step: 6/464, loss: 0.24643248319625854 2023-01-24 03:00:21.391432: step: 8/464, loss: 0.17525090277194977 2023-01-24 03:00:22.228386: step: 10/464, loss: 0.2345692366361618 2023-01-24 03:00:22.953522: step: 12/464, loss: 0.18845805525779724 2023-01-24 03:00:23.664001: step: 14/464, loss: 0.09380072355270386 2023-01-24 03:00:24.400180: step: 16/464, loss: 0.1685120165348053 2023-01-24 03:00:25.149398: step: 18/464, loss: 0.15246163308620453 2023-01-24 03:00:25.910990: step: 20/464, loss: 0.2983258068561554 2023-01-24 03:00:26.687986: step: 22/464, loss: 0.10962452739477158 2023-01-24 03:00:27.392049: step: 24/464, loss: 0.19744998216629028 2023-01-24 03:00:28.161144: step: 26/464, loss: 0.18615196645259857 2023-01-24 03:00:28.843232: step: 28/464, loss: 1.7140228748321533 2023-01-24 03:00:29.561983: step: 30/464, loss: 0.06185387820005417 2023-01-24 03:00:30.352395: step: 32/464, loss: 0.3651220500469208 2023-01-24 03:00:31.040454: step: 34/464, loss: 0.12869255244731903 2023-01-24 03:00:31.707021: step: 36/464, loss: 0.09327950328588486 2023-01-24 03:00:32.412471: step: 38/464, loss: 0.2185249775648117 2023-01-24 03:00:33.165441: step: 40/464, loss: 0.12256766110658646 2023-01-24 03:00:33.894145: step: 42/464, loss: 0.09723819047212601 2023-01-24 03:00:34.563049: step: 44/464, loss: 0.26763907074928284 2023-01-24 03:00:35.252402: step: 46/464, loss: 0.21186493337154388 2023-01-24 03:00:35.930549: step: 48/464, loss: 0.09950794279575348 2023-01-24 03:00:36.709765: step: 50/464, loss: 0.8632354736328125 2023-01-24 03:00:37.515597: step: 52/464, loss: 0.5110155940055847 2023-01-24 03:00:38.304786: step: 54/464, loss: 0.32015207409858704 2023-01-24 03:00:39.236325: step: 56/464, loss: 0.19230931997299194 2023-01-24 03:00:39.970360: step: 58/464, loss: 0.22721707820892334 2023-01-24 03:00:40.769922: step: 60/464, loss: 0.11136981099843979 2023-01-24 03:00:41.505663: step: 62/464, loss: 0.24339455366134644 2023-01-24 03:00:42.217869: step: 64/464, loss: 0.3092723786830902 2023-01-24 03:00:42.916381: step: 66/464, loss: 0.2001309096813202 2023-01-24 03:00:43.641118: step: 68/464, loss: 0.21909552812576294 2023-01-24 03:00:44.427676: step: 70/464, loss: 0.10150119662284851 2023-01-24 03:00:45.210738: step: 72/464, loss: 0.20717015862464905 2023-01-24 03:00:46.008130: step: 74/464, loss: 0.08364620059728622 2023-01-24 03:00:46.811773: step: 76/464, loss: 0.9749086499214172 2023-01-24 03:00:47.619413: step: 78/464, loss: 0.8755331039428711 2023-01-24 03:00:48.433386: step: 80/464, loss: 0.18153031170368195 2023-01-24 03:00:49.124050: step: 82/464, loss: 0.15206857025623322 2023-01-24 03:00:49.825208: step: 84/464, loss: 0.12143020331859589 2023-01-24 03:00:50.551654: step: 86/464, loss: 0.23054926097393036 2023-01-24 03:00:51.467438: step: 88/464, loss: 1.0099869966506958 2023-01-24 03:00:52.289180: step: 90/464, loss: 0.21190431714057922 2023-01-24 03:00:53.026669: step: 92/464, loss: 0.177865132689476 2023-01-24 03:00:53.752097: step: 94/464, loss: 0.16128742694854736 2023-01-24 03:00:54.472356: step: 96/464, loss: 0.6147654056549072 2023-01-24 03:00:55.284847: step: 98/464, loss: 0.44657233357429504 2023-01-24 03:00:56.012952: step: 100/464, loss: 0.61492520570755 2023-01-24 03:00:56.668378: step: 102/464, loss: 0.1421038955450058 2023-01-24 03:00:57.367899: step: 104/464, loss: 0.15594199299812317 2023-01-24 03:00:58.088557: step: 106/464, loss: 0.1598719209432602 2023-01-24 03:00:58.827527: step: 108/464, loss: 0.13725920021533966 2023-01-24 03:00:59.514755: step: 110/464, loss: 0.135905459523201 2023-01-24 03:01:00.242337: step: 112/464, loss: 0.23689238727092743 2023-01-24 03:01:01.056300: step: 114/464, loss: 0.19912511110305786 2023-01-24 03:01:01.839602: step: 116/464, loss: 0.1403755396604538 2023-01-24 03:01:02.613191: step: 118/464, loss: 0.1728145331144333 2023-01-24 03:01:03.404532: step: 120/464, loss: 0.3202742338180542 2023-01-24 03:01:04.216770: step: 122/464, loss: 0.13201336562633514 2023-01-24 03:01:04.917181: step: 124/464, loss: 0.11553767323493958 2023-01-24 03:01:05.721959: step: 126/464, loss: 0.3744227886199951 2023-01-24 03:01:06.502623: step: 128/464, loss: 0.6965504288673401 2023-01-24 03:01:07.261923: step: 130/464, loss: 0.4453356862068176 2023-01-24 03:01:08.020356: step: 132/464, loss: 0.08157233148813248 2023-01-24 03:01:08.669617: step: 134/464, loss: 0.2185574620962143 2023-01-24 03:01:09.402460: step: 136/464, loss: 0.2841760218143463 2023-01-24 03:01:10.184929: step: 138/464, loss: 0.5950050354003906 2023-01-24 03:01:10.882332: step: 140/464, loss: 0.9071783423423767 2023-01-24 03:01:11.651631: step: 142/464, loss: 0.20753763616085052 2023-01-24 03:01:12.427378: step: 144/464, loss: 0.39771732687950134 2023-01-24 03:01:13.125773: step: 146/464, loss: 0.11873120069503784 2023-01-24 03:01:13.838769: step: 148/464, loss: 0.7489532828330994 2023-01-24 03:01:14.535135: step: 150/464, loss: 0.25500231981277466 2023-01-24 03:01:15.303875: step: 152/464, loss: 0.49194595217704773 2023-01-24 03:01:16.037744: step: 154/464, loss: 0.08242286741733551 2023-01-24 03:01:16.848908: step: 156/464, loss: 0.05969241261482239 2023-01-24 03:01:17.614064: step: 158/464, loss: 0.25976186990737915 2023-01-24 03:01:18.392486: step: 160/464, loss: 0.46697625517845154 2023-01-24 03:01:19.152317: step: 162/464, loss: 0.5594256520271301 2023-01-24 03:01:19.897842: step: 164/464, loss: 0.20075185596942902 2023-01-24 03:01:20.676887: step: 166/464, loss: 0.3751225471496582 2023-01-24 03:01:21.408811: step: 168/464, loss: 0.5394560098648071 2023-01-24 03:01:22.134881: step: 170/464, loss: 0.4806734323501587 2023-01-24 03:01:22.913861: step: 172/464, loss: 0.22877338528633118 2023-01-24 03:01:23.647032: step: 174/464, loss: 0.04806321859359741 2023-01-24 03:01:24.358892: step: 176/464, loss: 0.20131415128707886 2023-01-24 03:01:25.097784: step: 178/464, loss: 1.2342121601104736 2023-01-24 03:01:25.775753: step: 180/464, loss: 0.19610396027565002 2023-01-24 03:01:26.484588: step: 182/464, loss: 0.32422640919685364 2023-01-24 03:01:27.238437: step: 184/464, loss: 0.1297033131122589 2023-01-24 03:01:28.065869: step: 186/464, loss: 0.17376866936683655 2023-01-24 03:01:28.800207: step: 188/464, loss: 0.19256730377674103 2023-01-24 03:01:29.532011: step: 190/464, loss: 0.1108497604727745 2023-01-24 03:01:30.217869: step: 192/464, loss: 0.24731086194515228 2023-01-24 03:01:31.027121: step: 194/464, loss: 0.14615006744861603 2023-01-24 03:01:31.752474: step: 196/464, loss: 0.0613839365541935 2023-01-24 03:01:32.452919: step: 198/464, loss: 0.17077118158340454 2023-01-24 03:01:33.128537: step: 200/464, loss: 0.18954557180404663 2023-01-24 03:01:33.881160: step: 202/464, loss: 0.11936686933040619 2023-01-24 03:01:34.604990: step: 204/464, loss: 0.05609899386763573 2023-01-24 03:01:35.277422: step: 206/464, loss: 0.18242907524108887 2023-01-24 03:01:35.975781: step: 208/464, loss: 0.15568356215953827 2023-01-24 03:01:36.677736: step: 210/464, loss: 0.25138142704963684 2023-01-24 03:01:37.444462: step: 212/464, loss: 0.41353484988212585 2023-01-24 03:01:38.198387: step: 214/464, loss: 0.12580302357673645 2023-01-24 03:01:38.904097: step: 216/464, loss: 0.05890248343348503 2023-01-24 03:01:39.661053: step: 218/464, loss: 0.4115862548351288 2023-01-24 03:01:40.383534: step: 220/464, loss: 0.16974353790283203 2023-01-24 03:01:41.193786: step: 222/464, loss: 1.10183584690094 2023-01-24 03:01:42.050987: step: 224/464, loss: 0.8364865183830261 2023-01-24 03:01:42.752326: step: 226/464, loss: 0.60638827085495 2023-01-24 03:01:43.500867: step: 228/464, loss: 0.2671571373939514 2023-01-24 03:01:44.191058: step: 230/464, loss: 0.055853791534900665 2023-01-24 03:01:44.894335: step: 232/464, loss: 0.23713281750679016 2023-01-24 03:01:45.676976: step: 234/464, loss: 0.2899948060512543 2023-01-24 03:01:46.394679: step: 236/464, loss: 0.13573069870471954 2023-01-24 03:01:47.089904: step: 238/464, loss: 0.8137500882148743 2023-01-24 03:01:47.851073: step: 240/464, loss: 0.4511808156967163 2023-01-24 03:01:48.572248: step: 242/464, loss: 0.41318273544311523 2023-01-24 03:01:49.278722: step: 244/464, loss: 0.11683430522680283 2023-01-24 03:01:50.032736: step: 246/464, loss: 0.6399244070053101 2023-01-24 03:01:50.840057: step: 248/464, loss: 0.2516360282897949 2023-01-24 03:01:51.572697: step: 250/464, loss: 0.29193392395973206 2023-01-24 03:01:52.280116: step: 252/464, loss: 0.27569180727005005 2023-01-24 03:01:53.073056: step: 254/464, loss: 0.5015103816986084 2023-01-24 03:01:53.781161: step: 256/464, loss: 0.30280640721321106 2023-01-24 03:01:54.535432: step: 258/464, loss: 0.15494994819164276 2023-01-24 03:01:55.240293: step: 260/464, loss: 0.3739899694919586 2023-01-24 03:01:55.983343: step: 262/464, loss: 0.15253880620002747 2023-01-24 03:01:56.742092: step: 264/464, loss: 0.24771569669246674 2023-01-24 03:01:57.515864: step: 266/464, loss: 0.1636432707309723 2023-01-24 03:01:58.236292: step: 268/464, loss: 0.0732090175151825 2023-01-24 03:01:58.962858: step: 270/464, loss: 0.250224232673645 2023-01-24 03:01:59.785104: step: 272/464, loss: 0.09976043552160263 2023-01-24 03:02:00.501668: step: 274/464, loss: 0.14588111639022827 2023-01-24 03:02:01.267598: step: 276/464, loss: 0.08698129653930664 2023-01-24 03:02:01.970022: step: 278/464, loss: 0.4589356780052185 2023-01-24 03:02:02.647375: step: 280/464, loss: 0.15701982378959656 2023-01-24 03:02:03.288185: step: 282/464, loss: 0.19665288925170898 2023-01-24 03:02:03.978466: step: 284/464, loss: 0.2983488142490387 2023-01-24 03:02:04.749141: step: 286/464, loss: 0.5707964301109314 2023-01-24 03:02:05.455576: step: 288/464, loss: 0.31916865706443787 2023-01-24 03:02:06.304310: step: 290/464, loss: 0.1850687861442566 2023-01-24 03:02:07.049101: step: 292/464, loss: 0.5208576917648315 2023-01-24 03:02:07.760912: step: 294/464, loss: 0.0874454528093338 2023-01-24 03:02:08.426513: step: 296/464, loss: 0.5099970102310181 2023-01-24 03:02:09.075772: step: 298/464, loss: 0.15315058827400208 2023-01-24 03:02:09.811929: step: 300/464, loss: 0.08985976129770279 2023-01-24 03:02:10.513534: step: 302/464, loss: 0.26404041051864624 2023-01-24 03:02:11.286316: step: 304/464, loss: 0.07328619062900543 2023-01-24 03:02:12.095702: step: 306/464, loss: 0.17824620008468628 2023-01-24 03:02:12.795843: step: 308/464, loss: 0.5839994549751282 2023-01-24 03:02:13.569274: step: 310/464, loss: 2.2232160568237305 2023-01-24 03:02:14.295593: step: 312/464, loss: 0.32043343782424927 2023-01-24 03:02:15.059860: step: 314/464, loss: 0.5206624269485474 2023-01-24 03:02:15.754695: step: 316/464, loss: 0.1408088058233261 2023-01-24 03:02:16.441584: step: 318/464, loss: 0.5124632716178894 2023-01-24 03:02:17.213095: step: 320/464, loss: 0.1030653789639473 2023-01-24 03:02:17.934901: step: 322/464, loss: 0.1808159351348877 2023-01-24 03:02:18.725112: step: 324/464, loss: 0.1294044405221939 2023-01-24 03:02:19.426580: step: 326/464, loss: 0.3512157201766968 2023-01-24 03:02:20.203119: step: 328/464, loss: 0.5164199471473694 2023-01-24 03:02:20.949227: step: 330/464, loss: 0.4815867245197296 2023-01-24 03:02:21.688556: step: 332/464, loss: 0.6558340787887573 2023-01-24 03:02:22.643849: step: 334/464, loss: 0.6065689325332642 2023-01-24 03:02:23.458160: step: 336/464, loss: 0.10820100456476212 2023-01-24 03:02:24.203158: step: 338/464, loss: 0.18210628628730774 2023-01-24 03:02:24.924157: step: 340/464, loss: 2.3327083587646484 2023-01-24 03:02:25.620999: step: 342/464, loss: 0.3005005419254303 2023-01-24 03:02:26.398088: step: 344/464, loss: 0.7286275625228882 2023-01-24 03:02:27.159093: step: 346/464, loss: 0.288993239402771 2023-01-24 03:02:27.898770: step: 348/464, loss: 0.2670809328556061 2023-01-24 03:02:28.666573: step: 350/464, loss: 0.1438087671995163 2023-01-24 03:02:29.467953: step: 352/464, loss: 0.32805734872817993 2023-01-24 03:02:30.177393: step: 354/464, loss: 0.11649665981531143 2023-01-24 03:02:30.905556: step: 356/464, loss: 0.1538315713405609 2023-01-24 03:02:31.625121: step: 358/464, loss: 0.1075054258108139 2023-01-24 03:02:32.352612: step: 360/464, loss: 0.4133543372154236 2023-01-24 03:02:33.062626: step: 362/464, loss: 1.6680829524993896 2023-01-24 03:02:33.806151: step: 364/464, loss: 0.25736382603645325 2023-01-24 03:02:34.547807: step: 366/464, loss: 0.1185770332813263 2023-01-24 03:02:35.232680: step: 368/464, loss: 0.3984162509441376 2023-01-24 03:02:35.951390: step: 370/464, loss: 0.3035191297531128 2023-01-24 03:02:36.688073: step: 372/464, loss: 0.18461394309997559 2023-01-24 03:02:37.463765: step: 374/464, loss: 0.09824996441602707 2023-01-24 03:02:38.205562: step: 376/464, loss: 0.2347511500120163 2023-01-24 03:02:38.916492: step: 378/464, loss: 0.6326672434806824 2023-01-24 03:02:39.660343: step: 380/464, loss: 0.11433830857276917 2023-01-24 03:02:40.347489: step: 382/464, loss: 0.18060731887817383 2023-01-24 03:02:41.040582: step: 384/464, loss: 0.2918536365032196 2023-01-24 03:02:41.792286: step: 386/464, loss: 0.20384199917316437 2023-01-24 03:02:42.535910: step: 388/464, loss: 0.28546860814094543 2023-01-24 03:02:43.406646: step: 390/464, loss: 0.07320096343755722 2023-01-24 03:02:44.076222: step: 392/464, loss: 4.606675624847412 2023-01-24 03:02:44.824123: step: 394/464, loss: 0.6734826564788818 2023-01-24 03:02:45.555185: step: 396/464, loss: 0.14568394422531128 2023-01-24 03:02:46.311640: step: 398/464, loss: 0.13595956563949585 2023-01-24 03:02:47.041849: step: 400/464, loss: 0.32695436477661133 2023-01-24 03:02:47.705059: step: 402/464, loss: 0.0527927540242672 2023-01-24 03:02:48.485225: step: 404/464, loss: 0.1709793359041214 2023-01-24 03:02:49.236945: step: 406/464, loss: 0.31607356667518616 2023-01-24 03:02:49.955001: step: 408/464, loss: 0.3742814362049103 2023-01-24 03:02:50.665104: step: 410/464, loss: 0.26817089319229126 2023-01-24 03:02:51.327303: step: 412/464, loss: 0.22187118232250214 2023-01-24 03:02:52.059369: step: 414/464, loss: 0.3849436044692993 2023-01-24 03:02:52.728263: step: 416/464, loss: 0.4200716018676758 2023-01-24 03:02:53.430070: step: 418/464, loss: 0.12109020352363586 2023-01-24 03:02:54.215471: step: 420/464, loss: 0.12587374448776245 2023-01-24 03:02:55.030345: step: 422/464, loss: 0.28375929594039917 2023-01-24 03:02:55.752612: step: 424/464, loss: 0.26978546380996704 2023-01-24 03:02:56.420805: step: 426/464, loss: 0.08260051906108856 2023-01-24 03:02:57.180990: step: 428/464, loss: 0.20075982809066772 2023-01-24 03:02:57.926503: step: 430/464, loss: 0.22721540927886963 2023-01-24 03:02:58.704435: step: 432/464, loss: 0.2437964528799057 2023-01-24 03:02:59.441400: step: 434/464, loss: 0.09995950013399124 2023-01-24 03:03:00.182701: step: 436/464, loss: 0.05342341214418411 2023-01-24 03:03:00.893230: step: 438/464, loss: 0.22190716862678528 2023-01-24 03:03:01.710209: step: 440/464, loss: 0.05813509225845337 2023-01-24 03:03:02.451997: step: 442/464, loss: 1.4940779209136963 2023-01-24 03:03:03.137391: step: 444/464, loss: 0.11205273866653442 2023-01-24 03:03:03.955224: step: 446/464, loss: 0.23172278702259064 2023-01-24 03:03:04.687547: step: 448/464, loss: 0.32702767848968506 2023-01-24 03:03:05.568482: step: 450/464, loss: 0.7603186368942261 2023-01-24 03:03:06.316197: step: 452/464, loss: 0.7385392189025879 2023-01-24 03:03:07.149538: step: 454/464, loss: 0.4690871834754944 2023-01-24 03:03:07.855033: step: 456/464, loss: 0.7157934904098511 2023-01-24 03:03:08.676305: step: 458/464, loss: 0.32446351647377014 2023-01-24 03:03:09.484031: step: 460/464, loss: 0.9630855917930603 2023-01-24 03:03:10.262646: step: 462/464, loss: 0.4231507480144501 2023-01-24 03:03:10.988009: step: 464/464, loss: 0.12859512865543365 2023-01-24 03:03:11.712593: step: 466/464, loss: 0.22822855412960052 2023-01-24 03:03:12.404415: step: 468/464, loss: 0.17116977274417877 2023-01-24 03:03:13.237650: step: 470/464, loss: 0.3771575689315796 2023-01-24 03:03:14.067569: step: 472/464, loss: 0.23884013295173645 2023-01-24 03:03:14.763599: step: 474/464, loss: 0.20674486458301544 2023-01-24 03:03:15.532403: step: 476/464, loss: 0.22556279599666595 2023-01-24 03:03:16.247491: step: 478/464, loss: 0.3144245743751526 2023-01-24 03:03:16.959132: step: 480/464, loss: 0.44248825311660767 2023-01-24 03:03:17.798787: step: 482/464, loss: 0.2980799674987793 2023-01-24 03:03:18.505550: step: 484/464, loss: 0.6016811728477478 2023-01-24 03:03:19.226450: step: 486/464, loss: 0.4583924412727356 2023-01-24 03:03:19.976202: step: 488/464, loss: 0.16038653254508972 2023-01-24 03:03:20.719451: step: 490/464, loss: 0.2621766924858093 2023-01-24 03:03:21.382102: step: 492/464, loss: 0.11310744285583496 2023-01-24 03:03:22.099808: step: 494/464, loss: 0.27761155366897583 2023-01-24 03:03:22.827750: step: 496/464, loss: 0.18872065842151642 2023-01-24 03:03:23.600882: step: 498/464, loss: 0.15203912556171417 2023-01-24 03:03:24.364082: step: 500/464, loss: 0.505059003829956 2023-01-24 03:03:25.074938: step: 502/464, loss: 0.1673978865146637 2023-01-24 03:03:25.734990: step: 504/464, loss: 0.11424268037080765 2023-01-24 03:03:26.477590: step: 506/464, loss: 0.503311812877655 2023-01-24 03:03:27.140761: step: 508/464, loss: 1.2228924036026 2023-01-24 03:03:27.927021: step: 510/464, loss: 0.29327163100242615 2023-01-24 03:03:28.602110: step: 512/464, loss: 0.1813446283340454 2023-01-24 03:03:29.335538: step: 514/464, loss: 0.16473616659641266 2023-01-24 03:03:30.060167: step: 516/464, loss: 0.4814782738685608 2023-01-24 03:03:30.840625: step: 518/464, loss: 0.24993804097175598 2023-01-24 03:03:31.552063: step: 520/464, loss: 0.15137940645217896 2023-01-24 03:03:32.316015: step: 522/464, loss: 0.12308914214372635 2023-01-24 03:03:33.047413: step: 524/464, loss: 0.766007661819458 2023-01-24 03:03:33.764189: step: 526/464, loss: 0.42049071192741394 2023-01-24 03:03:34.534472: step: 528/464, loss: 0.11827506870031357 2023-01-24 03:03:35.238574: step: 530/464, loss: 0.06717483699321747 2023-01-24 03:03:35.960041: step: 532/464, loss: 0.2546265721321106 2023-01-24 03:03:36.862749: step: 534/464, loss: 0.23186105489730835 2023-01-24 03:03:37.657757: step: 536/464, loss: 0.17715178430080414 2023-01-24 03:03:38.387685: step: 538/464, loss: 0.37223905324935913 2023-01-24 03:03:39.136372: step: 540/464, loss: 0.1452968567609787 2023-01-24 03:03:39.851544: step: 542/464, loss: 0.1264655441045761 2023-01-24 03:03:40.606441: step: 544/464, loss: 0.240777388215065 2023-01-24 03:03:41.394667: step: 546/464, loss: 0.3481501340866089 2023-01-24 03:03:42.177774: step: 548/464, loss: 0.18340934813022614 2023-01-24 03:03:42.967183: step: 550/464, loss: 0.24937808513641357 2023-01-24 03:03:43.665763: step: 552/464, loss: 0.08746077120304108 2023-01-24 03:03:44.343706: step: 554/464, loss: 0.13055148720741272 2023-01-24 03:03:45.136414: step: 556/464, loss: 0.5113113522529602 2023-01-24 03:03:45.958748: step: 558/464, loss: 0.4550716280937195 2023-01-24 03:03:46.850712: step: 560/464, loss: 0.6709883213043213 2023-01-24 03:03:47.586577: step: 562/464, loss: 0.26181280612945557 2023-01-24 03:03:48.301958: step: 564/464, loss: 0.09431593120098114 2023-01-24 03:03:48.993975: step: 566/464, loss: 0.0892590880393982 2023-01-24 03:03:49.759840: step: 568/464, loss: 0.15265516936779022 2023-01-24 03:03:50.544121: step: 570/464, loss: 0.2754208743572235 2023-01-24 03:03:51.277549: step: 572/464, loss: 0.23584358394145966 2023-01-24 03:03:52.010998: step: 574/464, loss: 0.2605666518211365 2023-01-24 03:03:52.732567: step: 576/464, loss: 0.407966673374176 2023-01-24 03:03:53.409304: step: 578/464, loss: 0.2766265869140625 2023-01-24 03:03:54.132914: step: 580/464, loss: 0.17403101921081543 2023-01-24 03:03:54.871387: step: 582/464, loss: 0.3033926486968994 2023-01-24 03:03:55.639356: step: 584/464, loss: 0.6713306307792664 2023-01-24 03:03:56.343114: step: 586/464, loss: 0.14127404987812042 2023-01-24 03:03:56.991318: step: 588/464, loss: 0.9483509063720703 2023-01-24 03:03:57.788867: step: 590/464, loss: 0.41415512561798096 2023-01-24 03:03:58.554719: step: 592/464, loss: 0.12702539563179016 2023-01-24 03:03:59.335519: step: 594/464, loss: 0.43179914355278015 2023-01-24 03:04:00.088244: step: 596/464, loss: 0.23740844428539276 2023-01-24 03:04:00.857015: step: 598/464, loss: 0.15830759704113007 2023-01-24 03:04:01.666900: step: 600/464, loss: 0.13203541934490204 2023-01-24 03:04:02.416762: step: 602/464, loss: 0.3251971900463104 2023-01-24 03:04:03.111019: step: 604/464, loss: 0.1645713448524475 2023-01-24 03:04:03.811171: step: 606/464, loss: 0.2223944514989853 2023-01-24 03:04:04.572878: step: 608/464, loss: 0.2733703851699829 2023-01-24 03:04:05.314473: step: 610/464, loss: 0.7987146377563477 2023-01-24 03:04:06.028154: step: 612/464, loss: 0.5771629810333252 2023-01-24 03:04:06.708137: step: 614/464, loss: 0.31207334995269775 2023-01-24 03:04:07.420095: step: 616/464, loss: 0.48628270626068115 2023-01-24 03:04:08.140961: step: 618/464, loss: 0.22458508610725403 2023-01-24 03:04:08.855988: step: 620/464, loss: 0.16614551842212677 2023-01-24 03:04:09.551315: step: 622/464, loss: 0.14270026981830597 2023-01-24 03:04:10.293273: step: 624/464, loss: 0.28465527296066284 2023-01-24 03:04:11.092652: step: 626/464, loss: 1.9318636655807495 2023-01-24 03:04:11.795849: step: 628/464, loss: 0.8859139680862427 2023-01-24 03:04:12.594255: step: 630/464, loss: 0.18957673013210297 2023-01-24 03:04:13.253196: step: 632/464, loss: 0.08624786138534546 2023-01-24 03:04:13.951789: step: 634/464, loss: 0.18184931576251984 2023-01-24 03:04:14.736757: step: 636/464, loss: 0.20926685631275177 2023-01-24 03:04:15.542529: step: 638/464, loss: 0.04862609878182411 2023-01-24 03:04:16.235938: step: 640/464, loss: 0.3046737313270569 2023-01-24 03:04:16.965823: step: 642/464, loss: 0.03974409028887749 2023-01-24 03:04:17.769023: step: 644/464, loss: 0.16337545216083527 2023-01-24 03:04:18.453131: step: 646/464, loss: 0.5412200689315796 2023-01-24 03:04:19.136245: step: 648/464, loss: 0.2021602839231491 2023-01-24 03:04:20.522593: step: 650/464, loss: 0.17793788015842438 2023-01-24 03:04:21.355887: step: 652/464, loss: 0.21241311728954315 2023-01-24 03:04:22.074211: step: 654/464, loss: 1.645676851272583 2023-01-24 03:04:22.846494: step: 656/464, loss: 0.188690647482872 2023-01-24 03:04:23.556242: step: 658/464, loss: 0.09510191529989243 2023-01-24 03:04:24.281338: step: 660/464, loss: 0.11184026300907135 2023-01-24 03:04:25.072192: step: 662/464, loss: 0.4701847434043884 2023-01-24 03:04:25.795221: step: 664/464, loss: 0.09061688184738159 2023-01-24 03:04:26.532212: step: 666/464, loss: 0.26259922981262207 2023-01-24 03:04:27.201727: step: 668/464, loss: 0.12384660542011261 2023-01-24 03:04:27.925018: step: 670/464, loss: 0.17336471378803253 2023-01-24 03:04:28.691226: step: 672/464, loss: 0.2143736183643341 2023-01-24 03:04:29.472865: step: 674/464, loss: 0.08235962688922882 2023-01-24 03:04:30.147095: step: 676/464, loss: 0.8620685338973999 2023-01-24 03:04:30.901226: step: 678/464, loss: 0.27369424700737 2023-01-24 03:04:31.637004: step: 680/464, loss: 0.251381516456604 2023-01-24 03:04:32.309581: step: 682/464, loss: 0.3370826244354248 2023-01-24 03:04:33.049413: step: 684/464, loss: 0.22877030074596405 2023-01-24 03:04:33.734010: step: 686/464, loss: 0.11255232989788055 2023-01-24 03:04:34.439950: step: 688/464, loss: 0.13139210641384125 2023-01-24 03:04:35.205025: step: 690/464, loss: 0.32984012365341187 2023-01-24 03:04:35.922471: step: 692/464, loss: 0.11645828187465668 2023-01-24 03:04:36.603374: step: 694/464, loss: 0.315295934677124 2023-01-24 03:04:37.319959: step: 696/464, loss: 0.1865382045507431 2023-01-24 03:04:38.120513: step: 698/464, loss: 0.06216064468026161 2023-01-24 03:04:38.825413: step: 700/464, loss: 0.0807647556066513 2023-01-24 03:04:39.510368: step: 702/464, loss: 0.10790904611349106 2023-01-24 03:04:40.298282: step: 704/464, loss: 0.2581593096256256 2023-01-24 03:04:41.006825: step: 706/464, loss: 0.15664775669574738 2023-01-24 03:04:41.743471: step: 708/464, loss: 3.8456437587738037 2023-01-24 03:04:42.467336: step: 710/464, loss: 0.2697579264640808 2023-01-24 03:04:43.223142: step: 712/464, loss: 0.7548564672470093 2023-01-24 03:04:44.009886: step: 714/464, loss: 0.11732342094182968 2023-01-24 03:04:44.701810: step: 716/464, loss: 0.3719097971916199 2023-01-24 03:04:45.446332: step: 718/464, loss: 0.40137097239494324 2023-01-24 03:04:46.209751: step: 720/464, loss: 0.09929775446653366 2023-01-24 03:04:46.937490: step: 722/464, loss: 1.2608824968338013 2023-01-24 03:04:47.744795: step: 724/464, loss: 0.2232675701379776 2023-01-24 03:04:48.610821: step: 726/464, loss: 0.2738083600997925 2023-01-24 03:04:49.373582: step: 728/464, loss: 0.5997371077537537 2023-01-24 03:04:50.042351: step: 730/464, loss: 0.38297492265701294 2023-01-24 03:04:50.888011: step: 732/464, loss: 0.3900352716445923 2023-01-24 03:04:51.594171: step: 734/464, loss: 0.18780764937400818 2023-01-24 03:04:52.373888: step: 736/464, loss: 0.09367948770523071 2023-01-24 03:04:53.117440: step: 738/464, loss: 0.24276983737945557 2023-01-24 03:04:53.859402: step: 740/464, loss: 0.6175634860992432 2023-01-24 03:04:54.675163: step: 742/464, loss: 0.10526596754789352 2023-01-24 03:04:55.411713: step: 744/464, loss: 0.45263907313346863 2023-01-24 03:04:56.111617: step: 746/464, loss: 0.1915971040725708 2023-01-24 03:04:56.844678: step: 748/464, loss: 0.15978604555130005 2023-01-24 03:04:57.668527: step: 750/464, loss: 0.14335952699184418 2023-01-24 03:04:58.385591: step: 752/464, loss: 0.2819792330265045 2023-01-24 03:04:59.244756: step: 754/464, loss: 0.6500842571258545 2023-01-24 03:05:00.025485: step: 756/464, loss: 0.303840696811676 2023-01-24 03:05:00.749460: step: 758/464, loss: 0.18252825736999512 2023-01-24 03:05:01.526424: step: 760/464, loss: 0.2440166026353836 2023-01-24 03:05:02.203921: step: 762/464, loss: 0.18778173625469208 2023-01-24 03:05:03.012041: step: 764/464, loss: 0.24666345119476318 2023-01-24 03:05:03.791731: step: 766/464, loss: 0.2585979700088501 2023-01-24 03:05:04.539196: step: 768/464, loss: 0.7861073017120361 2023-01-24 03:05:05.288347: step: 770/464, loss: 0.03258257359266281 2023-01-24 03:05:06.109523: step: 772/464, loss: 0.2287474125623703 2023-01-24 03:05:06.877498: step: 774/464, loss: 0.21275871992111206 2023-01-24 03:05:07.596102: step: 776/464, loss: 0.08996638655662537 2023-01-24 03:05:08.293030: step: 778/464, loss: 0.22523269057273865 2023-01-24 03:05:09.004129: step: 780/464, loss: 0.12908609211444855 2023-01-24 03:05:09.768630: step: 782/464, loss: 0.4444272518157959 2023-01-24 03:05:10.531470: step: 784/464, loss: 0.24299582839012146 2023-01-24 03:05:11.249317: step: 786/464, loss: 0.11312457919120789 2023-01-24 03:05:12.031418: step: 788/464, loss: 0.09250324219465256 2023-01-24 03:05:12.863472: step: 790/464, loss: 0.794976532459259 2023-01-24 03:05:13.616071: step: 792/464, loss: 0.3445376753807068 2023-01-24 03:05:14.341528: step: 794/464, loss: 0.23356936872005463 2023-01-24 03:05:15.095772: step: 796/464, loss: 0.6917689442634583 2023-01-24 03:05:15.837252: step: 798/464, loss: 0.13185939192771912 2023-01-24 03:05:16.593360: step: 800/464, loss: 2.712449312210083 2023-01-24 03:05:17.309609: step: 802/464, loss: 0.8467655181884766 2023-01-24 03:05:18.043190: step: 804/464, loss: 0.07459612935781479 2023-01-24 03:05:18.803105: step: 806/464, loss: 0.09105648845434189 2023-01-24 03:05:19.489569: step: 808/464, loss: 0.21356217563152313 2023-01-24 03:05:20.194945: step: 810/464, loss: 0.2339024692773819 2023-01-24 03:05:20.982580: step: 812/464, loss: 0.23405727744102478 2023-01-24 03:05:21.748744: step: 814/464, loss: 0.18663422763347626 2023-01-24 03:05:22.474445: step: 816/464, loss: 0.18019157648086548 2023-01-24 03:05:23.166939: step: 818/464, loss: 0.17258216440677643 2023-01-24 03:05:24.025163: step: 820/464, loss: 0.2562738358974457 2023-01-24 03:05:24.706683: step: 822/464, loss: 0.20118999481201172 2023-01-24 03:05:25.494918: step: 824/464, loss: 0.4466688632965088 2023-01-24 03:05:26.301636: step: 826/464, loss: 0.2582007646560669 2023-01-24 03:05:27.068962: step: 828/464, loss: 0.3365974724292755 2023-01-24 03:05:27.824043: step: 830/464, loss: 0.2806304097175598 2023-01-24 03:05:28.559111: step: 832/464, loss: 1.128015160560608 2023-01-24 03:05:29.234338: step: 834/464, loss: 0.07908018678426743 2023-01-24 03:05:29.953304: step: 836/464, loss: 0.22766578197479248 2023-01-24 03:05:30.727802: step: 838/464, loss: 0.25081801414489746 2023-01-24 03:05:31.531941: step: 840/464, loss: 0.14951854944229126 2023-01-24 03:05:32.309851: step: 842/464, loss: 0.1881357729434967 2023-01-24 03:05:33.055772: step: 844/464, loss: 0.13177481293678284 2023-01-24 03:05:33.821873: step: 846/464, loss: 0.24283462762832642 2023-01-24 03:05:34.583266: step: 848/464, loss: 0.18885497748851776 2023-01-24 03:05:35.385661: step: 850/464, loss: 0.17050006985664368 2023-01-24 03:05:36.085516: step: 852/464, loss: 0.33716070652008057 2023-01-24 03:05:36.973526: step: 854/464, loss: 0.3635439872741699 2023-01-24 03:05:37.755848: step: 856/464, loss: 0.19956035912036896 2023-01-24 03:05:38.431873: step: 858/464, loss: 0.10865310579538345 2023-01-24 03:05:39.135158: step: 860/464, loss: 0.23677769303321838 2023-01-24 03:05:39.865382: step: 862/464, loss: 3.2011382579803467 2023-01-24 03:05:40.679848: step: 864/464, loss: 0.9883920550346375 2023-01-24 03:05:41.414115: step: 866/464, loss: 0.24623115360736847 2023-01-24 03:05:42.117942: step: 868/464, loss: 0.2305896133184433 2023-01-24 03:05:42.852796: step: 870/464, loss: 0.21101327240467072 2023-01-24 03:05:43.640369: step: 872/464, loss: 0.23975980281829834 2023-01-24 03:05:44.428502: step: 874/464, loss: 0.5831571221351624 2023-01-24 03:05:45.208653: step: 876/464, loss: 0.39869871735572815 2023-01-24 03:05:46.000766: step: 878/464, loss: 0.18923348188400269 2023-01-24 03:05:46.682083: step: 880/464, loss: 0.14323951303958893 2023-01-24 03:05:47.485141: step: 882/464, loss: 4.778229713439941 2023-01-24 03:05:48.274744: step: 884/464, loss: 0.16568735241889954 2023-01-24 03:05:49.002143: step: 886/464, loss: 1.808220624923706 2023-01-24 03:05:49.697740: step: 888/464, loss: 0.0770537406206131 2023-01-24 03:05:50.510930: step: 890/464, loss: 0.5956398248672485 2023-01-24 03:05:51.289768: step: 892/464, loss: 0.06159377470612526 2023-01-24 03:05:51.990948: step: 894/464, loss: 0.10025126487016678 2023-01-24 03:05:52.726353: step: 896/464, loss: 0.27834150195121765 2023-01-24 03:05:53.458054: step: 898/464, loss: 0.25632181763648987 2023-01-24 03:05:54.152347: step: 900/464, loss: 0.8219600319862366 2023-01-24 03:05:54.908357: step: 902/464, loss: 0.05821787193417549 2023-01-24 03:05:55.622166: step: 904/464, loss: 0.08082897216081619 2023-01-24 03:05:56.348921: step: 906/464, loss: 0.34297946095466614 2023-01-24 03:05:57.148134: step: 908/464, loss: 0.11477062106132507 2023-01-24 03:05:57.846403: step: 910/464, loss: 0.6422632336616516 2023-01-24 03:05:58.582889: step: 912/464, loss: 1.6362946033477783 2023-01-24 03:05:59.357558: step: 914/464, loss: 0.8962820768356323 2023-01-24 03:06:00.078529: step: 916/464, loss: 0.2309628278017044 2023-01-24 03:06:00.939188: step: 918/464, loss: 0.3708310127258301 2023-01-24 03:06:01.682941: step: 920/464, loss: 0.24511513113975525 2023-01-24 03:06:02.370756: step: 922/464, loss: 0.2654450833797455 2023-01-24 03:06:03.069640: step: 924/464, loss: 0.11861108988523483 2023-01-24 03:06:03.809442: step: 926/464, loss: 0.18497033417224884 2023-01-24 03:06:04.695718: step: 928/464, loss: 0.39454326033592224 2023-01-24 03:06:05.380755: step: 930/464, loss: 0.6818116307258606 ================================================== Loss: 0.360 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3349100693505124, 'r': 0.32476127937019383, 'f1': 0.3297576067451199}, 'combined': 0.24297928918061465, 'epoch': 10} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3011358398612125, 'r': 0.243465205179006, 'f1': 0.2692470009239444}, 'combined': 0.16721655846855493, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.296640344347024, 'r': 0.2949548878450523, 'f1': 0.29579521516085017}, 'combined': 0.21795436906588958, 'epoch': 10} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30592372711434745, 'r': 0.24854415342063965, 'f1': 0.2742649536112287}, 'combined': 0.17033297119013152, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3167899906109915, 'r': 0.3083902560114576, 'f1': 0.31253369515172674}, 'combined': 0.23028798590127234, 'epoch': 10} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3154232949630762, 'r': 0.2510955885026262, 'f1': 0.2796072653101103}, 'combined': 0.17365082792943695, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2708333333333333, 'r': 0.2785714285714286, 'f1': 0.27464788732394363}, 'combined': 0.18309859154929575, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2631578947368421, 'r': 0.43478260869565216, 'f1': 0.32786885245901637}, 'combined': 0.16393442622950818, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5096153846153846, 'r': 0.22844827586206898, 'f1': 0.31547619047619047}, 'combined': 0.2103174603174603, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3430680085139636, 'r': 0.3183306568564102, 'f1': 0.330236724730961}, 'combined': 0.24333232348597125, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32349203894432277, 'r': 0.26013751499676996, 'f1': 0.2883761268220772}, 'combined': 0.1790967524473953, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36041666666666666, 'r': 0.30892857142857144, 'f1': 0.33269230769230773}, 'combined': 0.2217948717948718, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3331713050622994, 'r': 0.30345773516110763, 'f1': 0.3176211051239399}, 'combined': 0.23403660377553462, 'epoch': 9} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.33966819343217763, 'r': 0.26891795768273097, 'f1': 0.3001805962893735}, 'combined': 0.18642794927445303, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4791666666666667, 'r': 0.2974137931034483, 'f1': 0.36702127659574474}, 'combined': 0.2446808510638298, 'epoch': 9} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:08:45.066209: step: 2/464, loss: 0.43263551592826843 2023-01-24 03:08:45.855999: step: 4/464, loss: 0.5643942356109619 2023-01-24 03:08:46.604997: step: 6/464, loss: 0.11672255396842957 2023-01-24 03:08:47.305184: step: 8/464, loss: 0.06750161945819855 2023-01-24 03:08:48.075272: step: 10/464, loss: 0.20070071518421173 2023-01-24 03:08:48.782326: step: 12/464, loss: 0.41763728857040405 2023-01-24 03:08:49.646581: step: 14/464, loss: 0.11456458270549774 2023-01-24 03:08:50.420334: step: 16/464, loss: 0.38342341780662537 2023-01-24 03:08:51.226047: step: 18/464, loss: 0.35928112268447876 2023-01-24 03:08:52.125912: step: 20/464, loss: 0.2673722207546234 2023-01-24 03:08:52.771381: step: 22/464, loss: 0.7194101810455322 2023-01-24 03:08:53.459459: step: 24/464, loss: 0.27697527408599854 2023-01-24 03:08:54.252364: step: 26/464, loss: 0.15484285354614258 2023-01-24 03:08:54.891638: step: 28/464, loss: 0.35449036955833435 2023-01-24 03:08:55.617417: step: 30/464, loss: 0.11388775706291199 2023-01-24 03:08:56.335140: step: 32/464, loss: 0.23958417773246765 2023-01-24 03:08:57.057381: step: 34/464, loss: 0.07231630384922028 2023-01-24 03:08:57.777627: step: 36/464, loss: 0.14240187406539917 2023-01-24 03:08:58.409402: step: 38/464, loss: 0.8622165322303772 2023-01-24 03:08:59.139955: step: 40/464, loss: 0.19801019132137299 2023-01-24 03:08:59.827345: step: 42/464, loss: 0.22898069024085999 2023-01-24 03:09:00.570216: step: 44/464, loss: 0.30996227264404297 2023-01-24 03:09:01.371358: step: 46/464, loss: 0.11852078139781952 2023-01-24 03:09:02.078438: step: 48/464, loss: 0.127349853515625 2023-01-24 03:09:02.908030: step: 50/464, loss: 0.2572477161884308 2023-01-24 03:09:03.648466: step: 52/464, loss: 0.5002968907356262 2023-01-24 03:09:04.475041: step: 54/464, loss: 0.17681193351745605 2023-01-24 03:09:05.224060: step: 56/464, loss: 0.07735830545425415 2023-01-24 03:09:05.930401: step: 58/464, loss: 0.3611297905445099 2023-01-24 03:09:06.775110: step: 60/464, loss: 0.19696767628192902 2023-01-24 03:09:07.470145: step: 62/464, loss: 0.13411551713943481 2023-01-24 03:09:08.195720: step: 64/464, loss: 0.09733086079359055 2023-01-24 03:09:08.941837: step: 66/464, loss: 0.1393062323331833 2023-01-24 03:09:09.706222: step: 68/464, loss: 0.24694481492042542 2023-01-24 03:09:10.554245: step: 70/464, loss: 3.0466761589050293 2023-01-24 03:09:11.326828: step: 72/464, loss: 0.12388309836387634 2023-01-24 03:09:12.033670: step: 74/464, loss: 0.07793045043945312 2023-01-24 03:09:12.754887: step: 76/464, loss: 1.1562849283218384 2023-01-24 03:09:13.470048: step: 78/464, loss: 0.528100311756134 2023-01-24 03:09:14.157131: step: 80/464, loss: 0.08475414663553238 2023-01-24 03:09:14.931300: step: 82/464, loss: 0.4272237718105316 2023-01-24 03:09:15.718482: step: 84/464, loss: 1.6456173658370972 2023-01-24 03:09:16.512258: step: 86/464, loss: 0.19112280011177063 2023-01-24 03:09:17.177876: step: 88/464, loss: 0.1366272270679474 2023-01-24 03:09:17.926067: step: 90/464, loss: 1.2250635623931885 2023-01-24 03:09:18.589324: step: 92/464, loss: 0.6903375387191772 2023-01-24 03:09:19.316253: step: 94/464, loss: 0.19363798201084137 2023-01-24 03:09:20.123723: step: 96/464, loss: 0.1598142832517624 2023-01-24 03:09:20.849014: step: 98/464, loss: 0.0631617084145546 2023-01-24 03:09:21.606602: step: 100/464, loss: 0.1760246604681015 2023-01-24 03:09:22.280769: step: 102/464, loss: 0.051239561289548874 2023-01-24 03:09:23.028538: step: 104/464, loss: 0.27649593353271484 2023-01-24 03:09:23.794484: step: 106/464, loss: 0.4610246419906616 2023-01-24 03:09:24.595606: step: 108/464, loss: 0.23989155888557434 2023-01-24 03:09:25.360570: step: 110/464, loss: 0.05748572573065758 2023-01-24 03:09:26.181520: step: 112/464, loss: 0.18556129932403564 2023-01-24 03:09:26.944577: step: 114/464, loss: 0.13368819653987885 2023-01-24 03:09:27.623082: step: 116/464, loss: 0.10952291637659073 2023-01-24 03:09:28.334783: step: 118/464, loss: 0.07338271290063858 2023-01-24 03:09:29.125646: step: 120/464, loss: 0.21023805439472198 2023-01-24 03:09:29.891622: step: 122/464, loss: 0.12759849429130554 2023-01-24 03:09:30.655767: step: 124/464, loss: 0.38608530163764954 2023-01-24 03:09:31.532183: step: 126/464, loss: 0.046279970556497574 2023-01-24 03:09:32.424819: step: 128/464, loss: 0.6578817367553711 2023-01-24 03:09:33.129887: step: 130/464, loss: 0.06236157938838005 2023-01-24 03:09:33.832453: step: 132/464, loss: 0.2531399130821228 2023-01-24 03:09:34.592483: step: 134/464, loss: 0.14961302280426025 2023-01-24 03:09:35.294921: step: 136/464, loss: 0.08229457587003708 2023-01-24 03:09:36.035502: step: 138/464, loss: 0.14706575870513916 2023-01-24 03:09:36.763278: step: 140/464, loss: 0.7278061509132385 2023-01-24 03:09:37.529947: step: 142/464, loss: 0.18592041730880737 2023-01-24 03:09:38.226539: step: 144/464, loss: 0.1443493366241455 2023-01-24 03:09:38.972282: step: 146/464, loss: 0.1252126544713974 2023-01-24 03:09:39.787168: step: 148/464, loss: 0.4083552062511444 2023-01-24 03:09:40.550000: step: 150/464, loss: 2.9126224517822266 2023-01-24 03:09:41.264430: step: 152/464, loss: 0.29100507497787476 2023-01-24 03:09:42.015058: step: 154/464, loss: 7.7827019691467285 2023-01-24 03:09:42.824268: step: 156/464, loss: 0.2517273724079132 2023-01-24 03:09:43.568404: step: 158/464, loss: 0.22927714884281158 2023-01-24 03:09:44.302710: step: 160/464, loss: 0.13074521720409393 2023-01-24 03:09:44.944913: step: 162/464, loss: 0.4229300916194916 2023-01-24 03:09:45.695015: step: 164/464, loss: 0.1576210856437683 2023-01-24 03:09:46.409716: step: 166/464, loss: 0.12701904773712158 2023-01-24 03:09:47.127609: step: 168/464, loss: 0.4471745193004608 2023-01-24 03:09:47.856206: step: 170/464, loss: 0.2795443832874298 2023-01-24 03:09:48.508880: step: 172/464, loss: 0.04011191427707672 2023-01-24 03:09:49.203876: step: 174/464, loss: 0.19026388227939606 2023-01-24 03:09:49.843791: step: 176/464, loss: 0.01748960092663765 2023-01-24 03:09:50.559860: step: 178/464, loss: 0.16954189538955688 2023-01-24 03:09:51.252644: step: 180/464, loss: 0.4888118505477905 2023-01-24 03:09:52.002420: step: 182/464, loss: 0.05453702062368393 2023-01-24 03:09:52.839016: step: 184/464, loss: 0.20064681768417358 2023-01-24 03:09:53.532900: step: 186/464, loss: 0.2641415596008301 2023-01-24 03:09:54.275225: step: 188/464, loss: 0.2177356630563736 2023-01-24 03:09:54.967005: step: 190/464, loss: 0.7603049874305725 2023-01-24 03:09:55.729647: step: 192/464, loss: 0.13496249914169312 2023-01-24 03:09:56.397587: step: 194/464, loss: 0.10651998221874237 2023-01-24 03:09:57.151109: step: 196/464, loss: 0.08830710500478745 2023-01-24 03:09:57.879121: step: 198/464, loss: 0.30557310581207275 2023-01-24 03:09:58.645408: step: 200/464, loss: 0.08114877343177795 2023-01-24 03:09:59.442902: step: 202/464, loss: 0.25547945499420166 2023-01-24 03:10:00.154988: step: 204/464, loss: 0.2108420878648758 2023-01-24 03:10:00.891495: step: 206/464, loss: 0.165110781788826 2023-01-24 03:10:01.709172: step: 208/464, loss: 2.033724069595337 2023-01-24 03:10:02.495235: step: 210/464, loss: 0.06678891181945801 2023-01-24 03:10:03.278160: step: 212/464, loss: 0.09556983411312103 2023-01-24 03:10:03.982317: step: 214/464, loss: 0.17456085979938507 2023-01-24 03:10:04.786792: step: 216/464, loss: 0.06433313339948654 2023-01-24 03:10:05.542404: step: 218/464, loss: 0.11081457883119583 2023-01-24 03:10:06.262482: step: 220/464, loss: 0.2911708950996399 2023-01-24 03:10:07.089601: step: 222/464, loss: 0.13677887618541718 2023-01-24 03:10:07.845837: step: 224/464, loss: 0.16569681465625763 2023-01-24 03:10:08.589675: step: 226/464, loss: 0.13031910359859467 2023-01-24 03:10:09.333697: step: 228/464, loss: 0.8445505499839783 2023-01-24 03:10:10.044588: step: 230/464, loss: 0.09451509267091751 2023-01-24 03:10:10.828471: step: 232/464, loss: 0.2014075368642807 2023-01-24 03:10:11.485969: step: 234/464, loss: 0.399213582277298 2023-01-24 03:10:12.227695: step: 236/464, loss: 0.08709046989679337 2023-01-24 03:10:12.970638: step: 238/464, loss: 0.27674511075019836 2023-01-24 03:10:13.734292: step: 240/464, loss: 0.8091335296630859 2023-01-24 03:10:14.477296: step: 242/464, loss: 0.04643746092915535 2023-01-24 03:10:15.179313: step: 244/464, loss: 0.20637056231498718 2023-01-24 03:10:15.909427: step: 246/464, loss: 0.1723395735025406 2023-01-24 03:10:16.622717: step: 248/464, loss: 0.21998530626296997 2023-01-24 03:10:17.421478: step: 250/464, loss: 0.37970659136772156 2023-01-24 03:10:18.112573: step: 252/464, loss: 0.11584115773439407 2023-01-24 03:10:18.913979: step: 254/464, loss: 1.5689878463745117 2023-01-24 03:10:19.628253: step: 256/464, loss: 0.3336993157863617 2023-01-24 03:10:20.374899: step: 258/464, loss: 0.390744686126709 2023-01-24 03:10:21.094709: step: 260/464, loss: 0.19020554423332214 2023-01-24 03:10:21.885061: step: 262/464, loss: 0.1410345435142517 2023-01-24 03:10:22.681912: step: 264/464, loss: 0.3876209259033203 2023-01-24 03:10:23.446455: step: 266/464, loss: 0.14369329810142517 2023-01-24 03:10:24.254277: step: 268/464, loss: 0.1893119215965271 2023-01-24 03:10:24.960802: step: 270/464, loss: 0.17482930421829224 2023-01-24 03:10:25.727747: step: 272/464, loss: 0.24558770656585693 2023-01-24 03:10:26.474523: step: 274/464, loss: 0.42578569054603577 2023-01-24 03:10:27.238787: step: 276/464, loss: 0.14443501830101013 2023-01-24 03:10:28.027692: step: 278/464, loss: 0.1805008202791214 2023-01-24 03:10:28.800227: step: 280/464, loss: 0.8799417018890381 2023-01-24 03:10:29.552326: step: 282/464, loss: 0.3285672664642334 2023-01-24 03:10:30.279856: step: 284/464, loss: 0.34625038504600525 2023-01-24 03:10:30.956230: step: 286/464, loss: 0.10508932918310165 2023-01-24 03:10:31.671424: step: 288/464, loss: 0.15506626665592194 2023-01-24 03:10:32.372236: step: 290/464, loss: 0.9398106932640076 2023-01-24 03:10:33.205560: step: 292/464, loss: 0.21145036816596985 2023-01-24 03:10:33.943570: step: 294/464, loss: 0.08800602704286575 2023-01-24 03:10:34.641272: step: 296/464, loss: 0.9233723878860474 2023-01-24 03:10:35.361386: step: 298/464, loss: 0.08186593651771545 2023-01-24 03:10:36.153266: step: 300/464, loss: 0.1038471981883049 2023-01-24 03:10:36.861867: step: 302/464, loss: 0.18606869876384735 2023-01-24 03:10:37.581276: step: 304/464, loss: 0.20838655531406403 2023-01-24 03:10:38.346804: step: 306/464, loss: 0.5244540572166443 2023-01-24 03:10:39.108112: step: 308/464, loss: 0.47285252809524536 2023-01-24 03:10:39.831256: step: 310/464, loss: 0.18951445817947388 2023-01-24 03:10:40.724276: step: 312/464, loss: 0.36259371042251587 2023-01-24 03:10:41.445155: step: 314/464, loss: 0.18875542283058167 2023-01-24 03:10:42.244587: step: 316/464, loss: 0.1639244109392166 2023-01-24 03:10:43.015951: step: 318/464, loss: 0.2972382605075836 2023-01-24 03:10:43.725244: step: 320/464, loss: 0.1820705235004425 2023-01-24 03:10:44.390883: step: 322/464, loss: 0.13749344646930695 2023-01-24 03:10:45.043236: step: 324/464, loss: 0.22790080308914185 2023-01-24 03:10:45.823144: step: 326/464, loss: 0.2505926191806793 2023-01-24 03:10:46.598914: step: 328/464, loss: 0.16676443815231323 2023-01-24 03:10:47.351942: step: 330/464, loss: 2.2576770782470703 2023-01-24 03:10:48.140438: step: 332/464, loss: 0.2201552540063858 2023-01-24 03:10:48.915565: step: 334/464, loss: 0.1650933176279068 2023-01-24 03:10:49.781505: step: 336/464, loss: 0.22324177622795105 2023-01-24 03:10:50.524382: step: 338/464, loss: 1.0308393239974976 2023-01-24 03:10:51.241957: step: 340/464, loss: 0.4288811981678009 2023-01-24 03:10:51.963439: step: 342/464, loss: 0.38893359899520874 2023-01-24 03:10:52.744940: step: 344/464, loss: 0.42521703243255615 2023-01-24 03:10:53.484023: step: 346/464, loss: 0.3413054943084717 2023-01-24 03:10:54.181200: step: 348/464, loss: 0.3558991849422455 2023-01-24 03:10:54.864682: step: 350/464, loss: 0.370784193277359 2023-01-24 03:10:55.547931: step: 352/464, loss: 0.12938739359378815 2023-01-24 03:10:56.220751: step: 354/464, loss: 0.17673499882221222 2023-01-24 03:10:56.939958: step: 356/464, loss: 0.07088849693536758 2023-01-24 03:10:57.654312: step: 358/464, loss: 0.2451186180114746 2023-01-24 03:10:58.357365: step: 360/464, loss: 0.16760440170764923 2023-01-24 03:10:59.110301: step: 362/464, loss: 0.46949276328086853 2023-01-24 03:10:59.894930: step: 364/464, loss: 0.615884006023407 2023-01-24 03:11:00.630734: step: 366/464, loss: 0.12176218628883362 2023-01-24 03:11:01.358405: step: 368/464, loss: 0.17412437498569489 2023-01-24 03:11:02.084339: step: 370/464, loss: 0.2818848192691803 2023-01-24 03:11:02.783541: step: 372/464, loss: 0.04271039739251137 2023-01-24 03:11:03.474336: step: 374/464, loss: 0.366900771856308 2023-01-24 03:11:04.216290: step: 376/464, loss: 0.6440262198448181 2023-01-24 03:11:05.081979: step: 378/464, loss: 0.08439314365386963 2023-01-24 03:11:05.821114: step: 380/464, loss: 0.7600069642066956 2023-01-24 03:11:06.446426: step: 382/464, loss: 0.40017855167388916 2023-01-24 03:11:07.200905: step: 384/464, loss: 0.11778063327074051 2023-01-24 03:11:07.911458: step: 386/464, loss: 0.021956732496619225 2023-01-24 03:11:08.663483: step: 388/464, loss: 0.12280718237161636 2023-01-24 03:11:09.401145: step: 390/464, loss: 0.4814095199108124 2023-01-24 03:11:10.127134: step: 392/464, loss: 0.26054421067237854 2023-01-24 03:11:10.889678: step: 394/464, loss: 0.11960306018590927 2023-01-24 03:11:11.812857: step: 396/464, loss: 0.5592536330223083 2023-01-24 03:11:12.622128: step: 398/464, loss: 0.14069019258022308 2023-01-24 03:11:13.342768: step: 400/464, loss: 0.3604012727737427 2023-01-24 03:11:14.077032: step: 402/464, loss: 0.09957307577133179 2023-01-24 03:11:14.774301: step: 404/464, loss: 0.3808475434780121 2023-01-24 03:11:15.521665: step: 406/464, loss: 0.03552810475230217 2023-01-24 03:11:16.271087: step: 408/464, loss: 0.1248745247721672 2023-01-24 03:11:17.089613: step: 410/464, loss: 0.07676209509372711 2023-01-24 03:11:17.825289: step: 412/464, loss: 0.3310137689113617 2023-01-24 03:11:18.613023: step: 414/464, loss: 0.26078546047210693 2023-01-24 03:11:19.317154: step: 416/464, loss: 0.11648165434598923 2023-01-24 03:11:20.094190: step: 418/464, loss: 0.11332453042268753 2023-01-24 03:11:20.858777: step: 420/464, loss: 0.27925005555152893 2023-01-24 03:11:21.534060: step: 422/464, loss: 0.22074222564697266 2023-01-24 03:11:22.250281: step: 424/464, loss: 0.16748501360416412 2023-01-24 03:11:22.940801: step: 426/464, loss: 1.1123805046081543 2023-01-24 03:11:23.759769: step: 428/464, loss: 0.37792542576789856 2023-01-24 03:11:24.539538: step: 430/464, loss: 0.1879335194826126 2023-01-24 03:11:25.279438: step: 432/464, loss: 1.1048240661621094 2023-01-24 03:11:25.928756: step: 434/464, loss: 0.17384260892868042 2023-01-24 03:11:26.648060: step: 436/464, loss: 0.058489926159381866 2023-01-24 03:11:27.352471: step: 438/464, loss: 0.10755658894777298 2023-01-24 03:11:28.107935: step: 440/464, loss: 0.154677152633667 2023-01-24 03:11:28.829072: step: 442/464, loss: 0.335865318775177 2023-01-24 03:11:29.549137: step: 444/464, loss: 0.17729879915714264 2023-01-24 03:11:30.269398: step: 446/464, loss: 0.1523967832326889 2023-01-24 03:11:31.066337: step: 448/464, loss: 0.14690501987934113 2023-01-24 03:11:31.841922: step: 450/464, loss: 0.3801630437374115 2023-01-24 03:11:32.816630: step: 452/464, loss: 0.18971210718154907 2023-01-24 03:11:33.568350: step: 454/464, loss: 0.28348278999328613 2023-01-24 03:11:34.264545: step: 456/464, loss: 0.11890676617622375 2023-01-24 03:11:34.950907: step: 458/464, loss: 0.08258378505706787 2023-01-24 03:11:35.651484: step: 460/464, loss: 0.10658489167690277 2023-01-24 03:11:36.389207: step: 462/464, loss: 0.3449063301086426 2023-01-24 03:11:37.116618: step: 464/464, loss: 0.2024800330400467 2023-01-24 03:11:37.848549: step: 466/464, loss: 0.11131348460912704 2023-01-24 03:11:38.604623: step: 468/464, loss: 0.050689972937107086 2023-01-24 03:11:39.268125: step: 470/464, loss: 0.09831853210926056 2023-01-24 03:11:40.061071: step: 472/464, loss: 0.16549623012542725 2023-01-24 03:11:40.742928: step: 474/464, loss: 0.3030500113964081 2023-01-24 03:11:41.493818: step: 476/464, loss: 0.11540064960718155 2023-01-24 03:11:42.226718: step: 478/464, loss: 0.1258566528558731 2023-01-24 03:11:43.061956: step: 480/464, loss: 0.3872591257095337 2023-01-24 03:11:43.829527: step: 482/464, loss: 0.4863373041152954 2023-01-24 03:11:44.520270: step: 484/464, loss: 0.03375108912587166 2023-01-24 03:11:45.257597: step: 486/464, loss: 0.15512655675411224 2023-01-24 03:11:46.003396: step: 488/464, loss: 0.18190723657608032 2023-01-24 03:11:46.715203: step: 490/464, loss: 0.08832676708698273 2023-01-24 03:11:47.484576: step: 492/464, loss: 0.1871776580810547 2023-01-24 03:11:48.219592: step: 494/464, loss: 0.22434929013252258 2023-01-24 03:11:49.002469: step: 496/464, loss: 0.34984269738197327 2023-01-24 03:11:49.710293: step: 498/464, loss: 0.9877106547355652 2023-01-24 03:11:50.433456: step: 500/464, loss: 0.1210198700428009 2023-01-24 03:11:51.268878: step: 502/464, loss: 0.0619652234017849 2023-01-24 03:11:52.002566: step: 504/464, loss: 0.21530750393867493 2023-01-24 03:11:52.758254: step: 506/464, loss: 0.10127289593219757 2023-01-24 03:11:53.426410: step: 508/464, loss: 0.19827330112457275 2023-01-24 03:11:54.133444: step: 510/464, loss: 0.2336958348751068 2023-01-24 03:11:54.810316: step: 512/464, loss: 0.14155313372612 2023-01-24 03:11:55.617550: step: 514/464, loss: 2.5356342792510986 2023-01-24 03:11:56.347024: step: 516/464, loss: 0.1917305439710617 2023-01-24 03:11:57.138401: step: 518/464, loss: 0.32812514901161194 2023-01-24 03:11:57.960248: step: 520/464, loss: 0.7535200119018555 2023-01-24 03:11:58.676003: step: 522/464, loss: 0.0939045324921608 2023-01-24 03:11:59.413928: step: 524/464, loss: 0.08146481961011887 2023-01-24 03:12:00.118363: step: 526/464, loss: 0.8648735284805298 2023-01-24 03:12:00.865659: step: 528/464, loss: 0.22401273250579834 2023-01-24 03:12:01.594815: step: 530/464, loss: 0.10157474130392075 2023-01-24 03:12:02.260677: step: 532/464, loss: 0.29921257495880127 2023-01-24 03:12:03.052213: step: 534/464, loss: 0.24234499037265778 2023-01-24 03:12:03.746064: step: 536/464, loss: 0.23960405588150024 2023-01-24 03:12:04.497845: step: 538/464, loss: 0.09281633049249649 2023-01-24 03:12:05.190619: step: 540/464, loss: 0.11242015659809113 2023-01-24 03:12:05.855037: step: 542/464, loss: 0.2687571048736572 2023-01-24 03:12:06.634046: step: 544/464, loss: 0.07830285280942917 2023-01-24 03:12:07.352194: step: 546/464, loss: 0.40920308232307434 2023-01-24 03:12:08.135850: step: 548/464, loss: 0.16095489263534546 2023-01-24 03:12:08.872952: step: 550/464, loss: 0.12933610379695892 2023-01-24 03:12:09.556466: step: 552/464, loss: 0.21303704380989075 2023-01-24 03:12:10.232163: step: 554/464, loss: 0.17689307034015656 2023-01-24 03:12:10.984588: step: 556/464, loss: 0.45598405599594116 2023-01-24 03:12:11.750369: step: 558/464, loss: 0.22006511688232422 2023-01-24 03:12:12.502688: step: 560/464, loss: 0.09161525964736938 2023-01-24 03:12:13.259678: step: 562/464, loss: 0.07944789528846741 2023-01-24 03:12:14.018632: step: 564/464, loss: 0.21354304254055023 2023-01-24 03:12:14.787883: step: 566/464, loss: 0.22804072499275208 2023-01-24 03:12:15.574070: step: 568/464, loss: 0.6266902685165405 2023-01-24 03:12:16.279570: step: 570/464, loss: 0.6643524169921875 2023-01-24 03:12:17.041939: step: 572/464, loss: 0.11704916507005692 2023-01-24 03:12:17.805836: step: 574/464, loss: 0.1938568651676178 2023-01-24 03:12:18.537743: step: 576/464, loss: 0.04783834144473076 2023-01-24 03:12:19.259235: step: 578/464, loss: 0.08962643146514893 2023-01-24 03:12:19.999164: step: 580/464, loss: 0.1545608639717102 2023-01-24 03:12:20.781679: step: 582/464, loss: 0.37907034158706665 2023-01-24 03:12:21.507964: step: 584/464, loss: 0.442465603351593 2023-01-24 03:12:22.243382: step: 586/464, loss: 0.23899458348751068 2023-01-24 03:12:22.939625: step: 588/464, loss: 0.09116481244564056 2023-01-24 03:12:23.676304: step: 590/464, loss: 0.20019377768039703 2023-01-24 03:12:24.456067: step: 592/464, loss: 0.24498000741004944 2023-01-24 03:12:25.202543: step: 594/464, loss: 0.1740119606256485 2023-01-24 03:12:25.941866: step: 596/464, loss: 0.09426325559616089 2023-01-24 03:12:26.688309: step: 598/464, loss: 0.6598936915397644 2023-01-24 03:12:27.382676: step: 600/464, loss: 0.21235957741737366 2023-01-24 03:12:28.152509: step: 602/464, loss: 0.17811620235443115 2023-01-24 03:12:28.884500: step: 604/464, loss: 0.19934476912021637 2023-01-24 03:12:29.699855: step: 606/464, loss: 0.11637186259031296 2023-01-24 03:12:30.421589: step: 608/464, loss: 0.37004193663597107 2023-01-24 03:12:31.139413: step: 610/464, loss: 0.28444433212280273 2023-01-24 03:12:31.871668: step: 612/464, loss: 0.16172698140144348 2023-01-24 03:12:32.565414: step: 614/464, loss: 0.12194719165563583 2023-01-24 03:12:33.367340: step: 616/464, loss: 0.0865287110209465 2023-01-24 03:12:34.089904: step: 618/464, loss: 0.12266885489225388 2023-01-24 03:12:34.811138: step: 620/464, loss: 0.4905916154384613 2023-01-24 03:12:35.439111: step: 622/464, loss: 0.34407004714012146 2023-01-24 03:12:36.198675: step: 624/464, loss: 0.06899049878120422 2023-01-24 03:12:36.966922: step: 626/464, loss: 0.07489988952875137 2023-01-24 03:12:37.713467: step: 628/464, loss: 0.08575539290904999 2023-01-24 03:12:38.457900: step: 630/464, loss: 0.27297940850257874 2023-01-24 03:12:39.193485: step: 632/464, loss: 0.16481491923332214 2023-01-24 03:12:39.891736: step: 634/464, loss: 0.15143327414989471 2023-01-24 03:12:40.618953: step: 636/464, loss: 0.44829922914505005 2023-01-24 03:12:41.305745: step: 638/464, loss: 0.09175318479537964 2023-01-24 03:12:42.061928: step: 640/464, loss: 0.1762494593858719 2023-01-24 03:12:42.696874: step: 642/464, loss: 0.038562506437301636 2023-01-24 03:12:43.505361: step: 644/464, loss: 0.2596011757850647 2023-01-24 03:12:44.272069: step: 646/464, loss: 0.14710719883441925 2023-01-24 03:12:45.018995: step: 648/464, loss: 0.07652927190065384 2023-01-24 03:12:45.783228: step: 650/464, loss: 0.24959689378738403 2023-01-24 03:12:46.641381: step: 652/464, loss: 0.09222123771905899 2023-01-24 03:12:47.372167: step: 654/464, loss: 0.540006697177887 2023-01-24 03:12:48.036500: step: 656/464, loss: 0.3779156804084778 2023-01-24 03:12:48.712325: step: 658/464, loss: 0.12363962829113007 2023-01-24 03:12:49.482414: step: 660/464, loss: 1.2623299360275269 2023-01-24 03:12:50.205717: step: 662/464, loss: 0.40365299582481384 2023-01-24 03:12:50.916633: step: 664/464, loss: 0.06017347052693367 2023-01-24 03:12:51.685689: step: 666/464, loss: 0.16187569499015808 2023-01-24 03:12:52.455438: step: 668/464, loss: 0.08684320747852325 2023-01-24 03:12:53.213663: step: 670/464, loss: 0.2427690476179123 2023-01-24 03:12:53.989048: step: 672/464, loss: 0.12731128931045532 2023-01-24 03:12:54.822499: step: 674/464, loss: 0.09823483228683472 2023-01-24 03:12:55.550907: step: 676/464, loss: 0.09728223085403442 2023-01-24 03:12:56.303374: step: 678/464, loss: 0.06802794337272644 2023-01-24 03:12:57.072437: step: 680/464, loss: 0.46626871824264526 2023-01-24 03:12:57.743620: step: 682/464, loss: 0.1886630654335022 2023-01-24 03:12:58.495294: step: 684/464, loss: 0.15889041125774384 2023-01-24 03:12:59.345130: step: 686/464, loss: 0.4553874135017395 2023-01-24 03:13:00.006756: step: 688/464, loss: 0.09692560881376266 2023-01-24 03:13:00.696727: step: 690/464, loss: 0.7323739528656006 2023-01-24 03:13:01.490880: step: 692/464, loss: 0.5018308758735657 2023-01-24 03:13:02.213318: step: 694/464, loss: 0.3774145841598511 2023-01-24 03:13:02.998745: step: 696/464, loss: 0.22533495724201202 2023-01-24 03:13:03.715913: step: 698/464, loss: 0.11258202791213989 2023-01-24 03:13:04.431563: step: 700/464, loss: 0.24560454487800598 2023-01-24 03:13:05.189895: step: 702/464, loss: 0.23531009256839752 2023-01-24 03:13:05.973988: step: 704/464, loss: 0.07021478563547134 2023-01-24 03:13:06.731086: step: 706/464, loss: 0.18745137751102448 2023-01-24 03:13:07.411136: step: 708/464, loss: 0.20717504620552063 2023-01-24 03:13:08.182741: step: 710/464, loss: 0.20312827825546265 2023-01-24 03:13:08.920801: step: 712/464, loss: 0.2219710499048233 2023-01-24 03:13:09.670574: step: 714/464, loss: 1.655066967010498 2023-01-24 03:13:10.394270: step: 716/464, loss: 0.6989168524742126 2023-01-24 03:13:11.166227: step: 718/464, loss: 0.23458097875118256 2023-01-24 03:13:11.910745: step: 720/464, loss: 0.27344822883605957 2023-01-24 03:13:12.686183: step: 722/464, loss: 0.12376883625984192 2023-01-24 03:13:13.512823: step: 724/464, loss: 1.085131049156189 2023-01-24 03:13:14.205582: step: 726/464, loss: 0.23603534698486328 2023-01-24 03:13:15.016732: step: 728/464, loss: 0.16410315036773682 2023-01-24 03:13:15.726396: step: 730/464, loss: 0.2255164086818695 2023-01-24 03:13:16.483715: step: 732/464, loss: 0.2828912138938904 2023-01-24 03:13:17.186397: step: 734/464, loss: 0.1097203716635704 2023-01-24 03:13:17.951313: step: 736/464, loss: 0.10253311693668365 2023-01-24 03:13:18.680924: step: 738/464, loss: 0.3227848708629608 2023-01-24 03:13:19.515368: step: 740/464, loss: 0.41590064764022827 2023-01-24 03:13:20.234947: step: 742/464, loss: 0.17311303317546844 2023-01-24 03:13:21.050284: step: 744/464, loss: 0.3792298436164856 2023-01-24 03:13:21.742246: step: 746/464, loss: 0.1337263584136963 2023-01-24 03:13:22.468485: step: 748/464, loss: 0.056437354534864426 2023-01-24 03:13:23.245268: step: 750/464, loss: 0.7661721706390381 2023-01-24 03:13:23.943053: step: 752/464, loss: 0.1342168003320694 2023-01-24 03:13:24.758393: step: 754/464, loss: 2.0010664463043213 2023-01-24 03:13:25.473299: step: 756/464, loss: 0.35609573125839233 2023-01-24 03:13:26.128082: step: 758/464, loss: 0.1894560605287552 2023-01-24 03:13:26.830132: step: 760/464, loss: 0.6358622312545776 2023-01-24 03:13:27.630127: step: 762/464, loss: 0.08535061031579971 2023-01-24 03:13:28.434876: step: 764/464, loss: 0.8524316549301147 2023-01-24 03:13:29.209406: step: 766/464, loss: 0.5615988969802856 2023-01-24 03:13:29.940888: step: 768/464, loss: 0.12083742022514343 2023-01-24 03:13:30.734048: step: 770/464, loss: 0.5996964573860168 2023-01-24 03:13:31.475229: step: 772/464, loss: 0.19729484617710114 2023-01-24 03:13:32.260723: step: 774/464, loss: 0.17660316824913025 2023-01-24 03:13:32.959585: step: 776/464, loss: 0.3362042307853699 2023-01-24 03:13:33.667360: step: 778/464, loss: 0.9083384871482849 2023-01-24 03:13:34.441287: step: 780/464, loss: 0.8565486669540405 2023-01-24 03:13:35.152581: step: 782/464, loss: 0.09819147735834122 2023-01-24 03:13:35.969900: step: 784/464, loss: 0.21145141124725342 2023-01-24 03:13:36.687405: step: 786/464, loss: 0.35320553183555603 2023-01-24 03:13:37.441438: step: 788/464, loss: 0.28306126594543457 2023-01-24 03:13:38.201168: step: 790/464, loss: 0.6834411025047302 2023-01-24 03:13:38.871220: step: 792/464, loss: 0.12468130141496658 2023-01-24 03:13:39.620700: step: 794/464, loss: 0.19863691926002502 2023-01-24 03:13:40.396214: step: 796/464, loss: 0.4513365626335144 2023-01-24 03:13:41.147098: step: 798/464, loss: 0.1359991431236267 2023-01-24 03:13:41.886086: step: 800/464, loss: 0.17588861286640167 2023-01-24 03:13:42.624294: step: 802/464, loss: 0.09235641360282898 2023-01-24 03:13:43.411182: step: 804/464, loss: 0.1508709341287613 2023-01-24 03:13:44.150846: step: 806/464, loss: 0.2306538075208664 2023-01-24 03:13:44.887524: step: 808/464, loss: 0.19599375128746033 2023-01-24 03:13:45.708582: step: 810/464, loss: 0.2671712338924408 2023-01-24 03:13:46.425027: step: 812/464, loss: 0.09009677171707153 2023-01-24 03:13:47.186765: step: 814/464, loss: 0.1923481523990631 2023-01-24 03:13:47.854904: step: 816/464, loss: 0.14651606976985931 2023-01-24 03:13:48.598465: step: 818/464, loss: 0.41739848256111145 2023-01-24 03:13:49.317139: step: 820/464, loss: 0.2460222989320755 2023-01-24 03:13:50.108496: step: 822/464, loss: 0.2720450162887573 2023-01-24 03:13:50.828345: step: 824/464, loss: 0.19671209156513214 2023-01-24 03:13:51.596840: step: 826/464, loss: 0.1822405904531479 2023-01-24 03:13:52.306041: step: 828/464, loss: 0.18423821032047272 2023-01-24 03:13:53.087734: step: 830/464, loss: 0.36167916655540466 2023-01-24 03:13:53.857418: step: 832/464, loss: 0.16588671505451202 2023-01-24 03:13:54.673175: step: 834/464, loss: 0.2735016644001007 2023-01-24 03:13:55.490639: step: 836/464, loss: 0.12201464921236038 2023-01-24 03:13:56.228277: step: 838/464, loss: 0.20774728059768677 2023-01-24 03:13:56.879125: step: 840/464, loss: 0.6711456179618835 2023-01-24 03:13:57.600491: step: 842/464, loss: 0.08495127409696579 2023-01-24 03:13:58.330300: step: 844/464, loss: 0.16191741824150085 2023-01-24 03:13:59.108025: step: 846/464, loss: 0.25720930099487305 2023-01-24 03:13:59.813722: step: 848/464, loss: 0.21723899245262146 2023-01-24 03:14:00.553325: step: 850/464, loss: 0.7831099033355713 2023-01-24 03:14:01.278377: step: 852/464, loss: 0.4225570559501648 2023-01-24 03:14:02.067332: step: 854/464, loss: 0.21615463495254517 2023-01-24 03:14:02.796226: step: 856/464, loss: 0.286521852016449 2023-01-24 03:14:03.540108: step: 858/464, loss: 0.11874765157699585 2023-01-24 03:14:04.303602: step: 860/464, loss: 0.12384872138500214 2023-01-24 03:14:05.051440: step: 862/464, loss: 0.15509487688541412 2023-01-24 03:14:05.731392: step: 864/464, loss: 0.16075512766838074 2023-01-24 03:14:06.476059: step: 866/464, loss: 0.3110632300376892 2023-01-24 03:14:07.181523: step: 868/464, loss: 0.10222408920526505 2023-01-24 03:14:07.920110: step: 870/464, loss: 0.09057188034057617 2023-01-24 03:14:08.660004: step: 872/464, loss: 0.13266919553279877 2023-01-24 03:14:09.393495: step: 874/464, loss: 0.1931861937046051 2023-01-24 03:14:10.124904: step: 876/464, loss: 0.08242028206586838 2023-01-24 03:14:10.948358: step: 878/464, loss: 0.1272575557231903 2023-01-24 03:14:11.719053: step: 880/464, loss: 0.22169773280620575 2023-01-24 03:14:12.506934: step: 882/464, loss: 0.26219919323921204 2023-01-24 03:14:13.220811: step: 884/464, loss: 0.04633217677474022 2023-01-24 03:14:13.945382: step: 886/464, loss: 0.1503944844007492 2023-01-24 03:14:14.717466: step: 888/464, loss: 0.27836212515830994 2023-01-24 03:14:15.527786: step: 890/464, loss: 0.16426533460617065 2023-01-24 03:14:16.275179: step: 892/464, loss: 0.15028542280197144 2023-01-24 03:14:16.990945: step: 894/464, loss: 0.12456360459327698 2023-01-24 03:14:17.758489: step: 896/464, loss: 0.6605837941169739 2023-01-24 03:14:18.479029: step: 898/464, loss: 0.4971863925457001 2023-01-24 03:14:19.272721: step: 900/464, loss: 0.205971360206604 2023-01-24 03:14:20.067044: step: 902/464, loss: 0.36103111505508423 2023-01-24 03:14:20.825942: step: 904/464, loss: 0.14627264440059662 2023-01-24 03:14:21.528222: step: 906/464, loss: 0.09463157504796982 2023-01-24 03:14:22.294204: step: 908/464, loss: 0.32430994510650635 2023-01-24 03:14:23.022460: step: 910/464, loss: 0.2579716742038727 2023-01-24 03:14:23.797206: step: 912/464, loss: 0.4348582923412323 2023-01-24 03:14:24.569724: step: 914/464, loss: 0.08700353652238846 2023-01-24 03:14:25.311692: step: 916/464, loss: 0.35793405771255493 2023-01-24 03:14:26.052572: step: 918/464, loss: 0.19745084643363953 2023-01-24 03:14:26.907217: step: 920/464, loss: 0.3313872814178467 2023-01-24 03:14:27.602122: step: 922/464, loss: 0.16230227053165436 2023-01-24 03:14:28.352796: step: 924/464, loss: 0.5203825831413269 2023-01-24 03:14:29.031384: step: 926/464, loss: 0.40407904982566833 2023-01-24 03:14:29.824281: step: 928/464, loss: 0.2651551365852356 2023-01-24 03:14:30.533789: step: 930/464, loss: 0.24683575332164764 ================================================== Loss: 0.317 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3568852151612702, 'r': 0.300108021840159, 'f1': 0.32604328298683943}, 'combined': 0.2402424190429343, 'epoch': 11} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31206896181465105, 'r': 0.26277870131085623, 'f1': 0.2853106371145738}, 'combined': 0.17719292199747214, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33991790738034344, 'r': 0.2967025377513434, 'f1': 0.316843439503461}, 'combined': 0.2334635870025502, 'epoch': 11} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3108596430235575, 'r': 0.26426135333955286, 'f1': 0.2856727209875337}, 'combined': 0.17741779513962622, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3087121212121212, 'r': 0.2910714285714286, 'f1': 0.2996323529411765}, 'combined': 0.19975490196078433, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29545454545454547, 'r': 0.42391304347826086, 'f1': 0.3482142857142857}, 'combined': 0.17410714285714285, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3430680085139636, 'r': 0.3183306568564102, 'f1': 0.330236724730961}, 'combined': 0.24333232348597125, 'epoch': 9} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32349203894432277, 'r': 0.26013751499676996, 'f1': 0.2883761268220772}, 'combined': 0.1790967524473953, 'epoch': 9} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36041666666666666, 'r': 0.30892857142857144, 'f1': 0.33269230769230773}, 'combined': 0.2217948717948718, 'epoch': 9} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:17:16.749655: step: 2/464, loss: 0.07937145233154297 2023-01-24 03:17:17.467961: step: 4/464, loss: 0.08152173459529877 2023-01-24 03:17:18.222014: step: 6/464, loss: 1.303863525390625 2023-01-24 03:17:18.922573: step: 8/464, loss: 0.13373735547065735 2023-01-24 03:17:19.681226: step: 10/464, loss: 0.7212203145027161 2023-01-24 03:17:20.505497: step: 12/464, loss: 0.30934861302375793 2023-01-24 03:17:21.289195: step: 14/464, loss: 0.5328418612480164 2023-01-24 03:17:22.093760: step: 16/464, loss: 0.09542962163686752 2023-01-24 03:17:22.772785: step: 18/464, loss: 0.14745691418647766 2023-01-24 03:17:23.472537: step: 20/464, loss: 0.2676483988761902 2023-01-24 03:17:24.258827: step: 22/464, loss: 0.09156662225723267 2023-01-24 03:17:24.917064: step: 24/464, loss: 0.03031635843217373 2023-01-24 03:17:25.662673: step: 26/464, loss: 0.09614569693803787 2023-01-24 03:17:26.448082: step: 28/464, loss: 0.18411265313625336 2023-01-24 03:17:27.156978: step: 30/464, loss: 0.11576136201620102 2023-01-24 03:17:27.897889: step: 32/464, loss: 0.12587027251720428 2023-01-24 03:17:28.588953: step: 34/464, loss: 0.2171574831008911 2023-01-24 03:17:29.353888: step: 36/464, loss: 0.1379026621580124 2023-01-24 03:17:30.090159: step: 38/464, loss: 0.038375645875930786 2023-01-24 03:17:30.803339: step: 40/464, loss: 0.6074793338775635 2023-01-24 03:17:31.526691: step: 42/464, loss: 0.12009892612695694 2023-01-24 03:17:32.332040: step: 44/464, loss: 0.28919121623039246 2023-01-24 03:17:33.011408: step: 46/464, loss: 0.09155625849962234 2023-01-24 03:17:33.796465: step: 48/464, loss: 0.16924382746219635 2023-01-24 03:17:34.513641: step: 50/464, loss: 0.33645200729370117 2023-01-24 03:17:35.260765: step: 52/464, loss: 0.5285319089889526 2023-01-24 03:17:35.980913: step: 54/464, loss: 0.5698367357254028 2023-01-24 03:17:36.762469: step: 56/464, loss: 0.09041299670934677 2023-01-24 03:17:37.536051: step: 58/464, loss: 0.04541686549782753 2023-01-24 03:17:38.253415: step: 60/464, loss: 0.35840001702308655 2023-01-24 03:17:38.943290: step: 62/464, loss: 0.6764888167381287 2023-01-24 03:17:39.713399: step: 64/464, loss: 0.03156716749072075 2023-01-24 03:17:40.456641: step: 66/464, loss: 0.6557780504226685 2023-01-24 03:17:41.177079: step: 68/464, loss: 0.10692758858203888 2023-01-24 03:17:41.949448: step: 70/464, loss: 0.09629504382610321 2023-01-24 03:17:42.750650: step: 72/464, loss: 0.11967064440250397 2023-01-24 03:17:43.533950: step: 74/464, loss: 0.07440176606178284 2023-01-24 03:17:44.296195: step: 76/464, loss: 0.395173579454422 2023-01-24 03:17:45.087656: step: 78/464, loss: 0.17125675082206726 2023-01-24 03:17:45.877496: step: 80/464, loss: 0.08940937370061874 2023-01-24 03:17:46.657145: step: 82/464, loss: 0.19092987477779388 2023-01-24 03:17:47.452176: step: 84/464, loss: 0.25753524899482727 2023-01-24 03:17:48.272852: step: 86/464, loss: 0.3011278212070465 2023-01-24 03:17:49.047654: step: 88/464, loss: 0.13556747138500214 2023-01-24 03:17:49.861838: step: 90/464, loss: 0.09217557311058044 2023-01-24 03:17:50.523278: step: 92/464, loss: 0.06008195877075195 2023-01-24 03:17:51.263802: step: 94/464, loss: 1.1136311292648315 2023-01-24 03:17:51.999319: step: 96/464, loss: 0.0928504541516304 2023-01-24 03:17:52.731424: step: 98/464, loss: 0.11634860187768936 2023-01-24 03:17:53.464926: step: 100/464, loss: 0.17749115824699402 2023-01-24 03:17:54.153937: step: 102/464, loss: 0.3903777599334717 2023-01-24 03:17:54.902692: step: 104/464, loss: 0.08186224102973938 2023-01-24 03:17:55.723449: step: 106/464, loss: 0.149544358253479 2023-01-24 03:17:56.462314: step: 108/464, loss: 2.127565383911133 2023-01-24 03:17:57.164878: step: 110/464, loss: 0.20751747488975525 2023-01-24 03:17:57.867509: step: 112/464, loss: 0.6430408954620361 2023-01-24 03:17:58.590566: step: 114/464, loss: 0.31235331296920776 2023-01-24 03:17:59.320653: step: 116/464, loss: 0.22165215015411377 2023-01-24 03:18:00.041096: step: 118/464, loss: 0.28718554973602295 2023-01-24 03:18:00.762837: step: 120/464, loss: 0.16045460104942322 2023-01-24 03:18:01.580592: step: 122/464, loss: 0.15227341651916504 2023-01-24 03:18:02.303690: step: 124/464, loss: 0.17640146613121033 2023-01-24 03:18:03.047649: step: 126/464, loss: 0.12168958783149719 2023-01-24 03:18:03.782898: step: 128/464, loss: 0.14149035513401031 2023-01-24 03:18:04.468715: step: 130/464, loss: 0.14450493454933167 2023-01-24 03:18:05.233338: step: 132/464, loss: 0.13244052231311798 2023-01-24 03:18:05.979395: step: 134/464, loss: 0.09429279714822769 2023-01-24 03:18:06.699737: step: 136/464, loss: 0.11651817709207535 2023-01-24 03:18:07.379306: step: 138/464, loss: 0.08832782506942749 2023-01-24 03:18:08.114103: step: 140/464, loss: 0.08300164341926575 2023-01-24 03:18:08.918192: step: 142/464, loss: 0.2834710478782654 2023-01-24 03:18:09.645601: step: 144/464, loss: 0.1599501669406891 2023-01-24 03:18:10.427111: step: 146/464, loss: 0.313719242811203 2023-01-24 03:18:11.166727: step: 148/464, loss: 0.07726339995861053 2023-01-24 03:18:11.918810: step: 150/464, loss: 0.18269270658493042 2023-01-24 03:18:12.692887: step: 152/464, loss: 0.10299442708492279 2023-01-24 03:18:13.446932: step: 154/464, loss: 0.2939721941947937 2023-01-24 03:18:14.121693: step: 156/464, loss: 0.1128954365849495 2023-01-24 03:18:14.829294: step: 158/464, loss: 0.13372232019901276 2023-01-24 03:18:15.605183: step: 160/464, loss: 0.1052638366818428 2023-01-24 03:18:16.348781: step: 162/464, loss: 0.7225176692008972 2023-01-24 03:18:17.147999: step: 164/464, loss: 0.51224684715271 2023-01-24 03:18:18.093921: step: 166/464, loss: 0.11861090362071991 2023-01-24 03:18:18.821887: step: 168/464, loss: 0.3967590928077698 2023-01-24 03:18:19.566542: step: 170/464, loss: 0.16093403100967407 2023-01-24 03:18:20.311316: step: 172/464, loss: 0.16586624085903168 2023-01-24 03:18:20.991809: step: 174/464, loss: 0.47254830598831177 2023-01-24 03:18:21.642303: step: 176/464, loss: 0.8602539300918579 2023-01-24 03:18:22.513218: step: 178/464, loss: 0.08452915400266647 2023-01-24 03:18:23.276491: step: 180/464, loss: 0.14934788644313812 2023-01-24 03:18:24.008416: step: 182/464, loss: 0.15610671043395996 2023-01-24 03:18:24.798526: step: 184/464, loss: 0.11498009413480759 2023-01-24 03:18:25.450420: step: 186/464, loss: 0.02484835684299469 2023-01-24 03:18:26.128669: step: 188/464, loss: 0.050583284348249435 2023-01-24 03:18:26.862051: step: 190/464, loss: 0.022346261888742447 2023-01-24 03:18:27.567955: step: 192/464, loss: 0.3246009647846222 2023-01-24 03:18:28.408351: step: 194/464, loss: 0.18790866434574127 2023-01-24 03:18:29.152823: step: 196/464, loss: 0.27084141969680786 2023-01-24 03:18:29.866508: step: 198/464, loss: 0.08829298615455627 2023-01-24 03:18:30.645154: step: 200/464, loss: 0.47170162200927734 2023-01-24 03:18:31.320645: step: 202/464, loss: 0.09879495203495026 2023-01-24 03:18:32.113099: step: 204/464, loss: 0.13257142901420593 2023-01-24 03:18:32.880726: step: 206/464, loss: 0.1926821917295456 2023-01-24 03:18:33.701502: step: 208/464, loss: 0.2796195447444916 2023-01-24 03:18:34.399254: step: 210/464, loss: 0.11591766774654388 2023-01-24 03:18:35.215471: step: 212/464, loss: 0.13543307781219482 2023-01-24 03:18:36.000728: step: 214/464, loss: 0.6604641675949097 2023-01-24 03:18:36.759978: step: 216/464, loss: 0.13425329327583313 2023-01-24 03:18:37.488083: step: 218/464, loss: 0.6592523455619812 2023-01-24 03:18:38.154168: step: 220/464, loss: 0.6960573196411133 2023-01-24 03:18:38.887234: step: 222/464, loss: 0.4156104326248169 2023-01-24 03:18:39.622309: step: 224/464, loss: 0.06526309996843338 2023-01-24 03:18:40.393441: step: 226/464, loss: 0.14824193716049194 2023-01-24 03:18:41.114825: step: 228/464, loss: 0.1567968726158142 2023-01-24 03:18:41.921622: step: 230/464, loss: 0.18765020370483398 2023-01-24 03:18:42.619372: step: 232/464, loss: 0.32986509799957275 2023-01-24 03:18:43.332865: step: 234/464, loss: 0.08478040993213654 2023-01-24 03:18:44.090890: step: 236/464, loss: 0.7538495659828186 2023-01-24 03:18:44.806750: step: 238/464, loss: 0.30088624358177185 2023-01-24 03:18:45.601387: step: 240/464, loss: 0.08321912586688995 2023-01-24 03:18:46.275443: step: 242/464, loss: 0.15158098936080933 2023-01-24 03:18:47.037707: step: 244/464, loss: 0.0883256271481514 2023-01-24 03:18:47.828474: step: 246/464, loss: 0.12839487195014954 2023-01-24 03:18:48.587995: step: 248/464, loss: 0.11329327523708344 2023-01-24 03:18:49.350189: step: 250/464, loss: 0.14634855091571808 2023-01-24 03:18:50.088675: step: 252/464, loss: 0.1465548425912857 2023-01-24 03:18:50.769271: step: 254/464, loss: 0.1491321623325348 2023-01-24 03:18:51.560006: step: 256/464, loss: 0.05420532450079918 2023-01-24 03:18:52.312539: step: 258/464, loss: 0.06870382279157639 2023-01-24 03:18:53.072872: step: 260/464, loss: 0.22869589924812317 2023-01-24 03:18:53.857010: step: 262/464, loss: 0.24778543412685394 2023-01-24 03:18:54.567414: step: 264/464, loss: 0.04789621755480766 2023-01-24 03:18:55.323868: step: 266/464, loss: 0.10004598647356033 2023-01-24 03:18:56.080146: step: 268/464, loss: 0.049069881439208984 2023-01-24 03:18:56.859254: step: 270/464, loss: 0.11798962205648422 2023-01-24 03:18:57.595286: step: 272/464, loss: 0.0744778960943222 2023-01-24 03:18:58.347258: step: 274/464, loss: 0.21960477530956268 2023-01-24 03:18:59.026834: step: 276/464, loss: 0.45826032757759094 2023-01-24 03:18:59.837503: step: 278/464, loss: 0.22718052566051483 2023-01-24 03:19:00.575426: step: 280/464, loss: 0.32442647218704224 2023-01-24 03:19:01.387165: step: 282/464, loss: 0.1793290227651596 2023-01-24 03:19:02.256516: step: 284/464, loss: 0.3830242156982422 2023-01-24 03:19:02.984724: step: 286/464, loss: 0.18583126366138458 2023-01-24 03:19:03.762742: step: 288/464, loss: 0.24565443396568298 2023-01-24 03:19:04.444870: step: 290/464, loss: 0.17036233842372894 2023-01-24 03:19:05.186221: step: 292/464, loss: 0.17884781956672668 2023-01-24 03:19:06.026562: step: 294/464, loss: 0.13129177689552307 2023-01-24 03:19:06.799696: step: 296/464, loss: 0.09641414880752563 2023-01-24 03:19:07.616744: step: 298/464, loss: 0.2380957305431366 2023-01-24 03:19:08.286686: step: 300/464, loss: 0.2236909717321396 2023-01-24 03:19:09.063280: step: 302/464, loss: 0.25487086176872253 2023-01-24 03:19:09.794561: step: 304/464, loss: 0.08199506253004074 2023-01-24 03:19:10.495723: step: 306/464, loss: 0.14794254302978516 2023-01-24 03:19:11.285554: step: 308/464, loss: 0.11556050926446915 2023-01-24 03:19:11.941627: step: 310/464, loss: 0.4206964075565338 2023-01-24 03:19:12.691423: step: 312/464, loss: 0.35478389263153076 2023-01-24 03:19:13.474047: step: 314/464, loss: 0.08433805406093597 2023-01-24 03:19:14.209936: step: 316/464, loss: 0.2681663930416107 2023-01-24 03:19:14.987611: step: 318/464, loss: 0.08490245789289474 2023-01-24 03:19:15.673839: step: 320/464, loss: 0.8618941307067871 2023-01-24 03:19:16.457922: step: 322/464, loss: 0.16621652245521545 2023-01-24 03:19:17.204507: step: 324/464, loss: 0.12393920123577118 2023-01-24 03:19:17.935482: step: 326/464, loss: 0.4252515733242035 2023-01-24 03:19:18.655379: step: 328/464, loss: 0.18016891181468964 2023-01-24 03:19:19.465388: step: 330/464, loss: 0.1707686185836792 2023-01-24 03:19:20.211670: step: 332/464, loss: 0.1263190656900406 2023-01-24 03:19:20.972297: step: 334/464, loss: 0.07681165635585785 2023-01-24 03:19:21.699396: step: 336/464, loss: 0.2365681231021881 2023-01-24 03:19:22.419571: step: 338/464, loss: 0.3317764401435852 2023-01-24 03:19:23.198712: step: 340/464, loss: 0.17738425731658936 2023-01-24 03:19:23.954976: step: 342/464, loss: 0.09095247089862823 2023-01-24 03:19:24.637093: step: 344/464, loss: 0.15220151841640472 2023-01-24 03:19:25.377104: step: 346/464, loss: 0.19217680394649506 2023-01-24 03:19:26.072995: step: 348/464, loss: 0.20572134852409363 2023-01-24 03:19:26.862523: step: 350/464, loss: 0.08010413497686386 2023-01-24 03:19:27.576576: step: 352/464, loss: 0.18971148133277893 2023-01-24 03:19:28.267703: step: 354/464, loss: 0.3263251483440399 2023-01-24 03:19:29.093253: step: 356/464, loss: 0.1769842803478241 2023-01-24 03:19:29.756761: step: 358/464, loss: 0.08641387522220612 2023-01-24 03:19:30.400821: step: 360/464, loss: 0.14438386261463165 2023-01-24 03:19:31.068697: step: 362/464, loss: 0.28434932231903076 2023-01-24 03:19:31.822706: step: 364/464, loss: 0.1055278480052948 2023-01-24 03:19:32.587019: step: 366/464, loss: 0.10482140630483627 2023-01-24 03:19:33.382552: step: 368/464, loss: 0.3840358555316925 2023-01-24 03:19:34.116764: step: 370/464, loss: 0.1421050727367401 2023-01-24 03:19:34.882234: step: 372/464, loss: 0.20900005102157593 2023-01-24 03:19:35.581391: step: 374/464, loss: 0.07068168371915817 2023-01-24 03:19:36.339492: step: 376/464, loss: 0.34346091747283936 2023-01-24 03:19:37.093577: step: 378/464, loss: 0.14604853093624115 2023-01-24 03:19:37.832212: step: 380/464, loss: 0.23045358061790466 2023-01-24 03:19:38.541781: step: 382/464, loss: 0.42827993631362915 2023-01-24 03:19:39.237359: step: 384/464, loss: 0.0735638290643692 2023-01-24 03:19:40.008166: step: 386/464, loss: 0.3617570698261261 2023-01-24 03:19:40.760456: step: 388/464, loss: 0.1411140263080597 2023-01-24 03:19:41.579508: step: 390/464, loss: 0.03528972342610359 2023-01-24 03:19:42.298375: step: 392/464, loss: 0.4784262776374817 2023-01-24 03:19:43.000505: step: 394/464, loss: 0.08293693512678146 2023-01-24 03:19:43.789144: step: 396/464, loss: 0.07473158836364746 2023-01-24 03:19:44.511562: step: 398/464, loss: 0.06551724672317505 2023-01-24 03:19:45.247213: step: 400/464, loss: 0.10966359823942184 2023-01-24 03:19:46.012921: step: 402/464, loss: 0.5371749401092529 2023-01-24 03:19:46.719241: step: 404/464, loss: 0.06447066366672516 2023-01-24 03:19:47.453416: step: 406/464, loss: 1.2373099327087402 2023-01-24 03:19:48.255153: step: 408/464, loss: 0.12304025888442993 2023-01-24 03:19:48.956554: step: 410/464, loss: 0.09652536362409592 2023-01-24 03:19:49.688890: step: 412/464, loss: 0.04025071859359741 2023-01-24 03:19:50.451899: step: 414/464, loss: 0.4970055818557739 2023-01-24 03:19:51.180362: step: 416/464, loss: 0.5476180911064148 2023-01-24 03:19:51.952430: step: 418/464, loss: 0.20421092212200165 2023-01-24 03:19:52.721272: step: 420/464, loss: 0.07824033498764038 2023-01-24 03:19:53.426168: step: 422/464, loss: 0.14528338611125946 2023-01-24 03:19:54.089512: step: 424/464, loss: 0.4104193150997162 2023-01-24 03:19:54.845799: step: 426/464, loss: 0.05804063379764557 2023-01-24 03:19:55.528678: step: 428/464, loss: 0.2260199934244156 2023-01-24 03:19:56.189324: step: 430/464, loss: 0.27057549357414246 2023-01-24 03:19:57.041269: step: 432/464, loss: 0.23801268637180328 2023-01-24 03:19:57.808122: step: 434/464, loss: 0.27735766768455505 2023-01-24 03:19:58.513949: step: 436/464, loss: 0.20993174612522125 2023-01-24 03:19:59.253211: step: 438/464, loss: 0.23531346023082733 2023-01-24 03:20:00.105916: step: 440/464, loss: 0.13657617568969727 2023-01-24 03:20:00.808070: step: 442/464, loss: 0.5901662111282349 2023-01-24 03:20:01.539512: step: 444/464, loss: 0.07006267458200455 2023-01-24 03:20:02.397250: step: 446/464, loss: 0.4572935700416565 2023-01-24 03:20:03.063299: step: 448/464, loss: 0.07816348224878311 2023-01-24 03:20:03.823222: step: 450/464, loss: 5.27097225189209 2023-01-24 03:20:04.593396: step: 452/464, loss: 0.19717015326023102 2023-01-24 03:20:05.263984: step: 454/464, loss: 0.06615665555000305 2023-01-24 03:20:05.924837: step: 456/464, loss: 0.12976990640163422 2023-01-24 03:20:06.682599: step: 458/464, loss: 0.5949162840843201 2023-01-24 03:20:07.389252: step: 460/464, loss: 0.45521727204322815 2023-01-24 03:20:08.159654: step: 462/464, loss: 0.10309097915887833 2023-01-24 03:20:08.842023: step: 464/464, loss: 0.3543277680873871 2023-01-24 03:20:09.653989: step: 466/464, loss: 0.44685256481170654 2023-01-24 03:20:10.319103: step: 468/464, loss: 0.2305956780910492 2023-01-24 03:20:11.121355: step: 470/464, loss: 0.2975473403930664 2023-01-24 03:20:11.837189: step: 472/464, loss: 0.36820775270462036 2023-01-24 03:20:12.546071: step: 474/464, loss: 0.1223047524690628 2023-01-24 03:20:13.298067: step: 476/464, loss: 0.22195355594158173 2023-01-24 03:20:14.167546: step: 478/464, loss: 0.1410691738128662 2023-01-24 03:20:14.873353: step: 480/464, loss: 0.1481795310974121 2023-01-24 03:20:15.692094: step: 482/464, loss: 0.08102936297655106 2023-01-24 03:20:16.454030: step: 484/464, loss: 0.15827953815460205 2023-01-24 03:20:17.242335: step: 486/464, loss: 0.08449569344520569 2023-01-24 03:20:18.013741: step: 488/464, loss: 0.09026332944631577 2023-01-24 03:20:18.754145: step: 490/464, loss: 2.810152053833008 2023-01-24 03:20:19.461743: step: 492/464, loss: 0.10736886411905289 2023-01-24 03:20:20.189235: step: 494/464, loss: 0.06060099974274635 2023-01-24 03:20:20.972850: step: 496/464, loss: 0.10546278208494186 2023-01-24 03:20:21.753235: step: 498/464, loss: 1.2901486158370972 2023-01-24 03:20:22.611690: step: 500/464, loss: 0.09726813435554504 2023-01-24 03:20:23.397367: step: 502/464, loss: 0.09726590663194656 2023-01-24 03:20:24.120647: step: 504/464, loss: 0.08936482667922974 2023-01-24 03:20:24.871182: step: 506/464, loss: 0.193405881524086 2023-01-24 03:20:25.636963: step: 508/464, loss: 0.31795811653137207 2023-01-24 03:20:26.367565: step: 510/464, loss: 0.10925085097551346 2023-01-24 03:20:27.131363: step: 512/464, loss: 0.11257359385490417 2023-01-24 03:20:27.954590: step: 514/464, loss: 0.11842934787273407 2023-01-24 03:20:28.639672: step: 516/464, loss: 0.07557530701160431 2023-01-24 03:20:29.359616: step: 518/464, loss: 0.12648552656173706 2023-01-24 03:20:30.135682: step: 520/464, loss: 0.19210243225097656 2023-01-24 03:20:30.873739: step: 522/464, loss: 0.1300201416015625 2023-01-24 03:20:31.687371: step: 524/464, loss: 0.30112624168395996 2023-01-24 03:20:32.514836: step: 526/464, loss: 0.1210969090461731 2023-01-24 03:20:33.334474: step: 528/464, loss: 0.09769611805677414 2023-01-24 03:20:34.142950: step: 530/464, loss: 0.18061119318008423 2023-01-24 03:20:34.917228: step: 532/464, loss: 0.2147839516401291 2023-01-24 03:20:35.663713: step: 534/464, loss: 0.057371918112039566 2023-01-24 03:20:36.416692: step: 536/464, loss: 0.07153794169425964 2023-01-24 03:20:37.193851: step: 538/464, loss: 1.1326093673706055 2023-01-24 03:20:38.053272: step: 540/464, loss: 0.17589691281318665 2023-01-24 03:20:38.756163: step: 542/464, loss: 0.10882126539945602 2023-01-24 03:20:39.431969: step: 544/464, loss: 0.20188839733600616 2023-01-24 03:20:40.199780: step: 546/464, loss: 0.11440496891736984 2023-01-24 03:20:40.946926: step: 548/464, loss: 0.15246495604515076 2023-01-24 03:20:41.612207: step: 550/464, loss: 1.742716670036316 2023-01-24 03:20:42.382582: step: 552/464, loss: 0.15757957100868225 2023-01-24 03:20:43.118523: step: 554/464, loss: 0.14114047586917877 2023-01-24 03:20:43.807812: step: 556/464, loss: 0.2917866110801697 2023-01-24 03:20:44.533884: step: 558/464, loss: 0.21258586645126343 2023-01-24 03:20:45.299482: step: 560/464, loss: 0.3417026698589325 2023-01-24 03:20:46.037733: step: 562/464, loss: 0.09667295962572098 2023-01-24 03:20:46.829215: step: 564/464, loss: 0.34167689085006714 2023-01-24 03:20:47.615486: step: 566/464, loss: 0.0827411338686943 2023-01-24 03:20:48.276203: step: 568/464, loss: 0.5343703627586365 2023-01-24 03:20:49.090676: step: 570/464, loss: 0.24220702052116394 2023-01-24 03:20:49.977780: step: 572/464, loss: 0.42066872119903564 2023-01-24 03:20:50.760845: step: 574/464, loss: 0.2661969065666199 2023-01-24 03:20:51.434968: step: 576/464, loss: 0.42867612838745117 2023-01-24 03:20:52.167322: step: 578/464, loss: 0.20908300578594208 2023-01-24 03:20:52.941173: step: 580/464, loss: 0.2226964831352234 2023-01-24 03:20:53.707293: step: 582/464, loss: 0.18661530315876007 2023-01-24 03:20:54.418901: step: 584/464, loss: 0.46391546726226807 2023-01-24 03:20:55.114517: step: 586/464, loss: 0.05677974224090576 2023-01-24 03:20:55.851690: step: 588/464, loss: 0.09799033403396606 2023-01-24 03:20:56.609145: step: 590/464, loss: 0.14943784475326538 2023-01-24 03:20:57.274377: step: 592/464, loss: 0.11508098244667053 2023-01-24 03:20:57.944929: step: 594/464, loss: 0.2567056119441986 2023-01-24 03:20:58.708278: step: 596/464, loss: 0.10912559926509857 2023-01-24 03:20:59.387856: step: 598/464, loss: 0.1308480203151703 2023-01-24 03:21:00.104558: step: 600/464, loss: 0.1481272131204605 2023-01-24 03:21:00.802190: step: 602/464, loss: 0.38644760847091675 2023-01-24 03:21:01.566712: step: 604/464, loss: 0.37461990118026733 2023-01-24 03:21:02.315792: step: 606/464, loss: 0.11868741363286972 2023-01-24 03:21:03.013199: step: 608/464, loss: 0.20522792637348175 2023-01-24 03:21:03.724626: step: 610/464, loss: 0.08558636158704758 2023-01-24 03:21:04.428613: step: 612/464, loss: 0.11278170347213745 2023-01-24 03:21:05.149409: step: 614/464, loss: 0.3965875506401062 2023-01-24 03:21:05.902836: step: 616/464, loss: 0.7394839525222778 2023-01-24 03:21:06.702833: step: 618/464, loss: 0.4225020408630371 2023-01-24 03:21:07.435001: step: 620/464, loss: 0.36073020100593567 2023-01-24 03:21:08.189553: step: 622/464, loss: 0.07548674196004868 2023-01-24 03:21:08.909258: step: 624/464, loss: 0.06665664911270142 2023-01-24 03:21:09.671352: step: 626/464, loss: 0.3739912509918213 2023-01-24 03:21:10.395238: step: 628/464, loss: 0.117310531437397 2023-01-24 03:21:11.215193: step: 630/464, loss: 0.05616062879562378 2023-01-24 03:21:12.011198: step: 632/464, loss: 0.20686718821525574 2023-01-24 03:21:12.695975: step: 634/464, loss: 0.09264864772558212 2023-01-24 03:21:13.386418: step: 636/464, loss: 0.21587902307510376 2023-01-24 03:21:14.087885: step: 638/464, loss: 0.15633289515972137 2023-01-24 03:21:14.753573: step: 640/464, loss: 0.04739254713058472 2023-01-24 03:21:15.456113: step: 642/464, loss: 0.05121554806828499 2023-01-24 03:21:16.110575: step: 644/464, loss: 0.03187675401568413 2023-01-24 03:21:16.868871: step: 646/464, loss: 0.07687317579984665 2023-01-24 03:21:17.568959: step: 648/464, loss: 0.20603327453136444 2023-01-24 03:21:18.275553: step: 650/464, loss: 0.16405758261680603 2023-01-24 03:21:18.944717: step: 652/464, loss: 0.07752983272075653 2023-01-24 03:21:19.755864: step: 654/464, loss: 0.16881927847862244 2023-01-24 03:21:20.498378: step: 656/464, loss: 1.8332757949829102 2023-01-24 03:21:21.237394: step: 658/464, loss: 0.03292281553149223 2023-01-24 03:21:21.904985: step: 660/464, loss: 0.14211608469486237 2023-01-24 03:21:22.664425: step: 662/464, loss: 0.14775115251541138 2023-01-24 03:21:23.471254: step: 664/464, loss: 0.21745797991752625 2023-01-24 03:21:24.194850: step: 666/464, loss: 0.1632862389087677 2023-01-24 03:21:24.903568: step: 668/464, loss: 0.30970701575279236 2023-01-24 03:21:25.718097: step: 670/464, loss: 0.049705106765031815 2023-01-24 03:21:26.438977: step: 672/464, loss: 0.1696358174085617 2023-01-24 03:21:27.187664: step: 674/464, loss: 0.18233445286750793 2023-01-24 03:21:28.050419: step: 676/464, loss: 0.12525096535682678 2023-01-24 03:21:28.794782: step: 678/464, loss: 0.18021506071090698 2023-01-24 03:21:29.557450: step: 680/464, loss: 0.3737623691558838 2023-01-24 03:21:30.391638: step: 682/464, loss: 0.3178415596485138 2023-01-24 03:21:31.077044: step: 684/464, loss: 0.061164434999227524 2023-01-24 03:21:31.794225: step: 686/464, loss: 0.16735850274562836 2023-01-24 03:21:32.641370: step: 688/464, loss: 0.6186379194259644 2023-01-24 03:21:33.327341: step: 690/464, loss: 0.17663516104221344 2023-01-24 03:21:34.116654: step: 692/464, loss: 0.16589435935020447 2023-01-24 03:21:34.814132: step: 694/464, loss: 0.21240673959255219 2023-01-24 03:21:35.487757: step: 696/464, loss: 0.18954595923423767 2023-01-24 03:21:36.267232: step: 698/464, loss: 0.20750971138477325 2023-01-24 03:21:37.107967: step: 700/464, loss: 0.44897404313087463 2023-01-24 03:21:37.791602: step: 702/464, loss: 2.3649990558624268 2023-01-24 03:21:38.553149: step: 704/464, loss: 0.7894167900085449 2023-01-24 03:21:39.279487: step: 706/464, loss: 0.04682024195790291 2023-01-24 03:21:40.057472: step: 708/464, loss: 0.353426069021225 2023-01-24 03:21:40.834033: step: 710/464, loss: 0.3856732249259949 2023-01-24 03:21:41.576675: step: 712/464, loss: 0.10253427922725677 2023-01-24 03:21:42.372455: step: 714/464, loss: 0.1930823177099228 2023-01-24 03:21:43.118362: step: 716/464, loss: 0.5934716463088989 2023-01-24 03:21:43.883025: step: 718/464, loss: 0.4938408434391022 2023-01-24 03:21:44.698876: step: 720/464, loss: 0.10433968156576157 2023-01-24 03:21:45.418580: step: 722/464, loss: 0.16280996799468994 2023-01-24 03:21:46.209080: step: 724/464, loss: 0.5624796152114868 2023-01-24 03:21:47.003956: step: 726/464, loss: 0.05956294387578964 2023-01-24 03:21:47.693454: step: 728/464, loss: 0.8196101784706116 2023-01-24 03:21:48.443874: step: 730/464, loss: 0.13151977956295013 2023-01-24 03:21:49.209452: step: 732/464, loss: 0.3388148248195648 2023-01-24 03:21:49.938652: step: 734/464, loss: 0.17667055130004883 2023-01-24 03:21:50.705763: step: 736/464, loss: 0.2739188075065613 2023-01-24 03:21:51.337219: step: 738/464, loss: 0.14711619913578033 2023-01-24 03:21:52.034359: step: 740/464, loss: 0.07891000062227249 2023-01-24 03:21:52.760622: step: 742/464, loss: 0.12373429536819458 2023-01-24 03:21:53.554666: step: 744/464, loss: 2.3303115367889404 2023-01-24 03:21:54.368010: step: 746/464, loss: 0.0886702761054039 2023-01-24 03:21:55.194465: step: 748/464, loss: 1.4180445671081543 2023-01-24 03:21:55.873527: step: 750/464, loss: 0.057705819606781006 2023-01-24 03:21:56.660124: step: 752/464, loss: 0.14857631921768188 2023-01-24 03:21:57.412515: step: 754/464, loss: 0.048156969249248505 2023-01-24 03:21:58.089024: step: 756/464, loss: 0.1455095112323761 2023-01-24 03:21:58.772743: step: 758/464, loss: 0.09087461978197098 2023-01-24 03:21:59.497464: step: 760/464, loss: 0.1472252607345581 2023-01-24 03:22:00.254029: step: 762/464, loss: 0.08561497181653976 2023-01-24 03:22:00.958241: step: 764/464, loss: 0.10442690551280975 2023-01-24 03:22:01.653764: step: 766/464, loss: 0.12880218029022217 2023-01-24 03:22:02.401446: step: 768/464, loss: 0.498107373714447 2023-01-24 03:22:03.091606: step: 770/464, loss: 0.14554743468761444 2023-01-24 03:22:03.840645: step: 772/464, loss: 0.1577606350183487 2023-01-24 03:22:04.584262: step: 774/464, loss: 0.19420844316482544 2023-01-24 03:22:05.325654: step: 776/464, loss: 0.07381264120340347 2023-01-24 03:22:06.087635: step: 778/464, loss: 0.1493281126022339 2023-01-24 03:22:06.789793: step: 780/464, loss: 0.03374498337507248 2023-01-24 03:22:07.578522: step: 782/464, loss: 0.10605400055646896 2023-01-24 03:22:08.236848: step: 784/464, loss: 0.28580501675605774 2023-01-24 03:22:09.048578: step: 786/464, loss: 0.26233959197998047 2023-01-24 03:22:09.794981: step: 788/464, loss: 0.05284832417964935 2023-01-24 03:22:10.590488: step: 790/464, loss: 0.12521898746490479 2023-01-24 03:22:11.331884: step: 792/464, loss: 0.20476263761520386 2023-01-24 03:22:12.135225: step: 794/464, loss: 0.18052512407302856 2023-01-24 03:22:12.863552: step: 796/464, loss: 0.309608519077301 2023-01-24 03:22:13.574318: step: 798/464, loss: 0.13055773079395294 2023-01-24 03:22:14.318172: step: 800/464, loss: 0.22315725684165955 2023-01-24 03:22:15.063585: step: 802/464, loss: 0.2699635624885559 2023-01-24 03:22:15.748723: step: 804/464, loss: 0.17608486115932465 2023-01-24 03:22:16.425342: step: 806/464, loss: 0.05220549553632736 2023-01-24 03:22:17.221594: step: 808/464, loss: 0.16342726349830627 2023-01-24 03:22:17.932846: step: 810/464, loss: 0.17474690079689026 2023-01-24 03:22:18.652437: step: 812/464, loss: 0.2082083821296692 2023-01-24 03:22:19.356828: step: 814/464, loss: 0.21456584334373474 2023-01-24 03:22:20.069117: step: 816/464, loss: 0.07093963772058487 2023-01-24 03:22:20.797383: step: 818/464, loss: 0.12586569786071777 2023-01-24 03:22:21.546881: step: 820/464, loss: 0.4578618109226227 2023-01-24 03:22:22.284508: step: 822/464, loss: 0.10049530863761902 2023-01-24 03:22:23.021757: step: 824/464, loss: 0.7494491338729858 2023-01-24 03:22:23.773921: step: 826/464, loss: 0.10575184226036072 2023-01-24 03:22:24.517852: step: 828/464, loss: 0.4233231246471405 2023-01-24 03:22:25.184095: step: 830/464, loss: 0.15735939145088196 2023-01-24 03:22:25.900363: step: 832/464, loss: 0.21875634789466858 2023-01-24 03:22:26.586463: step: 834/464, loss: 0.2116895616054535 2023-01-24 03:22:27.289796: step: 836/464, loss: 0.05463794991374016 2023-01-24 03:22:27.920736: step: 838/464, loss: 0.06432764232158661 2023-01-24 03:22:28.652174: step: 840/464, loss: 0.07017546892166138 2023-01-24 03:22:29.449694: step: 842/464, loss: 0.5909464955329895 2023-01-24 03:22:30.245382: step: 844/464, loss: 0.17316317558288574 2023-01-24 03:22:31.058753: step: 846/464, loss: 0.14773967862129211 2023-01-24 03:22:31.779616: step: 848/464, loss: 0.19056522846221924 2023-01-24 03:22:32.463597: step: 850/464, loss: 0.20791678130626678 2023-01-24 03:22:33.227817: step: 852/464, loss: 0.3895057141780853 2023-01-24 03:22:33.983890: step: 854/464, loss: 0.13461123406887054 2023-01-24 03:22:34.681457: step: 856/464, loss: 0.1486469954252243 2023-01-24 03:22:35.488701: step: 858/464, loss: 0.12327220290899277 2023-01-24 03:22:36.195197: step: 860/464, loss: 0.11395876854658127 2023-01-24 03:22:36.999322: step: 862/464, loss: 0.0672190859913826 2023-01-24 03:22:37.832370: step: 864/464, loss: 0.08600833266973495 2023-01-24 03:22:38.489690: step: 866/464, loss: 0.21235749125480652 2023-01-24 03:22:39.216753: step: 868/464, loss: 0.4932500422000885 2023-01-24 03:22:39.947637: step: 870/464, loss: 0.19560837745666504 2023-01-24 03:22:40.752367: step: 872/464, loss: 0.35193949937820435 2023-01-24 03:22:41.514797: step: 874/464, loss: 0.17032520473003387 2023-01-24 03:22:42.310585: step: 876/464, loss: 0.1449870467185974 2023-01-24 03:22:43.015619: step: 878/464, loss: 0.14561982452869415 2023-01-24 03:22:43.669099: step: 880/464, loss: 0.16295947134494781 2023-01-24 03:22:44.388900: step: 882/464, loss: 0.1561228334903717 2023-01-24 03:22:45.118653: step: 884/464, loss: 0.24844062328338623 2023-01-24 03:22:45.873694: step: 886/464, loss: 0.5855473279953003 2023-01-24 03:22:46.575070: step: 888/464, loss: 0.9291168451309204 2023-01-24 03:22:47.306702: step: 890/464, loss: 0.16777721047401428 2023-01-24 03:22:48.030409: step: 892/464, loss: 0.30441814661026 2023-01-24 03:22:48.784039: step: 894/464, loss: 0.1941244751214981 2023-01-24 03:22:49.553528: step: 896/464, loss: 0.10518061369657516 2023-01-24 03:22:50.362405: step: 898/464, loss: 0.282848984003067 2023-01-24 03:22:51.102056: step: 900/464, loss: 0.4408087134361267 2023-01-24 03:22:51.855920: step: 902/464, loss: 0.07189252972602844 2023-01-24 03:22:52.638263: step: 904/464, loss: 0.06143733859062195 2023-01-24 03:22:53.518542: step: 906/464, loss: 0.09300398826599121 2023-01-24 03:22:54.267420: step: 908/464, loss: 0.07987049967050552 2023-01-24 03:22:54.983459: step: 910/464, loss: 0.16798578202724457 2023-01-24 03:22:55.720562: step: 912/464, loss: 0.1030452772974968 2023-01-24 03:22:56.541499: step: 914/464, loss: 0.36905360221862793 2023-01-24 03:22:57.305119: step: 916/464, loss: 0.1241687461733818 2023-01-24 03:22:58.081640: step: 918/464, loss: 0.20748136937618256 2023-01-24 03:22:58.813971: step: 920/464, loss: 0.16023887693881989 2023-01-24 03:22:59.502801: step: 922/464, loss: 0.22584021091461182 2023-01-24 03:23:00.258010: step: 924/464, loss: 0.19338735938072205 2023-01-24 03:23:01.016247: step: 926/464, loss: 0.16376447677612305 2023-01-24 03:23:01.847699: step: 928/464, loss: 0.41982167959213257 2023-01-24 03:23:02.574401: step: 930/464, loss: 0.07592824101448059 ================================================== Loss: 0.262 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3444427339901478, 'r': 0.34117477636215776, 'f1': 0.34280096690725864}, 'combined': 0.2525901861421906, 'epoch': 12} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32594264866344497, 'r': 0.2669441276887561, 'f1': 0.2935078989595785}, 'combined': 0.18228385303805403, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32655865883638346, 'r': 0.32408003523990236, 'f1': 0.3253146258503401}, 'combined': 0.23970551378446112, 'epoch': 12} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3213659532118916, 'r': 0.2626182246109035, 'f1': 0.28903713247007545}, 'combined': 0.17950727174457318, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34964797184513347, 'r': 0.3377055363741422, 'f1': 0.3435730070833454}, 'combined': 0.2531590578508861, 'epoch': 12} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.33595516660539493, 'r': 0.2728805799897575, 'f1': 0.30115065098106286}, 'combined': 0.1870304042935022, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36160714285714285, 'r': 0.2892857142857143, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29838709677419356, 'r': 0.40217391304347827, 'f1': 0.34259259259259267}, 'combined': 0.17129629629629634, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4326923076923077, 'r': 0.1939655172413793, 'f1': 0.26785714285714285}, 'combined': 0.17857142857142855, 'epoch': 12} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3444427339901478, 'r': 0.34117477636215776, 'f1': 0.34280096690725864}, 'combined': 0.2525901861421906, 'epoch': 12} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32594264866344497, 'r': 0.2669441276887561, 'f1': 0.2935078989595785}, 'combined': 0.18228385303805403, 'epoch': 12} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36160714285714285, 'r': 0.2892857142857143, 'f1': 0.32142857142857145}, 'combined': 0.2142857142857143, 'epoch': 12} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:25:48.221613: step: 2/464, loss: 0.10845023393630981 2023-01-24 03:25:48.932458: step: 4/464, loss: 0.24459651112556458 2023-01-24 03:25:49.598209: step: 6/464, loss: 0.04099079221487045 2023-01-24 03:25:50.356374: step: 8/464, loss: 0.1733992099761963 2023-01-24 03:25:51.146658: step: 10/464, loss: 0.42204564809799194 2023-01-24 03:25:51.847394: step: 12/464, loss: 0.058562155812978745 2023-01-24 03:25:52.621491: step: 14/464, loss: 0.13704180717468262 2023-01-24 03:25:53.381777: step: 16/464, loss: 4.886255741119385 2023-01-24 03:25:54.120787: step: 18/464, loss: 0.04163377359509468 2023-01-24 03:25:54.795052: step: 20/464, loss: 0.1298331320285797 2023-01-24 03:25:55.606685: step: 22/464, loss: 0.11849203705787659 2023-01-24 03:25:56.335942: step: 24/464, loss: 0.12065554410219193 2023-01-24 03:25:57.121255: step: 26/464, loss: 0.32254287600517273 2023-01-24 03:25:57.871740: step: 28/464, loss: 0.10869194567203522 2023-01-24 03:25:58.569468: step: 30/464, loss: 0.1350581794977188 2023-01-24 03:25:59.339453: step: 32/464, loss: 0.34600409865379333 2023-01-24 03:26:00.045302: step: 34/464, loss: 0.14678220450878143 2023-01-24 03:26:00.696823: step: 36/464, loss: 0.10113175958395004 2023-01-24 03:26:01.424264: step: 38/464, loss: 0.08116129785776138 2023-01-24 03:26:02.096549: step: 40/464, loss: 0.22657261788845062 2023-01-24 03:26:02.832656: step: 42/464, loss: 0.06568806618452072 2023-01-24 03:26:03.567971: step: 44/464, loss: 0.046456482261419296 2023-01-24 03:26:04.284118: step: 46/464, loss: 0.08516017347574234 2023-01-24 03:26:05.002849: step: 48/464, loss: 0.1707119345664978 2023-01-24 03:26:05.674561: step: 50/464, loss: 0.44092610478401184 2023-01-24 03:26:06.421016: step: 52/464, loss: 0.13745073974132538 2023-01-24 03:26:07.109040: step: 54/464, loss: 0.0608690120279789 2023-01-24 03:26:07.858600: step: 56/464, loss: 0.21165050566196442 2023-01-24 03:26:08.592142: step: 58/464, loss: 0.09404819458723068 2023-01-24 03:26:09.370642: step: 60/464, loss: 0.09275195747613907 2023-01-24 03:26:10.103757: step: 62/464, loss: 0.2595752477645874 2023-01-24 03:26:10.798982: step: 64/464, loss: 0.0873362123966217 2023-01-24 03:26:11.553225: step: 66/464, loss: 0.35337966680526733 2023-01-24 03:26:12.317378: step: 68/464, loss: 0.19586828351020813 2023-01-24 03:26:13.104671: step: 70/464, loss: 0.10347774624824524 2023-01-24 03:26:13.842617: step: 72/464, loss: 0.1529945284128189 2023-01-24 03:26:14.686129: step: 74/464, loss: 0.17042525112628937 2023-01-24 03:26:15.448982: step: 76/464, loss: 0.06554456055164337 2023-01-24 03:26:16.144296: step: 78/464, loss: 0.03990291431546211 2023-01-24 03:26:16.888610: step: 80/464, loss: 0.06882677972316742 2023-01-24 03:26:17.627379: step: 82/464, loss: 0.2006833404302597 2023-01-24 03:26:18.447882: step: 84/464, loss: 0.15863539278507233 2023-01-24 03:26:19.134938: step: 86/464, loss: 0.05487312376499176 2023-01-24 03:26:19.866081: step: 88/464, loss: 0.061266910284757614 2023-01-24 03:26:20.516051: step: 90/464, loss: 0.10806545615196228 2023-01-24 03:26:21.203388: step: 92/464, loss: 0.12428941577672958 2023-01-24 03:26:21.989008: step: 94/464, loss: 0.5336252450942993 2023-01-24 03:26:22.742461: step: 96/464, loss: 0.9217267036437988 2023-01-24 03:26:23.459829: step: 98/464, loss: 0.12581682205200195 2023-01-24 03:26:24.146581: step: 100/464, loss: 0.18978308141231537 2023-01-24 03:26:24.903153: step: 102/464, loss: 0.1525404453277588 2023-01-24 03:26:25.702118: step: 104/464, loss: 0.12237998098134995 2023-01-24 03:26:26.386926: step: 106/464, loss: 0.12113020569086075 2023-01-24 03:26:27.204160: step: 108/464, loss: 0.05587160959839821 2023-01-24 03:26:27.947110: step: 110/464, loss: 0.20736397802829742 2023-01-24 03:26:28.693531: step: 112/464, loss: 0.2167879045009613 2023-01-24 03:26:29.490001: step: 114/464, loss: 0.028888702392578125 2023-01-24 03:26:30.218523: step: 116/464, loss: 0.1010793000459671 2023-01-24 03:26:30.928707: step: 118/464, loss: 0.24765628576278687 2023-01-24 03:26:31.657781: step: 120/464, loss: 0.4231231212615967 2023-01-24 03:26:32.361117: step: 122/464, loss: 0.061653271317481995 2023-01-24 03:26:33.168282: step: 124/464, loss: 0.516242265701294 2023-01-24 03:26:33.836348: step: 126/464, loss: 0.03295483440160751 2023-01-24 03:26:34.510427: step: 128/464, loss: 0.07076632976531982 2023-01-24 03:26:35.297065: step: 130/464, loss: 0.398122638463974 2023-01-24 03:26:36.020496: step: 132/464, loss: 0.0682402104139328 2023-01-24 03:26:36.774819: step: 134/464, loss: 0.46888625621795654 2023-01-24 03:26:37.504967: step: 136/464, loss: 0.10444146394729614 2023-01-24 03:26:38.293836: step: 138/464, loss: 0.28288185596466064 2023-01-24 03:26:39.041214: step: 140/464, loss: 0.41589614748954773 2023-01-24 03:26:39.758213: step: 142/464, loss: 0.12509053945541382 2023-01-24 03:26:40.455426: step: 144/464, loss: 0.06599561870098114 2023-01-24 03:26:41.213693: step: 146/464, loss: 0.5306773781776428 2023-01-24 03:26:41.958867: step: 148/464, loss: 1.9244468212127686 2023-01-24 03:26:42.689584: step: 150/464, loss: 0.12096443772315979 2023-01-24 03:26:43.525705: step: 152/464, loss: 0.37596365809440613 2023-01-24 03:26:44.266105: step: 154/464, loss: 0.21632319688796997 2023-01-24 03:26:44.988872: step: 156/464, loss: 0.03397297114133835 2023-01-24 03:26:45.788863: step: 158/464, loss: 0.1000724658370018 2023-01-24 03:26:46.583543: step: 160/464, loss: 0.10586489737033844 2023-01-24 03:26:47.312967: step: 162/464, loss: 0.13357606530189514 2023-01-24 03:26:48.085272: step: 164/464, loss: 0.18611209094524384 2023-01-24 03:26:48.873022: step: 166/464, loss: 0.08677546679973602 2023-01-24 03:26:49.662707: step: 168/464, loss: 0.1102922335267067 2023-01-24 03:26:50.418980: step: 170/464, loss: 0.04976590722799301 2023-01-24 03:26:51.132542: step: 172/464, loss: 0.09167968481779099 2023-01-24 03:26:51.991569: step: 174/464, loss: 0.21131104230880737 2023-01-24 03:26:52.742853: step: 176/464, loss: 0.030156195163726807 2023-01-24 03:26:53.478802: step: 178/464, loss: 0.0817871242761612 2023-01-24 03:26:54.178581: step: 180/464, loss: 0.08249973505735397 2023-01-24 03:26:54.857330: step: 182/464, loss: 0.09012583643198013 2023-01-24 03:26:55.578414: step: 184/464, loss: 0.20371857285499573 2023-01-24 03:26:56.372120: step: 186/464, loss: 0.11443076282739639 2023-01-24 03:26:57.150219: step: 188/464, loss: 0.11218014359474182 2023-01-24 03:26:57.949789: step: 190/464, loss: 0.12328223139047623 2023-01-24 03:26:58.722094: step: 192/464, loss: 0.1352798342704773 2023-01-24 03:26:59.486683: step: 194/464, loss: 0.7996044158935547 2023-01-24 03:27:00.194919: step: 196/464, loss: 0.5265755653381348 2023-01-24 03:27:00.953677: step: 198/464, loss: 0.2508794963359833 2023-01-24 03:27:01.670288: step: 200/464, loss: 0.5351430177688599 2023-01-24 03:27:02.523195: step: 202/464, loss: 0.22345688939094543 2023-01-24 03:27:03.232357: step: 204/464, loss: 0.14427419006824493 2023-01-24 03:27:03.968508: step: 206/464, loss: 0.16566066443920135 2023-01-24 03:27:04.676338: step: 208/464, loss: 0.509405255317688 2023-01-24 03:27:05.446789: step: 210/464, loss: 0.20510387420654297 2023-01-24 03:27:06.383761: step: 212/464, loss: 0.10925981402397156 2023-01-24 03:27:07.179989: step: 214/464, loss: 0.05766135826706886 2023-01-24 03:27:07.861492: step: 216/464, loss: 0.2569079101085663 2023-01-24 03:27:08.596006: step: 218/464, loss: 0.16548825800418854 2023-01-24 03:27:09.371062: step: 220/464, loss: 0.038141459226608276 2023-01-24 03:27:10.154839: step: 222/464, loss: 0.08520923554897308 2023-01-24 03:27:10.853853: step: 224/464, loss: 0.5157656073570251 2023-01-24 03:27:11.583304: step: 226/464, loss: 0.04011956602334976 2023-01-24 03:27:12.409623: step: 228/464, loss: 0.1085488423705101 2023-01-24 03:27:13.081427: step: 230/464, loss: 0.20553813874721527 2023-01-24 03:27:13.810362: step: 232/464, loss: 0.1176021471619606 2023-01-24 03:27:14.573902: step: 234/464, loss: 0.17439326643943787 2023-01-24 03:27:15.284797: step: 236/464, loss: 0.6796504259109497 2023-01-24 03:27:15.930135: step: 238/464, loss: 0.05329074710607529 2023-01-24 03:27:16.626820: step: 240/464, loss: 0.14916737377643585 2023-01-24 03:27:17.413546: step: 242/464, loss: 0.6748087406158447 2023-01-24 03:27:18.119418: step: 244/464, loss: 0.4097531735897064 2023-01-24 03:27:18.983511: step: 246/464, loss: 0.07001913338899612 2023-01-24 03:27:19.688950: step: 248/464, loss: 0.1428634524345398 2023-01-24 03:27:20.415084: step: 250/464, loss: 0.8062233924865723 2023-01-24 03:27:21.109023: step: 252/464, loss: 0.13343364000320435 2023-01-24 03:27:21.931696: step: 254/464, loss: 0.21857410669326782 2023-01-24 03:27:22.641107: step: 256/464, loss: 0.11554975062608719 2023-01-24 03:27:23.368037: step: 258/464, loss: 1.3301513195037842 2023-01-24 03:27:24.144732: step: 260/464, loss: 0.1528233289718628 2023-01-24 03:27:24.848648: step: 262/464, loss: 0.15933214128017426 2023-01-24 03:27:25.709156: step: 264/464, loss: 0.11591895669698715 2023-01-24 03:27:26.436195: step: 266/464, loss: 0.08424033224582672 2023-01-24 03:27:27.295481: step: 268/464, loss: 0.2541303038597107 2023-01-24 03:27:28.006420: step: 270/464, loss: 0.07291339337825775 2023-01-24 03:27:28.690891: step: 272/464, loss: 0.06928709894418716 2023-01-24 03:27:29.419601: step: 274/464, loss: 0.6142052412033081 2023-01-24 03:27:30.221811: step: 276/464, loss: 0.45714521408081055 2023-01-24 03:27:30.927188: step: 278/464, loss: 0.024776801466941833 2023-01-24 03:27:31.733392: step: 280/464, loss: 0.240465447306633 2023-01-24 03:27:32.560226: step: 282/464, loss: 0.19866108894348145 2023-01-24 03:27:33.319911: step: 284/464, loss: 0.0858876183629036 2023-01-24 03:27:34.088486: step: 286/464, loss: 0.11999360471963882 2023-01-24 03:27:34.794157: step: 288/464, loss: 0.10557374358177185 2023-01-24 03:27:35.466530: step: 290/464, loss: 0.050895802676677704 2023-01-24 03:27:36.176015: step: 292/464, loss: 0.08306840807199478 2023-01-24 03:27:36.967311: step: 294/464, loss: 1.0396608114242554 2023-01-24 03:27:37.694935: step: 296/464, loss: 0.3304722011089325 2023-01-24 03:27:38.513350: step: 298/464, loss: 0.15357911586761475 2023-01-24 03:27:39.190018: step: 300/464, loss: 0.3373052477836609 2023-01-24 03:27:39.908722: step: 302/464, loss: 0.20188729465007782 2023-01-24 03:27:40.560633: step: 304/464, loss: 0.21684570610523224 2023-01-24 03:27:41.290380: step: 306/464, loss: 0.199372798204422 2023-01-24 03:27:42.018244: step: 308/464, loss: 0.6930966377258301 2023-01-24 03:27:42.708441: step: 310/464, loss: 0.08602418750524521 2023-01-24 03:27:43.459638: step: 312/464, loss: 0.08976003527641296 2023-01-24 03:27:44.186695: step: 314/464, loss: 0.07710489630699158 2023-01-24 03:27:44.908000: step: 316/464, loss: 0.03334254398941994 2023-01-24 03:27:45.629289: step: 318/464, loss: 0.1080012395977974 2023-01-24 03:27:46.363553: step: 320/464, loss: 1.0282444953918457 2023-01-24 03:27:47.123660: step: 322/464, loss: 0.11568324267864227 2023-01-24 03:27:47.873234: step: 324/464, loss: 0.04866556078195572 2023-01-24 03:27:48.669272: step: 326/464, loss: 0.18815982341766357 2023-01-24 03:27:49.379086: step: 328/464, loss: 0.2963554263114929 2023-01-24 03:27:50.124869: step: 330/464, loss: 0.2101053148508072 2023-01-24 03:27:50.792624: step: 332/464, loss: 0.11286016553640366 2023-01-24 03:27:51.549487: step: 334/464, loss: 0.26691845059394836 2023-01-24 03:27:52.272770: step: 336/464, loss: 0.12511800229549408 2023-01-24 03:27:53.020922: step: 338/464, loss: 0.06494363397359848 2023-01-24 03:27:53.819078: step: 340/464, loss: 0.10067279636859894 2023-01-24 03:27:54.545891: step: 342/464, loss: 0.08120899647474289 2023-01-24 03:27:55.238363: step: 344/464, loss: 0.07520805299282074 2023-01-24 03:27:56.011977: step: 346/464, loss: 0.1738840788602829 2023-01-24 03:27:56.755018: step: 348/464, loss: 0.05793759599328041 2023-01-24 03:27:57.465953: step: 350/464, loss: 0.15568017959594727 2023-01-24 03:27:58.281650: step: 352/464, loss: 0.08221323788166046 2023-01-24 03:27:59.034762: step: 354/464, loss: 0.18200474977493286 2023-01-24 03:27:59.743218: step: 356/464, loss: 0.12087699770927429 2023-01-24 03:28:00.516561: step: 358/464, loss: 0.11079593002796173 2023-01-24 03:28:01.273691: step: 360/464, loss: 0.5768294930458069 2023-01-24 03:28:02.026852: step: 362/464, loss: 0.09069310873746872 2023-01-24 03:28:02.789960: step: 364/464, loss: 1.772871732711792 2023-01-24 03:28:03.467593: step: 366/464, loss: 0.13492810726165771 2023-01-24 03:28:04.208886: step: 368/464, loss: 0.3811272978782654 2023-01-24 03:28:04.964193: step: 370/464, loss: 0.153494730591774 2023-01-24 03:28:05.705285: step: 372/464, loss: 0.07832568883895874 2023-01-24 03:28:06.537572: step: 374/464, loss: 0.09619408845901489 2023-01-24 03:28:07.259667: step: 376/464, loss: 0.06830616295337677 2023-01-24 03:28:07.999420: step: 378/464, loss: 0.5078091621398926 2023-01-24 03:28:08.726997: step: 380/464, loss: 0.07470594346523285 2023-01-24 03:28:09.525093: step: 382/464, loss: 0.2070901244878769 2023-01-24 03:28:10.260826: step: 384/464, loss: 0.2671006917953491 2023-01-24 03:28:11.093465: step: 386/464, loss: 0.13977664709091187 2023-01-24 03:28:11.843008: step: 388/464, loss: 2.476294994354248 2023-01-24 03:28:12.614540: step: 390/464, loss: 0.9573323726654053 2023-01-24 03:28:13.355787: step: 392/464, loss: 0.3616604208946228 2023-01-24 03:28:14.138297: step: 394/464, loss: 0.08241743594408035 2023-01-24 03:28:14.895695: step: 396/464, loss: 0.17898499965667725 2023-01-24 03:28:15.651910: step: 398/464, loss: 0.09183957427740097 2023-01-24 03:28:16.366441: step: 400/464, loss: 0.14725667238235474 2023-01-24 03:28:17.056359: step: 402/464, loss: 0.25489723682403564 2023-01-24 03:28:17.796054: step: 404/464, loss: 0.5332356691360474 2023-01-24 03:28:18.455124: step: 406/464, loss: 0.12564606964588165 2023-01-24 03:28:19.159398: step: 408/464, loss: 0.10011353343725204 2023-01-24 03:28:19.859837: step: 410/464, loss: 0.06462383270263672 2023-01-24 03:28:20.610044: step: 412/464, loss: 0.06372683495283127 2023-01-24 03:28:21.412077: step: 414/464, loss: 0.9359332323074341 2023-01-24 03:28:22.146892: step: 416/464, loss: 0.47240427136421204 2023-01-24 03:28:22.879937: step: 418/464, loss: 0.15439915657043457 2023-01-24 03:28:23.627857: step: 420/464, loss: 0.13898834586143494 2023-01-24 03:28:24.389845: step: 422/464, loss: 0.4279281795024872 2023-01-24 03:28:25.106004: step: 424/464, loss: 0.15131518244743347 2023-01-24 03:28:25.826049: step: 426/464, loss: 0.19741450250148773 2023-01-24 03:28:26.589861: step: 428/464, loss: 0.8428908586502075 2023-01-24 03:28:27.291712: step: 430/464, loss: 0.1573386788368225 2023-01-24 03:28:28.051317: step: 432/464, loss: 0.15728452801704407 2023-01-24 03:28:28.807306: step: 434/464, loss: 6.404244422912598 2023-01-24 03:28:29.746119: step: 436/464, loss: 0.15470275282859802 2023-01-24 03:28:30.399870: step: 438/464, loss: 0.39804452657699585 2023-01-24 03:28:31.034868: step: 440/464, loss: 0.0591038353741169 2023-01-24 03:28:31.711907: step: 442/464, loss: 0.53672194480896 2023-01-24 03:28:32.466231: step: 444/464, loss: 0.10315102338790894 2023-01-24 03:28:33.223034: step: 446/464, loss: 0.08972831070423126 2023-01-24 03:28:33.982484: step: 448/464, loss: 0.10898428410291672 2023-01-24 03:28:34.772326: step: 450/464, loss: 0.06059310585260391 2023-01-24 03:28:35.525028: step: 452/464, loss: 0.03156914561986923 2023-01-24 03:28:36.282718: step: 454/464, loss: 0.13455218076705933 2023-01-24 03:28:37.012973: step: 456/464, loss: 0.09226778149604797 2023-01-24 03:28:37.720546: step: 458/464, loss: 0.029771529138088226 2023-01-24 03:28:38.509456: step: 460/464, loss: 0.32640859484672546 2023-01-24 03:28:39.348021: step: 462/464, loss: 0.14315687119960785 2023-01-24 03:28:40.132372: step: 464/464, loss: 0.23702003061771393 2023-01-24 03:28:40.900410: step: 466/464, loss: 0.3839260935783386 2023-01-24 03:28:41.598275: step: 468/464, loss: 0.17654064297676086 2023-01-24 03:28:42.399769: step: 470/464, loss: 0.28439781069755554 2023-01-24 03:28:43.089363: step: 472/464, loss: 0.10062745958566666 2023-01-24 03:28:43.900484: step: 474/464, loss: 0.6959630846977234 2023-01-24 03:28:44.641351: step: 476/464, loss: 0.07147932797670364 2023-01-24 03:28:45.346722: step: 478/464, loss: 1.0545878410339355 2023-01-24 03:28:46.149239: step: 480/464, loss: 0.1763465404510498 2023-01-24 03:28:46.812693: step: 482/464, loss: 0.08560379594564438 2023-01-24 03:28:47.539458: step: 484/464, loss: 0.16833864152431488 2023-01-24 03:28:48.228051: step: 486/464, loss: 0.10443995893001556 2023-01-24 03:28:49.028681: step: 488/464, loss: 0.12339601665735245 2023-01-24 03:28:49.839135: step: 490/464, loss: 0.08989642560482025 2023-01-24 03:28:50.548419: step: 492/464, loss: 0.14790289103984833 2023-01-24 03:28:51.311869: step: 494/464, loss: 0.054193589836359024 2023-01-24 03:28:52.133480: step: 496/464, loss: 0.09965498745441437 2023-01-24 03:28:52.830908: step: 498/464, loss: 0.6218535304069519 2023-01-24 03:28:53.586087: step: 500/464, loss: 0.08032213151454926 2023-01-24 03:28:54.272248: step: 502/464, loss: 0.1574018895626068 2023-01-24 03:28:54.956190: step: 504/464, loss: 0.2863512337207794 2023-01-24 03:28:55.702313: step: 506/464, loss: 0.11170153319835663 2023-01-24 03:28:56.432032: step: 508/464, loss: 0.13133688271045685 2023-01-24 03:28:57.193447: step: 510/464, loss: 0.41238370537757874 2023-01-24 03:28:57.905496: step: 512/464, loss: 0.11662472039461136 2023-01-24 03:28:58.604384: step: 514/464, loss: 0.22386302053928375 2023-01-24 03:28:59.344368: step: 516/464, loss: 0.10225214809179306 2023-01-24 03:29:00.035400: step: 518/464, loss: 0.08818010240793228 2023-01-24 03:29:00.714502: step: 520/464, loss: 0.07073657214641571 2023-01-24 03:29:01.441576: step: 522/464, loss: 0.4500882625579834 2023-01-24 03:29:02.228898: step: 524/464, loss: 0.18536502122879028 2023-01-24 03:29:02.980003: step: 526/464, loss: 0.1312844306230545 2023-01-24 03:29:03.706345: step: 528/464, loss: 0.08108577877283096 2023-01-24 03:29:04.643317: step: 530/464, loss: 0.3510834276676178 2023-01-24 03:29:05.320901: step: 532/464, loss: 0.1260337233543396 2023-01-24 03:29:06.147630: step: 534/464, loss: 0.1317775845527649 2023-01-24 03:29:06.914184: step: 536/464, loss: 0.193727508187294 2023-01-24 03:29:07.649752: step: 538/464, loss: 0.05266972631216049 2023-01-24 03:29:08.401016: step: 540/464, loss: 0.14960551261901855 2023-01-24 03:29:09.136951: step: 542/464, loss: 0.27731019258499146 2023-01-24 03:29:09.839890: step: 544/464, loss: 0.0634918063879013 2023-01-24 03:29:10.545009: step: 546/464, loss: 0.024509821087121964 2023-01-24 03:29:11.279290: step: 548/464, loss: 0.9180271029472351 2023-01-24 03:29:11.995536: step: 550/464, loss: 0.17400743067264557 2023-01-24 03:29:12.699537: step: 552/464, loss: 0.13902823626995087 2023-01-24 03:29:13.380375: step: 554/464, loss: 0.11064955592155457 2023-01-24 03:29:14.172487: step: 556/464, loss: 0.14851465821266174 2023-01-24 03:29:14.943037: step: 558/464, loss: 0.10951215028762817 2023-01-24 03:29:15.660144: step: 560/464, loss: 0.15823350846767426 2023-01-24 03:29:16.411738: step: 562/464, loss: 0.0656096562743187 2023-01-24 03:29:17.252487: step: 564/464, loss: 0.174315944314003 2023-01-24 03:29:18.037602: step: 566/464, loss: 0.2251901626586914 2023-01-24 03:29:18.668948: step: 568/464, loss: 0.3147839605808258 2023-01-24 03:29:19.447250: step: 570/464, loss: 0.17955265939235687 2023-01-24 03:29:20.248483: step: 572/464, loss: 0.1577025055885315 2023-01-24 03:29:21.024530: step: 574/464, loss: 0.11040801554918289 2023-01-24 03:29:21.723773: step: 576/464, loss: 0.08774346113204956 2023-01-24 03:29:22.499189: step: 578/464, loss: 0.14458192884922028 2023-01-24 03:29:23.313758: step: 580/464, loss: 0.17943516373634338 2023-01-24 03:29:24.089695: step: 582/464, loss: 0.4944537878036499 2023-01-24 03:29:24.813235: step: 584/464, loss: 0.08026743680238724 2023-01-24 03:29:25.484190: step: 586/464, loss: 0.3535708785057068 2023-01-24 03:29:26.190221: step: 588/464, loss: 0.1416577249765396 2023-01-24 03:29:26.901009: step: 590/464, loss: 0.15077544748783112 2023-01-24 03:29:27.641642: step: 592/464, loss: 0.21983230113983154 2023-01-24 03:29:28.415079: step: 594/464, loss: 0.0527484267950058 2023-01-24 03:29:29.167060: step: 596/464, loss: 0.5502574443817139 2023-01-24 03:29:29.882872: step: 598/464, loss: 0.10318192839622498 2023-01-24 03:29:30.636710: step: 600/464, loss: 0.7747368812561035 2023-01-24 03:29:31.381711: step: 602/464, loss: 0.19791975617408752 2023-01-24 03:29:32.145652: step: 604/464, loss: 0.03329138085246086 2023-01-24 03:29:32.941057: step: 606/464, loss: 0.10343381017446518 2023-01-24 03:29:33.667383: step: 608/464, loss: 0.051676757633686066 2023-01-24 03:29:34.411410: step: 610/464, loss: 0.2705204486846924 2023-01-24 03:29:35.155888: step: 612/464, loss: 0.1125253215432167 2023-01-24 03:29:35.864615: step: 614/464, loss: 0.545214056968689 2023-01-24 03:29:36.673990: step: 616/464, loss: 0.2631594240665436 2023-01-24 03:29:37.406157: step: 618/464, loss: 0.10287163406610489 2023-01-24 03:29:38.175523: step: 620/464, loss: 0.06313641369342804 2023-01-24 03:29:39.015503: step: 622/464, loss: 0.15470679104328156 2023-01-24 03:29:39.762519: step: 624/464, loss: 0.2945192754268646 2023-01-24 03:29:40.478793: step: 626/464, loss: 0.43829968571662903 2023-01-24 03:29:41.336953: step: 628/464, loss: 0.1536589115858078 2023-01-24 03:29:42.154919: step: 630/464, loss: 0.06712499260902405 2023-01-24 03:29:42.871956: step: 632/464, loss: 0.2594963312149048 2023-01-24 03:29:43.695147: step: 634/464, loss: 0.08554352074861526 2023-01-24 03:29:44.515014: step: 636/464, loss: 0.3566303253173828 2023-01-24 03:29:45.215488: step: 638/464, loss: 0.24919937551021576 2023-01-24 03:29:45.922552: step: 640/464, loss: 0.09062323719263077 2023-01-24 03:29:46.679166: step: 642/464, loss: 0.7201142311096191 2023-01-24 03:29:47.408107: step: 644/464, loss: 0.3398234248161316 2023-01-24 03:29:48.112922: step: 646/464, loss: 0.3669608533382416 2023-01-24 03:29:48.889055: step: 648/464, loss: 0.10835307091474533 2023-01-24 03:29:49.649837: step: 650/464, loss: 0.07052277028560638 2023-01-24 03:29:50.435217: step: 652/464, loss: 0.07522750645875931 2023-01-24 03:29:51.216888: step: 654/464, loss: 0.37385356426239014 2023-01-24 03:29:51.945509: step: 656/464, loss: 0.09178084135055542 2023-01-24 03:29:52.652651: step: 658/464, loss: 0.07646650820970535 2023-01-24 03:29:53.461813: step: 660/464, loss: 0.3632817268371582 2023-01-24 03:29:54.198480: step: 662/464, loss: 1.816754937171936 2023-01-24 03:29:54.911989: step: 664/464, loss: 0.42521968483924866 2023-01-24 03:29:55.694184: step: 666/464, loss: 0.30828657746315 2023-01-24 03:29:56.426796: step: 668/464, loss: 0.10740874707698822 2023-01-24 03:29:57.172902: step: 670/464, loss: 0.12233463674783707 2023-01-24 03:29:57.894543: step: 672/464, loss: 0.05220026522874832 2023-01-24 03:29:58.626709: step: 674/464, loss: 0.07350676506757736 2023-01-24 03:29:59.425452: step: 676/464, loss: 0.8677085638046265 2023-01-24 03:30:00.189491: step: 678/464, loss: 0.12398731708526611 2023-01-24 03:30:00.910923: step: 680/464, loss: 0.4580591917037964 2023-01-24 03:30:01.680742: step: 682/464, loss: 0.30364760756492615 2023-01-24 03:30:02.503043: step: 684/464, loss: 0.0784701257944107 2023-01-24 03:30:03.202514: step: 686/464, loss: 0.02582676336169243 2023-01-24 03:30:03.915197: step: 688/464, loss: 0.2821761667728424 2023-01-24 03:30:04.671822: step: 690/464, loss: 0.12536336481571198 2023-01-24 03:30:05.418729: step: 692/464, loss: 0.05118219181895256 2023-01-24 03:30:06.168034: step: 694/464, loss: 0.07651496678590775 2023-01-24 03:30:06.961144: step: 696/464, loss: 0.16557954251766205 2023-01-24 03:30:07.694363: step: 698/464, loss: 0.2265680879354477 2023-01-24 03:30:08.499774: step: 700/464, loss: 0.37888118624687195 2023-01-24 03:30:09.229624: step: 702/464, loss: 0.13355915248394012 2023-01-24 03:30:09.985548: step: 704/464, loss: 0.2651720643043518 2023-01-24 03:30:10.665119: step: 706/464, loss: 0.4567992687225342 2023-01-24 03:30:11.498241: step: 708/464, loss: 0.24589699506759644 2023-01-24 03:30:12.168049: step: 710/464, loss: 0.5090956687927246 2023-01-24 03:30:12.905459: step: 712/464, loss: 0.1037251204252243 2023-01-24 03:30:13.602273: step: 714/464, loss: 0.1926645189523697 2023-01-24 03:30:14.302800: step: 716/464, loss: 0.0683947280049324 2023-01-24 03:30:15.060725: step: 718/464, loss: 0.11341574788093567 2023-01-24 03:30:15.779428: step: 720/464, loss: 0.1150423139333725 2023-01-24 03:30:16.517409: step: 722/464, loss: 0.09730232506990433 2023-01-24 03:30:17.335021: step: 724/464, loss: 0.0670548602938652 2023-01-24 03:30:18.061946: step: 726/464, loss: 0.0629471018910408 2023-01-24 03:30:18.783082: step: 728/464, loss: 0.0662987157702446 2023-01-24 03:30:19.464721: step: 730/464, loss: 0.05995099991559982 2023-01-24 03:30:20.252811: step: 732/464, loss: 0.15863323211669922 2023-01-24 03:30:20.982758: step: 734/464, loss: 0.10982012748718262 2023-01-24 03:30:21.663346: step: 736/464, loss: 0.11240449547767639 2023-01-24 03:30:22.392394: step: 738/464, loss: 0.22407962381839752 2023-01-24 03:30:23.202413: step: 740/464, loss: 0.10131499916315079 2023-01-24 03:30:24.028405: step: 742/464, loss: 0.31418102979660034 2023-01-24 03:30:24.867105: step: 744/464, loss: 0.11370068043470383 2023-01-24 03:30:25.694178: step: 746/464, loss: 0.41384071111679077 2023-01-24 03:30:26.562571: step: 748/464, loss: 0.08551589399576187 2023-01-24 03:30:27.305752: step: 750/464, loss: 0.0748281255364418 2023-01-24 03:30:28.135122: step: 752/464, loss: 0.07672805339097977 2023-01-24 03:30:28.901076: step: 754/464, loss: 0.09141271561384201 2023-01-24 03:30:29.643509: step: 756/464, loss: 0.11692787706851959 2023-01-24 03:30:30.381026: step: 758/464, loss: 0.05921081081032753 2023-01-24 03:30:31.099552: step: 760/464, loss: 0.2514224648475647 2023-01-24 03:30:31.825859: step: 762/464, loss: 0.509127140045166 2023-01-24 03:30:32.548320: step: 764/464, loss: 0.11973954737186432 2023-01-24 03:30:33.305976: step: 766/464, loss: 0.1569305807352066 2023-01-24 03:30:34.032791: step: 768/464, loss: 0.21292757987976074 2023-01-24 03:30:34.773529: step: 770/464, loss: 0.08074121177196503 2023-01-24 03:30:35.499862: step: 772/464, loss: 0.10858672112226486 2023-01-24 03:30:36.243568: step: 774/464, loss: 0.23681391775608063 2023-01-24 03:30:36.990433: step: 776/464, loss: 0.06215392425656319 2023-01-24 03:30:37.795181: step: 778/464, loss: 0.15121573209762573 2023-01-24 03:30:38.571815: step: 780/464, loss: 0.14246949553489685 2023-01-24 03:30:39.230067: step: 782/464, loss: 0.11438118666410446 2023-01-24 03:30:39.943360: step: 784/464, loss: 0.10496123880147934 2023-01-24 03:30:40.719879: step: 786/464, loss: 0.20628753304481506 2023-01-24 03:30:41.452638: step: 788/464, loss: 0.5667099356651306 2023-01-24 03:30:42.308968: step: 790/464, loss: 0.1638394445180893 2023-01-24 03:30:43.076666: step: 792/464, loss: 0.6485792398452759 2023-01-24 03:30:43.799346: step: 794/464, loss: 0.2431643158197403 2023-01-24 03:30:44.533274: step: 796/464, loss: 0.7835709452629089 2023-01-24 03:30:45.254376: step: 798/464, loss: 0.7168622016906738 2023-01-24 03:30:46.029473: step: 800/464, loss: 0.20961681008338928 2023-01-24 03:30:46.744060: step: 802/464, loss: 0.3085970878601074 2023-01-24 03:30:47.450451: step: 804/464, loss: 0.180080845952034 2023-01-24 03:30:48.246508: step: 806/464, loss: 0.028265029191970825 2023-01-24 03:30:49.018272: step: 808/464, loss: 0.24229370057582855 2023-01-24 03:30:49.759320: step: 810/464, loss: 0.10656850785017014 2023-01-24 03:30:50.549043: step: 812/464, loss: 6.04249382019043 2023-01-24 03:30:51.246434: step: 814/464, loss: 0.2670717239379883 2023-01-24 03:30:52.037843: step: 816/464, loss: 0.4301287531852722 2023-01-24 03:30:52.758461: step: 818/464, loss: 0.09188126027584076 2023-01-24 03:30:53.479761: step: 820/464, loss: 0.05205368250608444 2023-01-24 03:30:54.230971: step: 822/464, loss: 0.06847725808620453 2023-01-24 03:30:55.069081: step: 824/464, loss: 0.1087225005030632 2023-01-24 03:30:55.817495: step: 826/464, loss: 0.03399376943707466 2023-01-24 03:30:56.603795: step: 828/464, loss: 0.19704696536064148 2023-01-24 03:30:57.308788: step: 830/464, loss: 0.1437470018863678 2023-01-24 03:30:58.134388: step: 832/464, loss: 0.0708436593413353 2023-01-24 03:30:58.894556: step: 834/464, loss: 0.4941231906414032 2023-01-24 03:30:59.623577: step: 836/464, loss: 0.35776862502098083 2023-01-24 03:31:00.373752: step: 838/464, loss: 0.6135424375534058 2023-01-24 03:31:01.142680: step: 840/464, loss: 0.21788859367370605 2023-01-24 03:31:01.852077: step: 842/464, loss: 0.10796776413917542 2023-01-24 03:31:02.552129: step: 844/464, loss: 0.08246316015720367 2023-01-24 03:31:03.330964: step: 846/464, loss: 0.7474268674850464 2023-01-24 03:31:03.975337: step: 848/464, loss: 0.1485125720500946 2023-01-24 03:31:04.726116: step: 850/464, loss: 0.12459192425012589 2023-01-24 03:31:05.460568: step: 852/464, loss: 0.16107913851737976 2023-01-24 03:31:06.207051: step: 854/464, loss: 0.15444625914096832 2023-01-24 03:31:07.028085: step: 856/464, loss: 0.10845639556646347 2023-01-24 03:31:07.691847: step: 858/464, loss: 0.33226022124290466 2023-01-24 03:31:08.466253: step: 860/464, loss: 0.6130020618438721 2023-01-24 03:31:09.215602: step: 862/464, loss: 0.1199200227856636 2023-01-24 03:31:09.952636: step: 864/464, loss: 0.1062421128153801 2023-01-24 03:31:10.649423: step: 866/464, loss: 0.02942419983446598 2023-01-24 03:31:11.439359: step: 868/464, loss: 0.11861138045787811 2023-01-24 03:31:12.163177: step: 870/464, loss: 0.41673585772514343 2023-01-24 03:31:12.941213: step: 872/464, loss: 0.2176818698644638 2023-01-24 03:31:13.688512: step: 874/464, loss: 0.06265763193368912 2023-01-24 03:31:14.389591: step: 876/464, loss: 0.33866050839424133 2023-01-24 03:31:15.184606: step: 878/464, loss: 0.09827618300914764 2023-01-24 03:31:15.956871: step: 880/464, loss: 0.6393712759017944 2023-01-24 03:31:16.625072: step: 882/464, loss: 0.12278065085411072 2023-01-24 03:31:17.322551: step: 884/464, loss: 0.034253865480422974 2023-01-24 03:31:18.082805: step: 886/464, loss: 0.12241361290216446 2023-01-24 03:31:18.892135: step: 888/464, loss: 0.18203134834766388 2023-01-24 03:31:19.601887: step: 890/464, loss: 0.4041353762149811 2023-01-24 03:31:20.337572: step: 892/464, loss: 0.06696341186761856 2023-01-24 03:31:21.084137: step: 894/464, loss: 0.14808495342731476 2023-01-24 03:31:21.875526: step: 896/464, loss: 0.06377518177032471 2023-01-24 03:31:22.603206: step: 898/464, loss: 0.34000760316848755 2023-01-24 03:31:23.398440: step: 900/464, loss: 0.19238989055156708 2023-01-24 03:31:24.176381: step: 902/464, loss: 0.22991186380386353 2023-01-24 03:31:24.927114: step: 904/464, loss: 0.16415190696716309 2023-01-24 03:31:25.643144: step: 906/464, loss: 0.06087620556354523 2023-01-24 03:31:26.377011: step: 908/464, loss: 0.04438728466629982 2023-01-24 03:31:27.175399: step: 910/464, loss: 0.18937140703201294 2023-01-24 03:31:27.905707: step: 912/464, loss: 0.1797570437192917 2023-01-24 03:31:28.634763: step: 914/464, loss: 0.11785709112882614 2023-01-24 03:31:29.416131: step: 916/464, loss: 0.3231666684150696 2023-01-24 03:31:30.044107: step: 918/464, loss: 0.11942718923091888 2023-01-24 03:31:30.771806: step: 920/464, loss: 0.5971169471740723 2023-01-24 03:31:31.519287: step: 922/464, loss: 0.1580820530653 2023-01-24 03:31:32.282572: step: 924/464, loss: 0.5575023889541626 2023-01-24 03:31:33.124731: step: 926/464, loss: 0.1856396347284317 2023-01-24 03:31:33.824036: step: 928/464, loss: 0.07226559519767761 2023-01-24 03:31:34.541797: step: 930/464, loss: 0.09687872231006622 ================================================== Loss: 0.264 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3062964477955841, 'r': 0.3318695857519517, 'f1': 0.31857062238848544}, 'combined': 0.2347362480757261, 'epoch': 13} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3090923181073524, 'r': 0.26969221036441915, 'f1': 0.2880512051596751}, 'combined': 0.17889495899390348, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33499418625216076, 'r': 0.3477074381023376, 'f1': 0.3412324392549943}, 'combined': 0.2514344289247326, 'epoch': 13} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32612210795227636, 'r': 0.28132865636594595, 'f1': 0.3020738464109679}, 'combined': 0.1876037572447064, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2426470588235294, 'r': 0.358695652173913, 'f1': 0.2894736842105263}, 'combined': 0.14473684210526316, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46875, 'r': 0.1939655172413793, 'f1': 0.274390243902439}, 'combined': 0.18292682926829265, 'epoch': 13} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:34:21.559699: step: 2/464, loss: 0.12193366140127182 2023-01-24 03:34:22.313552: step: 4/464, loss: 0.07567768543958664 2023-01-24 03:34:22.913446: step: 6/464, loss: 0.08617586642503738 2023-01-24 03:34:23.634441: step: 8/464, loss: 0.06880141794681549 2023-01-24 03:34:24.327264: step: 10/464, loss: 0.2527928054332733 2023-01-24 03:34:25.076088: step: 12/464, loss: 0.13666020333766937 2023-01-24 03:34:25.815485: step: 14/464, loss: 0.9853371381759644 2023-01-24 03:34:26.569383: step: 16/464, loss: 0.16098473966121674 2023-01-24 03:34:27.306805: step: 18/464, loss: 0.05923319235444069 2023-01-24 03:34:28.109162: step: 20/464, loss: 0.2810465097427368 2023-01-24 03:34:28.959630: step: 22/464, loss: 0.1063055694103241 2023-01-24 03:34:29.704326: step: 24/464, loss: 0.08378912508487701 2023-01-24 03:34:30.436930: step: 26/464, loss: 0.13146083056926727 2023-01-24 03:34:31.187156: step: 28/464, loss: 0.0617261677980423 2023-01-24 03:34:31.854207: step: 30/464, loss: 0.0540962852537632 2023-01-24 03:34:32.577166: step: 32/464, loss: 0.10380225628614426 2023-01-24 03:34:33.298391: step: 34/464, loss: 0.04630668833851814 2023-01-24 03:34:34.002697: step: 36/464, loss: 0.06711572408676147 2023-01-24 03:34:34.868725: step: 38/464, loss: 0.1516401767730713 2023-01-24 03:34:35.605836: step: 40/464, loss: 0.04767386242747307 2023-01-24 03:34:36.385344: step: 42/464, loss: 0.05505099147558212 2023-01-24 03:34:37.185681: step: 44/464, loss: 0.6285038590431213 2023-01-24 03:34:37.952789: step: 46/464, loss: 0.4285893142223358 2023-01-24 03:34:38.670821: step: 48/464, loss: 0.2863122522830963 2023-01-24 03:34:39.385001: step: 50/464, loss: 0.17619168758392334 2023-01-24 03:34:40.061106: step: 52/464, loss: 0.10409979522228241 2023-01-24 03:34:40.764587: step: 54/464, loss: 0.11232734471559525 2023-01-24 03:34:41.536639: step: 56/464, loss: 0.0699315294623375 2023-01-24 03:34:42.330203: step: 58/464, loss: 0.12718603014945984 2023-01-24 03:34:43.063991: step: 60/464, loss: 0.1453656256198883 2023-01-24 03:34:43.839527: step: 62/464, loss: 0.06315405666828156 2023-01-24 03:34:44.691966: step: 64/464, loss: 0.19121019542217255 2023-01-24 03:34:45.426959: step: 66/464, loss: 0.11257484555244446 2023-01-24 03:34:46.173888: step: 68/464, loss: 0.27853450179100037 2023-01-24 03:34:46.853961: step: 70/464, loss: 0.45154255628585815 2023-01-24 03:34:47.575268: step: 72/464, loss: 0.16039679944515228 2023-01-24 03:34:48.292098: step: 74/464, loss: 0.1091892197728157 2023-01-24 03:34:48.992969: step: 76/464, loss: 0.17020484805107117 2023-01-24 03:34:49.713319: step: 78/464, loss: 0.08507838845252991 2023-01-24 03:34:50.495360: step: 80/464, loss: 0.07408726215362549 2023-01-24 03:34:51.231333: step: 82/464, loss: 0.06273666769266129 2023-01-24 03:34:51.929939: step: 84/464, loss: 0.06049322336912155 2023-01-24 03:34:52.694037: step: 86/464, loss: 0.0883559063076973 2023-01-24 03:34:53.355385: step: 88/464, loss: 0.21968767046928406 2023-01-24 03:34:54.024482: step: 90/464, loss: 0.1691584289073944 2023-01-24 03:34:54.790437: step: 92/464, loss: 0.14545169472694397 2023-01-24 03:34:55.513076: step: 94/464, loss: 0.08463499695062637 2023-01-24 03:34:56.272180: step: 96/464, loss: 0.211579829454422 2023-01-24 03:34:57.030740: step: 98/464, loss: 0.06251304596662521 2023-01-24 03:34:57.679892: step: 100/464, loss: 0.037433043122291565 2023-01-24 03:34:58.469192: step: 102/464, loss: 0.17153921723365784 2023-01-24 03:34:59.276159: step: 104/464, loss: 0.0873965322971344 2023-01-24 03:35:00.083832: step: 106/464, loss: 0.17969262599945068 2023-01-24 03:35:00.875385: step: 108/464, loss: 0.07596272975206375 2023-01-24 03:35:01.604038: step: 110/464, loss: 0.06037634611129761 2023-01-24 03:35:02.399977: step: 112/464, loss: 0.11389008164405823 2023-01-24 03:35:03.222282: step: 114/464, loss: 0.10771888494491577 2023-01-24 03:35:04.005939: step: 116/464, loss: 0.08732682466506958 2023-01-24 03:35:04.826391: step: 118/464, loss: 0.11964301764965057 2023-01-24 03:35:05.586834: step: 120/464, loss: 0.03714948520064354 2023-01-24 03:35:06.315036: step: 122/464, loss: 0.08723358064889908 2023-01-24 03:35:06.986561: step: 124/464, loss: 0.08652307838201523 2023-01-24 03:35:07.754325: step: 126/464, loss: 0.182054340839386 2023-01-24 03:35:08.461977: step: 128/464, loss: 0.03836066275835037 2023-01-24 03:35:09.204106: step: 130/464, loss: 0.05950037017464638 2023-01-24 03:35:10.053886: step: 132/464, loss: 0.04085146263241768 2023-01-24 03:35:10.739452: step: 134/464, loss: 0.07664566487073898 2023-01-24 03:35:11.532715: step: 136/464, loss: 0.16005469858646393 2023-01-24 03:35:12.306192: step: 138/464, loss: 0.04177999496459961 2023-01-24 03:35:13.000867: step: 140/464, loss: 0.46670863032341003 2023-01-24 03:35:13.707014: step: 142/464, loss: 0.5049713850021362 2023-01-24 03:35:14.455414: step: 144/464, loss: 0.037760179489851 2023-01-24 03:35:15.148394: step: 146/464, loss: 0.18749308586120605 2023-01-24 03:35:15.868402: step: 148/464, loss: 0.06520560383796692 2023-01-24 03:35:16.549668: step: 150/464, loss: 0.08682045340538025 2023-01-24 03:35:17.363383: step: 152/464, loss: 0.28364092111587524 2023-01-24 03:35:18.056841: step: 154/464, loss: 0.2779575288295746 2023-01-24 03:35:18.824130: step: 156/464, loss: 0.21594323217868805 2023-01-24 03:35:19.703498: step: 158/464, loss: 0.2842862010002136 2023-01-24 03:35:20.485633: step: 160/464, loss: 0.13473035395145416 2023-01-24 03:35:21.117210: step: 162/464, loss: 0.10318247228860855 2023-01-24 03:35:21.844276: step: 164/464, loss: 0.1572057455778122 2023-01-24 03:35:22.678489: step: 166/464, loss: 0.12798070907592773 2023-01-24 03:35:23.379826: step: 168/464, loss: 0.07746720314025879 2023-01-24 03:35:24.183796: step: 170/464, loss: 0.06719473749399185 2023-01-24 03:35:24.963668: step: 172/464, loss: 0.5005451440811157 2023-01-24 03:35:25.713936: step: 174/464, loss: 0.11085796356201172 2023-01-24 03:35:26.487830: step: 176/464, loss: 0.08189515769481659 2023-01-24 03:35:27.270027: step: 178/464, loss: 0.22294452786445618 2023-01-24 03:35:28.070599: step: 180/464, loss: 0.14733389019966125 2023-01-24 03:35:28.812721: step: 182/464, loss: 0.9163328409194946 2023-01-24 03:35:29.531756: step: 184/464, loss: 0.18387994170188904 2023-01-24 03:35:30.252686: step: 186/464, loss: 0.10005448013544083 2023-01-24 03:35:30.970301: step: 188/464, loss: 0.07477206736803055 2023-01-24 03:35:31.649010: step: 190/464, loss: 0.6215589046478271 2023-01-24 03:35:32.416982: step: 192/464, loss: 0.443669855594635 2023-01-24 03:35:33.167731: step: 194/464, loss: 0.29367828369140625 2023-01-24 03:35:33.932292: step: 196/464, loss: 0.0811825841665268 2023-01-24 03:35:34.655499: step: 198/464, loss: 0.037247225642204285 2023-01-24 03:35:35.345569: step: 200/464, loss: 0.06594298779964447 2023-01-24 03:35:36.074212: step: 202/464, loss: 0.16184520721435547 2023-01-24 03:35:36.790878: step: 204/464, loss: 0.10985446721315384 2023-01-24 03:35:37.532241: step: 206/464, loss: 0.053390901535749435 2023-01-24 03:35:38.261880: step: 208/464, loss: 1.1080310344696045 2023-01-24 03:35:38.946254: step: 210/464, loss: 0.41309675574302673 2023-01-24 03:35:39.654313: step: 212/464, loss: 0.09760172665119171 2023-01-24 03:35:40.503972: step: 214/464, loss: 0.17236673831939697 2023-01-24 03:35:41.220814: step: 216/464, loss: 0.06706502288579941 2023-01-24 03:35:42.031415: step: 218/464, loss: 0.640880286693573 2023-01-24 03:35:42.813590: step: 220/464, loss: 0.15519407391548157 2023-01-24 03:35:43.579443: step: 222/464, loss: 0.12634916603565216 2023-01-24 03:35:44.324987: step: 224/464, loss: 0.18875771760940552 2023-01-24 03:35:45.018756: step: 226/464, loss: 0.08672748506069183 2023-01-24 03:35:45.757322: step: 228/464, loss: 0.0703166127204895 2023-01-24 03:35:46.458939: step: 230/464, loss: 0.21372142434120178 2023-01-24 03:35:47.214483: step: 232/464, loss: 0.09859161078929901 2023-01-24 03:35:47.972735: step: 234/464, loss: 0.08668128401041031 2023-01-24 03:35:48.630122: step: 236/464, loss: 0.1777801811695099 2023-01-24 03:35:49.433063: step: 238/464, loss: 0.1596376895904541 2023-01-24 03:35:50.129623: step: 240/464, loss: 0.7244187593460083 2023-01-24 03:35:50.952824: step: 242/464, loss: 0.1390981376171112 2023-01-24 03:35:51.673637: step: 244/464, loss: 0.20769082009792328 2023-01-24 03:35:52.486223: step: 246/464, loss: 0.06973709166049957 2023-01-24 03:35:53.246894: step: 248/464, loss: 0.3444404900074005 2023-01-24 03:35:53.894174: step: 250/464, loss: 0.16165480017662048 2023-01-24 03:35:54.619116: step: 252/464, loss: 0.12662559747695923 2023-01-24 03:35:55.402354: step: 254/464, loss: 0.19266805052757263 2023-01-24 03:35:56.242275: step: 256/464, loss: 0.46310684084892273 2023-01-24 03:35:56.933025: step: 258/464, loss: 0.07980255782604218 2023-01-24 03:35:57.666832: step: 260/464, loss: 0.15134549140930176 2023-01-24 03:35:58.364338: step: 262/464, loss: 0.16883066296577454 2023-01-24 03:35:59.164374: step: 264/464, loss: 0.22058843076229095 2023-01-24 03:35:59.795004: step: 266/464, loss: 0.06367085129022598 2023-01-24 03:36:00.549260: step: 268/464, loss: 0.055862151086330414 2023-01-24 03:36:01.248521: step: 270/464, loss: 0.04270695149898529 2023-01-24 03:36:01.968468: step: 272/464, loss: 0.24421437084674835 2023-01-24 03:36:02.710030: step: 274/464, loss: 0.07772643119096756 2023-01-24 03:36:03.559614: step: 276/464, loss: 0.09192586690187454 2023-01-24 03:36:04.329075: step: 278/464, loss: 0.09877754002809525 2023-01-24 03:36:05.047696: step: 280/464, loss: 0.12516112625598907 2023-01-24 03:36:05.753417: step: 282/464, loss: 0.12664923071861267 2023-01-24 03:36:06.582206: step: 284/464, loss: 1.0937851667404175 2023-01-24 03:36:07.382718: step: 286/464, loss: 0.317772775888443 2023-01-24 03:36:08.094095: step: 288/464, loss: 0.0594954788684845 2023-01-24 03:36:08.799554: step: 290/464, loss: 0.07765944302082062 2023-01-24 03:36:09.522419: step: 292/464, loss: 0.20633435249328613 2023-01-24 03:36:10.225319: step: 294/464, loss: 0.4687633514404297 2023-01-24 03:36:11.043119: step: 296/464, loss: 1.9894096851348877 2023-01-24 03:36:11.826334: step: 298/464, loss: 0.1260242909193039 2023-01-24 03:36:12.634804: step: 300/464, loss: 0.056300777941942215 2023-01-24 03:36:13.346153: step: 302/464, loss: 0.19352252781391144 2023-01-24 03:36:14.023697: step: 304/464, loss: 0.08822372555732727 2023-01-24 03:36:14.795546: step: 306/464, loss: 0.14995773136615753 2023-01-24 03:36:15.522464: step: 308/464, loss: 0.05846775323152542 2023-01-24 03:36:16.287796: step: 310/464, loss: 0.18117393553256989 2023-01-24 03:36:16.965404: step: 312/464, loss: 0.034953609108924866 2023-01-24 03:36:17.668979: step: 314/464, loss: 0.024108169600367546 2023-01-24 03:36:18.415953: step: 316/464, loss: 0.0547054260969162 2023-01-24 03:36:19.154480: step: 318/464, loss: 0.03408285230398178 2023-01-24 03:36:19.925372: step: 320/464, loss: 0.028545448556542397 2023-01-24 03:36:20.627214: step: 322/464, loss: 0.2630191743373871 2023-01-24 03:36:21.378919: step: 324/464, loss: 0.19812065362930298 2023-01-24 03:36:22.081411: step: 326/464, loss: 0.16171224415302277 2023-01-24 03:36:22.821814: step: 328/464, loss: 0.22023920714855194 2023-01-24 03:36:23.614440: step: 330/464, loss: 0.11093226820230484 2023-01-24 03:36:24.386723: step: 332/464, loss: 0.14447692036628723 2023-01-24 03:36:25.144518: step: 334/464, loss: 0.35919249057769775 2023-01-24 03:36:25.823773: step: 336/464, loss: 0.09195218980312347 2023-01-24 03:36:26.644047: step: 338/464, loss: 0.0950898677110672 2023-01-24 03:36:27.457356: step: 340/464, loss: 0.017913883551955223 2023-01-24 03:36:28.140459: step: 342/464, loss: 0.09973374009132385 2023-01-24 03:36:28.978889: step: 344/464, loss: 0.140656977891922 2023-01-24 03:36:29.676702: step: 346/464, loss: 0.09039287269115448 2023-01-24 03:36:30.511020: step: 348/464, loss: 0.1490309089422226 2023-01-24 03:36:31.273429: step: 350/464, loss: 0.26216188073158264 2023-01-24 03:36:32.049809: step: 352/464, loss: 0.1854851096868515 2023-01-24 03:36:32.760819: step: 354/464, loss: 0.0873323529958725 2023-01-24 03:36:33.489899: step: 356/464, loss: 0.13864044845104218 2023-01-24 03:36:34.266243: step: 358/464, loss: 0.14705531299114227 2023-01-24 03:36:35.045912: step: 360/464, loss: 0.06606864929199219 2023-01-24 03:36:35.755999: step: 362/464, loss: 0.05874408036470413 2023-01-24 03:36:36.554309: step: 364/464, loss: 0.12137801945209503 2023-01-24 03:36:37.252007: step: 366/464, loss: 0.1096571609377861 2023-01-24 03:36:37.900441: step: 368/464, loss: 0.08940312266349792 2023-01-24 03:36:38.637623: step: 370/464, loss: 1.0309978723526 2023-01-24 03:36:39.309154: step: 372/464, loss: 0.1272679716348648 2023-01-24 03:36:40.003629: step: 374/464, loss: 0.29737213253974915 2023-01-24 03:36:40.731849: step: 376/464, loss: 0.1702267825603485 2023-01-24 03:36:41.458280: step: 378/464, loss: 0.1460234671831131 2023-01-24 03:36:42.176186: step: 380/464, loss: 0.13599790632724762 2023-01-24 03:36:42.962806: step: 382/464, loss: 0.1381097435951233 2023-01-24 03:36:43.647781: step: 384/464, loss: 0.13972963392734528 2023-01-24 03:36:44.397109: step: 386/464, loss: 0.07999303191900253 2023-01-24 03:36:45.119086: step: 388/464, loss: 0.07907330989837646 2023-01-24 03:36:45.887775: step: 390/464, loss: 0.03261999040842056 2023-01-24 03:36:46.629340: step: 392/464, loss: 0.1951265037059784 2023-01-24 03:36:47.384436: step: 394/464, loss: 0.06247112527489662 2023-01-24 03:36:48.143365: step: 396/464, loss: 0.02576691471040249 2023-01-24 03:36:49.069011: step: 398/464, loss: 0.02725120075047016 2023-01-24 03:36:49.793483: step: 400/464, loss: 0.25071337819099426 2023-01-24 03:36:50.495118: step: 402/464, loss: 0.09235862642526627 2023-01-24 03:36:51.274167: step: 404/464, loss: 0.10845604538917542 2023-01-24 03:36:52.013884: step: 406/464, loss: 0.11261343210935593 2023-01-24 03:36:52.747124: step: 408/464, loss: 0.11463362723588943 2023-01-24 03:36:53.563509: step: 410/464, loss: 0.1571776568889618 2023-01-24 03:36:54.308951: step: 412/464, loss: 0.12521491944789886 2023-01-24 03:36:55.039071: step: 414/464, loss: 0.045416172593832016 2023-01-24 03:36:55.749394: step: 416/464, loss: 0.054573748260736465 2023-01-24 03:36:56.438685: step: 418/464, loss: 0.225603848695755 2023-01-24 03:36:57.240164: step: 420/464, loss: 0.055413082242012024 2023-01-24 03:36:58.095161: step: 422/464, loss: 0.39748069643974304 2023-01-24 03:36:58.855270: step: 424/464, loss: 0.26622945070266724 2023-01-24 03:36:59.651715: step: 426/464, loss: 0.12175989151000977 2023-01-24 03:37:00.343351: step: 428/464, loss: 0.09528283774852753 2023-01-24 03:37:01.182960: step: 430/464, loss: 0.22560282051563263 2023-01-24 03:37:01.893746: step: 432/464, loss: 0.20506493747234344 2023-01-24 03:37:02.723360: step: 434/464, loss: 0.08719082176685333 2023-01-24 03:37:03.551977: step: 436/464, loss: 2.499934673309326 2023-01-24 03:37:04.297509: step: 438/464, loss: 0.6153557300567627 2023-01-24 03:37:05.027642: step: 440/464, loss: 0.1747024953365326 2023-01-24 03:37:05.763340: step: 442/464, loss: 0.06936346739530563 2023-01-24 03:37:06.526567: step: 444/464, loss: 0.06344647705554962 2023-01-24 03:37:07.252868: step: 446/464, loss: 0.07927471399307251 2023-01-24 03:37:07.990572: step: 448/464, loss: 0.40280646085739136 2023-01-24 03:37:08.863390: step: 450/464, loss: 0.21933694183826447 2023-01-24 03:37:09.590189: step: 452/464, loss: 0.26208335161209106 2023-01-24 03:37:10.280727: step: 454/464, loss: 0.14688733220100403 2023-01-24 03:37:11.006066: step: 456/464, loss: 1.7487246990203857 2023-01-24 03:37:11.734130: step: 458/464, loss: 0.18525251746177673 2023-01-24 03:37:12.595101: step: 460/464, loss: 0.19878581166267395 2023-01-24 03:37:13.416233: step: 462/464, loss: 0.1086234375834465 2023-01-24 03:37:14.123850: step: 464/464, loss: 0.5612615346908569 2023-01-24 03:37:14.912213: step: 466/464, loss: 0.22032488882541656 2023-01-24 03:37:15.747559: step: 468/464, loss: 0.024769339710474014 2023-01-24 03:37:16.492665: step: 470/464, loss: 0.11801978200674057 2023-01-24 03:37:17.200005: step: 472/464, loss: 0.06288079172372818 2023-01-24 03:37:17.925258: step: 474/464, loss: 0.10719543695449829 2023-01-24 03:37:18.842998: step: 476/464, loss: 0.09371764212846756 2023-01-24 03:37:19.543142: step: 478/464, loss: 0.09058858454227448 2023-01-24 03:37:20.265290: step: 480/464, loss: 1.2796767950057983 2023-01-24 03:37:20.971004: step: 482/464, loss: 0.33729735016822815 2023-01-24 03:37:21.665306: step: 484/464, loss: 0.10503074526786804 2023-01-24 03:37:22.426868: step: 486/464, loss: 0.14708486199378967 2023-01-24 03:37:23.148384: step: 488/464, loss: 0.04026735946536064 2023-01-24 03:37:23.945925: step: 490/464, loss: 0.1619386523962021 2023-01-24 03:37:24.709748: step: 492/464, loss: 0.07265634089708328 2023-01-24 03:37:25.413505: step: 494/464, loss: 0.06399974226951599 2023-01-24 03:37:26.109233: step: 496/464, loss: 0.16489113867282867 2023-01-24 03:37:26.889983: step: 498/464, loss: 0.03770787641406059 2023-01-24 03:37:27.711602: step: 500/464, loss: 0.10955356806516647 2023-01-24 03:37:28.493967: step: 502/464, loss: 0.26145660877227783 2023-01-24 03:37:29.301799: step: 504/464, loss: 0.3547782897949219 2023-01-24 03:37:30.036972: step: 506/464, loss: 0.10551081597805023 2023-01-24 03:37:30.789252: step: 508/464, loss: 0.07227484881877899 2023-01-24 03:37:31.519660: step: 510/464, loss: 0.24487046897411346 2023-01-24 03:37:32.466608: step: 512/464, loss: 0.11837557703256607 2023-01-24 03:37:33.242707: step: 514/464, loss: 0.19440263509750366 2023-01-24 03:37:33.961494: step: 516/464, loss: 0.04968242719769478 2023-01-24 03:37:34.731458: step: 518/464, loss: 0.2314143180847168 2023-01-24 03:37:35.507296: step: 520/464, loss: 0.0630800649523735 2023-01-24 03:37:36.328111: step: 522/464, loss: 0.09166283905506134 2023-01-24 03:37:37.092094: step: 524/464, loss: 0.09149608761072159 2023-01-24 03:37:37.806739: step: 526/464, loss: 0.13420897722244263 2023-01-24 03:37:38.523075: step: 528/464, loss: 0.09974882006645203 2023-01-24 03:37:39.341238: step: 530/464, loss: 0.09064196050167084 2023-01-24 03:37:40.047981: step: 532/464, loss: 0.08575993031263351 2023-01-24 03:37:40.831510: step: 534/464, loss: 0.04460528492927551 2023-01-24 03:37:41.594579: step: 536/464, loss: 0.08206569403409958 2023-01-24 03:37:42.418644: step: 538/464, loss: 0.3511834442615509 2023-01-24 03:37:43.194431: step: 540/464, loss: 0.9145286083221436 2023-01-24 03:37:43.905578: step: 542/464, loss: 0.23256400227546692 2023-01-24 03:37:44.587882: step: 544/464, loss: 0.0949745923280716 2023-01-24 03:37:45.367585: step: 546/464, loss: 0.05418713763356209 2023-01-24 03:37:46.063881: step: 548/464, loss: 1.9522722959518433 2023-01-24 03:37:46.763102: step: 550/464, loss: 0.6723595857620239 2023-01-24 03:37:47.544821: step: 552/464, loss: 0.21266116201877594 2023-01-24 03:37:48.267107: step: 554/464, loss: 0.1409553587436676 2023-01-24 03:37:49.093404: step: 556/464, loss: 0.11059970408678055 2023-01-24 03:37:49.852939: step: 558/464, loss: 0.09071413427591324 2023-01-24 03:37:50.526182: step: 560/464, loss: 0.12655948102474213 2023-01-24 03:37:51.224822: step: 562/464, loss: 0.05802586302161217 2023-01-24 03:37:51.912520: step: 564/464, loss: 0.09048962593078613 2023-01-24 03:37:52.594415: step: 566/464, loss: 0.23743292689323425 2023-01-24 03:37:53.314741: step: 568/464, loss: 0.07267441600561142 2023-01-24 03:37:54.110507: step: 570/464, loss: 0.07767453044652939 2023-01-24 03:37:54.839379: step: 572/464, loss: 0.11691311746835709 2023-01-24 03:37:55.668062: step: 574/464, loss: 0.10471902787685394 2023-01-24 03:37:56.440701: step: 576/464, loss: 0.1262369155883789 2023-01-24 03:37:57.153320: step: 578/464, loss: 0.06593358516693115 2023-01-24 03:37:57.876909: step: 580/464, loss: 0.11966902017593384 2023-01-24 03:37:58.560516: step: 582/464, loss: 0.1588023602962494 2023-01-24 03:37:59.239157: step: 584/464, loss: 0.07839857786893845 2023-01-24 03:37:59.994746: step: 586/464, loss: 0.07492205500602722 2023-01-24 03:38:00.739501: step: 588/464, loss: 0.04727175459265709 2023-01-24 03:38:01.564360: step: 590/464, loss: 0.2579101622104645 2023-01-24 03:38:02.321566: step: 592/464, loss: 0.05314266309142113 2023-01-24 03:38:03.135829: step: 594/464, loss: 0.3269496560096741 2023-01-24 03:38:03.838100: step: 596/464, loss: 0.07443402707576752 2023-01-24 03:38:04.576946: step: 598/464, loss: 0.1413809210062027 2023-01-24 03:38:05.270468: step: 600/464, loss: 0.12987062335014343 2023-01-24 03:38:05.983728: step: 602/464, loss: 0.34331780672073364 2023-01-24 03:38:06.682273: step: 604/464, loss: 0.13412347435951233 2023-01-24 03:38:07.391745: step: 606/464, loss: 0.12206920236349106 2023-01-24 03:38:08.096672: step: 608/464, loss: 0.11294567584991455 2023-01-24 03:38:08.921027: step: 610/464, loss: 0.1791577935218811 2023-01-24 03:38:09.674728: step: 612/464, loss: 0.05504743009805679 2023-01-24 03:38:10.426671: step: 614/464, loss: 0.13075266778469086 2023-01-24 03:38:11.138872: step: 616/464, loss: 0.17414139211177826 2023-01-24 03:38:11.858584: step: 618/464, loss: 0.0166025310754776 2023-01-24 03:38:12.648372: step: 620/464, loss: 0.08685021847486496 2023-01-24 03:38:13.315919: step: 622/464, loss: 0.029836809262633324 2023-01-24 03:38:14.009229: step: 624/464, loss: 0.19079022109508514 2023-01-24 03:38:14.759027: step: 626/464, loss: 0.06109185889363289 2023-01-24 03:38:15.456683: step: 628/464, loss: 0.08585669845342636 2023-01-24 03:38:16.218256: step: 630/464, loss: 0.2822912335395813 2023-01-24 03:38:16.953010: step: 632/464, loss: 0.11809217929840088 2023-01-24 03:38:17.663071: step: 634/464, loss: 0.7315946817398071 2023-01-24 03:38:18.397180: step: 636/464, loss: 0.1486750692129135 2023-01-24 03:38:19.153376: step: 638/464, loss: 0.08701854944229126 2023-01-24 03:38:19.858565: step: 640/464, loss: 0.22685103118419647 2023-01-24 03:38:20.577025: step: 642/464, loss: 0.5486653447151184 2023-01-24 03:38:21.335609: step: 644/464, loss: 0.14657476544380188 2023-01-24 03:38:22.052675: step: 646/464, loss: 0.04882512614130974 2023-01-24 03:38:22.864963: step: 648/464, loss: 0.18402329087257385 2023-01-24 03:38:23.533175: step: 650/464, loss: 0.21440860629081726 2023-01-24 03:38:24.350095: step: 652/464, loss: 0.3449436128139496 2023-01-24 03:38:25.055470: step: 654/464, loss: 0.13148115575313568 2023-01-24 03:38:25.849488: step: 656/464, loss: 0.16635462641716003 2023-01-24 03:38:26.658325: step: 658/464, loss: 0.1267225593328476 2023-01-24 03:38:27.415944: step: 660/464, loss: 0.028304558247327805 2023-01-24 03:38:28.133364: step: 662/464, loss: 0.054090242832899094 2023-01-24 03:38:28.892690: step: 664/464, loss: 0.11156564950942993 2023-01-24 03:38:29.569770: step: 666/464, loss: 0.3686261773109436 2023-01-24 03:38:30.286625: step: 668/464, loss: 0.13700802624225616 2023-01-24 03:38:30.990513: step: 670/464, loss: 0.09640567749738693 2023-01-24 03:38:31.748966: step: 672/464, loss: 0.08035552501678467 2023-01-24 03:38:32.531817: step: 674/464, loss: 0.07916685938835144 2023-01-24 03:38:33.200593: step: 676/464, loss: 0.039851244539022446 2023-01-24 03:38:33.912370: step: 678/464, loss: 0.6721044182777405 2023-01-24 03:38:34.701176: step: 680/464, loss: 0.18033455312252045 2023-01-24 03:38:35.355612: step: 682/464, loss: 0.7768173217773438 2023-01-24 03:38:36.129885: step: 684/464, loss: 2.363861322402954 2023-01-24 03:38:36.907104: step: 686/464, loss: 0.2924078702926636 2023-01-24 03:38:37.631912: step: 688/464, loss: 0.05811668187379837 2023-01-24 03:38:38.374867: step: 690/464, loss: 0.21463051438331604 2023-01-24 03:38:39.172101: step: 692/464, loss: 0.06769423186779022 2023-01-24 03:38:39.853959: step: 694/464, loss: 0.06266149878501892 2023-01-24 03:38:40.581474: step: 696/464, loss: 0.0853852853178978 2023-01-24 03:38:41.341520: step: 698/464, loss: 0.09094809740781784 2023-01-24 03:38:42.102197: step: 700/464, loss: 0.25462332367897034 2023-01-24 03:38:42.852623: step: 702/464, loss: 0.16619449853897095 2023-01-24 03:38:43.635908: step: 704/464, loss: 0.10084399580955505 2023-01-24 03:38:44.337202: step: 706/464, loss: 0.29148754477500916 2023-01-24 03:38:45.031865: step: 708/464, loss: 0.02944398857653141 2023-01-24 03:38:45.786594: step: 710/464, loss: 0.10250351577997208 2023-01-24 03:38:46.514598: step: 712/464, loss: 0.18411992490291595 2023-01-24 03:38:47.225738: step: 714/464, loss: 0.15183106064796448 2023-01-24 03:38:48.011186: step: 716/464, loss: 0.1189422532916069 2023-01-24 03:38:48.703556: step: 718/464, loss: 0.07556652277708054 2023-01-24 03:38:49.590514: step: 720/464, loss: 0.23556427657604218 2023-01-24 03:38:50.240273: step: 722/464, loss: 0.1929933726787567 2023-01-24 03:38:50.982977: step: 724/464, loss: 0.1436728984117508 2023-01-24 03:38:51.702470: step: 726/464, loss: 0.046241115778684616 2023-01-24 03:38:52.396304: step: 728/464, loss: 0.15094700455665588 2023-01-24 03:38:53.026457: step: 730/464, loss: 0.06769252568483353 2023-01-24 03:38:53.794524: step: 732/464, loss: 0.15382608771324158 2023-01-24 03:38:54.549487: step: 734/464, loss: 0.1293579787015915 2023-01-24 03:38:55.294924: step: 736/464, loss: 0.050788234919309616 2023-01-24 03:38:55.972695: step: 738/464, loss: 0.18262383341789246 2023-01-24 03:38:56.706205: step: 740/464, loss: 0.17732545733451843 2023-01-24 03:38:57.381685: step: 742/464, loss: 0.12889239192008972 2023-01-24 03:38:58.090982: step: 744/464, loss: 0.1251494586467743 2023-01-24 03:38:58.851954: step: 746/464, loss: 0.4357171058654785 2023-01-24 03:38:59.581116: step: 748/464, loss: 0.6001867055892944 2023-01-24 03:39:00.347912: step: 750/464, loss: 0.11146920174360275 2023-01-24 03:39:01.069935: step: 752/464, loss: 0.09523777663707733 2023-01-24 03:39:01.758634: step: 754/464, loss: 0.03619766607880592 2023-01-24 03:39:02.458239: step: 756/464, loss: 0.10129847377538681 2023-01-24 03:39:03.145563: step: 758/464, loss: 0.06092946603894234 2023-01-24 03:39:03.851464: step: 760/464, loss: 0.2895306646823883 2023-01-24 03:39:04.618913: step: 762/464, loss: 0.0353437177836895 2023-01-24 03:39:05.419442: step: 764/464, loss: 0.14700661599636078 2023-01-24 03:39:06.220288: step: 766/464, loss: 0.5515168309211731 2023-01-24 03:39:06.911850: step: 768/464, loss: 0.029831836000084877 2023-01-24 03:39:07.678604: step: 770/464, loss: 0.08818569034337997 2023-01-24 03:39:08.458728: step: 772/464, loss: 0.16117626428604126 2023-01-24 03:39:09.172315: step: 774/464, loss: 0.543182909488678 2023-01-24 03:39:09.978172: step: 776/464, loss: 0.07934430986642838 2023-01-24 03:39:10.712335: step: 778/464, loss: 0.21586981415748596 2023-01-24 03:39:11.514447: step: 780/464, loss: 0.22837424278259277 2023-01-24 03:39:12.286529: step: 782/464, loss: 0.05814993754029274 2023-01-24 03:39:13.041277: step: 784/464, loss: 0.06691834330558777 2023-01-24 03:39:13.773826: step: 786/464, loss: 0.12464606761932373 2023-01-24 03:39:14.533468: step: 788/464, loss: 0.0432974137365818 2023-01-24 03:39:15.306639: step: 790/464, loss: 0.19307135045528412 2023-01-24 03:39:16.066213: step: 792/464, loss: 0.17207276821136475 2023-01-24 03:39:16.800517: step: 794/464, loss: 0.08930698037147522 2023-01-24 03:39:17.591401: step: 796/464, loss: 0.25028547644615173 2023-01-24 03:39:18.319166: step: 798/464, loss: 0.11197063326835632 2023-01-24 03:39:19.071306: step: 800/464, loss: 0.08689679205417633 2023-01-24 03:39:19.864929: step: 802/464, loss: 0.05291495844721794 2023-01-24 03:39:20.576088: step: 804/464, loss: 0.049140579998493195 2023-01-24 03:39:21.322398: step: 806/464, loss: 0.07981767505407333 2023-01-24 03:39:22.124304: step: 808/464, loss: 0.22347483038902283 2023-01-24 03:39:22.950017: step: 810/464, loss: 0.7353914976119995 2023-01-24 03:39:23.719906: step: 812/464, loss: 0.3017483949661255 2023-01-24 03:39:24.425111: step: 814/464, loss: 0.46327030658721924 2023-01-24 03:39:25.203591: step: 816/464, loss: 0.28747430443763733 2023-01-24 03:39:25.967159: step: 818/464, loss: 0.2881736755371094 2023-01-24 03:39:26.668905: step: 820/464, loss: 0.15409404039382935 2023-01-24 03:39:27.369225: step: 822/464, loss: 0.08141999691724777 2023-01-24 03:39:28.114754: step: 824/464, loss: 0.07773395627737045 2023-01-24 03:39:28.948026: step: 826/464, loss: 0.18517354130744934 2023-01-24 03:39:29.677655: step: 828/464, loss: 0.09534697234630585 2023-01-24 03:39:30.482204: step: 830/464, loss: 0.022645127028226852 2023-01-24 03:39:31.231851: step: 832/464, loss: 0.5086654424667358 2023-01-24 03:39:32.017264: step: 834/464, loss: 0.08283492177724838 2023-01-24 03:39:32.703476: step: 836/464, loss: 0.3400731384754181 2023-01-24 03:39:33.440367: step: 838/464, loss: 0.13037338852882385 2023-01-24 03:39:34.167670: step: 840/464, loss: 0.06704337894916534 2023-01-24 03:39:34.962890: step: 842/464, loss: 0.11626792699098587 2023-01-24 03:39:35.705467: step: 844/464, loss: 0.16931547224521637 2023-01-24 03:39:36.461812: step: 846/464, loss: 0.07658080011606216 2023-01-24 03:39:37.257036: step: 848/464, loss: 0.39117011427879333 2023-01-24 03:39:38.058773: step: 850/464, loss: 0.1561334729194641 2023-01-24 03:39:38.766351: step: 852/464, loss: 0.11885397136211395 2023-01-24 03:39:39.526548: step: 854/464, loss: 0.09748605638742447 2023-01-24 03:39:40.275955: step: 856/464, loss: 0.14782683551311493 2023-01-24 03:39:41.002584: step: 858/464, loss: 0.05930705741047859 2023-01-24 03:39:41.697837: step: 860/464, loss: 0.19739149510860443 2023-01-24 03:39:42.462168: step: 862/464, loss: 0.23783403635025024 2023-01-24 03:39:43.221766: step: 864/464, loss: 0.1938062161207199 2023-01-24 03:39:43.988557: step: 866/464, loss: 0.1528290957212448 2023-01-24 03:39:44.671292: step: 868/464, loss: 0.27736181020736694 2023-01-24 03:39:45.434210: step: 870/464, loss: 0.24971850216388702 2023-01-24 03:39:46.130668: step: 872/464, loss: 0.29064181447029114 2023-01-24 03:39:46.856229: step: 874/464, loss: 0.29505786299705505 2023-01-24 03:39:47.658013: step: 876/464, loss: 0.13866356015205383 2023-01-24 03:39:48.490474: step: 878/464, loss: 0.12224125117063522 2023-01-24 03:39:49.254978: step: 880/464, loss: 0.07472982257604599 2023-01-24 03:39:49.979778: step: 882/464, loss: 0.572647750377655 2023-01-24 03:39:50.783926: step: 884/464, loss: 0.47569239139556885 2023-01-24 03:39:51.526445: step: 886/464, loss: 0.05924500524997711 2023-01-24 03:39:52.241912: step: 888/464, loss: 0.3303375840187073 2023-01-24 03:39:52.919222: step: 890/464, loss: 0.01358190830796957 2023-01-24 03:39:53.672656: step: 892/464, loss: 0.06630312651395798 2023-01-24 03:39:54.414410: step: 894/464, loss: 0.1577807366847992 2023-01-24 03:39:55.192795: step: 896/464, loss: 0.16048268973827362 2023-01-24 03:39:55.992943: step: 898/464, loss: 0.6700915694236755 2023-01-24 03:39:56.719169: step: 900/464, loss: 0.15095430612564087 2023-01-24 03:39:57.438767: step: 902/464, loss: 0.052950140088796616 2023-01-24 03:39:58.120103: step: 904/464, loss: 0.04643837735056877 2023-01-24 03:39:58.848402: step: 906/464, loss: 0.15152107179164886 2023-01-24 03:39:59.525252: step: 908/464, loss: 0.11968246847391129 2023-01-24 03:40:00.255029: step: 910/464, loss: 0.05407499521970749 2023-01-24 03:40:01.072841: step: 912/464, loss: 0.26440638303756714 2023-01-24 03:40:01.804852: step: 914/464, loss: 0.2121550738811493 2023-01-24 03:40:02.551593: step: 916/464, loss: 0.15850675106048584 2023-01-24 03:40:03.361339: step: 918/464, loss: 0.1400802731513977 2023-01-24 03:40:04.110183: step: 920/464, loss: 0.09546446800231934 2023-01-24 03:40:04.863633: step: 922/464, loss: 0.32698971033096313 2023-01-24 03:40:05.612375: step: 924/464, loss: 0.16159042716026306 2023-01-24 03:40:06.483484: step: 926/464, loss: 0.1334841400384903 2023-01-24 03:40:07.217393: step: 928/464, loss: 0.18339157104492188 2023-01-24 03:40:07.865638: step: 930/464, loss: 0.2129916399717331 ================================================== Loss: 0.197 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31840268032556834, 'r': 0.30752744646245594, 'f1': 0.31287058742415885}, 'combined': 0.2305362223125381, 'epoch': 14} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3163273123615729, 'r': 0.2688157002282141, 'f1': 0.29064261605871017}, 'combined': 0.18050436155225158, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.302534354619063, 'r': 0.302534354619063, 'f1': 0.302534354619063}, 'combined': 0.22292005077194113, 'epoch': 14} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3128463371610251, 'r': 0.26590394501050607, 'f1': 0.2874713941255525}, 'combined': 0.17853486582534314, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32473529089714576, 'r': 0.3173409389222582, 'f1': 0.3209955370672362}, 'combined': 0.23652302731270036, 'epoch': 14} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3238551611783678, 'r': 0.2743018048282721, 'f1': 0.29702589876113267}, 'combined': 0.18446871607270346, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.32857142857142857, 'f1': 0.28395061728395066}, 'combined': 0.18930041152263377, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.33695652173913043, 'f1': 0.2870370370370371}, 'combined': 0.14351851851851855, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5083333333333333, 'r': 0.2629310344827586, 'f1': 0.34659090909090906}, 'combined': 0.23106060606060602, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:42:48.345047: step: 2/464, loss: 0.09386724978685379 2023-01-24 03:42:49.117324: step: 4/464, loss: 0.12544168531894684 2023-01-24 03:42:49.808242: step: 6/464, loss: 0.3711128830909729 2023-01-24 03:42:50.641110: step: 8/464, loss: 0.14079999923706055 2023-01-24 03:42:51.338708: step: 10/464, loss: 0.13299894332885742 2023-01-24 03:42:52.060462: step: 12/464, loss: 0.08605366945266724 2023-01-24 03:42:52.803945: step: 14/464, loss: 0.08199825882911682 2023-01-24 03:42:53.495553: step: 16/464, loss: 0.0731671154499054 2023-01-24 03:42:54.217162: step: 18/464, loss: 0.38960978388786316 2023-01-24 03:42:54.938335: step: 20/464, loss: 0.0806836411356926 2023-01-24 03:42:55.630347: step: 22/464, loss: 0.6706730723381042 2023-01-24 03:42:56.378078: step: 24/464, loss: 0.11350344866514206 2023-01-24 03:42:57.074542: step: 26/464, loss: 0.13751348853111267 2023-01-24 03:42:57.820633: step: 28/464, loss: 0.07605013996362686 2023-01-24 03:42:58.577865: step: 30/464, loss: 0.08604296296834946 2023-01-24 03:42:59.388426: step: 32/464, loss: 0.11444265395402908 2023-01-24 03:43:00.055109: step: 34/464, loss: 0.14891904592514038 2023-01-24 03:43:00.803328: step: 36/464, loss: 0.08350467681884766 2023-01-24 03:43:01.603033: step: 38/464, loss: 0.09013022482395172 2023-01-24 03:43:02.318380: step: 40/464, loss: 0.6425589323043823 2023-01-24 03:43:03.112400: step: 42/464, loss: 0.45394182205200195 2023-01-24 03:43:03.780598: step: 44/464, loss: 0.07432316243648529 2023-01-24 03:43:04.554188: step: 46/464, loss: 0.09558834880590439 2023-01-24 03:43:05.308960: step: 48/464, loss: 0.05760970339179039 2023-01-24 03:43:06.079238: step: 50/464, loss: 0.06453204900026321 2023-01-24 03:43:06.817286: step: 52/464, loss: 0.10807399451732635 2023-01-24 03:43:07.542055: step: 54/464, loss: 0.10865321755409241 2023-01-24 03:43:08.287450: step: 56/464, loss: 0.0828612744808197 2023-01-24 03:43:09.024731: step: 58/464, loss: 0.6269303560256958 2023-01-24 03:43:09.716271: step: 60/464, loss: 0.044964175671339035 2023-01-24 03:43:10.408258: step: 62/464, loss: 0.07399984449148178 2023-01-24 03:43:11.159439: step: 64/464, loss: 0.08610676974058151 2023-01-24 03:43:11.877722: step: 66/464, loss: 0.40302664041519165 2023-01-24 03:43:12.578153: step: 68/464, loss: 0.10739628970623016 2023-01-24 03:43:13.283479: step: 70/464, loss: 0.06525947153568268 2023-01-24 03:43:14.076916: step: 72/464, loss: 0.04451471567153931 2023-01-24 03:43:14.854350: step: 74/464, loss: 0.14145416021347046 2023-01-24 03:43:15.557021: step: 76/464, loss: 0.11191177368164062 2023-01-24 03:43:16.316282: step: 78/464, loss: 0.06259164214134216 2023-01-24 03:43:17.156554: step: 80/464, loss: 0.1686648726463318 2023-01-24 03:43:17.884657: step: 82/464, loss: 0.11285480856895447 2023-01-24 03:43:18.657534: step: 84/464, loss: 0.20290781557559967 2023-01-24 03:43:19.452774: step: 86/464, loss: 4.417758941650391 2023-01-24 03:43:20.181003: step: 88/464, loss: 0.06766335666179657 2023-01-24 03:43:20.873979: step: 90/464, loss: 0.09443461894989014 2023-01-24 03:43:21.569837: step: 92/464, loss: 0.07344210147857666 2023-01-24 03:43:22.248411: step: 94/464, loss: 0.07837525755167007 2023-01-24 03:43:23.052783: step: 96/464, loss: 0.2997106611728668 2023-01-24 03:43:23.791449: step: 98/464, loss: 0.2080036699771881 2023-01-24 03:43:24.531034: step: 100/464, loss: 0.049382418394088745 2023-01-24 03:43:25.279925: step: 102/464, loss: 0.44997745752334595 2023-01-24 03:43:25.962593: step: 104/464, loss: 0.03246447443962097 2023-01-24 03:43:26.718563: step: 106/464, loss: 0.19689927995204926 2023-01-24 03:43:27.468579: step: 108/464, loss: 0.0539761483669281 2023-01-24 03:43:28.183906: step: 110/464, loss: 0.3616011142730713 2023-01-24 03:43:28.995617: step: 112/464, loss: 0.050215672701597214 2023-01-24 03:43:29.721991: step: 114/464, loss: 0.03970296308398247 2023-01-24 03:43:30.476413: step: 116/464, loss: 0.8578179478645325 2023-01-24 03:43:31.280213: step: 118/464, loss: 0.0712665542960167 2023-01-24 03:43:32.014393: step: 120/464, loss: 0.03966226801276207 2023-01-24 03:43:32.801305: step: 122/464, loss: 0.05801122263073921 2023-01-24 03:43:33.557069: step: 124/464, loss: 0.14632351696491241 2023-01-24 03:43:34.259141: step: 126/464, loss: 0.13727235794067383 2023-01-24 03:43:35.089520: step: 128/464, loss: 0.11179246753454208 2023-01-24 03:43:35.828181: step: 130/464, loss: 0.052726734429597855 2023-01-24 03:43:36.526978: step: 132/464, loss: 0.605263888835907 2023-01-24 03:43:37.319380: step: 134/464, loss: 0.1118796169757843 2023-01-24 03:43:38.115252: step: 136/464, loss: 0.060034509748220444 2023-01-24 03:43:38.809514: step: 138/464, loss: 0.10273944586515427 2023-01-24 03:43:39.543256: step: 140/464, loss: 0.00863298773765564 2023-01-24 03:43:40.279040: step: 142/464, loss: 0.0825846940279007 2023-01-24 03:43:41.024675: step: 144/464, loss: 0.1935960054397583 2023-01-24 03:43:41.687620: step: 146/464, loss: 0.18002963066101074 2023-01-24 03:43:42.445232: step: 148/464, loss: 0.1243882030248642 2023-01-24 03:43:43.270042: step: 150/464, loss: 2.4855635166168213 2023-01-24 03:43:43.972209: step: 152/464, loss: 0.049686331301927567 2023-01-24 03:43:44.653635: step: 154/464, loss: 0.15782882273197174 2023-01-24 03:43:45.433168: step: 156/464, loss: 0.38794490694999695 2023-01-24 03:43:46.193337: step: 158/464, loss: 0.13704653084278107 2023-01-24 03:43:46.947550: step: 160/464, loss: 0.04243621975183487 2023-01-24 03:43:47.689337: step: 162/464, loss: 0.04862409457564354 2023-01-24 03:43:48.546365: step: 164/464, loss: 0.04644959047436714 2023-01-24 03:43:49.327079: step: 166/464, loss: 0.10157779604196548 2023-01-24 03:43:50.118017: step: 168/464, loss: 0.08971478044986725 2023-01-24 03:43:50.855726: step: 170/464, loss: 0.17621955275535583 2023-01-24 03:43:51.608481: step: 172/464, loss: 0.32910415530204773 2023-01-24 03:43:52.249355: step: 174/464, loss: 0.09267746657133102 2023-01-24 03:43:52.980365: step: 176/464, loss: 0.3283587396144867 2023-01-24 03:43:53.774250: step: 178/464, loss: 0.0949794203042984 2023-01-24 03:43:54.433508: step: 180/464, loss: 0.26500043272972107 2023-01-24 03:43:55.334911: step: 182/464, loss: 0.08976930379867554 2023-01-24 03:43:56.010301: step: 184/464, loss: 0.19251510500907898 2023-01-24 03:43:56.751678: step: 186/464, loss: 0.2029496431350708 2023-01-24 03:43:57.414243: step: 188/464, loss: 0.018383512273430824 2023-01-24 03:43:58.226026: step: 190/464, loss: 0.8381131291389465 2023-01-24 03:43:58.966533: step: 192/464, loss: 0.0751001387834549 2023-01-24 03:43:59.631380: step: 194/464, loss: 0.05229335278272629 2023-01-24 03:44:00.373888: step: 196/464, loss: 0.1134602278470993 2023-01-24 03:44:01.113744: step: 198/464, loss: 0.10519812256097794 2023-01-24 03:44:01.756127: step: 200/464, loss: 0.18681646883487701 2023-01-24 03:44:02.473895: step: 202/464, loss: 0.20296519994735718 2023-01-24 03:44:03.249820: step: 204/464, loss: 0.14943040907382965 2023-01-24 03:44:03.992399: step: 206/464, loss: 0.2886558473110199 2023-01-24 03:44:04.782287: step: 208/464, loss: 0.12967827916145325 2023-01-24 03:44:05.509224: step: 210/464, loss: 0.2362452745437622 2023-01-24 03:44:06.183407: step: 212/464, loss: 0.09625612944364548 2023-01-24 03:44:06.936839: step: 214/464, loss: 0.20666609704494476 2023-01-24 03:44:07.687061: step: 216/464, loss: 0.09178787469863892 2023-01-24 03:44:08.442360: step: 218/464, loss: 0.06775946170091629 2023-01-24 03:44:09.134510: step: 220/464, loss: 0.06090725213289261 2023-01-24 03:44:09.817057: step: 222/464, loss: 0.08698819577693939 2023-01-24 03:44:10.574899: step: 224/464, loss: 0.05898406729102135 2023-01-24 03:44:11.370244: step: 226/464, loss: 0.07399263978004456 2023-01-24 03:44:12.139029: step: 228/464, loss: 0.3723280131816864 2023-01-24 03:44:12.917642: step: 230/464, loss: 0.11726538836956024 2023-01-24 03:44:13.596595: step: 232/464, loss: 0.008238730020821095 2023-01-24 03:44:14.389878: step: 234/464, loss: 0.0894642323255539 2023-01-24 03:44:15.180167: step: 236/464, loss: 0.7798489928245544 2023-01-24 03:44:15.993550: step: 238/464, loss: 4.281653881072998 2023-01-24 03:44:16.714686: step: 240/464, loss: 0.06438206881284714 2023-01-24 03:44:17.459073: step: 242/464, loss: 0.036539752036333084 2023-01-24 03:44:18.275659: step: 244/464, loss: 0.0853443443775177 2023-01-24 03:44:19.040447: step: 246/464, loss: 0.1012922152876854 2023-01-24 03:44:19.747547: step: 248/464, loss: 0.535499095916748 2023-01-24 03:44:20.529179: step: 250/464, loss: 0.11578743159770966 2023-01-24 03:44:21.339968: step: 252/464, loss: 0.02177901193499565 2023-01-24 03:44:22.066886: step: 254/464, loss: 0.148213192820549 2023-01-24 03:44:22.821179: step: 256/464, loss: 0.1408969908952713 2023-01-24 03:44:23.508602: step: 258/464, loss: 0.09203886240720749 2023-01-24 03:44:24.275549: step: 260/464, loss: 0.08816751092672348 2023-01-24 03:44:25.095404: step: 262/464, loss: 0.2151239514350891 2023-01-24 03:44:25.838884: step: 264/464, loss: 0.23791560530662537 2023-01-24 03:44:26.609641: step: 266/464, loss: 0.060156311839818954 2023-01-24 03:44:27.290598: step: 268/464, loss: 1.4113879203796387 2023-01-24 03:44:28.000900: step: 270/464, loss: 0.05631784349679947 2023-01-24 03:44:28.740748: step: 272/464, loss: 0.04653649777173996 2023-01-24 03:44:29.479878: step: 274/464, loss: 0.038742709904909134 2023-01-24 03:44:30.222171: step: 276/464, loss: 0.36380136013031006 2023-01-24 03:44:30.929705: step: 278/464, loss: 0.18330629169940948 2023-01-24 03:44:31.634191: step: 280/464, loss: 1.0259242057800293 2023-01-24 03:44:32.407562: step: 282/464, loss: 0.35638394951820374 2023-01-24 03:44:33.142041: step: 284/464, loss: 0.08629385381937027 2023-01-24 03:44:33.903804: step: 286/464, loss: 0.8568986654281616 2023-01-24 03:44:34.587085: step: 288/464, loss: 0.10635808855295181 2023-01-24 03:44:35.308737: step: 290/464, loss: 0.16357223689556122 2023-01-24 03:44:36.155053: step: 292/464, loss: 0.13970738649368286 2023-01-24 03:44:36.936031: step: 294/464, loss: 0.18330936133861542 2023-01-24 03:44:37.686724: step: 296/464, loss: 0.13619013130664825 2023-01-24 03:44:38.368412: step: 298/464, loss: 0.032106779515743256 2023-01-24 03:44:39.045880: step: 300/464, loss: 0.31436678767204285 2023-01-24 03:44:39.841466: step: 302/464, loss: 0.4427456259727478 2023-01-24 03:44:40.647034: step: 304/464, loss: 0.08416996151208878 2023-01-24 03:44:41.416744: step: 306/464, loss: 0.12893402576446533 2023-01-24 03:44:42.301817: step: 308/464, loss: 0.15583616495132446 2023-01-24 03:44:43.064907: step: 310/464, loss: 0.1951410174369812 2023-01-24 03:44:43.813419: step: 312/464, loss: 0.040011730045080185 2023-01-24 03:44:44.601206: step: 314/464, loss: 0.09809692949056625 2023-01-24 03:44:45.319442: step: 316/464, loss: 0.1280430257320404 2023-01-24 03:44:46.064616: step: 318/464, loss: 0.33851298689842224 2023-01-24 03:44:46.771397: step: 320/464, loss: 0.32978272438049316 2023-01-24 03:44:47.533583: step: 322/464, loss: 0.06723512709140778 2023-01-24 03:44:48.311797: step: 324/464, loss: 0.934802770614624 2023-01-24 03:44:49.115548: step: 326/464, loss: 1.1091173887252808 2023-01-24 03:44:49.870607: step: 328/464, loss: 0.08063969761133194 2023-01-24 03:44:50.644391: step: 330/464, loss: 0.07467617839574814 2023-01-24 03:44:51.388254: step: 332/464, loss: 0.03026079200208187 2023-01-24 03:44:52.139333: step: 334/464, loss: 0.039279479533433914 2023-01-24 03:44:52.865588: step: 336/464, loss: 0.04296695813536644 2023-01-24 03:44:53.616099: step: 338/464, loss: 0.15277546644210815 2023-01-24 03:44:54.302732: step: 340/464, loss: 0.1530921310186386 2023-01-24 03:44:55.038111: step: 342/464, loss: 0.1040443629026413 2023-01-24 03:44:55.866736: step: 344/464, loss: 0.11835810542106628 2023-01-24 03:44:56.637164: step: 346/464, loss: 0.6051956415176392 2023-01-24 03:44:57.359127: step: 348/464, loss: 0.4107264280319214 2023-01-24 03:44:58.119643: step: 350/464, loss: 0.1013975739479065 2023-01-24 03:44:58.875631: step: 352/464, loss: 0.976349413394928 2023-01-24 03:44:59.691104: step: 354/464, loss: 0.3468686640262604 2023-01-24 03:45:00.430774: step: 356/464, loss: 0.0867522656917572 2023-01-24 03:45:01.158128: step: 358/464, loss: 0.672160267829895 2023-01-24 03:45:01.984457: step: 360/464, loss: 0.1508776992559433 2023-01-24 03:45:02.808908: step: 362/464, loss: 0.08101523667573929 2023-01-24 03:45:03.521714: step: 364/464, loss: 0.19883856177330017 2023-01-24 03:45:04.330158: step: 366/464, loss: 0.14487911760807037 2023-01-24 03:45:05.095730: step: 368/464, loss: 0.11524210125207901 2023-01-24 03:45:05.835880: step: 370/464, loss: 0.1271398514509201 2023-01-24 03:45:06.449860: step: 372/464, loss: 0.049729228019714355 2023-01-24 03:45:07.141464: step: 374/464, loss: 0.030202293768525124 2023-01-24 03:45:07.926562: step: 376/464, loss: 0.2078704684972763 2023-01-24 03:45:08.577428: step: 378/464, loss: 0.16754186153411865 2023-01-24 03:45:09.336779: step: 380/464, loss: 0.14381146430969238 2023-01-24 03:45:10.038210: step: 382/464, loss: 0.05142787843942642 2023-01-24 03:45:10.932176: step: 384/464, loss: 0.6397085785865784 2023-01-24 03:45:11.628232: step: 386/464, loss: 1.1454335451126099 2023-01-24 03:45:12.393675: step: 388/464, loss: 0.9766638278961182 2023-01-24 03:45:13.138909: step: 390/464, loss: 0.01772836036980152 2023-01-24 03:45:13.889750: step: 392/464, loss: 0.11965905874967575 2023-01-24 03:45:14.624427: step: 394/464, loss: 0.4042905569076538 2023-01-24 03:45:15.487378: step: 396/464, loss: 0.06917060911655426 2023-01-24 03:45:16.149776: step: 398/464, loss: 0.14034123718738556 2023-01-24 03:45:16.948769: step: 400/464, loss: 0.03209187090396881 2023-01-24 03:45:17.757289: step: 402/464, loss: 0.14110608398914337 2023-01-24 03:45:18.462772: step: 404/464, loss: 0.12455212324857712 2023-01-24 03:45:19.187963: step: 406/464, loss: 0.0245119147002697 2023-01-24 03:45:19.932321: step: 408/464, loss: 0.12244775146245956 2023-01-24 03:45:20.677045: step: 410/464, loss: 0.11255189776420593 2023-01-24 03:45:21.437000: step: 412/464, loss: 0.25171393156051636 2023-01-24 03:45:22.137240: step: 414/464, loss: 0.24863357841968536 2023-01-24 03:45:22.878886: step: 416/464, loss: 0.07378604263067245 2023-01-24 03:45:23.586082: step: 418/464, loss: 0.32955202460289 2023-01-24 03:45:24.311783: step: 420/464, loss: 0.16634100675582886 2023-01-24 03:45:25.071519: step: 422/464, loss: 0.09777677059173584 2023-01-24 03:45:25.736832: step: 424/464, loss: 1.3568098545074463 2023-01-24 03:45:26.360969: step: 426/464, loss: 0.1650019735097885 2023-01-24 03:45:27.108770: step: 428/464, loss: 7.026379108428955 2023-01-24 03:45:27.930975: step: 430/464, loss: 0.09765823185443878 2023-01-24 03:45:28.711094: step: 432/464, loss: 0.07757440954446793 2023-01-24 03:45:29.385365: step: 434/464, loss: 0.08847343176603317 2023-01-24 03:45:30.138894: step: 436/464, loss: 0.07123363018035889 2023-01-24 03:45:30.804222: step: 438/464, loss: 0.04508393257856369 2023-01-24 03:45:31.531013: step: 440/464, loss: 0.04964699596166611 2023-01-24 03:45:32.215636: step: 442/464, loss: 0.01695246435701847 2023-01-24 03:45:32.976321: step: 444/464, loss: 0.17230522632598877 2023-01-24 03:45:33.676429: step: 446/464, loss: 0.1555473655462265 2023-01-24 03:45:34.495437: step: 448/464, loss: 0.19200685620307922 2023-01-24 03:45:35.257900: step: 450/464, loss: 0.32288166880607605 2023-01-24 03:45:36.060221: step: 452/464, loss: 0.3152397572994232 2023-01-24 03:45:36.855110: step: 454/464, loss: 0.11890127509832382 2023-01-24 03:45:37.577719: step: 456/464, loss: 0.10466277599334717 2023-01-24 03:45:38.284646: step: 458/464, loss: 0.05214134231209755 2023-01-24 03:45:39.024175: step: 460/464, loss: 0.06556827574968338 2023-01-24 03:45:39.752358: step: 462/464, loss: 0.16239741444587708 2023-01-24 03:45:40.474741: step: 464/464, loss: 0.08355816453695297 2023-01-24 03:45:41.215283: step: 466/464, loss: 0.13289578258991241 2023-01-24 03:45:41.933998: step: 468/464, loss: 0.06553588062524796 2023-01-24 03:45:42.767249: step: 470/464, loss: 0.09732535481452942 2023-01-24 03:45:43.499839: step: 472/464, loss: 1.0334306955337524 2023-01-24 03:45:44.247950: step: 474/464, loss: 0.8623963594436646 2023-01-24 03:45:45.089386: step: 476/464, loss: 0.098114974796772 2023-01-24 03:45:45.843477: step: 478/464, loss: 0.4096452593803406 2023-01-24 03:45:46.560064: step: 480/464, loss: 0.5679343938827515 2023-01-24 03:45:47.272721: step: 482/464, loss: 0.0800742655992508 2023-01-24 03:45:47.945304: step: 484/464, loss: 0.07798271626234055 2023-01-24 03:45:48.642388: step: 486/464, loss: 0.15702295303344727 2023-01-24 03:45:49.379069: step: 488/464, loss: 0.11062658578157425 2023-01-24 03:45:50.098729: step: 490/464, loss: 0.3952050805091858 2023-01-24 03:45:50.810389: step: 492/464, loss: 0.03058406338095665 2023-01-24 03:45:51.504786: step: 494/464, loss: 0.08915737271308899 2023-01-24 03:45:52.242971: step: 496/464, loss: 0.11728838831186295 2023-01-24 03:45:53.019356: step: 498/464, loss: 0.21412897109985352 2023-01-24 03:45:53.788285: step: 500/464, loss: 0.09385736286640167 2023-01-24 03:45:54.547288: step: 502/464, loss: 0.0986119955778122 2023-01-24 03:45:55.292730: step: 504/464, loss: 0.1011638268828392 2023-01-24 03:45:55.987468: step: 506/464, loss: 0.050703346729278564 2023-01-24 03:45:56.706302: step: 508/464, loss: 0.8299161195755005 2023-01-24 03:45:57.348077: step: 510/464, loss: 0.10197915881872177 2023-01-24 03:45:58.050346: step: 512/464, loss: 0.050886351615190506 2023-01-24 03:45:58.765956: step: 514/464, loss: 0.0736493468284607 2023-01-24 03:45:59.492808: step: 516/464, loss: 0.32507458329200745 2023-01-24 03:46:00.273146: step: 518/464, loss: 0.1292927861213684 2023-01-24 03:46:01.014547: step: 520/464, loss: 0.11475580185651779 2023-01-24 03:46:01.696030: step: 522/464, loss: 0.07191069424152374 2023-01-24 03:46:02.434565: step: 524/464, loss: 0.21141907572746277 2023-01-24 03:46:03.088040: step: 526/464, loss: 0.12864668667316437 2023-01-24 03:46:03.809636: step: 528/464, loss: 0.0564606748521328 2023-01-24 03:46:04.548443: step: 530/464, loss: 0.10558000206947327 2023-01-24 03:46:05.323323: step: 532/464, loss: 0.10238667577505112 2023-01-24 03:46:05.989648: step: 534/464, loss: 0.13729903101921082 2023-01-24 03:46:06.695380: step: 536/464, loss: 0.5416274070739746 2023-01-24 03:46:07.463682: step: 538/464, loss: 0.11077231168746948 2023-01-24 03:46:08.257900: step: 540/464, loss: 0.10182888060808182 2023-01-24 03:46:09.043839: step: 542/464, loss: 0.08829410374164581 2023-01-24 03:46:09.749821: step: 544/464, loss: 0.20814473927021027 2023-01-24 03:46:10.535069: step: 546/464, loss: 0.10945319384336472 2023-01-24 03:46:11.265695: step: 548/464, loss: 0.10658914595842361 2023-01-24 03:46:11.921564: step: 550/464, loss: 0.05725671350955963 2023-01-24 03:46:12.611455: step: 552/464, loss: 0.05665695294737816 2023-01-24 03:46:13.314306: step: 554/464, loss: 0.07857471704483032 2023-01-24 03:46:14.038849: step: 556/464, loss: 0.0755433514714241 2023-01-24 03:46:14.848529: step: 558/464, loss: 0.21889066696166992 2023-01-24 03:46:15.667971: step: 560/464, loss: 0.05423108488321304 2023-01-24 03:46:16.436333: step: 562/464, loss: 0.4585532248020172 2023-01-24 03:46:17.254564: step: 564/464, loss: 0.11373139917850494 2023-01-24 03:46:18.007590: step: 566/464, loss: 0.14729614555835724 2023-01-24 03:46:18.659621: step: 568/464, loss: 0.1796552836894989 2023-01-24 03:46:19.456542: step: 570/464, loss: 0.10746961086988449 2023-01-24 03:46:20.180115: step: 572/464, loss: 0.08348584920167923 2023-01-24 03:46:20.939921: step: 574/464, loss: 0.226358100771904 2023-01-24 03:46:21.715272: step: 576/464, loss: 0.08186423778533936 2023-01-24 03:46:22.417585: step: 578/464, loss: 0.3481625020503998 2023-01-24 03:46:23.157219: step: 580/464, loss: 0.15354043245315552 2023-01-24 03:46:23.802602: step: 582/464, loss: 0.07363526523113251 2023-01-24 03:46:24.474764: step: 584/464, loss: 0.15959565341472626 2023-01-24 03:46:25.227733: step: 586/464, loss: 0.11872921884059906 2023-01-24 03:46:25.976752: step: 588/464, loss: 0.24006997048854828 2023-01-24 03:46:26.680861: step: 590/464, loss: 0.08303096145391464 2023-01-24 03:46:27.453179: step: 592/464, loss: 0.07172245532274246 2023-01-24 03:46:28.260775: step: 594/464, loss: 0.06550987809896469 2023-01-24 03:46:28.988971: step: 596/464, loss: 0.15820249915122986 2023-01-24 03:46:29.725102: step: 598/464, loss: 0.1852339804172516 2023-01-24 03:46:30.407572: step: 600/464, loss: 0.10605773329734802 2023-01-24 03:46:31.199569: step: 602/464, loss: 0.12317892909049988 2023-01-24 03:46:31.822139: step: 604/464, loss: 0.4308888912200928 2023-01-24 03:46:32.586712: step: 606/464, loss: 0.22137056291103363 2023-01-24 03:46:33.392193: step: 608/464, loss: 0.06388486176729202 2023-01-24 03:46:34.165887: step: 610/464, loss: 0.15812082588672638 2023-01-24 03:46:35.047403: step: 612/464, loss: 0.1343749761581421 2023-01-24 03:46:35.758190: step: 614/464, loss: 0.16867481172084808 2023-01-24 03:46:36.502421: step: 616/464, loss: 0.34840089082717896 2023-01-24 03:46:37.262625: step: 618/464, loss: 0.08506720513105392 2023-01-24 03:46:37.930623: step: 620/464, loss: 0.12645494937896729 2023-01-24 03:46:38.621907: step: 622/464, loss: 0.03901791200041771 2023-01-24 03:46:39.409441: step: 624/464, loss: 0.036129169166088104 2023-01-24 03:46:40.125131: step: 626/464, loss: 0.05090763792395592 2023-01-24 03:46:40.927179: step: 628/464, loss: 0.1342857927083969 2023-01-24 03:46:41.619013: step: 630/464, loss: 0.1088811606168747 2023-01-24 03:46:42.313670: step: 632/464, loss: 0.023789554834365845 2023-01-24 03:46:42.997570: step: 634/464, loss: 0.12072187662124634 2023-01-24 03:46:43.845977: step: 636/464, loss: 0.0999072790145874 2023-01-24 03:46:44.579598: step: 638/464, loss: 0.0696912482380867 2023-01-24 03:46:45.296583: step: 640/464, loss: 0.06875769048929214 2023-01-24 03:46:46.103838: step: 642/464, loss: 0.10171730816364288 2023-01-24 03:46:46.809139: step: 644/464, loss: 0.03848240152001381 2023-01-24 03:46:47.568972: step: 646/464, loss: 0.08445584028959274 2023-01-24 03:46:48.252518: step: 648/464, loss: 2.2473201751708984 2023-01-24 03:46:48.986654: step: 650/464, loss: 0.08144298195838928 2023-01-24 03:46:49.655623: step: 652/464, loss: 0.06221764162182808 2023-01-24 03:46:50.391530: step: 654/464, loss: 0.12438201904296875 2023-01-24 03:46:51.146772: step: 656/464, loss: 0.03517334535717964 2023-01-24 03:46:51.877085: step: 658/464, loss: 0.023519620299339294 2023-01-24 03:46:52.657608: step: 660/464, loss: 0.16124774515628815 2023-01-24 03:46:53.461743: step: 662/464, loss: 0.15201835334300995 2023-01-24 03:46:54.259248: step: 664/464, loss: 0.9340047240257263 2023-01-24 03:46:54.993853: step: 666/464, loss: 0.20893046259880066 2023-01-24 03:46:55.703182: step: 668/464, loss: 0.03892991691827774 2023-01-24 03:46:56.438275: step: 670/464, loss: 0.09852541983127594 2023-01-24 03:46:57.154799: step: 672/464, loss: 0.14694903790950775 2023-01-24 03:46:57.845425: step: 674/464, loss: 0.03230362758040428 2023-01-24 03:46:58.505599: step: 676/464, loss: 0.0965629294514656 2023-01-24 03:46:59.225420: step: 678/464, loss: 0.11053648591041565 2023-01-24 03:47:00.043183: step: 680/464, loss: 0.0767754390835762 2023-01-24 03:47:00.756215: step: 682/464, loss: 0.08364859968423843 2023-01-24 03:47:01.539388: step: 684/464, loss: 0.6616068482398987 2023-01-24 03:47:02.196394: step: 686/464, loss: 0.13472962379455566 2023-01-24 03:47:02.952847: step: 688/464, loss: 0.07598034292459488 2023-01-24 03:47:03.792372: step: 690/464, loss: 0.030683374032378197 2023-01-24 03:47:04.516914: step: 692/464, loss: 0.09763442724943161 2023-01-24 03:47:05.238617: step: 694/464, loss: 0.0856194719672203 2023-01-24 03:47:06.019241: step: 696/464, loss: 0.10150092095136642 2023-01-24 03:47:06.846983: step: 698/464, loss: 0.04385127127170563 2023-01-24 03:47:07.742926: step: 700/464, loss: 0.07088147103786469 2023-01-24 03:47:08.430054: step: 702/464, loss: 0.0784793421626091 2023-01-24 03:47:09.260949: step: 704/464, loss: 0.07931099832057953 2023-01-24 03:47:10.072580: step: 706/464, loss: 0.4231050908565521 2023-01-24 03:47:10.775958: step: 708/464, loss: 0.2891331911087036 2023-01-24 03:47:11.518201: step: 710/464, loss: 0.25972074270248413 2023-01-24 03:47:12.295371: step: 712/464, loss: 0.0433255136013031 2023-01-24 03:47:13.053136: step: 714/464, loss: 0.18018367886543274 2023-01-24 03:47:13.762167: step: 716/464, loss: 0.08854905515909195 2023-01-24 03:47:14.540955: step: 718/464, loss: 0.08068551868200302 2023-01-24 03:47:15.283172: step: 720/464, loss: 0.040290337055921555 2023-01-24 03:47:16.017450: step: 722/464, loss: 0.09700462222099304 2023-01-24 03:47:16.776480: step: 724/464, loss: 0.07090365886688232 2023-01-24 03:47:17.579227: step: 726/464, loss: 0.27016595005989075 2023-01-24 03:47:18.227936: step: 728/464, loss: 0.20455826818943024 2023-01-24 03:47:18.941768: step: 730/464, loss: 0.09726991504430771 2023-01-24 03:47:19.639914: step: 732/464, loss: 0.23708124458789825 2023-01-24 03:47:20.342611: step: 734/464, loss: 0.2544391453266144 2023-01-24 03:47:21.037546: step: 736/464, loss: 0.059123240411281586 2023-01-24 03:47:21.748470: step: 738/464, loss: 0.25635281205177307 2023-01-24 03:47:22.444217: step: 740/464, loss: 0.07739058881998062 2023-01-24 03:47:23.168624: step: 742/464, loss: 0.0810338705778122 2023-01-24 03:47:23.813054: step: 744/464, loss: 0.061010394245386124 2023-01-24 03:47:24.522365: step: 746/464, loss: 0.05705961585044861 2023-01-24 03:47:25.276284: step: 748/464, loss: 0.22858786582946777 2023-01-24 03:47:26.050756: step: 750/464, loss: 0.09986155480146408 2023-01-24 03:47:26.775092: step: 752/464, loss: 0.1608353704214096 2023-01-24 03:47:27.405915: step: 754/464, loss: 0.019120080396533012 2023-01-24 03:47:28.153286: step: 756/464, loss: 0.06734953075647354 2023-01-24 03:47:28.850220: step: 758/464, loss: 0.03486610949039459 2023-01-24 03:47:29.576478: step: 760/464, loss: 0.13575465977191925 2023-01-24 03:47:30.356653: step: 762/464, loss: 0.1357676386833191 2023-01-24 03:47:31.116142: step: 764/464, loss: 0.051089994609355927 2023-01-24 03:47:31.877136: step: 766/464, loss: 0.04774364084005356 2023-01-24 03:47:32.572507: step: 768/464, loss: 0.06380462646484375 2023-01-24 03:47:33.351794: step: 770/464, loss: 0.373801589012146 2023-01-24 03:47:34.133685: step: 772/464, loss: 0.17129270732402802 2023-01-24 03:47:34.918734: step: 774/464, loss: 0.1697038859128952 2023-01-24 03:47:35.700473: step: 776/464, loss: 0.1404598206281662 2023-01-24 03:47:36.449957: step: 778/464, loss: 0.03730373829603195 2023-01-24 03:47:37.272460: step: 780/464, loss: 0.05492500960826874 2023-01-24 03:47:38.078836: step: 782/464, loss: 0.04647694155573845 2023-01-24 03:47:38.796239: step: 784/464, loss: 0.046614717692136765 2023-01-24 03:47:39.516296: step: 786/464, loss: 0.11796533316373825 2023-01-24 03:47:40.356621: step: 788/464, loss: 0.27363863587379456 2023-01-24 03:47:41.063397: step: 790/464, loss: 0.0794137716293335 2023-01-24 03:47:41.832733: step: 792/464, loss: 0.314433753490448 2023-01-24 03:47:42.523346: step: 794/464, loss: 0.2909509837627411 2023-01-24 03:47:43.343639: step: 796/464, loss: 0.7796887159347534 2023-01-24 03:47:44.064162: step: 798/464, loss: 0.06901483237743378 2023-01-24 03:47:44.787468: step: 800/464, loss: 0.05117961764335632 2023-01-24 03:47:45.553493: step: 802/464, loss: 0.07418478280305862 2023-01-24 03:47:46.356223: step: 804/464, loss: 0.21300861239433289 2023-01-24 03:47:47.106956: step: 806/464, loss: 1.5926798582077026 2023-01-24 03:47:47.822792: step: 808/464, loss: 0.04421302676200867 2023-01-24 03:47:48.502514: step: 810/464, loss: 0.037948526442050934 2023-01-24 03:47:49.200954: step: 812/464, loss: 0.07859799265861511 2023-01-24 03:47:49.950870: step: 814/464, loss: 0.07585307210683823 2023-01-24 03:47:50.758584: step: 816/464, loss: 0.1594003438949585 2023-01-24 03:47:51.469474: step: 818/464, loss: 0.13735361397266388 2023-01-24 03:47:52.221117: step: 820/464, loss: 0.10319264233112335 2023-01-24 03:47:52.992606: step: 822/464, loss: 0.07057742774486542 2023-01-24 03:47:53.766888: step: 824/464, loss: 0.3316095173358917 2023-01-24 03:47:54.472670: step: 826/464, loss: 0.24340875446796417 2023-01-24 03:47:55.169766: step: 828/464, loss: 0.21709150075912476 2023-01-24 03:47:55.927637: step: 830/464, loss: 0.2157144844532013 2023-01-24 03:47:56.674258: step: 832/464, loss: 0.06577938795089722 2023-01-24 03:47:57.463112: step: 834/464, loss: 0.10622964054346085 2023-01-24 03:47:58.173204: step: 836/464, loss: 0.10273239016532898 2023-01-24 03:47:58.889294: step: 838/464, loss: 0.5213263630867004 2023-01-24 03:47:59.736593: step: 840/464, loss: 0.11066462844610214 2023-01-24 03:48:00.565586: step: 842/464, loss: 0.06470531970262527 2023-01-24 03:48:01.369241: step: 844/464, loss: 0.29106324911117554 2023-01-24 03:48:02.054752: step: 846/464, loss: 0.12173037230968475 2023-01-24 03:48:02.790366: step: 848/464, loss: 0.04491506889462471 2023-01-24 03:48:03.501204: step: 850/464, loss: 0.08093959838151932 2023-01-24 03:48:04.169726: step: 852/464, loss: 0.4785907566547394 2023-01-24 03:48:04.886097: step: 854/464, loss: 0.39316803216934204 2023-01-24 03:48:05.592271: step: 856/464, loss: 0.10662375390529633 2023-01-24 03:48:06.339961: step: 858/464, loss: 0.1058470830321312 2023-01-24 03:48:07.003853: step: 860/464, loss: 0.1215873584151268 2023-01-24 03:48:07.752527: step: 862/464, loss: 0.1317547708749771 2023-01-24 03:48:08.713523: step: 864/464, loss: 0.0965326651930809 2023-01-24 03:48:09.557301: step: 866/464, loss: 0.2264188826084137 2023-01-24 03:48:10.424240: step: 868/464, loss: 0.17923258244991302 2023-01-24 03:48:11.133569: step: 870/464, loss: 0.08647506684064865 2023-01-24 03:48:11.915689: step: 872/464, loss: 0.03950796648859978 2023-01-24 03:48:12.676514: step: 874/464, loss: 0.3292936086654663 2023-01-24 03:48:13.491222: step: 876/464, loss: 0.15255765616893768 2023-01-24 03:48:14.166256: step: 878/464, loss: 0.08292162418365479 2023-01-24 03:48:14.925382: step: 880/464, loss: 0.15378907322883606 2023-01-24 03:48:15.708531: step: 882/464, loss: 0.07309963554143906 2023-01-24 03:48:16.451062: step: 884/464, loss: 0.028615491464734077 2023-01-24 03:48:17.211607: step: 886/464, loss: 0.09923788160085678 2023-01-24 03:48:17.971034: step: 888/464, loss: 0.19777190685272217 2023-01-24 03:48:18.702867: step: 890/464, loss: 0.14272283017635345 2023-01-24 03:48:19.532044: step: 892/464, loss: 0.06808196753263474 2023-01-24 03:48:20.248495: step: 894/464, loss: 0.08321517705917358 2023-01-24 03:48:20.942452: step: 896/464, loss: 0.059523243457078934 2023-01-24 03:48:21.691265: step: 898/464, loss: 0.18098331987857819 2023-01-24 03:48:22.416657: step: 900/464, loss: 0.07278680801391602 2023-01-24 03:48:23.149537: step: 902/464, loss: 0.1367041915655136 2023-01-24 03:48:23.909253: step: 904/464, loss: 0.1275164633989334 2023-01-24 03:48:24.600670: step: 906/464, loss: 0.05483197793364525 2023-01-24 03:48:25.341432: step: 908/464, loss: 0.22534726560115814 2023-01-24 03:48:26.094529: step: 910/464, loss: 0.1497402787208557 2023-01-24 03:48:26.853711: step: 912/464, loss: 0.11753479391336441 2023-01-24 03:48:27.558467: step: 914/464, loss: 0.4801797866821289 2023-01-24 03:48:28.375390: step: 916/464, loss: 0.1518942415714264 2023-01-24 03:48:29.073310: step: 918/464, loss: 0.1326410174369812 2023-01-24 03:48:29.825001: step: 920/464, loss: 0.5608806014060974 2023-01-24 03:48:30.596147: step: 922/464, loss: 4.899883270263672 2023-01-24 03:48:31.325110: step: 924/464, loss: 0.1643269956111908 2023-01-24 03:48:32.068956: step: 926/464, loss: 0.07229573279619217 2023-01-24 03:48:32.888735: step: 928/464, loss: 0.0629381537437439 2023-01-24 03:48:33.643620: step: 930/464, loss: 0.11444586515426636 ================================================== Loss: 0.233 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3087595602294455, 'r': 0.3064160341555977, 'f1': 0.3075833333333333}, 'combined': 0.22664035087719295, 'epoch': 15} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31303560473581105, 'r': 0.26323448580056835, 'f1': 0.2859831450672842}, 'combined': 0.17761058483126071, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29509512683578104, 'r': 0.29957474925454053, 'f1': 0.2973180656443369}, 'combined': 0.21907646942214296, 'epoch': 15} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30351488227981505, 'r': 0.25587533017469105, 'f1': 0.2776665339763921}, 'combined': 0.17244553162744355, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30761300082619875, 'r': 0.30761300082619875, 'f1': 0.30761300082619875}, 'combined': 0.2266622111350938, 'epoch': 15} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3198619064385172, 'r': 0.2640280233619713, 'f1': 0.2892754356445585}, 'combined': 0.1796552705581995, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29276315789473684, 'r': 0.31785714285714284, 'f1': 0.3047945205479452}, 'combined': 0.20319634703196346, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24324324324324326, 'r': 0.391304347826087, 'f1': 0.30000000000000004}, 'combined': 0.15000000000000002, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.44166666666666665, 'r': 0.22844827586206898, 'f1': 0.30113636363636365}, 'combined': 0.20075757575757575, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:51:11.871081: step: 2/464, loss: 0.14209577441215515 2023-01-24 03:51:12.591843: step: 4/464, loss: 0.08315715938806534 2023-01-24 03:51:13.279078: step: 6/464, loss: 0.1677294224500656 2023-01-24 03:51:13.996080: step: 8/464, loss: 0.06222666800022125 2023-01-24 03:51:14.813201: step: 10/464, loss: 0.12703558802604675 2023-01-24 03:51:15.623107: step: 12/464, loss: 0.14261215925216675 2023-01-24 03:51:16.368836: step: 14/464, loss: 0.18909241259098053 2023-01-24 03:51:17.224970: step: 16/464, loss: 0.10527751594781876 2023-01-24 03:51:17.980146: step: 18/464, loss: 0.09355314821004868 2023-01-24 03:51:18.737872: step: 20/464, loss: 0.10597775876522064 2023-01-24 03:51:19.475435: step: 22/464, loss: 0.027014456689357758 2023-01-24 03:51:20.213811: step: 24/464, loss: 0.18352515995502472 2023-01-24 03:51:21.036914: step: 26/464, loss: 0.12560158967971802 2023-01-24 03:51:21.759794: step: 28/464, loss: 0.029618391767144203 2023-01-24 03:51:22.518787: step: 30/464, loss: 0.04836886003613472 2023-01-24 03:51:23.244682: step: 32/464, loss: 0.10874973237514496 2023-01-24 03:51:23.947782: step: 34/464, loss: 0.05077312886714935 2023-01-24 03:51:24.659262: step: 36/464, loss: 0.2322484850883484 2023-01-24 03:51:25.335321: step: 38/464, loss: 0.06784657388925552 2023-01-24 03:51:26.068454: step: 40/464, loss: 0.09843548387289047 2023-01-24 03:51:26.812032: step: 42/464, loss: 0.07803135365247726 2023-01-24 03:51:27.608967: step: 44/464, loss: 0.06171514838933945 2023-01-24 03:51:28.328524: step: 46/464, loss: 0.046159062534570694 2023-01-24 03:51:29.122983: step: 48/464, loss: 0.05216488987207413 2023-01-24 03:51:29.844864: step: 50/464, loss: 0.02905622124671936 2023-01-24 03:51:30.609952: step: 52/464, loss: 0.06213120371103287 2023-01-24 03:51:31.385096: step: 54/464, loss: 0.12204937636852264 2023-01-24 03:51:32.217846: step: 56/464, loss: 0.06039648875594139 2023-01-24 03:51:32.982476: step: 58/464, loss: 0.04854537174105644 2023-01-24 03:51:33.699171: step: 60/464, loss: 0.0071175843477249146 2023-01-24 03:51:34.385822: step: 62/464, loss: 0.09326845407485962 2023-01-24 03:51:35.098541: step: 64/464, loss: 0.0703241154551506 2023-01-24 03:51:35.850858: step: 66/464, loss: 0.0811472162604332 2023-01-24 03:51:36.599625: step: 68/464, loss: 0.5887887477874756 2023-01-24 03:51:37.402120: step: 70/464, loss: 0.19554060697555542 2023-01-24 03:51:38.203966: step: 72/464, loss: 0.13443249464035034 2023-01-24 03:51:38.982450: step: 74/464, loss: 0.054412368685007095 2023-01-24 03:51:39.788400: step: 76/464, loss: 0.4430738389492035 2023-01-24 03:51:40.666795: step: 78/464, loss: 0.07522349804639816 2023-01-24 03:51:41.428758: step: 80/464, loss: 0.07084693759679794 2023-01-24 03:51:42.247199: step: 82/464, loss: 0.06538707762956619 2023-01-24 03:51:42.980801: step: 84/464, loss: 0.0802653431892395 2023-01-24 03:51:43.698887: step: 86/464, loss: 0.008309608325362206 2023-01-24 03:51:44.530269: step: 88/464, loss: 0.14886318147182465 2023-01-24 03:51:45.229231: step: 90/464, loss: 0.070876345038414 2023-01-24 03:51:45.928652: step: 92/464, loss: 0.054328061640262604 2023-01-24 03:51:46.722728: step: 94/464, loss: 0.05771628022193909 2023-01-24 03:51:47.420087: step: 96/464, loss: 0.05725085735321045 2023-01-24 03:51:48.144002: step: 98/464, loss: 0.12023055553436279 2023-01-24 03:51:48.751792: step: 100/464, loss: 0.07223106175661087 2023-01-24 03:51:49.523063: step: 102/464, loss: 0.7687522768974304 2023-01-24 03:51:50.237613: step: 104/464, loss: 0.11119861900806427 2023-01-24 03:51:51.056306: step: 106/464, loss: 0.4024055302143097 2023-01-24 03:51:51.817888: step: 108/464, loss: 0.06760146468877792 2023-01-24 03:51:52.565895: step: 110/464, loss: 0.31549784541130066 2023-01-24 03:51:53.273709: step: 112/464, loss: 0.0992489904165268 2023-01-24 03:51:54.048362: step: 114/464, loss: 0.0670260563492775 2023-01-24 03:51:54.786708: step: 116/464, loss: 0.0990232601761818 2023-01-24 03:51:55.562100: step: 118/464, loss: 0.104999840259552 2023-01-24 03:51:56.250093: step: 120/464, loss: 0.08298064768314362 2023-01-24 03:51:56.991825: step: 122/464, loss: 0.0959516391158104 2023-01-24 03:51:57.756287: step: 124/464, loss: 0.05884644761681557 2023-01-24 03:51:58.476095: step: 126/464, loss: 0.13606123626232147 2023-01-24 03:51:59.223360: step: 128/464, loss: 0.10948154330253601 2023-01-24 03:51:59.977324: step: 130/464, loss: 0.31888434290885925 2023-01-24 03:52:00.758141: step: 132/464, loss: 0.059614237397909164 2023-01-24 03:52:01.470865: step: 134/464, loss: 0.2272975742816925 2023-01-24 03:52:02.279833: step: 136/464, loss: 0.05067453533411026 2023-01-24 03:52:03.042829: step: 138/464, loss: 0.0519007109105587 2023-01-24 03:52:03.763808: step: 140/464, loss: 0.08054037392139435 2023-01-24 03:52:04.473841: step: 142/464, loss: 0.067112997174263 2023-01-24 03:52:05.283213: step: 144/464, loss: 0.02732996642589569 2023-01-24 03:52:06.112617: step: 146/464, loss: 0.18856127560138702 2023-01-24 03:52:06.768264: step: 148/464, loss: 0.026846928521990776 2023-01-24 03:52:07.462978: step: 150/464, loss: 0.056829262524843216 2023-01-24 03:52:08.216197: step: 152/464, loss: 0.09267004579305649 2023-01-24 03:52:09.046594: step: 154/464, loss: 0.02071995846927166 2023-01-24 03:52:09.794779: step: 156/464, loss: 0.04269138723611832 2023-01-24 03:52:10.609751: step: 158/464, loss: 0.11235304921865463 2023-01-24 03:52:11.319253: step: 160/464, loss: 0.09503141790628433 2023-01-24 03:52:12.064138: step: 162/464, loss: 0.12102089822292328 2023-01-24 03:52:12.750376: step: 164/464, loss: 0.11403682827949524 2023-01-24 03:52:13.491203: step: 166/464, loss: 0.25968801975250244 2023-01-24 03:52:14.149556: step: 168/464, loss: 0.1058613508939743 2023-01-24 03:52:14.965366: step: 170/464, loss: 0.22510240972042084 2023-01-24 03:52:15.813820: step: 172/464, loss: 0.05563351884484291 2023-01-24 03:52:16.536556: step: 174/464, loss: 0.03602204844355583 2023-01-24 03:52:17.356683: step: 176/464, loss: 0.03502077981829643 2023-01-24 03:52:18.145167: step: 178/464, loss: 0.025843489915132523 2023-01-24 03:52:18.918999: step: 180/464, loss: 0.0642140656709671 2023-01-24 03:52:19.768312: step: 182/464, loss: 0.8990870714187622 2023-01-24 03:52:20.561207: step: 184/464, loss: 0.04531247913837433 2023-01-24 03:52:21.284281: step: 186/464, loss: 0.0421457402408123 2023-01-24 03:52:21.992738: step: 188/464, loss: 0.05311638116836548 2023-01-24 03:52:22.739835: step: 190/464, loss: 0.08210153877735138 2023-01-24 03:52:23.464891: step: 192/464, loss: 0.07027444243431091 2023-01-24 03:52:24.181580: step: 194/464, loss: 0.0335502065718174 2023-01-24 03:52:24.899326: step: 196/464, loss: 0.06261256337165833 2023-01-24 03:52:25.638803: step: 198/464, loss: 0.12928935885429382 2023-01-24 03:52:26.373999: step: 200/464, loss: 0.03679617494344711 2023-01-24 03:52:27.122628: step: 202/464, loss: 1.5623284578323364 2023-01-24 03:52:27.830400: step: 204/464, loss: 0.04236576333642006 2023-01-24 03:52:28.549218: step: 206/464, loss: 2.797375440597534 2023-01-24 03:52:29.257339: step: 208/464, loss: 0.15503378212451935 2023-01-24 03:52:29.943436: step: 210/464, loss: 0.46465569734573364 2023-01-24 03:52:30.674299: step: 212/464, loss: 0.16911257803440094 2023-01-24 03:52:31.482418: step: 214/464, loss: 0.05166523531079292 2023-01-24 03:52:32.158360: step: 216/464, loss: 0.02335522696375847 2023-01-24 03:52:32.810021: step: 218/464, loss: 0.05577556788921356 2023-01-24 03:52:33.565050: step: 220/464, loss: 0.05349784716963768 2023-01-24 03:52:34.347920: step: 222/464, loss: 0.7577762603759766 2023-01-24 03:52:35.080317: step: 224/464, loss: 0.067133329808712 2023-01-24 03:52:35.871038: step: 226/464, loss: 0.0397614948451519 2023-01-24 03:52:36.654883: step: 228/464, loss: 0.0500374361872673 2023-01-24 03:52:37.393361: step: 230/464, loss: 0.12101941555738449 2023-01-24 03:52:38.084941: step: 232/464, loss: 0.03364633768796921 2023-01-24 03:52:38.788799: step: 234/464, loss: 0.029257718473672867 2023-01-24 03:52:39.536471: step: 236/464, loss: 0.09055133163928986 2023-01-24 03:52:40.240574: step: 238/464, loss: 0.7606881260871887 2023-01-24 03:52:41.050727: step: 240/464, loss: 0.05093425512313843 2023-01-24 03:52:41.820333: step: 242/464, loss: 0.09483727812767029 2023-01-24 03:52:42.490545: step: 244/464, loss: 0.09020978957414627 2023-01-24 03:52:43.171943: step: 246/464, loss: 0.07504907995462418 2023-01-24 03:52:43.926711: step: 248/464, loss: 0.1254086196422577 2023-01-24 03:52:44.682975: step: 250/464, loss: 0.1057504266500473 2023-01-24 03:52:45.462234: step: 252/464, loss: 0.16019676625728607 2023-01-24 03:52:46.194379: step: 254/464, loss: 0.033384401351213455 2023-01-24 03:52:47.028847: step: 256/464, loss: 0.022989513352513313 2023-01-24 03:52:47.790786: step: 258/464, loss: 0.03476962074637413 2023-01-24 03:52:48.549332: step: 260/464, loss: 0.032892998307943344 2023-01-24 03:52:49.251979: step: 262/464, loss: 0.03973241522908211 2023-01-24 03:52:50.051473: step: 264/464, loss: 0.0409555658698082 2023-01-24 03:52:50.781962: step: 266/464, loss: 0.6431903839111328 2023-01-24 03:52:51.518574: step: 268/464, loss: 0.10075043886899948 2023-01-24 03:52:52.248194: step: 270/464, loss: 0.12035145610570908 2023-01-24 03:52:52.995399: step: 272/464, loss: 0.008856400847434998 2023-01-24 03:52:53.791158: step: 274/464, loss: 0.1363389790058136 2023-01-24 03:52:54.510212: step: 276/464, loss: 0.01673283614218235 2023-01-24 03:52:55.200096: step: 278/464, loss: 0.15913592278957367 2023-01-24 03:52:55.936706: step: 280/464, loss: 0.05854547768831253 2023-01-24 03:52:56.794857: step: 282/464, loss: 0.37333014607429504 2023-01-24 03:52:57.601407: step: 284/464, loss: 0.10043874382972717 2023-01-24 03:52:58.312872: step: 286/464, loss: 0.043367329984903336 2023-01-24 03:52:59.037244: step: 288/464, loss: 0.08529473096132278 2023-01-24 03:52:59.783726: step: 290/464, loss: 0.10794027149677277 2023-01-24 03:53:00.558473: step: 292/464, loss: 0.1581147313117981 2023-01-24 03:53:01.456668: step: 294/464, loss: 0.06559644639492035 2023-01-24 03:53:02.262321: step: 296/464, loss: 0.2656407952308655 2023-01-24 03:53:03.111782: step: 298/464, loss: 0.12226749956607819 2023-01-24 03:53:03.865536: step: 300/464, loss: 0.03235700726509094 2023-01-24 03:53:04.574710: step: 302/464, loss: 0.07469668239355087 2023-01-24 03:53:05.315387: step: 304/464, loss: 0.019965143874287605 2023-01-24 03:53:05.970112: step: 306/464, loss: 0.05402126908302307 2023-01-24 03:53:06.723330: step: 308/464, loss: 0.11711370199918747 2023-01-24 03:53:07.472456: step: 310/464, loss: 0.06864065676927567 2023-01-24 03:53:08.178904: step: 312/464, loss: 0.2924725115299225 2023-01-24 03:53:08.928246: step: 314/464, loss: 0.14239171147346497 2023-01-24 03:53:09.659181: step: 316/464, loss: 0.20074528455734253 2023-01-24 03:53:10.348676: step: 318/464, loss: 0.05544080212712288 2023-01-24 03:53:11.062966: step: 320/464, loss: 0.11986785382032394 2023-01-24 03:53:11.826916: step: 322/464, loss: 0.26871606707572937 2023-01-24 03:53:12.531726: step: 324/464, loss: 0.183811217546463 2023-01-24 03:53:13.389959: step: 326/464, loss: 0.1335066854953766 2023-01-24 03:53:14.112459: step: 328/464, loss: 0.14025689661502838 2023-01-24 03:53:14.828293: step: 330/464, loss: 0.07599826157093048 2023-01-24 03:53:15.521096: step: 332/464, loss: 0.10522612184286118 2023-01-24 03:53:16.250064: step: 334/464, loss: 0.08387448638677597 2023-01-24 03:53:16.952373: step: 336/464, loss: 0.06999139487743378 2023-01-24 03:53:17.676067: step: 338/464, loss: 0.12560762465000153 2023-01-24 03:53:18.459787: step: 340/464, loss: 0.5550030469894409 2023-01-24 03:53:19.214240: step: 342/464, loss: 0.1845470666885376 2023-01-24 03:53:19.952581: step: 344/464, loss: 0.19275884330272675 2023-01-24 03:53:20.716925: step: 346/464, loss: 0.14432045817375183 2023-01-24 03:53:21.493240: step: 348/464, loss: 0.09600774198770523 2023-01-24 03:53:22.160426: step: 350/464, loss: 0.33380627632141113 2023-01-24 03:53:22.942596: step: 352/464, loss: 0.12676355242729187 2023-01-24 03:53:23.617366: step: 354/464, loss: 0.11150521785020828 2023-01-24 03:53:24.404153: step: 356/464, loss: 0.1399673968553543 2023-01-24 03:53:25.254174: step: 358/464, loss: 0.103044293820858 2023-01-24 03:53:25.913550: step: 360/464, loss: 0.5707974433898926 2023-01-24 03:53:26.732293: step: 362/464, loss: 0.0869031548500061 2023-01-24 03:53:27.395697: step: 364/464, loss: 0.08653386682271957 2023-01-24 03:53:28.039525: step: 366/464, loss: 0.0813024491071701 2023-01-24 03:53:28.767030: step: 368/464, loss: 0.023970788344740868 2023-01-24 03:53:29.507007: step: 370/464, loss: 0.13009853661060333 2023-01-24 03:53:30.247083: step: 372/464, loss: 0.03696196526288986 2023-01-24 03:53:30.962135: step: 374/464, loss: 0.02843949757516384 2023-01-24 03:53:31.729795: step: 376/464, loss: 0.0992714986205101 2023-01-24 03:53:32.453937: step: 378/464, loss: 0.1639476865530014 2023-01-24 03:53:33.233754: step: 380/464, loss: 0.10682159662246704 2023-01-24 03:53:33.988001: step: 382/464, loss: 0.11330337077379227 2023-01-24 03:53:34.684615: step: 384/464, loss: 0.052299533039331436 2023-01-24 03:53:35.394323: step: 386/464, loss: 0.16930243372917175 2023-01-24 03:53:36.132088: step: 388/464, loss: 0.06845500320196152 2023-01-24 03:53:36.877829: step: 390/464, loss: 0.17916421592235565 2023-01-24 03:53:37.599456: step: 392/464, loss: 0.17624793946743011 2023-01-24 03:53:38.436935: step: 394/464, loss: 0.09612559527158737 2023-01-24 03:53:39.204386: step: 396/464, loss: 0.1727244257926941 2023-01-24 03:53:39.957251: step: 398/464, loss: 0.010704328306019306 2023-01-24 03:53:40.655614: step: 400/464, loss: 0.13919922709465027 2023-01-24 03:53:41.442611: step: 402/464, loss: 0.062354229390621185 2023-01-24 03:53:42.197703: step: 404/464, loss: 0.1840866208076477 2023-01-24 03:53:43.006476: step: 406/464, loss: 0.019472267478704453 2023-01-24 03:53:43.700022: step: 408/464, loss: 0.09749460965394974 2023-01-24 03:53:44.377844: step: 410/464, loss: 0.05314226448535919 2023-01-24 03:53:45.142819: step: 412/464, loss: 0.04876469820737839 2023-01-24 03:53:45.812081: step: 414/464, loss: 0.0391797199845314 2023-01-24 03:53:46.538430: step: 416/464, loss: 0.0012564189964905381 2023-01-24 03:53:47.225279: step: 418/464, loss: 0.04942185804247856 2023-01-24 03:53:47.946064: step: 420/464, loss: 0.07895748317241669 2023-01-24 03:53:48.651771: step: 422/464, loss: 0.07031918317079544 2023-01-24 03:53:49.417580: step: 424/464, loss: 0.03398099169135094 2023-01-24 03:53:50.174752: step: 426/464, loss: 0.17639093101024628 2023-01-24 03:53:50.991320: step: 428/464, loss: 0.040956635028123856 2023-01-24 03:53:51.758209: step: 430/464, loss: 0.15845489501953125 2023-01-24 03:53:52.431407: step: 432/464, loss: 0.06485290080308914 2023-01-24 03:53:53.266135: step: 434/464, loss: 0.10166499018669128 2023-01-24 03:53:54.022752: step: 436/464, loss: 0.01902008429169655 2023-01-24 03:53:54.737202: step: 438/464, loss: 0.05797155201435089 2023-01-24 03:53:55.492713: step: 440/464, loss: 0.048199351876974106 2023-01-24 03:53:56.214770: step: 442/464, loss: 0.07311541587114334 2023-01-24 03:53:56.921990: step: 444/464, loss: 0.07391253858804703 2023-01-24 03:53:57.755997: step: 446/464, loss: 0.16486096382141113 2023-01-24 03:53:58.531902: step: 448/464, loss: 0.11881289631128311 2023-01-24 03:53:59.243080: step: 450/464, loss: 0.030278276652097702 2023-01-24 03:53:59.958567: step: 452/464, loss: 0.2739794850349426 2023-01-24 03:54:00.689396: step: 454/464, loss: 0.06841745972633362 2023-01-24 03:54:01.372770: step: 456/464, loss: 0.19904053211212158 2023-01-24 03:54:02.084514: step: 458/464, loss: 0.02035653218626976 2023-01-24 03:54:02.872188: step: 460/464, loss: 0.05959150567650795 2023-01-24 03:54:03.632999: step: 462/464, loss: 0.03956807032227516 2023-01-24 03:54:04.413552: step: 464/464, loss: 0.13315275311470032 2023-01-24 03:54:05.144678: step: 466/464, loss: 0.2876358926296234 2023-01-24 03:54:05.850112: step: 468/464, loss: 0.04231845214962959 2023-01-24 03:54:06.553497: step: 470/464, loss: 0.7945632338523865 2023-01-24 03:54:07.394522: step: 472/464, loss: 0.09187690913677216 2023-01-24 03:54:08.134899: step: 474/464, loss: 0.10587914288043976 2023-01-24 03:54:08.876640: step: 476/464, loss: 0.2761503756046295 2023-01-24 03:54:09.614904: step: 478/464, loss: 0.07797357439994812 2023-01-24 03:54:10.389523: step: 480/464, loss: 0.05710531771183014 2023-01-24 03:54:11.130231: step: 482/464, loss: 0.18004289269447327 2023-01-24 03:54:11.861100: step: 484/464, loss: 0.08591750264167786 2023-01-24 03:54:12.564690: step: 486/464, loss: 0.09022261202335358 2023-01-24 03:54:13.256885: step: 488/464, loss: 0.10273952037096024 2023-01-24 03:54:13.962768: step: 490/464, loss: 0.0322842039167881 2023-01-24 03:54:14.707618: step: 492/464, loss: 0.0460544228553772 2023-01-24 03:54:15.440461: step: 494/464, loss: 0.04815031960606575 2023-01-24 03:54:16.278156: step: 496/464, loss: 0.18230149149894714 2023-01-24 03:54:17.018265: step: 498/464, loss: 0.055478718131780624 2023-01-24 03:54:17.690318: step: 500/464, loss: 0.035472407937049866 2023-01-24 03:54:18.461743: step: 502/464, loss: 0.054278500378131866 2023-01-24 03:54:19.193481: step: 504/464, loss: 0.9115884900093079 2023-01-24 03:54:19.936325: step: 506/464, loss: 0.14384281635284424 2023-01-24 03:54:20.724263: step: 508/464, loss: 0.1132027804851532 2023-01-24 03:54:21.467313: step: 510/464, loss: 0.03535445034503937 2023-01-24 03:54:22.249837: step: 512/464, loss: 0.10807029157876968 2023-01-24 03:54:22.937438: step: 514/464, loss: 0.03422875702381134 2023-01-24 03:54:23.688498: step: 516/464, loss: 0.3180900514125824 2023-01-24 03:54:24.457923: step: 518/464, loss: 0.19532497227191925 2023-01-24 03:54:25.220798: step: 520/464, loss: 0.1341649740934372 2023-01-24 03:54:25.914366: step: 522/464, loss: 0.4564950168132782 2023-01-24 03:54:26.717792: step: 524/464, loss: 0.13296714425086975 2023-01-24 03:54:27.501269: step: 526/464, loss: 0.13749663531780243 2023-01-24 03:54:28.210807: step: 528/464, loss: 0.2917221784591675 2023-01-24 03:54:28.912282: step: 530/464, loss: 0.06330596655607224 2023-01-24 03:54:29.663152: step: 532/464, loss: 0.32829830050468445 2023-01-24 03:54:30.409013: step: 534/464, loss: 0.07539442181587219 2023-01-24 03:54:31.118478: step: 536/464, loss: 0.05646499991416931 2023-01-24 03:54:31.920543: step: 538/464, loss: 0.07317481189966202 2023-01-24 03:54:32.652319: step: 540/464, loss: 0.14793869853019714 2023-01-24 03:54:33.426952: step: 542/464, loss: 0.220576211810112 2023-01-24 03:54:34.199790: step: 544/464, loss: 0.09299348294734955 2023-01-24 03:54:35.022189: step: 546/464, loss: 0.13680137693881989 2023-01-24 03:54:35.697620: step: 548/464, loss: 0.11256013065576553 2023-01-24 03:54:36.385133: step: 550/464, loss: 0.06750702112913132 2023-01-24 03:54:37.183555: step: 552/464, loss: 0.08740395307540894 2023-01-24 03:54:37.901030: step: 554/464, loss: 0.02208864875137806 2023-01-24 03:54:38.703932: step: 556/464, loss: 0.14167027175426483 2023-01-24 03:54:39.411966: step: 558/464, loss: 0.039901308715343475 2023-01-24 03:54:40.088334: step: 560/464, loss: 0.18916989862918854 2023-01-24 03:54:40.925096: step: 562/464, loss: 0.1889684796333313 2023-01-24 03:54:41.703749: step: 564/464, loss: 0.019529862329363823 2023-01-24 03:54:42.447325: step: 566/464, loss: 0.07504037022590637 2023-01-24 03:54:43.185749: step: 568/464, loss: 0.02826717123389244 2023-01-24 03:54:43.911097: step: 570/464, loss: 0.15966065227985382 2023-01-24 03:54:44.619434: step: 572/464, loss: 0.052043791860342026 2023-01-24 03:54:45.385930: step: 574/464, loss: 0.2825374901294708 2023-01-24 03:54:46.093220: step: 576/464, loss: 0.17329668998718262 2023-01-24 03:54:46.828745: step: 578/464, loss: 0.15861612558364868 2023-01-24 03:54:47.622853: step: 580/464, loss: 0.1683676838874817 2023-01-24 03:54:48.408468: step: 582/464, loss: 0.1867954432964325 2023-01-24 03:54:49.105478: step: 584/464, loss: 0.04729166254401207 2023-01-24 03:54:49.815289: step: 586/464, loss: 0.049370501190423965 2023-01-24 03:54:50.552046: step: 588/464, loss: 0.12045831978321075 2023-01-24 03:54:51.261745: step: 590/464, loss: 0.029787318781018257 2023-01-24 03:54:52.078286: step: 592/464, loss: 0.605231761932373 2023-01-24 03:54:52.872041: step: 594/464, loss: 0.09484227001667023 2023-01-24 03:54:53.578579: step: 596/464, loss: 1.07864248752594 2023-01-24 03:54:54.380528: step: 598/464, loss: 0.5804974436759949 2023-01-24 03:54:55.102852: step: 600/464, loss: 0.10189292579889297 2023-01-24 03:54:55.881232: step: 602/464, loss: 0.13452737033367157 2023-01-24 03:54:56.662857: step: 604/464, loss: 0.40827855467796326 2023-01-24 03:54:57.363556: step: 606/464, loss: 0.25222280621528625 2023-01-24 03:54:58.042019: step: 608/464, loss: 0.09318625926971436 2023-01-24 03:54:58.820169: step: 610/464, loss: 0.3048606514930725 2023-01-24 03:54:59.562364: step: 612/464, loss: 0.04482343792915344 2023-01-24 03:55:00.340420: step: 614/464, loss: 0.09011220932006836 2023-01-24 03:55:01.043105: step: 616/464, loss: 0.056642841547727585 2023-01-24 03:55:01.755803: step: 618/464, loss: 0.05928850173950195 2023-01-24 03:55:02.629144: step: 620/464, loss: 0.3149532079696655 2023-01-24 03:55:03.497266: step: 622/464, loss: 0.13324689865112305 2023-01-24 03:55:04.206675: step: 624/464, loss: 0.3803398311138153 2023-01-24 03:55:04.958807: step: 626/464, loss: 0.0937994047999382 2023-01-24 03:55:05.692464: step: 628/464, loss: 0.4643716812133789 2023-01-24 03:55:06.396826: step: 630/464, loss: 0.06096263229846954 2023-01-24 03:55:07.125468: step: 632/464, loss: 0.1206122562289238 2023-01-24 03:55:07.925692: step: 634/464, loss: 0.056166987866163254 2023-01-24 03:55:08.636889: step: 636/464, loss: 0.07867898792028427 2023-01-24 03:55:09.397968: step: 638/464, loss: 0.07757259160280228 2023-01-24 03:55:10.042660: step: 640/464, loss: 0.29400986433029175 2023-01-24 03:55:10.835919: step: 642/464, loss: 0.042422376573085785 2023-01-24 03:55:11.557928: step: 644/464, loss: 0.11885477602481842 2023-01-24 03:55:12.331359: step: 646/464, loss: 0.05155384540557861 2023-01-24 03:55:13.048177: step: 648/464, loss: 0.045021940022706985 2023-01-24 03:55:13.800229: step: 650/464, loss: 0.1434958279132843 2023-01-24 03:55:14.594124: step: 652/464, loss: 0.10911697894334793 2023-01-24 03:55:15.272033: step: 654/464, loss: 0.13170289993286133 2023-01-24 03:55:16.054455: step: 656/464, loss: 0.10425911843776703 2023-01-24 03:55:16.826801: step: 658/464, loss: 0.11268703639507294 2023-01-24 03:55:17.534203: step: 660/464, loss: 0.16174089908599854 2023-01-24 03:55:18.313679: step: 662/464, loss: 0.12235087156295776 2023-01-24 03:55:19.070191: step: 664/464, loss: 0.0665929839015007 2023-01-24 03:55:19.798526: step: 666/464, loss: 0.0692693442106247 2023-01-24 03:55:20.461326: step: 668/464, loss: 0.04867265373468399 2023-01-24 03:55:21.273223: step: 670/464, loss: 0.055253803730010986 2023-01-24 03:55:21.929624: step: 672/464, loss: 0.03663313016295433 2023-01-24 03:55:22.695790: step: 674/464, loss: 0.08477963507175446 2023-01-24 03:55:23.448740: step: 676/464, loss: 0.1519874483346939 2023-01-24 03:55:24.146203: step: 678/464, loss: 0.09137319773435593 2023-01-24 03:55:24.888958: step: 680/464, loss: 0.14208124577999115 2023-01-24 03:55:25.770200: step: 682/464, loss: 0.05060447007417679 2023-01-24 03:55:26.589389: step: 684/464, loss: 0.12298265844583511 2023-01-24 03:55:27.415131: step: 686/464, loss: 0.05008777230978012 2023-01-24 03:55:28.129551: step: 688/464, loss: 0.07089913636445999 2023-01-24 03:55:28.865166: step: 690/464, loss: 0.03305942565202713 2023-01-24 03:55:29.527527: step: 692/464, loss: 0.03266128525137901 2023-01-24 03:55:30.223273: step: 694/464, loss: 0.045487675815820694 2023-01-24 03:55:30.955499: step: 696/464, loss: 0.1335483193397522 2023-01-24 03:55:31.636287: step: 698/464, loss: 0.15396928787231445 2023-01-24 03:55:32.370498: step: 700/464, loss: 0.6332160234451294 2023-01-24 03:55:33.153890: step: 702/464, loss: 0.03149405121803284 2023-01-24 03:55:33.948008: step: 704/464, loss: 0.2502283453941345 2023-01-24 03:55:34.642486: step: 706/464, loss: 0.0986698642373085 2023-01-24 03:55:35.414222: step: 708/464, loss: 0.09098249673843384 2023-01-24 03:55:36.164517: step: 710/464, loss: 0.11281674355268478 2023-01-24 03:55:36.915739: step: 712/464, loss: 0.4118633270263672 2023-01-24 03:55:37.564960: step: 714/464, loss: 0.2575540840625763 2023-01-24 03:55:38.321896: step: 716/464, loss: 0.10508500039577484 2023-01-24 03:55:39.079270: step: 718/464, loss: 0.06450307369232178 2023-01-24 03:55:39.794135: step: 720/464, loss: 0.08441021293401718 2023-01-24 03:55:40.524512: step: 722/464, loss: 0.27181103825569153 2023-01-24 03:55:41.258387: step: 724/464, loss: 0.20128807425498962 2023-01-24 03:55:42.082756: step: 726/464, loss: 0.02161598950624466 2023-01-24 03:55:42.877586: step: 728/464, loss: 0.05868678167462349 2023-01-24 03:55:43.750275: step: 730/464, loss: 0.04888615012168884 2023-01-24 03:55:44.518607: step: 732/464, loss: 0.1488361805677414 2023-01-24 03:55:45.213639: step: 734/464, loss: 0.06514663249254227 2023-01-24 03:55:46.001077: step: 736/464, loss: 0.0822744145989418 2023-01-24 03:55:46.726249: step: 738/464, loss: 0.13314495980739594 2023-01-24 03:55:47.396619: step: 740/464, loss: 0.018895376473665237 2023-01-24 03:55:48.185273: step: 742/464, loss: 0.12033816426992416 2023-01-24 03:55:48.949647: step: 744/464, loss: 0.09358536452054977 2023-01-24 03:55:49.868587: step: 746/464, loss: 0.07539544254541397 2023-01-24 03:55:50.627949: step: 748/464, loss: 0.07023809105157852 2023-01-24 03:55:51.408721: step: 750/464, loss: 0.18539175391197205 2023-01-24 03:55:52.090221: step: 752/464, loss: 0.17972595989704132 2023-01-24 03:55:52.892941: step: 754/464, loss: 0.038272675126791 2023-01-24 03:55:53.721750: step: 756/464, loss: 0.0807376578450203 2023-01-24 03:55:54.413176: step: 758/464, loss: 0.04930340126156807 2023-01-24 03:55:55.137592: step: 760/464, loss: 0.02274372987449169 2023-01-24 03:55:55.817753: step: 762/464, loss: 0.0977998822927475 2023-01-24 03:55:56.589686: step: 764/464, loss: 0.9066311120986938 2023-01-24 03:55:57.263066: step: 766/464, loss: 0.2617078125476837 2023-01-24 03:55:58.078018: step: 768/464, loss: 0.13507243990898132 2023-01-24 03:55:58.809846: step: 770/464, loss: 0.1343207210302353 2023-01-24 03:55:59.567008: step: 772/464, loss: 0.029202204197645187 2023-01-24 03:56:00.264947: step: 774/464, loss: 0.030908869579434395 2023-01-24 03:56:00.917736: step: 776/464, loss: 0.03610406443476677 2023-01-24 03:56:01.668937: step: 778/464, loss: 0.04053062945604324 2023-01-24 03:56:02.451000: step: 780/464, loss: 0.32335996627807617 2023-01-24 03:56:03.158491: step: 782/464, loss: 0.1535036861896515 2023-01-24 03:56:03.877047: step: 784/464, loss: 0.2088811993598938 2023-01-24 03:56:04.656951: step: 786/464, loss: 0.05972848832607269 2023-01-24 03:56:05.457317: step: 788/464, loss: 0.05589446425437927 2023-01-24 03:56:06.200820: step: 790/464, loss: 0.31635093688964844 2023-01-24 03:56:06.953885: step: 792/464, loss: 0.06617505848407745 2023-01-24 03:56:07.699923: step: 794/464, loss: 0.06160557270050049 2023-01-24 03:56:08.395819: step: 796/464, loss: 0.18889334797859192 2023-01-24 03:56:09.113010: step: 798/464, loss: 0.2467067390680313 2023-01-24 03:56:09.867721: step: 800/464, loss: 0.04508241266012192 2023-01-24 03:56:10.589808: step: 802/464, loss: 0.06874433159828186 2023-01-24 03:56:11.339170: step: 804/464, loss: 0.1067737489938736 2023-01-24 03:56:12.188815: step: 806/464, loss: 0.9908697605133057 2023-01-24 03:56:13.037884: step: 808/464, loss: 0.0754680186510086 2023-01-24 03:56:13.702774: step: 810/464, loss: 0.16445475816726685 2023-01-24 03:56:14.362956: step: 812/464, loss: 0.20411959290504456 2023-01-24 03:56:15.108693: step: 814/464, loss: 0.01780773140490055 2023-01-24 03:56:15.857651: step: 816/464, loss: 0.1358191967010498 2023-01-24 03:56:16.635595: step: 818/464, loss: 0.17191946506500244 2023-01-24 03:56:17.337217: step: 820/464, loss: 0.03713075816631317 2023-01-24 03:56:18.152996: step: 822/464, loss: 0.026605524122714996 2023-01-24 03:56:18.869936: step: 824/464, loss: 0.12163020670413971 2023-01-24 03:56:19.586374: step: 826/464, loss: 0.3582850396633148 2023-01-24 03:56:20.281343: step: 828/464, loss: 0.13930034637451172 2023-01-24 03:56:21.015167: step: 830/464, loss: 1.0212076902389526 2023-01-24 03:56:21.855332: step: 832/464, loss: 0.07644716650247574 2023-01-24 03:56:22.585040: step: 834/464, loss: 0.4002009630203247 2023-01-24 03:56:23.345697: step: 836/464, loss: 0.11011244356632233 2023-01-24 03:56:24.100666: step: 838/464, loss: 0.03478245809674263 2023-01-24 03:56:24.829648: step: 840/464, loss: 0.12680082023143768 2023-01-24 03:56:25.552896: step: 842/464, loss: 0.069913849234581 2023-01-24 03:56:26.371723: step: 844/464, loss: 0.10052835196256638 2023-01-24 03:56:27.148835: step: 846/464, loss: 0.15281496942043304 2023-01-24 03:56:27.886221: step: 848/464, loss: 0.08110205829143524 2023-01-24 03:56:28.617444: step: 850/464, loss: 0.08946767449378967 2023-01-24 03:56:29.379439: step: 852/464, loss: 0.0644698217511177 2023-01-24 03:56:30.144644: step: 854/464, loss: 0.10672362148761749 2023-01-24 03:56:30.835406: step: 856/464, loss: 0.33436694741249084 2023-01-24 03:56:31.641023: step: 858/464, loss: 0.0706905946135521 2023-01-24 03:56:32.326836: step: 860/464, loss: 0.14880642294883728 2023-01-24 03:56:32.979321: step: 862/464, loss: 0.03052304871380329 2023-01-24 03:56:33.677715: step: 864/464, loss: 0.0998193621635437 2023-01-24 03:56:34.439828: step: 866/464, loss: 0.26408851146698 2023-01-24 03:56:35.205428: step: 868/464, loss: 0.3294277787208557 2023-01-24 03:56:35.941319: step: 870/464, loss: 0.06395434588193893 2023-01-24 03:56:36.696567: step: 872/464, loss: 0.0582747757434845 2023-01-24 03:56:37.509475: step: 874/464, loss: 0.17045952379703522 2023-01-24 03:56:38.255176: step: 876/464, loss: 0.28945544362068176 2023-01-24 03:56:38.955126: step: 878/464, loss: 0.05567263439297676 2023-01-24 03:56:39.727622: step: 880/464, loss: 0.4171179533004761 2023-01-24 03:56:40.465895: step: 882/464, loss: 0.05881846696138382 2023-01-24 03:56:41.153217: step: 884/464, loss: 0.06386110186576843 2023-01-24 03:56:41.885220: step: 886/464, loss: 0.17204733192920685 2023-01-24 03:56:42.614987: step: 888/464, loss: 0.10010688006877899 2023-01-24 03:56:43.294204: step: 890/464, loss: 0.20999249815940857 2023-01-24 03:56:44.005032: step: 892/464, loss: 0.23485256731510162 2023-01-24 03:56:44.746497: step: 894/464, loss: 0.06420190632343292 2023-01-24 03:56:45.467160: step: 896/464, loss: 0.09333354979753494 2023-01-24 03:56:46.161708: step: 898/464, loss: 0.07883160561323166 2023-01-24 03:56:46.997098: step: 900/464, loss: 0.053643010556697845 2023-01-24 03:56:47.759354: step: 902/464, loss: 0.13103336095809937 2023-01-24 03:56:48.515326: step: 904/464, loss: 0.007707820273935795 2023-01-24 03:56:49.260232: step: 906/464, loss: 0.4912790060043335 2023-01-24 03:56:50.054384: step: 908/464, loss: 0.049322012811899185 2023-01-24 03:56:50.937649: step: 910/464, loss: 0.18857868015766144 2023-01-24 03:56:51.679112: step: 912/464, loss: 0.017859352752566338 2023-01-24 03:56:52.407086: step: 914/464, loss: 0.02454872988164425 2023-01-24 03:56:53.102124: step: 916/464, loss: 0.06313516944646835 2023-01-24 03:56:53.797385: step: 918/464, loss: 0.025236960500478745 2023-01-24 03:56:54.517893: step: 920/464, loss: 0.06392581760883331 2023-01-24 03:56:55.319584: step: 922/464, loss: 0.059632401913404465 2023-01-24 03:56:56.109673: step: 924/464, loss: 0.9361553192138672 2023-01-24 03:56:56.891324: step: 926/464, loss: 0.061501532793045044 2023-01-24 03:56:57.616382: step: 928/464, loss: 0.06427504122257233 2023-01-24 03:56:58.224988: step: 930/464, loss: 0.0780082419514656 ================================================== Loss: 0.147 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3174167587731418, 'r': 0.3113936703713744, 'f1': 0.31437636836343735}, 'combined': 0.23164574510990119, 'epoch': 16} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31536928997118413, 'r': 0.27018297866108365, 'f1': 0.29103264971263076}, 'combined': 0.18074659297942333, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30395622334754796, 'r': 0.3091471265925725, 'f1': 0.30652970030909826}, 'combined': 0.2258639897014408, 'epoch': 16} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3030224850615022, 'r': 0.2611023784324406, 'f1': 0.2805048906301803}, 'combined': 0.1742083004966383, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3208658548226034, 'r': 0.3117330506056412, 'f1': 0.31623352775586705}, 'combined': 0.23301417834642835, 'epoch': 16} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3216192093754258, 'r': 0.27245650971744656, 'f1': 0.29500363145060415}, 'combined': 0.18321278163774365, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3705357142857143, 'r': 0.29642857142857143, 'f1': 0.32936507936507936}, 'combined': 0.21957671957671956, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25892857142857145, 'r': 0.31521739130434784, 'f1': 0.2843137254901961}, 'combined': 0.14215686274509806, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4326923076923077, 'r': 0.1939655172413793, 'f1': 0.26785714285714285}, 'combined': 0.17857142857142855, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:59:38.048559: step: 2/464, loss: 0.053664859384298325 2023-01-24 03:59:38.723938: step: 4/464, loss: 0.0166542399674654 2023-01-24 03:59:39.496774: step: 6/464, loss: 0.16694216430187225 2023-01-24 03:59:40.277284: step: 8/464, loss: 0.07075951993465424 2023-01-24 03:59:41.052903: step: 10/464, loss: 0.07365650683641434 2023-01-24 03:59:41.818616: step: 12/464, loss: 0.13355489075183868 2023-01-24 03:59:42.561668: step: 14/464, loss: 0.5236701965332031 2023-01-24 03:59:43.294546: step: 16/464, loss: 0.0357673317193985 2023-01-24 03:59:43.999864: step: 18/464, loss: 0.40205228328704834 2023-01-24 03:59:44.739744: step: 20/464, loss: 0.09290299564599991 2023-01-24 03:59:45.483320: step: 22/464, loss: 0.031853266060352325 2023-01-24 03:59:46.391826: step: 24/464, loss: 0.060423143208026886 2023-01-24 03:59:47.049783: step: 26/464, loss: 0.04558515176177025 2023-01-24 03:59:47.762022: step: 28/464, loss: 0.0652964860200882 2023-01-24 03:59:48.477000: step: 30/464, loss: 0.0554376095533371 2023-01-24 03:59:49.167166: step: 32/464, loss: 0.04711688682436943 2023-01-24 03:59:49.953995: step: 34/464, loss: 0.10371864587068558 2023-01-24 03:59:50.691499: step: 36/464, loss: 0.07498136907815933 2023-01-24 03:59:51.579191: step: 38/464, loss: 0.10975520312786102 2023-01-24 03:59:52.292460: step: 40/464, loss: 0.027045963332057 2023-01-24 03:59:53.043779: step: 42/464, loss: 0.14196310937404633 2023-01-24 03:59:53.830292: step: 44/464, loss: 0.06612968444824219 2023-01-24 03:59:54.573133: step: 46/464, loss: 0.41418808698654175 2023-01-24 03:59:55.331139: step: 48/464, loss: 0.9328832626342773 2023-01-24 03:59:56.060927: step: 50/464, loss: 0.3546507656574249 2023-01-24 03:59:56.860563: step: 52/464, loss: 0.06665902584791183 2023-01-24 03:59:57.569618: step: 54/464, loss: 0.09104351699352264 2023-01-24 03:59:58.302081: step: 56/464, loss: 0.1324085146188736 2023-01-24 03:59:59.089342: step: 58/464, loss: 0.14064887166023254 2023-01-24 03:59:59.891831: step: 60/464, loss: 0.027966858819127083 2023-01-24 04:00:00.758653: step: 62/464, loss: 0.09557768702507019 2023-01-24 04:00:01.515670: step: 64/464, loss: 0.03680592030286789 2023-01-24 04:00:02.235047: step: 66/464, loss: 0.11538437753915787 2023-01-24 04:00:03.039431: step: 68/464, loss: 0.17318518459796906 2023-01-24 04:00:03.740390: step: 70/464, loss: 0.05451393499970436 2023-01-24 04:00:04.469000: step: 72/464, loss: 0.11980122327804565 2023-01-24 04:00:05.210293: step: 74/464, loss: 0.04295728728175163 2023-01-24 04:00:05.948692: step: 76/464, loss: 0.090221107006073 2023-01-24 04:00:06.657340: step: 78/464, loss: 0.15044856071472168 2023-01-24 04:00:07.511180: step: 80/464, loss: 0.17875273525714874 2023-01-24 04:00:08.197268: step: 82/464, loss: 0.0368921123445034 2023-01-24 04:00:08.964430: step: 84/464, loss: 0.07362562417984009 2023-01-24 04:00:09.715136: step: 86/464, loss: 0.34966787695884705 2023-01-24 04:00:10.506003: step: 88/464, loss: 0.061326391994953156 2023-01-24 04:00:11.229762: step: 90/464, loss: 0.10193252563476562 2023-01-24 04:00:12.028838: step: 92/464, loss: 0.032848015427589417 2023-01-24 04:00:12.819920: step: 94/464, loss: 0.08947242051362991 2023-01-24 04:00:13.530716: step: 96/464, loss: 0.0824618861079216 2023-01-24 04:00:14.323778: step: 98/464, loss: 0.10630713403224945 2023-01-24 04:00:15.024640: step: 100/464, loss: 0.032170526683330536 2023-01-24 04:00:15.748590: step: 102/464, loss: 0.04691094160079956 2023-01-24 04:00:16.584052: step: 104/464, loss: 0.06669402122497559 2023-01-24 04:00:17.245635: step: 106/464, loss: 0.33576953411102295 2023-01-24 04:00:17.938063: step: 108/464, loss: 0.061419643461704254 2023-01-24 04:00:18.651302: step: 110/464, loss: 0.11293346434831619 2023-01-24 04:00:19.481208: step: 112/464, loss: 0.0694413036108017 2023-01-24 04:00:20.163366: step: 114/464, loss: 0.030548907816410065 2023-01-24 04:00:20.981673: step: 116/464, loss: 0.12947489321231842 2023-01-24 04:00:21.689414: step: 118/464, loss: 0.12525877356529236 2023-01-24 04:00:22.393115: step: 120/464, loss: 0.18001887202262878 2023-01-24 04:00:23.110728: step: 122/464, loss: 0.04662255942821503 2023-01-24 04:00:23.903614: step: 124/464, loss: 0.08843176811933517 2023-01-24 04:00:24.647399: step: 126/464, loss: 0.07850769907236099 2023-01-24 04:00:25.493130: step: 128/464, loss: 0.10110843926668167 2023-01-24 04:00:26.279486: step: 130/464, loss: 0.0414954349398613 2023-01-24 04:00:27.064301: step: 132/464, loss: 0.09983392059803009 2023-01-24 04:00:27.742565: step: 134/464, loss: 0.23782214522361755 2023-01-24 04:00:28.483609: step: 136/464, loss: 0.04706363379955292 2023-01-24 04:00:29.215881: step: 138/464, loss: 0.02044433355331421 2023-01-24 04:00:30.006378: step: 140/464, loss: 0.1627044975757599 2023-01-24 04:00:30.742563: step: 142/464, loss: 0.39245784282684326 2023-01-24 04:00:31.471914: step: 144/464, loss: 0.053587671369314194 2023-01-24 04:00:32.106982: step: 146/464, loss: 0.03694664314389229 2023-01-24 04:00:32.831480: step: 148/464, loss: 0.05931307375431061 2023-01-24 04:00:33.568995: step: 150/464, loss: 0.2365327775478363 2023-01-24 04:00:34.265217: step: 152/464, loss: 0.016334207728505135 2023-01-24 04:00:34.945487: step: 154/464, loss: 0.020577138289809227 2023-01-24 04:00:35.870220: step: 156/464, loss: 0.7687644958496094 2023-01-24 04:00:36.684928: step: 158/464, loss: 0.11480724066495895 2023-01-24 04:00:37.332240: step: 160/464, loss: 0.04226589575409889 2023-01-24 04:00:38.097112: step: 162/464, loss: 0.0029008612036705017 2023-01-24 04:00:38.844130: step: 164/464, loss: 0.040296342223882675 2023-01-24 04:00:39.569755: step: 166/464, loss: 0.0675327405333519 2023-01-24 04:00:40.350316: step: 168/464, loss: 0.05431724339723587 2023-01-24 04:00:41.067551: step: 170/464, loss: 0.027871621772646904 2023-01-24 04:00:41.841617: step: 172/464, loss: 0.11166887730360031 2023-01-24 04:00:42.589314: step: 174/464, loss: 0.10647082328796387 2023-01-24 04:00:43.348473: step: 176/464, loss: 0.0702541172504425 2023-01-24 04:00:44.138375: step: 178/464, loss: 0.11109448224306107 2023-01-24 04:00:44.843822: step: 180/464, loss: 0.12108645588159561 2023-01-24 04:00:45.545318: step: 182/464, loss: 0.0694524422287941 2023-01-24 04:00:46.273439: step: 184/464, loss: 0.462844580411911 2023-01-24 04:00:47.030872: step: 186/464, loss: 0.030476845800876617 2023-01-24 04:00:47.831213: step: 188/464, loss: 0.0202813558280468 2023-01-24 04:00:48.536088: step: 190/464, loss: 0.1016131266951561 2023-01-24 04:00:49.340078: step: 192/464, loss: 0.048376649618148804 2023-01-24 04:00:50.064911: step: 194/464, loss: 0.18338793516159058 2023-01-24 04:00:50.866866: step: 196/464, loss: 0.01997218281030655 2023-01-24 04:00:51.603511: step: 198/464, loss: 0.05872989073395729 2023-01-24 04:00:52.273891: step: 200/464, loss: 0.07014588266611099 2023-01-24 04:00:52.945963: step: 202/464, loss: 0.03125053271651268 2023-01-24 04:00:53.689997: step: 204/464, loss: 0.03680029883980751 2023-01-24 04:00:54.371865: step: 206/464, loss: 0.007591854315251112 2023-01-24 04:00:55.170296: step: 208/464, loss: 0.016236577183008194 2023-01-24 04:00:55.836725: step: 210/464, loss: 0.06709948182106018 2023-01-24 04:00:56.644044: step: 212/464, loss: 0.28125205636024475 2023-01-24 04:00:57.433507: step: 214/464, loss: 0.11939781904220581 2023-01-24 04:00:58.119393: step: 216/464, loss: 0.04702724516391754 2023-01-24 04:00:58.906586: step: 218/464, loss: 0.05703671649098396 2023-01-24 04:00:59.596460: step: 220/464, loss: 0.5108763575553894 2023-01-24 04:01:00.346863: step: 222/464, loss: 0.08599857240915298 2023-01-24 04:01:01.129826: step: 224/464, loss: 0.12017587572336197 2023-01-24 04:01:01.795417: step: 226/464, loss: 0.07810725271701813 2023-01-24 04:01:02.501640: step: 228/464, loss: 0.13311772048473358 2023-01-24 04:01:03.248563: step: 230/464, loss: 0.10150721669197083 2023-01-24 04:01:03.927713: step: 232/464, loss: 0.41148698329925537 2023-01-24 04:01:04.731832: step: 234/464, loss: 0.08408664166927338 2023-01-24 04:01:05.465340: step: 236/464, loss: 0.06322506070137024 2023-01-24 04:01:06.148807: step: 238/464, loss: 0.0461643822491169 2023-01-24 04:01:06.840709: step: 240/464, loss: 0.10806065052747726 2023-01-24 04:01:07.630929: step: 242/464, loss: 0.09661133587360382 2023-01-24 04:01:08.386027: step: 244/464, loss: 0.1098666563630104 2023-01-24 04:01:09.153955: step: 246/464, loss: 0.23182234168052673 2023-01-24 04:01:09.892279: step: 248/464, loss: 0.033908504992723465 2023-01-24 04:01:10.609030: step: 250/464, loss: 10.392996788024902 2023-01-24 04:01:11.348867: step: 252/464, loss: 0.0998709425330162 2023-01-24 04:01:12.171126: step: 254/464, loss: 0.13033442199230194 2023-01-24 04:01:12.908587: step: 256/464, loss: 0.0023037393111735582 2023-01-24 04:01:13.769646: step: 258/464, loss: 0.08774647116661072 2023-01-24 04:01:14.497149: step: 260/464, loss: 0.052995506674051285 2023-01-24 04:01:15.207117: step: 262/464, loss: 0.05491722375154495 2023-01-24 04:01:15.911519: step: 264/464, loss: 0.07200101763010025 2023-01-24 04:01:16.656596: step: 266/464, loss: 0.06661888211965561 2023-01-24 04:01:17.423642: step: 268/464, loss: 0.09004511684179306 2023-01-24 04:01:18.151774: step: 270/464, loss: 0.05332894250750542 2023-01-24 04:01:18.895912: step: 272/464, loss: 0.048882585018873215 2023-01-24 04:01:19.646232: step: 274/464, loss: 0.1596928983926773 2023-01-24 04:01:20.296379: step: 276/464, loss: 0.09304609894752502 2023-01-24 04:01:21.090113: step: 278/464, loss: 0.1633264273405075 2023-01-24 04:01:21.888258: step: 280/464, loss: 0.16213783621788025 2023-01-24 04:01:22.572488: step: 282/464, loss: 0.11811444163322449 2023-01-24 04:01:23.331865: step: 284/464, loss: 0.055999331176280975 2023-01-24 04:01:24.132964: step: 286/464, loss: 0.06834820657968521 2023-01-24 04:01:24.881145: step: 288/464, loss: 0.08850032836198807 2023-01-24 04:01:25.635703: step: 290/464, loss: 0.31907227635383606 2023-01-24 04:01:26.363656: step: 292/464, loss: 0.22951997816562653 2023-01-24 04:01:27.138331: step: 294/464, loss: 0.1545470803976059 2023-01-24 04:01:27.853657: step: 296/464, loss: 0.05630459636449814 2023-01-24 04:01:28.562519: step: 298/464, loss: 0.06210784241557121 2023-01-24 04:01:29.344677: step: 300/464, loss: 0.14896251261234283 2023-01-24 04:01:30.082968: step: 302/464, loss: 0.06157911568880081 2023-01-24 04:01:30.812593: step: 304/464, loss: 0.12659184634685516 2023-01-24 04:01:31.502294: step: 306/464, loss: 0.024008898064494133 2023-01-24 04:01:32.249748: step: 308/464, loss: 0.025252996012568474 2023-01-24 04:01:33.078935: step: 310/464, loss: 0.11464542150497437 2023-01-24 04:01:33.772468: step: 312/464, loss: 0.04284798353910446 2023-01-24 04:01:34.563946: step: 314/464, loss: 0.16082020103931427 2023-01-24 04:01:35.262433: step: 316/464, loss: 0.8475959897041321 2023-01-24 04:01:36.052143: step: 318/464, loss: 0.08074049651622772 2023-01-24 04:01:36.784558: step: 320/464, loss: 0.04344986751675606 2023-01-24 04:01:37.556691: step: 322/464, loss: 0.13628548383712769 2023-01-24 04:01:38.306967: step: 324/464, loss: 0.06325775384902954 2023-01-24 04:01:39.025243: step: 326/464, loss: 0.03645012527704239 2023-01-24 04:01:39.760733: step: 328/464, loss: 0.07878056168556213 2023-01-24 04:01:40.548714: step: 330/464, loss: 0.19644658267498016 2023-01-24 04:01:41.247867: step: 332/464, loss: 0.10182038694620132 2023-01-24 04:01:41.972197: step: 334/464, loss: 0.10113890469074249 2023-01-24 04:01:42.932777: step: 336/464, loss: 0.11203952878713608 2023-01-24 04:01:43.670752: step: 338/464, loss: 0.05734609067440033 2023-01-24 04:01:44.410697: step: 340/464, loss: 0.04465271905064583 2023-01-24 04:01:45.063339: step: 342/464, loss: 0.004280386958271265 2023-01-24 04:01:45.796255: step: 344/464, loss: 0.19001714885234833 2023-01-24 04:01:46.565478: step: 346/464, loss: 0.14562982320785522 2023-01-24 04:01:47.423210: step: 348/464, loss: 0.14720419049263 2023-01-24 04:01:48.147551: step: 350/464, loss: 0.12624314427375793 2023-01-24 04:01:48.908960: step: 352/464, loss: 0.13830460608005524 2023-01-24 04:01:49.673599: step: 354/464, loss: 0.05004170536994934 2023-01-24 04:01:50.393355: step: 356/464, loss: 0.12028498202562332 2023-01-24 04:01:51.114192: step: 358/464, loss: 0.2403750866651535 2023-01-24 04:01:51.830321: step: 360/464, loss: 0.08636949211359024 2023-01-24 04:01:52.531135: step: 362/464, loss: 0.09380398690700531 2023-01-24 04:01:53.288672: step: 364/464, loss: 0.06335175037384033 2023-01-24 04:01:53.994317: step: 366/464, loss: 0.3888946771621704 2023-01-24 04:01:54.757822: step: 368/464, loss: 0.03396635875105858 2023-01-24 04:01:55.471235: step: 370/464, loss: 0.0674733966588974 2023-01-24 04:01:56.134949: step: 372/464, loss: 0.03991612046957016 2023-01-24 04:01:56.804379: step: 374/464, loss: 0.09819450229406357 2023-01-24 04:01:57.448636: step: 376/464, loss: 0.08212851732969284 2023-01-24 04:01:58.137414: step: 378/464, loss: 0.04800790548324585 2023-01-24 04:01:58.871224: step: 380/464, loss: 0.07307100296020508 2023-01-24 04:01:59.574883: step: 382/464, loss: 0.04123353213071823 2023-01-24 04:02:00.306944: step: 384/464, loss: 0.3220861554145813 2023-01-24 04:02:01.038306: step: 386/464, loss: 0.054907046258449554 2023-01-24 04:02:01.783140: step: 388/464, loss: 0.17442238330841064 2023-01-24 04:02:02.598497: step: 390/464, loss: 0.04887682572007179 2023-01-24 04:02:03.313084: step: 392/464, loss: 0.041350577026605606 2023-01-24 04:02:04.014030: step: 394/464, loss: 0.15166236460208893 2023-01-24 04:02:04.737376: step: 396/464, loss: 0.14650413393974304 2023-01-24 04:02:05.507847: step: 398/464, loss: 0.09053334593772888 2023-01-24 04:02:06.318442: step: 400/464, loss: 0.01918404921889305 2023-01-24 04:02:07.070138: step: 402/464, loss: 0.14491277933120728 2023-01-24 04:02:07.729652: step: 404/464, loss: 0.22183500230312347 2023-01-24 04:02:08.455326: step: 406/464, loss: 0.22847139835357666 2023-01-24 04:02:09.252640: step: 408/464, loss: 0.0427161380648613 2023-01-24 04:02:09.986744: step: 410/464, loss: 0.04364943131804466 2023-01-24 04:02:10.734354: step: 412/464, loss: 0.14266836643218994 2023-01-24 04:02:11.523060: step: 414/464, loss: 0.022271789610385895 2023-01-24 04:02:12.298944: step: 416/464, loss: 0.36364445090293884 2023-01-24 04:02:13.004018: step: 418/464, loss: 0.11318691074848175 2023-01-24 04:02:13.785137: step: 420/464, loss: 0.030474815517663956 2023-01-24 04:02:14.529489: step: 422/464, loss: 0.09793737530708313 2023-01-24 04:02:15.406569: step: 424/464, loss: 0.09572654217481613 2023-01-24 04:02:16.189478: step: 426/464, loss: 0.051216285675764084 2023-01-24 04:02:16.910557: step: 428/464, loss: 0.11468888074159622 2023-01-24 04:02:17.621446: step: 430/464, loss: 0.16534972190856934 2023-01-24 04:02:18.317539: step: 432/464, loss: 0.04753812029957771 2023-01-24 04:02:19.045700: step: 434/464, loss: 0.08464813232421875 2023-01-24 04:02:19.877217: step: 436/464, loss: 0.049439672380685806 2023-01-24 04:02:20.622134: step: 438/464, loss: 0.05753437429666519 2023-01-24 04:02:21.418970: step: 440/464, loss: 0.036289017647504807 2023-01-24 04:02:22.172113: step: 442/464, loss: 0.10135679692029953 2023-01-24 04:02:22.920636: step: 444/464, loss: 0.05048893392086029 2023-01-24 04:02:23.713602: step: 446/464, loss: 0.7747906446456909 2023-01-24 04:02:24.479928: step: 448/464, loss: 0.03790482506155968 2023-01-24 04:02:25.153439: step: 450/464, loss: 0.05181882530450821 2023-01-24 04:02:25.836984: step: 452/464, loss: 0.09344540536403656 2023-01-24 04:02:26.539223: step: 454/464, loss: 0.11064407229423523 2023-01-24 04:02:27.258397: step: 456/464, loss: 1.7301889657974243 2023-01-24 04:02:28.000160: step: 458/464, loss: 0.05675554648041725 2023-01-24 04:02:28.695262: step: 460/464, loss: 0.13673949241638184 2023-01-24 04:02:29.450612: step: 462/464, loss: 0.26604804396629333 2023-01-24 04:02:30.146249: step: 464/464, loss: 0.04592286795377731 2023-01-24 04:02:30.885793: step: 466/464, loss: 0.10509765148162842 2023-01-24 04:02:31.638417: step: 468/464, loss: 1.0288773775100708 2023-01-24 04:02:32.354315: step: 470/464, loss: 0.037989452481269836 2023-01-24 04:02:33.109187: step: 472/464, loss: 0.057457275688648224 2023-01-24 04:02:33.851893: step: 474/464, loss: 0.12352025508880615 2023-01-24 04:02:34.604823: step: 476/464, loss: 0.06553643941879272 2023-01-24 04:02:35.340473: step: 478/464, loss: 0.03871558606624603 2023-01-24 04:02:36.179512: step: 480/464, loss: 0.03770974278450012 2023-01-24 04:02:36.901023: step: 482/464, loss: 0.1302376687526703 2023-01-24 04:02:37.682567: step: 484/464, loss: 0.11569955945014954 2023-01-24 04:02:38.455173: step: 486/464, loss: 0.15845021605491638 2023-01-24 04:02:39.199586: step: 488/464, loss: 0.0666755884885788 2023-01-24 04:02:39.921524: step: 490/464, loss: 0.14751411974430084 2023-01-24 04:02:40.668486: step: 492/464, loss: 0.05031317099928856 2023-01-24 04:02:41.396508: step: 494/464, loss: 0.05618094280362129 2023-01-24 04:02:42.281183: step: 496/464, loss: 0.25684428215026855 2023-01-24 04:02:43.041508: step: 498/464, loss: 0.11302121728658676 2023-01-24 04:02:43.803679: step: 500/464, loss: 0.8344253897666931 2023-01-24 04:02:44.484770: step: 502/464, loss: 0.183640256524086 2023-01-24 04:02:45.283211: step: 504/464, loss: 0.05752811208367348 2023-01-24 04:02:46.155349: step: 506/464, loss: 0.016010737046599388 2023-01-24 04:02:46.817793: step: 508/464, loss: 0.018036268651485443 2023-01-24 04:02:47.670368: step: 510/464, loss: 0.0805363729596138 2023-01-24 04:02:48.447058: step: 512/464, loss: 0.0920647606253624 2023-01-24 04:02:49.167131: step: 514/464, loss: 0.08872433006763458 2023-01-24 04:02:49.870995: step: 516/464, loss: 0.1249750405550003 2023-01-24 04:02:50.688309: step: 518/464, loss: 0.2511100769042969 2023-01-24 04:02:51.394867: step: 520/464, loss: 0.012199416756629944 2023-01-24 04:02:52.100053: step: 522/464, loss: 0.2406979352235794 2023-01-24 04:02:52.857650: step: 524/464, loss: 0.11704836785793304 2023-01-24 04:02:53.606616: step: 526/464, loss: 0.013306746259331703 2023-01-24 04:02:54.269149: step: 528/464, loss: 0.014066663570702076 2023-01-24 04:02:54.946924: step: 530/464, loss: 0.060074660927057266 2023-01-24 04:02:55.704207: step: 532/464, loss: 0.034528978168964386 2023-01-24 04:02:56.427491: step: 534/464, loss: 0.04230083152651787 2023-01-24 04:02:57.224465: step: 536/464, loss: 0.12458521872758865 2023-01-24 04:02:57.902958: step: 538/464, loss: 0.05736164376139641 2023-01-24 04:02:58.606061: step: 540/464, loss: 0.20850035548210144 2023-01-24 04:02:59.392499: step: 542/464, loss: 0.0998268872499466 2023-01-24 04:03:00.173595: step: 544/464, loss: 0.09749908000230789 2023-01-24 04:03:00.897187: step: 546/464, loss: 1.4653208255767822 2023-01-24 04:03:01.743921: step: 548/464, loss: 0.05344126373529434 2023-01-24 04:03:02.457726: step: 550/464, loss: 0.07183989882469177 2023-01-24 04:03:03.192493: step: 552/464, loss: 0.26208972930908203 2023-01-24 04:03:03.968280: step: 554/464, loss: 0.06099194660782814 2023-01-24 04:03:04.726639: step: 556/464, loss: 0.13452257215976715 2023-01-24 04:03:05.476391: step: 558/464, loss: 0.0388176366686821 2023-01-24 04:03:06.253760: step: 560/464, loss: 0.13151054084300995 2023-01-24 04:03:06.976366: step: 562/464, loss: 0.11742176860570908 2023-01-24 04:03:07.656910: step: 564/464, loss: 0.03709885850548744 2023-01-24 04:03:08.353032: step: 566/464, loss: 0.20520006120204926 2023-01-24 04:03:09.135142: step: 568/464, loss: 0.1609494388103485 2023-01-24 04:03:09.846663: step: 570/464, loss: 0.0556962713599205 2023-01-24 04:03:10.550924: step: 572/464, loss: 0.21883781254291534 2023-01-24 04:03:11.324979: step: 574/464, loss: 0.07949330657720566 2023-01-24 04:03:12.121201: step: 576/464, loss: 0.3774619698524475 2023-01-24 04:03:12.806217: step: 578/464, loss: 0.18832574784755707 2023-01-24 04:03:13.568577: step: 580/464, loss: 0.11123646795749664 2023-01-24 04:03:14.324613: step: 582/464, loss: 0.051139943301677704 2023-01-24 04:03:15.057097: step: 584/464, loss: 0.043388500809669495 2023-01-24 04:03:15.752521: step: 586/464, loss: 0.037496041506528854 2023-01-24 04:03:16.436586: step: 588/464, loss: 0.03766334429383278 2023-01-24 04:03:17.051390: step: 590/464, loss: 0.04886811599135399 2023-01-24 04:03:17.777960: step: 592/464, loss: 0.06284377723932266 2023-01-24 04:03:18.509378: step: 594/464, loss: 0.0439876988530159 2023-01-24 04:03:19.179632: step: 596/464, loss: 0.04255954176187515 2023-01-24 04:03:19.889629: step: 598/464, loss: 0.24127870798110962 2023-01-24 04:03:20.515743: step: 600/464, loss: 0.07364135980606079 2023-01-24 04:03:21.235134: step: 602/464, loss: 0.07154645770788193 2023-01-24 04:03:22.031709: step: 604/464, loss: 0.3457305133342743 2023-01-24 04:03:22.720006: step: 606/464, loss: 0.0533280149102211 2023-01-24 04:03:23.456198: step: 608/464, loss: 0.3225669264793396 2023-01-24 04:03:24.123857: step: 610/464, loss: 0.05882871150970459 2023-01-24 04:03:24.867244: step: 612/464, loss: 0.03248715400695801 2023-01-24 04:03:25.612193: step: 614/464, loss: 0.64145427942276 2023-01-24 04:03:26.341320: step: 616/464, loss: 0.07119648158550262 2023-01-24 04:03:27.061467: step: 618/464, loss: 0.04753183200955391 2023-01-24 04:03:27.750895: step: 620/464, loss: 0.20422199368476868 2023-01-24 04:03:28.461285: step: 622/464, loss: 0.08181479573249817 2023-01-24 04:03:29.163541: step: 624/464, loss: 0.02665702998638153 2023-01-24 04:03:29.867866: step: 626/464, loss: 0.09652336686849594 2023-01-24 04:03:30.636371: step: 628/464, loss: 0.11436853557825089 2023-01-24 04:03:31.402178: step: 630/464, loss: 0.13034532964229584 2023-01-24 04:03:32.097889: step: 632/464, loss: 0.127781942486763 2023-01-24 04:03:32.852119: step: 634/464, loss: 0.09202573448419571 2023-01-24 04:03:33.551393: step: 636/464, loss: 0.03257625922560692 2023-01-24 04:03:34.294651: step: 638/464, loss: 0.15791824460029602 2023-01-24 04:03:35.017220: step: 640/464, loss: 0.018911080434918404 2023-01-24 04:03:35.760426: step: 642/464, loss: 0.040161360055208206 2023-01-24 04:03:36.476081: step: 644/464, loss: 0.21853290498256683 2023-01-24 04:03:37.226049: step: 646/464, loss: 0.06965027749538422 2023-01-24 04:03:37.981219: step: 648/464, loss: 0.1252724528312683 2023-01-24 04:03:38.681911: step: 650/464, loss: 0.17936329543590546 2023-01-24 04:03:39.468362: step: 652/464, loss: 0.1071472018957138 2023-01-24 04:03:40.195316: step: 654/464, loss: 0.024697106331586838 2023-01-24 04:03:40.954791: step: 656/464, loss: 0.1417209357023239 2023-01-24 04:03:41.681261: step: 658/464, loss: 0.05779734253883362 2023-01-24 04:03:42.372011: step: 660/464, loss: 0.07815540581941605 2023-01-24 04:03:43.162475: step: 662/464, loss: 0.15495631098747253 2023-01-24 04:03:43.906675: step: 664/464, loss: 0.186952605843544 2023-01-24 04:03:44.761717: step: 666/464, loss: 0.08128277957439423 2023-01-24 04:03:45.480363: step: 668/464, loss: 0.16984660923480988 2023-01-24 04:03:46.248740: step: 670/464, loss: 0.026893116533756256 2023-01-24 04:03:46.992752: step: 672/464, loss: 0.043474629521369934 2023-01-24 04:03:47.763850: step: 674/464, loss: 0.13293564319610596 2023-01-24 04:03:48.488966: step: 676/464, loss: 0.030793726444244385 2023-01-24 04:03:49.171127: step: 678/464, loss: 0.06726095080375671 2023-01-24 04:03:49.898797: step: 680/464, loss: 0.15387548506259918 2023-01-24 04:03:50.653174: step: 682/464, loss: 0.09032996743917465 2023-01-24 04:03:51.396673: step: 684/464, loss: 0.06952432543039322 2023-01-24 04:03:52.144189: step: 686/464, loss: 0.03810926526784897 2023-01-24 04:03:52.945028: step: 688/464, loss: 0.07896184921264648 2023-01-24 04:03:53.747190: step: 690/464, loss: 0.4305756688117981 2023-01-24 04:03:54.480282: step: 692/464, loss: 0.23241256177425385 2023-01-24 04:03:55.245718: step: 694/464, loss: 0.06421250104904175 2023-01-24 04:03:55.968394: step: 696/464, loss: 0.03044717386364937 2023-01-24 04:03:56.718041: step: 698/464, loss: 0.1370568573474884 2023-01-24 04:03:57.480689: step: 700/464, loss: 0.07529357820749283 2023-01-24 04:03:58.123492: step: 702/464, loss: 0.04130513593554497 2023-01-24 04:03:58.833054: step: 704/464, loss: 0.02765693888068199 2023-01-24 04:03:59.539896: step: 706/464, loss: 0.19176216423511505 2023-01-24 04:04:00.261317: step: 708/464, loss: 0.15551027655601501 2023-01-24 04:04:00.939440: step: 710/464, loss: 0.06480307877063751 2023-01-24 04:04:01.693644: step: 712/464, loss: 0.023103512823581696 2023-01-24 04:04:02.391158: step: 714/464, loss: 0.03260474652051926 2023-01-24 04:04:03.081983: step: 716/464, loss: 0.0389396958053112 2023-01-24 04:04:03.825310: step: 718/464, loss: 0.020739024505019188 2023-01-24 04:04:04.518236: step: 720/464, loss: 0.1336267739534378 2023-01-24 04:04:05.220813: step: 722/464, loss: 0.08211637288331985 2023-01-24 04:04:05.960625: step: 724/464, loss: 0.04815949127078056 2023-01-24 04:04:06.662787: step: 726/464, loss: 0.16563375294208527 2023-01-24 04:04:07.464037: step: 728/464, loss: 0.13736072182655334 2023-01-24 04:04:08.220611: step: 730/464, loss: 0.06849687546491623 2023-01-24 04:04:08.951404: step: 732/464, loss: 0.08200888335704803 2023-01-24 04:04:09.714242: step: 734/464, loss: 0.15218576788902283 2023-01-24 04:04:10.493623: step: 736/464, loss: 0.09333252906799316 2023-01-24 04:04:11.189730: step: 738/464, loss: 0.04118682071566582 2023-01-24 04:04:11.971851: step: 740/464, loss: 0.08373872935771942 2023-01-24 04:04:12.727278: step: 742/464, loss: 0.08737599104642868 2023-01-24 04:04:13.435586: step: 744/464, loss: 0.1110134944319725 2023-01-24 04:04:14.162154: step: 746/464, loss: 0.09193912148475647 2023-01-24 04:04:14.897477: step: 748/464, loss: 0.09227243065834045 2023-01-24 04:04:15.587190: step: 750/464, loss: 0.1252930611371994 2023-01-24 04:04:16.342016: step: 752/464, loss: 0.08073785156011581 2023-01-24 04:04:17.089322: step: 754/464, loss: 0.17625649273395538 2023-01-24 04:04:17.889586: step: 756/464, loss: 0.2500367760658264 2023-01-24 04:04:18.534781: step: 758/464, loss: 0.05067160353064537 2023-01-24 04:04:19.286571: step: 760/464, loss: 0.02755684033036232 2023-01-24 04:04:20.027705: step: 762/464, loss: 0.1689205765724182 2023-01-24 04:04:20.737222: step: 764/464, loss: 0.05219363793730736 2023-01-24 04:04:21.439608: step: 766/464, loss: 0.07540833950042725 2023-01-24 04:04:22.190525: step: 768/464, loss: 0.11556242406368256 2023-01-24 04:04:22.895825: step: 770/464, loss: 0.059864141047000885 2023-01-24 04:04:23.652987: step: 772/464, loss: 0.23969833552837372 2023-01-24 04:04:24.326832: step: 774/464, loss: 0.036458227783441544 2023-01-24 04:04:25.103193: step: 776/464, loss: 0.08701157569885254 2023-01-24 04:04:25.840973: step: 778/464, loss: 0.028931396082043648 2023-01-24 04:04:26.551450: step: 780/464, loss: 0.02051432803273201 2023-01-24 04:04:27.368913: step: 782/464, loss: 0.13937965035438538 2023-01-24 04:04:28.137900: step: 784/464, loss: 0.11918213218450546 2023-01-24 04:04:28.939048: step: 786/464, loss: 0.024718215689063072 2023-01-24 04:04:29.655447: step: 788/464, loss: 0.0715637132525444 2023-01-24 04:04:30.403888: step: 790/464, loss: 0.1728140264749527 2023-01-24 04:04:31.137255: step: 792/464, loss: 0.0436103418469429 2023-01-24 04:04:31.864850: step: 794/464, loss: 0.05633265897631645 2023-01-24 04:04:32.673808: step: 796/464, loss: 0.13844357430934906 2023-01-24 04:04:33.430193: step: 798/464, loss: 0.13182282447814941 2023-01-24 04:04:34.191468: step: 800/464, loss: 0.04310326278209686 2023-01-24 04:04:34.953167: step: 802/464, loss: 0.6118875741958618 2023-01-24 04:04:35.736708: step: 804/464, loss: 0.06036046892404556 2023-01-24 04:04:36.477440: step: 806/464, loss: 0.08748368918895721 2023-01-24 04:04:37.215826: step: 808/464, loss: 0.08084870874881744 2023-01-24 04:04:37.957149: step: 810/464, loss: 0.07438945770263672 2023-01-24 04:04:38.816885: step: 812/464, loss: 0.04066689312458038 2023-01-24 04:04:39.532168: step: 814/464, loss: 0.034743160009384155 2023-01-24 04:04:40.281822: step: 816/464, loss: 0.09501675516366959 2023-01-24 04:04:41.010843: step: 818/464, loss: 0.04034503549337387 2023-01-24 04:04:41.709087: step: 820/464, loss: 0.08743909001350403 2023-01-24 04:04:42.509216: step: 822/464, loss: 0.18358755111694336 2023-01-24 04:04:43.221581: step: 824/464, loss: 0.08890260010957718 2023-01-24 04:04:43.961599: step: 826/464, loss: 0.022742677479982376 2023-01-24 04:04:44.745516: step: 828/464, loss: 0.4063442051410675 2023-01-24 04:04:45.507329: step: 830/464, loss: 0.10741689056158066 2023-01-24 04:04:46.287520: step: 832/464, loss: 0.051975641399621964 2023-01-24 04:04:46.991466: step: 834/464, loss: 0.014520746655762196 2023-01-24 04:04:47.779778: step: 836/464, loss: 0.13959254324436188 2023-01-24 04:04:48.475388: step: 838/464, loss: 0.24080374836921692 2023-01-24 04:04:49.104749: step: 840/464, loss: 0.02364250086247921 2023-01-24 04:04:49.779409: step: 842/464, loss: 0.04288835823535919 2023-01-24 04:04:50.476107: step: 844/464, loss: 0.11207117140293121 2023-01-24 04:04:51.228549: step: 846/464, loss: 0.12408368289470673 2023-01-24 04:04:52.005608: step: 848/464, loss: 0.05987241491675377 2023-01-24 04:04:52.788881: step: 850/464, loss: 0.11004193872213364 2023-01-24 04:04:53.585235: step: 852/464, loss: 0.07125671207904816 2023-01-24 04:04:54.284833: step: 854/464, loss: 0.09247303009033203 2023-01-24 04:04:54.997101: step: 856/464, loss: 0.07699891924858093 2023-01-24 04:04:55.651575: step: 858/464, loss: 0.0834435373544693 2023-01-24 04:04:56.397896: step: 860/464, loss: 0.15045170485973358 2023-01-24 04:04:57.178029: step: 862/464, loss: 0.03895842283964157 2023-01-24 04:04:57.881748: step: 864/464, loss: 0.07983269542455673 2023-01-24 04:04:58.755199: step: 866/464, loss: 0.09023448079824448 2023-01-24 04:04:59.496201: step: 868/464, loss: 0.05701834335923195 2023-01-24 04:05:00.203352: step: 870/464, loss: 0.03738541156053543 2023-01-24 04:05:00.925859: step: 872/464, loss: 0.022355815395712852 2023-01-24 04:05:01.653223: step: 874/464, loss: 0.011521057225763798 2023-01-24 04:05:02.588746: step: 876/464, loss: 0.038567617535591125 2023-01-24 04:05:03.435328: step: 878/464, loss: 0.2596600353717804 2023-01-24 04:05:04.120767: step: 880/464, loss: 0.04852532595396042 2023-01-24 04:05:04.825626: step: 882/464, loss: 0.008914230391383171 2023-01-24 04:05:05.596579: step: 884/464, loss: 0.12365903705358505 2023-01-24 04:05:06.392351: step: 886/464, loss: 0.07245268672704697 2023-01-24 04:05:07.162135: step: 888/464, loss: 0.13257306814193726 2023-01-24 04:05:07.990480: step: 890/464, loss: 0.1250031590461731 2023-01-24 04:05:08.681679: step: 892/464, loss: 0.409727543592453 2023-01-24 04:05:09.473006: step: 894/464, loss: 0.16821452975273132 2023-01-24 04:05:10.209936: step: 896/464, loss: 0.5855254530906677 2023-01-24 04:05:11.002030: step: 898/464, loss: 0.09568691998720169 2023-01-24 04:05:11.850775: step: 900/464, loss: 0.028606195002794266 2023-01-24 04:05:12.554464: step: 902/464, loss: 0.06103532388806343 2023-01-24 04:05:13.313680: step: 904/464, loss: 0.18515242636203766 2023-01-24 04:05:14.247271: step: 906/464, loss: 0.03543923795223236 2023-01-24 04:05:14.984712: step: 908/464, loss: 0.4824013411998749 2023-01-24 04:05:15.794824: step: 910/464, loss: 0.059209562838077545 2023-01-24 04:05:16.539616: step: 912/464, loss: 0.7797068953514099 2023-01-24 04:05:17.225428: step: 914/464, loss: 0.0929434671998024 2023-01-24 04:05:18.119464: step: 916/464, loss: 0.3149912655353546 2023-01-24 04:05:18.863898: step: 918/464, loss: 0.03383219987154007 2023-01-24 04:05:19.557489: step: 920/464, loss: 0.029487041756510735 2023-01-24 04:05:20.378138: step: 922/464, loss: 0.0898323506116867 2023-01-24 04:05:21.124034: step: 924/464, loss: 0.19288434088230133 2023-01-24 04:05:21.928094: step: 926/464, loss: 0.13376876711845398 2023-01-24 04:05:22.674847: step: 928/464, loss: 0.06767135858535767 2023-01-24 04:05:23.270106: step: 930/464, loss: 0.021101508289575577 ================================================== Loss: 0.147 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36262044967880086, 'r': 0.3213353889943074, 'f1': 0.3407318913480885}, 'combined': 0.2510656041512231, 'epoch': 17} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31504138495439127, 'r': 0.270879611347754, 'f1': 0.29129622748967604}, 'combined': 0.18091028865148304, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3340191511387164, 'r': 0.3061314041745731, 'f1': 0.31946782178217825}, 'combined': 0.23539734236581555, 'epoch': 17} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3082228732658302, 'r': 0.2653211702742388, 'f1': 0.28516747531862485}, 'combined': 0.1771040109873565, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34670865056548933, 'r': 0.3131562005107646, 'f1': 0.32907939714690515}, 'combined': 0.24247955579245642, 'epoch': 17} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3292407480308367, 'r': 0.2795133695029808, 'f1': 0.3023460152765825}, 'combined': 0.1877727884349302, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.26785714285714285, 'f1': 0.31250000000000006}, 'combined': 0.20833333333333337, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32575757575757575, 'r': 0.4673913043478261, 'f1': 0.3839285714285714}, 'combined': 0.1919642857142857, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4220647773279352, 'r': 0.18920145190562612, 'f1': 0.2612781954887218}, 'combined': 0.17418546365914783, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:08:02.564815: step: 2/464, loss: 0.02255629189312458 2023-01-24 04:08:03.301766: step: 4/464, loss: 0.10551592707633972 2023-01-24 04:08:04.035230: step: 6/464, loss: 0.03464389219880104 2023-01-24 04:08:04.816683: step: 8/464, loss: 0.10338738560676575 2023-01-24 04:08:05.517284: step: 10/464, loss: 0.08868830651044846 2023-01-24 04:08:06.312199: step: 12/464, loss: 0.02098909579217434 2023-01-24 04:08:07.020250: step: 14/464, loss: 0.043363526463508606 2023-01-24 04:08:07.726987: step: 16/464, loss: 0.04640447720885277 2023-01-24 04:08:08.469754: step: 18/464, loss: 0.12110107392072678 2023-01-24 04:08:09.138138: step: 20/464, loss: 0.03413453325629234 2023-01-24 04:08:10.041112: step: 22/464, loss: 0.012522303499281406 2023-01-24 04:08:10.710139: step: 24/464, loss: 0.131692036986351 2023-01-24 04:08:11.478585: step: 26/464, loss: 0.02454289421439171 2023-01-24 04:08:12.191073: step: 28/464, loss: 0.07694856822490692 2023-01-24 04:08:12.883418: step: 30/464, loss: 0.02013799548149109 2023-01-24 04:08:13.591419: step: 32/464, loss: 1.6733767986297607 2023-01-24 04:08:14.309058: step: 34/464, loss: 0.1602635681629181 2023-01-24 04:08:14.982778: step: 36/464, loss: 0.05486570671200752 2023-01-24 04:08:15.759651: step: 38/464, loss: 0.06268543750047684 2023-01-24 04:08:16.491377: step: 40/464, loss: 0.10344596207141876 2023-01-24 04:08:17.224832: step: 42/464, loss: 0.22582951188087463 2023-01-24 04:08:17.952471: step: 44/464, loss: 0.06255070865154266 2023-01-24 04:08:18.777874: step: 46/464, loss: 0.06916432827711105 2023-01-24 04:08:19.616964: step: 48/464, loss: 0.048753079026937485 2023-01-24 04:08:20.387899: step: 50/464, loss: 0.19315694272518158 2023-01-24 04:08:21.141357: step: 52/464, loss: 0.06600367277860641 2023-01-24 04:08:21.834951: step: 54/464, loss: 0.885759711265564 2023-01-24 04:08:22.595170: step: 56/464, loss: 0.06824956089258194 2023-01-24 04:08:23.346766: step: 58/464, loss: 0.10237880796194077 2023-01-24 04:08:24.130030: step: 60/464, loss: 0.0079014478251338 2023-01-24 04:08:24.824685: step: 62/464, loss: 0.08703933656215668 2023-01-24 04:08:25.520531: step: 64/464, loss: 0.20505230128765106 2023-01-24 04:08:26.197442: step: 66/464, loss: 0.08236251026391983 2023-01-24 04:08:26.988620: step: 68/464, loss: 0.13015301525592804 2023-01-24 04:08:27.719072: step: 70/464, loss: 0.06615529209375381 2023-01-24 04:08:28.416474: step: 72/464, loss: 0.22916918992996216 2023-01-24 04:08:29.193177: step: 74/464, loss: 0.07430724054574966 2023-01-24 04:08:29.931801: step: 76/464, loss: 0.0898202508687973 2023-01-24 04:08:30.710144: step: 78/464, loss: 0.08509740978479385 2023-01-24 04:08:31.420950: step: 80/464, loss: 0.02653665468096733 2023-01-24 04:08:32.107139: step: 82/464, loss: 0.1384216696023941 2023-01-24 04:08:32.844243: step: 84/464, loss: 0.1956842988729477 2023-01-24 04:08:33.604663: step: 86/464, loss: 0.047416504472494125 2023-01-24 04:08:34.298616: step: 88/464, loss: 0.19975516200065613 2023-01-24 04:08:35.004890: step: 90/464, loss: 0.1394379884004593 2023-01-24 04:08:35.739462: step: 92/464, loss: 0.11064222455024719 2023-01-24 04:08:36.412124: step: 94/464, loss: 0.011957090348005295 2023-01-24 04:08:37.148016: step: 96/464, loss: 0.0212737824767828 2023-01-24 04:08:37.900735: step: 98/464, loss: 0.11996756494045258 2023-01-24 04:08:38.599650: step: 100/464, loss: 0.08332812041044235 2023-01-24 04:08:39.443973: step: 102/464, loss: 0.04559627175331116 2023-01-24 04:08:40.183108: step: 104/464, loss: 0.11228682845830917 2023-01-24 04:08:40.916494: step: 106/464, loss: 0.057501498609781265 2023-01-24 04:08:41.598546: step: 108/464, loss: 0.02210753969848156 2023-01-24 04:08:42.348036: step: 110/464, loss: 0.021231796592473984 2023-01-24 04:08:43.122163: step: 112/464, loss: 0.22066837549209595 2023-01-24 04:08:43.797299: step: 114/464, loss: 0.04990150406956673 2023-01-24 04:08:44.661439: step: 116/464, loss: 0.05642591789364815 2023-01-24 04:08:45.469101: step: 118/464, loss: 0.010035468265414238 2023-01-24 04:08:46.251522: step: 120/464, loss: 0.15385819971561432 2023-01-24 04:08:46.980482: step: 122/464, loss: 0.12485004961490631 2023-01-24 04:08:47.662491: step: 124/464, loss: 0.01280268281698227 2023-01-24 04:08:48.416729: step: 126/464, loss: 0.2483009397983551 2023-01-24 04:08:49.167466: step: 128/464, loss: 0.05752422288060188 2023-01-24 04:08:49.957708: step: 130/464, loss: 0.036647357046604156 2023-01-24 04:08:50.671366: step: 132/464, loss: 0.034497249871492386 2023-01-24 04:08:51.403583: step: 134/464, loss: 0.03933246433734894 2023-01-24 04:08:52.088869: step: 136/464, loss: 0.029888764023780823 2023-01-24 04:08:52.811410: step: 138/464, loss: 0.06552189588546753 2023-01-24 04:08:53.512465: step: 140/464, loss: 0.38262122869491577 2023-01-24 04:08:54.219412: step: 142/464, loss: 0.015346909873187542 2023-01-24 04:08:54.952999: step: 144/464, loss: 0.07747817039489746 2023-01-24 04:08:55.653924: step: 146/464, loss: 0.0321495346724987 2023-01-24 04:08:56.403389: step: 148/464, loss: 0.04494288191199303 2023-01-24 04:08:57.170783: step: 150/464, loss: 0.05675000697374344 2023-01-24 04:08:57.826022: step: 152/464, loss: 0.10795766115188599 2023-01-24 04:08:58.564025: step: 154/464, loss: 0.2547047436237335 2023-01-24 04:08:59.204306: step: 156/464, loss: 0.018213415518403053 2023-01-24 04:08:59.914383: step: 158/464, loss: 0.027684977278113365 2023-01-24 04:09:00.727292: step: 160/464, loss: 0.04280570521950722 2023-01-24 04:09:01.529706: step: 162/464, loss: 0.2729480266571045 2023-01-24 04:09:02.256824: step: 164/464, loss: 0.06972559541463852 2023-01-24 04:09:02.986242: step: 166/464, loss: 0.049423202872276306 2023-01-24 04:09:03.731280: step: 168/464, loss: 0.5567521452903748 2023-01-24 04:09:04.487014: step: 170/464, loss: 0.3044826090335846 2023-01-24 04:09:05.188329: step: 172/464, loss: 0.06705247610807419 2023-01-24 04:09:05.904244: step: 174/464, loss: 0.012513558380305767 2023-01-24 04:09:06.629046: step: 176/464, loss: 0.13715636730194092 2023-01-24 04:09:07.368006: step: 178/464, loss: 0.014747089706361294 2023-01-24 04:09:08.091736: step: 180/464, loss: 0.01857588067650795 2023-01-24 04:09:08.847243: step: 182/464, loss: 0.05693085864186287 2023-01-24 04:09:09.648936: step: 184/464, loss: 0.09271720796823502 2023-01-24 04:09:10.414439: step: 186/464, loss: 0.04319971427321434 2023-01-24 04:09:11.120737: step: 188/464, loss: 0.009966270998120308 2023-01-24 04:09:11.841673: step: 190/464, loss: 0.060091424733400345 2023-01-24 04:09:12.560402: step: 192/464, loss: 0.09363576769828796 2023-01-24 04:09:13.319276: step: 194/464, loss: 0.03685108199715614 2023-01-24 04:09:14.078333: step: 196/464, loss: 0.10837969183921814 2023-01-24 04:09:14.859585: step: 198/464, loss: 0.0591653436422348 2023-01-24 04:09:15.621690: step: 200/464, loss: 0.034615155309438705 2023-01-24 04:09:16.324861: step: 202/464, loss: 0.1155146136879921 2023-01-24 04:09:17.115813: step: 204/464, loss: 0.05499124899506569 2023-01-24 04:09:17.837657: step: 206/464, loss: 0.06080270931124687 2023-01-24 04:09:18.555229: step: 208/464, loss: 0.060843829065561295 2023-01-24 04:09:19.379145: step: 210/464, loss: 0.06305276602506638 2023-01-24 04:09:20.086347: step: 212/464, loss: 0.07687051594257355 2023-01-24 04:09:20.842474: step: 214/464, loss: 0.08562051504850388 2023-01-24 04:09:21.567611: step: 216/464, loss: 0.11981651932001114 2023-01-24 04:09:22.301071: step: 218/464, loss: 0.08380502462387085 2023-01-24 04:09:23.062732: step: 220/464, loss: 0.11919360607862473 2023-01-24 04:09:23.842159: step: 222/464, loss: 0.08396501839160919 2023-01-24 04:09:24.559115: step: 224/464, loss: 0.05867454409599304 2023-01-24 04:09:25.268596: step: 226/464, loss: 0.018938075751066208 2023-01-24 04:09:26.030598: step: 228/464, loss: 0.21620342135429382 2023-01-24 04:09:26.830988: step: 230/464, loss: 0.05132962390780449 2023-01-24 04:09:27.579898: step: 232/464, loss: 0.09675484895706177 2023-01-24 04:09:28.299726: step: 234/464, loss: 0.03961890563368797 2023-01-24 04:09:29.045274: step: 236/464, loss: 0.027667952701449394 2023-01-24 04:09:29.774245: step: 238/464, loss: 0.14270053803920746 2023-01-24 04:09:30.434744: step: 240/464, loss: 0.031896259635686874 2023-01-24 04:09:31.263063: step: 242/464, loss: 0.05330142378807068 2023-01-24 04:09:32.007358: step: 244/464, loss: 0.06403126567602158 2023-01-24 04:09:32.721685: step: 246/464, loss: 0.03436197713017464 2023-01-24 04:09:33.461767: step: 248/464, loss: 0.06037990003824234 2023-01-24 04:09:34.197214: step: 250/464, loss: 0.04214172810316086 2023-01-24 04:09:34.905573: step: 252/464, loss: 0.03639375790953636 2023-01-24 04:09:35.660085: step: 254/464, loss: 0.13453000783920288 2023-01-24 04:09:36.460404: step: 256/464, loss: 0.05805106833577156 2023-01-24 04:09:37.289518: step: 258/464, loss: 0.16511701047420502 2023-01-24 04:09:37.996117: step: 260/464, loss: 0.025146618485450745 2023-01-24 04:09:38.744958: step: 262/464, loss: 0.04297715798020363 2023-01-24 04:09:39.451340: step: 264/464, loss: 0.08677572011947632 2023-01-24 04:09:40.168296: step: 266/464, loss: 0.18877673149108887 2023-01-24 04:09:40.963912: step: 268/464, loss: 0.029620453715324402 2023-01-24 04:09:41.686289: step: 270/464, loss: 0.021564066410064697 2023-01-24 04:09:42.502026: step: 272/464, loss: 0.03365343436598778 2023-01-24 04:09:43.333792: step: 274/464, loss: 0.06616527587175369 2023-01-24 04:09:44.180768: step: 276/464, loss: 0.05282546579837799 2023-01-24 04:09:44.980630: step: 278/464, loss: 0.09444158524274826 2023-01-24 04:09:45.671842: step: 280/464, loss: 0.048270583152770996 2023-01-24 04:09:46.416892: step: 282/464, loss: 0.035702046006917953 2023-01-24 04:09:47.166080: step: 284/464, loss: 0.15187041461467743 2023-01-24 04:09:47.939837: step: 286/464, loss: 0.07321696728467941 2023-01-24 04:09:48.711970: step: 288/464, loss: 0.061114389449357986 2023-01-24 04:09:49.384335: step: 290/464, loss: 0.07513292133808136 2023-01-24 04:09:50.077312: step: 292/464, loss: 0.02930048480629921 2023-01-24 04:09:50.778425: step: 294/464, loss: 0.06662975996732712 2023-01-24 04:09:51.507534: step: 296/464, loss: 0.01861395500600338 2023-01-24 04:09:52.259043: step: 298/464, loss: 0.07548941671848297 2023-01-24 04:09:53.009996: step: 300/464, loss: 0.06447035074234009 2023-01-24 04:09:53.820529: step: 302/464, loss: 0.032488834112882614 2023-01-24 04:09:54.536070: step: 304/464, loss: 0.11268679797649384 2023-01-24 04:09:55.249131: step: 306/464, loss: 0.023737547919154167 2023-01-24 04:09:56.066373: step: 308/464, loss: 0.05261879414319992 2023-01-24 04:09:56.744394: step: 310/464, loss: 0.02802574262022972 2023-01-24 04:09:57.496641: step: 312/464, loss: 0.057686544954776764 2023-01-24 04:09:58.242528: step: 314/464, loss: 0.015248388051986694 2023-01-24 04:09:58.951342: step: 316/464, loss: 0.09674489498138428 2023-01-24 04:09:59.676183: step: 318/464, loss: 0.4312308132648468 2023-01-24 04:10:00.428647: step: 320/464, loss: 0.03816446289420128 2023-01-24 04:10:01.195282: step: 322/464, loss: 0.1555195152759552 2023-01-24 04:10:01.921071: step: 324/464, loss: 0.09488389641046524 2023-01-24 04:10:02.743492: step: 326/464, loss: 0.059349559247493744 2023-01-24 04:10:03.503454: step: 328/464, loss: 0.04325365275144577 2023-01-24 04:10:04.188741: step: 330/464, loss: 0.08058194071054459 2023-01-24 04:10:04.990893: step: 332/464, loss: 0.08156687766313553 2023-01-24 04:10:05.739945: step: 334/464, loss: 0.055490389466285706 2023-01-24 04:10:06.429779: step: 336/464, loss: 0.03358432650566101 2023-01-24 04:10:07.172966: step: 338/464, loss: 0.09756134450435638 2023-01-24 04:10:07.917315: step: 340/464, loss: 0.2598879635334015 2023-01-24 04:10:08.665422: step: 342/464, loss: 0.09739559888839722 2023-01-24 04:10:09.381924: step: 344/464, loss: 0.0758894681930542 2023-01-24 04:10:10.081367: step: 346/464, loss: 0.016688646748661995 2023-01-24 04:10:10.720782: step: 348/464, loss: 0.07276943325996399 2023-01-24 04:10:11.517737: step: 350/464, loss: 0.4115365147590637 2023-01-24 04:10:12.340424: step: 352/464, loss: 0.6110472083091736 2023-01-24 04:10:13.040622: step: 354/464, loss: 0.02503076381981373 2023-01-24 04:10:13.759400: step: 356/464, loss: 0.04955613613128662 2023-01-24 04:10:14.529690: step: 358/464, loss: 0.036105427891016006 2023-01-24 04:10:15.236104: step: 360/464, loss: 0.0953097864985466 2023-01-24 04:10:15.852056: step: 362/464, loss: 0.05211088806390762 2023-01-24 04:10:16.645056: step: 364/464, loss: 0.06627248227596283 2023-01-24 04:10:17.369732: step: 366/464, loss: 0.29732802510261536 2023-01-24 04:10:18.093729: step: 368/464, loss: 0.07204879820346832 2023-01-24 04:10:18.801759: step: 370/464, loss: 0.08538369089365005 2023-01-24 04:10:19.507506: step: 372/464, loss: 0.0855962336063385 2023-01-24 04:10:20.222265: step: 374/464, loss: 0.34332185983657837 2023-01-24 04:10:20.939621: step: 376/464, loss: 0.11024769395589828 2023-01-24 04:10:21.674488: step: 378/464, loss: 0.022629186511039734 2023-01-24 04:10:22.397723: step: 380/464, loss: 0.0872216522693634 2023-01-24 04:10:23.207987: step: 382/464, loss: 0.07278859615325928 2023-01-24 04:10:24.099202: step: 384/464, loss: 0.0784880667924881 2023-01-24 04:10:24.755044: step: 386/464, loss: 0.06884322315454483 2023-01-24 04:10:25.489504: step: 388/464, loss: 0.07317520678043365 2023-01-24 04:10:26.186260: step: 390/464, loss: 0.07874592393636703 2023-01-24 04:10:26.937285: step: 392/464, loss: 0.025225400924682617 2023-01-24 04:10:27.673889: step: 394/464, loss: 0.01785973832011223 2023-01-24 04:10:28.393176: step: 396/464, loss: 0.0650726780295372 2023-01-24 04:10:29.142124: step: 398/464, loss: 0.0992753878235817 2023-01-24 04:10:29.817798: step: 400/464, loss: 0.03817930445075035 2023-01-24 04:10:30.571088: step: 402/464, loss: 0.15180909633636475 2023-01-24 04:10:31.366186: step: 404/464, loss: 0.09004110097885132 2023-01-24 04:10:32.145798: step: 406/464, loss: 0.09567401558160782 2023-01-24 04:10:32.859957: step: 408/464, loss: 0.027423489838838577 2023-01-24 04:10:33.573413: step: 410/464, loss: 0.02999250404536724 2023-01-24 04:10:34.315827: step: 412/464, loss: 0.023866957053542137 2023-01-24 04:10:35.028819: step: 414/464, loss: 0.039055559784173965 2023-01-24 04:10:35.892239: step: 416/464, loss: 0.13388648629188538 2023-01-24 04:10:36.710988: step: 418/464, loss: 0.07439249008893967 2023-01-24 04:10:37.463043: step: 420/464, loss: 0.04521423205733299 2023-01-24 04:10:38.222776: step: 422/464, loss: 0.07073336094617844 2023-01-24 04:10:38.929000: step: 424/464, loss: 0.21017691493034363 2023-01-24 04:10:39.676606: step: 426/464, loss: 0.2631050646305084 2023-01-24 04:10:40.357528: step: 428/464, loss: 0.060145895928144455 2023-01-24 04:10:41.119793: step: 430/464, loss: 0.08439886569976807 2023-01-24 04:10:41.923930: step: 432/464, loss: 0.09918252378702164 2023-01-24 04:10:42.686909: step: 434/464, loss: 0.14070436358451843 2023-01-24 04:10:43.330763: step: 436/464, loss: 0.005157202482223511 2023-01-24 04:10:44.197278: step: 438/464, loss: 0.05908423289656639 2023-01-24 04:10:44.935068: step: 440/464, loss: 0.19316250085830688 2023-01-24 04:10:45.615274: step: 442/464, loss: 0.26289963722229004 2023-01-24 04:10:46.449591: step: 444/464, loss: 0.3197251558303833 2023-01-24 04:10:47.186025: step: 446/464, loss: 0.03284204378724098 2023-01-24 04:10:47.851429: step: 448/464, loss: 0.09915520995855331 2023-01-24 04:10:48.574572: step: 450/464, loss: 0.2551385462284088 2023-01-24 04:10:49.247031: step: 452/464, loss: 0.1864646077156067 2023-01-24 04:10:49.944065: step: 454/464, loss: 0.0436815470457077 2023-01-24 04:10:50.602328: step: 456/464, loss: 0.10498335212469101 2023-01-24 04:10:51.353772: step: 458/464, loss: 0.033282823860645294 2023-01-24 04:10:52.076935: step: 460/464, loss: 0.010245820507407188 2023-01-24 04:10:52.901290: step: 462/464, loss: 0.06738439947366714 2023-01-24 04:10:53.616284: step: 464/464, loss: 0.050078701227903366 2023-01-24 04:10:54.393180: step: 466/464, loss: 0.06606537848711014 2023-01-24 04:10:55.143798: step: 468/464, loss: 0.025787794962525368 2023-01-24 04:10:55.857738: step: 470/464, loss: 0.07703365385532379 2023-01-24 04:10:56.589738: step: 472/464, loss: 0.01791863888502121 2023-01-24 04:10:57.281435: step: 474/464, loss: 0.3191978633403778 2023-01-24 04:10:58.032566: step: 476/464, loss: 1.24513578414917 2023-01-24 04:10:58.787854: step: 478/464, loss: 0.09245769679546356 2023-01-24 04:10:59.536515: step: 480/464, loss: 0.08820255845785141 2023-01-24 04:11:00.269832: step: 482/464, loss: 0.10357934981584549 2023-01-24 04:11:00.992945: step: 484/464, loss: 0.3993353843688965 2023-01-24 04:11:01.634053: step: 486/464, loss: 0.24043171107769012 2023-01-24 04:11:02.416655: step: 488/464, loss: 0.04684942960739136 2023-01-24 04:11:03.149288: step: 490/464, loss: 0.07265922427177429 2023-01-24 04:11:03.907845: step: 492/464, loss: 0.1522282212972641 2023-01-24 04:11:04.727598: step: 494/464, loss: 0.05145607516169548 2023-01-24 04:11:05.475067: step: 496/464, loss: 0.07494250684976578 2023-01-24 04:11:06.194246: step: 498/464, loss: 0.07337863743305206 2023-01-24 04:11:06.903943: step: 500/464, loss: 0.10319215059280396 2023-01-24 04:11:07.615231: step: 502/464, loss: 0.3263223171234131 2023-01-24 04:11:08.297151: step: 504/464, loss: 0.06274055689573288 2023-01-24 04:11:09.021632: step: 506/464, loss: 0.04613077640533447 2023-01-24 04:11:09.751535: step: 508/464, loss: 0.06665505468845367 2023-01-24 04:11:10.553717: step: 510/464, loss: 0.09452710300683975 2023-01-24 04:11:11.285519: step: 512/464, loss: 0.026932209730148315 2023-01-24 04:11:12.105664: step: 514/464, loss: 0.1044914722442627 2023-01-24 04:11:12.817000: step: 516/464, loss: 0.07837115973234177 2023-01-24 04:11:13.678980: step: 518/464, loss: 0.07439406961202621 2023-01-24 04:11:14.472569: step: 520/464, loss: 0.060525111854076385 2023-01-24 04:11:15.229846: step: 522/464, loss: 0.03752945363521576 2023-01-24 04:11:15.978212: step: 524/464, loss: 0.03834892064332962 2023-01-24 04:11:16.660658: step: 526/464, loss: 0.36184078454971313 2023-01-24 04:11:17.395585: step: 528/464, loss: 0.35955408215522766 2023-01-24 04:11:18.147708: step: 530/464, loss: 0.1059010699391365 2023-01-24 04:11:18.839907: step: 532/464, loss: 0.012662368826568127 2023-01-24 04:11:19.576816: step: 534/464, loss: 0.021875306963920593 2023-01-24 04:11:20.324761: step: 536/464, loss: 0.06533759832382202 2023-01-24 04:11:21.081651: step: 538/464, loss: 0.020436787977814674 2023-01-24 04:11:21.792984: step: 540/464, loss: 0.1005106270313263 2023-01-24 04:11:22.522194: step: 542/464, loss: 0.037786103785037994 2023-01-24 04:11:23.334975: step: 544/464, loss: 0.0772073045372963 2023-01-24 04:11:24.105055: step: 546/464, loss: 0.19233551621437073 2023-01-24 04:11:24.811732: step: 548/464, loss: 0.04664510488510132 2023-01-24 04:11:25.641539: step: 550/464, loss: 0.09136329591274261 2023-01-24 04:11:26.360712: step: 552/464, loss: 0.03763750195503235 2023-01-24 04:11:27.156823: step: 554/464, loss: 0.08128439635038376 2023-01-24 04:11:28.045138: step: 556/464, loss: 0.10550633072853088 2023-01-24 04:11:28.769140: step: 558/464, loss: 0.019246075302362442 2023-01-24 04:11:29.526578: step: 560/464, loss: 0.0624995157122612 2023-01-24 04:11:30.278242: step: 562/464, loss: 1.1483707427978516 2023-01-24 04:11:30.942126: step: 564/464, loss: 0.12313096970319748 2023-01-24 04:11:31.659060: step: 566/464, loss: 0.08051449060440063 2023-01-24 04:11:32.371096: step: 568/464, loss: 0.16188694536685944 2023-01-24 04:11:33.214974: step: 570/464, loss: 0.036759935319423676 2023-01-24 04:11:33.997825: step: 572/464, loss: 0.01996193267405033 2023-01-24 04:11:34.726049: step: 574/464, loss: 0.020887991413474083 2023-01-24 04:11:35.584562: step: 576/464, loss: 0.08122891932725906 2023-01-24 04:11:36.298292: step: 578/464, loss: 0.03574630990624428 2023-01-24 04:11:37.001287: step: 580/464, loss: 0.09584583342075348 2023-01-24 04:11:37.729654: step: 582/464, loss: 0.11320322006940842 2023-01-24 04:11:38.539749: step: 584/464, loss: 0.11420974135398865 2023-01-24 04:11:39.217025: step: 586/464, loss: 0.07629022002220154 2023-01-24 04:11:39.954183: step: 588/464, loss: 0.07765085250139236 2023-01-24 04:11:40.746026: step: 590/464, loss: 0.04749465361237526 2023-01-24 04:11:41.508179: step: 592/464, loss: 0.0955575630068779 2023-01-24 04:11:42.346355: step: 594/464, loss: 0.02726142108440399 2023-01-24 04:11:43.045807: step: 596/464, loss: 0.7329921126365662 2023-01-24 04:11:43.727957: step: 598/464, loss: 0.006696996279060841 2023-01-24 04:11:44.477606: step: 600/464, loss: 0.06922253221273422 2023-01-24 04:11:45.224720: step: 602/464, loss: 0.05629788339138031 2023-01-24 04:11:46.002612: step: 604/464, loss: 0.06038188934326172 2023-01-24 04:11:46.792032: step: 606/464, loss: 0.03784613311290741 2023-01-24 04:11:47.592288: step: 608/464, loss: 0.1631857305765152 2023-01-24 04:11:48.287527: step: 610/464, loss: 0.08317292481660843 2023-01-24 04:11:48.961546: step: 612/464, loss: 0.0531904511153698 2023-01-24 04:11:49.697747: step: 614/464, loss: 0.07334493845701218 2023-01-24 04:11:50.405192: step: 616/464, loss: 0.0606456883251667 2023-01-24 04:11:51.138826: step: 618/464, loss: 0.03547906503081322 2023-01-24 04:11:51.932687: step: 620/464, loss: 0.03521246835589409 2023-01-24 04:11:52.715774: step: 622/464, loss: 0.05137834697961807 2023-01-24 04:11:53.526392: step: 624/464, loss: 0.03159233555197716 2023-01-24 04:11:54.217707: step: 626/464, loss: 0.015035794116556644 2023-01-24 04:11:55.090637: step: 628/464, loss: 0.071849025785923 2023-01-24 04:11:55.845837: step: 630/464, loss: 0.16488297283649445 2023-01-24 04:11:56.618694: step: 632/464, loss: 0.03442539647221565 2023-01-24 04:11:57.339528: step: 634/464, loss: 0.03260069712996483 2023-01-24 04:11:58.058548: step: 636/464, loss: 0.056695397943258286 2023-01-24 04:11:58.819473: step: 638/464, loss: 2.0955114364624023 2023-01-24 04:11:59.526016: step: 640/464, loss: 0.045452218502759933 2023-01-24 04:12:00.266105: step: 642/464, loss: 0.12092519551515579 2023-01-24 04:12:00.984245: step: 644/464, loss: 0.02838761731982231 2023-01-24 04:12:01.691810: step: 646/464, loss: 0.019557658582925797 2023-01-24 04:12:02.393463: step: 648/464, loss: 0.03526949882507324 2023-01-24 04:12:03.063068: step: 650/464, loss: 0.07882774621248245 2023-01-24 04:12:03.792087: step: 652/464, loss: 0.1663265824317932 2023-01-24 04:12:04.542212: step: 654/464, loss: 0.09100101888179779 2023-01-24 04:12:05.253127: step: 656/464, loss: 0.08623427897691727 2023-01-24 04:12:05.942819: step: 658/464, loss: 0.04138614237308502 2023-01-24 04:12:06.616680: step: 660/464, loss: 0.015468517318367958 2023-01-24 04:12:07.407632: step: 662/464, loss: 0.060640037059783936 2023-01-24 04:12:08.122366: step: 664/464, loss: 0.1918199211359024 2023-01-24 04:12:08.840932: step: 666/464, loss: 0.009935087524354458 2023-01-24 04:12:09.608983: step: 668/464, loss: 0.03264904394745827 2023-01-24 04:12:10.407257: step: 670/464, loss: 0.06861291825771332 2023-01-24 04:12:11.157542: step: 672/464, loss: 0.04679633677005768 2023-01-24 04:12:11.947304: step: 674/464, loss: 0.10642059892416 2023-01-24 04:12:12.683362: step: 676/464, loss: 0.08146971464157104 2023-01-24 04:12:13.364045: step: 678/464, loss: 0.026526009663939476 2023-01-24 04:12:14.034147: step: 680/464, loss: 0.13253115117549896 2023-01-24 04:12:14.673528: step: 682/464, loss: 0.15299640595912933 2023-01-24 04:12:15.487324: step: 684/464, loss: 0.16351103782653809 2023-01-24 04:12:16.238118: step: 686/464, loss: 0.11877278983592987 2023-01-24 04:12:16.996599: step: 688/464, loss: 0.08799266070127487 2023-01-24 04:12:17.692512: step: 690/464, loss: 0.01087766420096159 2023-01-24 04:12:18.499508: step: 692/464, loss: 0.17610910534858704 2023-01-24 04:12:19.250242: step: 694/464, loss: 0.0128702437505126 2023-01-24 04:12:19.982346: step: 696/464, loss: 4.251014709472656 2023-01-24 04:12:20.832904: step: 698/464, loss: 0.12636759877204895 2023-01-24 04:12:21.624549: step: 700/464, loss: 0.48409542441368103 2023-01-24 04:12:22.467095: step: 702/464, loss: 0.03458774834871292 2023-01-24 04:12:23.289075: step: 704/464, loss: 0.34720274806022644 2023-01-24 04:12:24.006127: step: 706/464, loss: 0.12886154651641846 2023-01-24 04:12:24.791026: step: 708/464, loss: 0.015398476272821426 2023-01-24 04:12:25.494275: step: 710/464, loss: 0.1452849805355072 2023-01-24 04:12:26.271314: step: 712/464, loss: 0.05062844231724739 2023-01-24 04:12:27.007104: step: 714/464, loss: 0.052139606326818466 2023-01-24 04:12:27.820801: step: 716/464, loss: 0.07707355916500092 2023-01-24 04:12:28.599628: step: 718/464, loss: 0.01662560924887657 2023-01-24 04:12:29.323943: step: 720/464, loss: 0.27249106764793396 2023-01-24 04:12:30.020576: step: 722/464, loss: 0.22377265989780426 2023-01-24 04:12:30.888652: step: 724/464, loss: 0.11260569095611572 2023-01-24 04:12:31.657262: step: 726/464, loss: 0.06264045834541321 2023-01-24 04:12:32.536877: step: 728/464, loss: 0.013300295919179916 2023-01-24 04:12:33.313062: step: 730/464, loss: 0.007667948491871357 2023-01-24 04:12:34.081530: step: 732/464, loss: 0.04861823841929436 2023-01-24 04:12:34.813391: step: 734/464, loss: 0.24045132100582123 2023-01-24 04:12:35.688188: step: 736/464, loss: 0.08063305169343948 2023-01-24 04:12:36.460349: step: 738/464, loss: 0.07206561416387558 2023-01-24 04:12:37.196392: step: 740/464, loss: 1.408238172531128 2023-01-24 04:12:37.836009: step: 742/464, loss: 0.10014645755290985 2023-01-24 04:12:38.570339: step: 744/464, loss: 0.15454338490962982 2023-01-24 04:12:39.326390: step: 746/464, loss: 0.05530770123004913 2023-01-24 04:12:40.065030: step: 748/464, loss: 0.15269611775875092 2023-01-24 04:12:40.827051: step: 750/464, loss: 0.10810256749391556 2023-01-24 04:12:41.657847: step: 752/464, loss: 0.18739116191864014 2023-01-24 04:12:42.477208: step: 754/464, loss: 0.029451007023453712 2023-01-24 04:12:43.214578: step: 756/464, loss: 0.25769859552383423 2023-01-24 04:12:43.969201: step: 758/464, loss: 0.07550712674856186 2023-01-24 04:12:44.793453: step: 760/464, loss: 0.07921797782182693 2023-01-24 04:12:45.578542: step: 762/464, loss: 0.3808189928531647 2023-01-24 04:12:46.283370: step: 764/464, loss: 0.07425136119127274 2023-01-24 04:12:47.110899: step: 766/464, loss: 0.08345261216163635 2023-01-24 04:12:47.876888: step: 768/464, loss: 0.017589038237929344 2023-01-24 04:12:48.613612: step: 770/464, loss: 0.06161960959434509 2023-01-24 04:12:49.369008: step: 772/464, loss: 0.033091556280851364 2023-01-24 04:12:50.066776: step: 774/464, loss: 0.058344125747680664 2023-01-24 04:12:50.789927: step: 776/464, loss: 0.0743429884314537 2023-01-24 04:12:51.530932: step: 778/464, loss: 0.07232428342103958 2023-01-24 04:12:52.264332: step: 780/464, loss: 0.09190531075000763 2023-01-24 04:12:52.989248: step: 782/464, loss: 0.05084923282265663 2023-01-24 04:12:53.678874: step: 784/464, loss: 0.2766363322734833 2023-01-24 04:12:54.366124: step: 786/464, loss: 1.6356395483016968 2023-01-24 04:12:55.081283: step: 788/464, loss: 0.003092309460043907 2023-01-24 04:12:55.840932: step: 790/464, loss: 0.04528704658150673 2023-01-24 04:12:56.591548: step: 792/464, loss: 0.030916161835193634 2023-01-24 04:12:57.372774: step: 794/464, loss: 0.02986675500869751 2023-01-24 04:12:58.150906: step: 796/464, loss: 0.1350688338279724 2023-01-24 04:12:58.807935: step: 798/464, loss: 1.9431557655334473 2023-01-24 04:12:59.541540: step: 800/464, loss: 0.057894084602594376 2023-01-24 04:13:00.281866: step: 802/464, loss: 0.11123964190483093 2023-01-24 04:13:01.014535: step: 804/464, loss: 0.021497314795851707 2023-01-24 04:13:01.820088: step: 806/464, loss: 0.3729839622974396 2023-01-24 04:13:02.520326: step: 808/464, loss: 0.05687344819307327 2023-01-24 04:13:03.167471: step: 810/464, loss: 0.0007358321454375982 2023-01-24 04:13:03.933981: step: 812/464, loss: 0.5136083960533142 2023-01-24 04:13:04.689277: step: 814/464, loss: 0.07494036853313446 2023-01-24 04:13:05.462180: step: 816/464, loss: 0.027758443728089333 2023-01-24 04:13:06.237777: step: 818/464, loss: 0.12550386786460876 2023-01-24 04:13:07.037892: step: 820/464, loss: 0.04769080877304077 2023-01-24 04:13:07.715647: step: 822/464, loss: 0.02603987790644169 2023-01-24 04:13:08.478636: step: 824/464, loss: 0.3562179505825043 2023-01-24 04:13:09.142280: step: 826/464, loss: 0.021887611597776413 2023-01-24 04:13:09.981133: step: 828/464, loss: 0.07408808916807175 2023-01-24 04:13:10.656077: step: 830/464, loss: 1.2311229705810547 2023-01-24 04:13:11.358995: step: 832/464, loss: 0.46964800357818604 2023-01-24 04:13:12.084349: step: 834/464, loss: 0.0071813007816672325 2023-01-24 04:13:12.806670: step: 836/464, loss: 0.03797230124473572 2023-01-24 04:13:13.518178: step: 838/464, loss: 0.11793620139360428 2023-01-24 04:13:14.249605: step: 840/464, loss: 0.04512689262628555 2023-01-24 04:13:14.965720: step: 842/464, loss: 0.1048913225531578 2023-01-24 04:13:15.677259: step: 844/464, loss: 0.05974305421113968 2023-01-24 04:13:16.412410: step: 846/464, loss: 0.17225582897663116 2023-01-24 04:13:17.210651: step: 848/464, loss: 0.11195321381092072 2023-01-24 04:13:17.985512: step: 850/464, loss: 0.07324235886335373 2023-01-24 04:13:18.734890: step: 852/464, loss: 0.05328064784407616 2023-01-24 04:13:19.542560: step: 854/464, loss: 0.06797124445438385 2023-01-24 04:13:20.312723: step: 856/464, loss: 0.1463581919670105 2023-01-24 04:13:21.115932: step: 858/464, loss: 0.1293702870607376 2023-01-24 04:13:21.844142: step: 860/464, loss: 0.05080826207995415 2023-01-24 04:13:22.600984: step: 862/464, loss: 0.04502733424305916 2023-01-24 04:13:23.385531: step: 864/464, loss: 0.8059793710708618 2023-01-24 04:13:24.125053: step: 866/464, loss: 0.2736210227012634 2023-01-24 04:13:24.855220: step: 868/464, loss: 0.04146193712949753 2023-01-24 04:13:25.598982: step: 870/464, loss: 0.035493213683366776 2023-01-24 04:13:26.406059: step: 872/464, loss: 0.13061979413032532 2023-01-24 04:13:27.072503: step: 874/464, loss: 0.04180055856704712 2023-01-24 04:13:27.806849: step: 876/464, loss: 0.9490019083023071 2023-01-24 04:13:28.656749: step: 878/464, loss: 0.14072301983833313 2023-01-24 04:13:29.519121: step: 880/464, loss: 0.051200225949287415 2023-01-24 04:13:30.326214: step: 882/464, loss: 0.06961522996425629 2023-01-24 04:13:31.072710: step: 884/464, loss: 0.11146531999111176 2023-01-24 04:13:31.869892: step: 886/464, loss: 0.17565035820007324 2023-01-24 04:13:32.646749: step: 888/464, loss: 0.11682313680648804 2023-01-24 04:13:33.490240: step: 890/464, loss: 0.07494233548641205 2023-01-24 04:13:34.261035: step: 892/464, loss: 0.06947507709264755 2023-01-24 04:13:34.977315: step: 894/464, loss: 0.041834089905023575 2023-01-24 04:13:35.676247: step: 896/464, loss: 0.07336413115262985 2023-01-24 04:13:36.436582: step: 898/464, loss: 0.010007267817854881 2023-01-24 04:13:37.154763: step: 900/464, loss: 0.07785910367965698 2023-01-24 04:13:37.908913: step: 902/464, loss: 0.14214085042476654 2023-01-24 04:13:38.663416: step: 904/464, loss: 0.7422015070915222 2023-01-24 04:13:39.330775: step: 906/464, loss: 0.08649776130914688 2023-01-24 04:13:40.097581: step: 908/464, loss: 0.017394889146089554 2023-01-24 04:13:40.804566: step: 910/464, loss: 0.19215063750743866 2023-01-24 04:13:41.540590: step: 912/464, loss: 0.14460989832878113 2023-01-24 04:13:42.273543: step: 914/464, loss: 0.037630844861269 2023-01-24 04:13:43.151177: step: 916/464, loss: 0.09516682475805283 2023-01-24 04:13:43.973630: step: 918/464, loss: 0.03875623270869255 2023-01-24 04:13:44.665048: step: 920/464, loss: 0.07534055411815643 2023-01-24 04:13:45.334107: step: 922/464, loss: 0.010584483854472637 2023-01-24 04:13:46.068171: step: 924/464, loss: 0.12951800227165222 2023-01-24 04:13:46.730971: step: 926/464, loss: 0.09220469743013382 2023-01-24 04:13:47.485050: step: 928/464, loss: 0.06358341872692108 2023-01-24 04:13:48.071096: step: 930/464, loss: 0.0035241262521594763 ================================================== Loss: 0.135 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34615152949919686, 'r': 0.3317011810191545, 'f1': 0.3387723302269272}, 'combined': 0.24962171700931476, 'epoch': 18} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3123390677752571, 'r': 0.27129055392732315, 'f1': 0.29037127506552196}, 'combined': 0.18033584451437681, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.321106382844723, 'r': 0.3131853525278702, 'f1': 0.3170964088034344}, 'combined': 0.23364998543410956, 'epoch': 18} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.2997063280530514, 'r': 0.2632795707896865, 'f1': 0.28031449304488454}, 'combined': 0.1740900535752441, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33704188581519046, 'r': 0.32553001874749893, 'f1': 0.3311859457141543}, 'combined': 0.24403174947358736, 'epoch': 18} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32080712397544486, 'r': 0.2798732772960854, 'f1': 0.29894546576475683}, 'combined': 0.18566086821179636, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3671875, 'r': 0.2517857142857143, 'f1': 0.298728813559322}, 'combined': 0.19915254237288132, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3046875, 'r': 0.42391304347826086, 'f1': 0.3545454545454545}, 'combined': 0.17727272727272725, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.49038461538461536, 'r': 0.21982758620689655, 'f1': 0.30357142857142855}, 'combined': 0.20238095238095236, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:16:28.165921: step: 2/464, loss: 0.12920217216014862 2023-01-24 04:16:28.940026: step: 4/464, loss: 0.04563036561012268 2023-01-24 04:16:29.736298: step: 6/464, loss: 0.09706927090883255 2023-01-24 04:16:30.431328: step: 8/464, loss: 0.03109694831073284 2023-01-24 04:16:31.178408: step: 10/464, loss: 0.036775220185518265 2023-01-24 04:16:31.925472: step: 12/464, loss: 0.18778790533542633 2023-01-24 04:16:32.676983: step: 14/464, loss: 0.0676003098487854 2023-01-24 04:16:33.378557: step: 16/464, loss: 0.8434752821922302 2023-01-24 04:16:34.073037: step: 18/464, loss: 0.11235176026821136 2023-01-24 04:16:34.887643: step: 20/464, loss: 0.06825807690620422 2023-01-24 04:16:35.654209: step: 22/464, loss: 0.006161467172205448 2023-01-24 04:16:36.397650: step: 24/464, loss: 0.04186315834522247 2023-01-24 04:16:37.089829: step: 26/464, loss: 0.03112836368381977 2023-01-24 04:16:37.818542: step: 28/464, loss: 0.03736530616879463 2023-01-24 04:16:38.559749: step: 30/464, loss: 0.07068456709384918 2023-01-24 04:16:39.301898: step: 32/464, loss: 0.06576775014400482 2023-01-24 04:16:40.014123: step: 34/464, loss: 0.06414441019296646 2023-01-24 04:16:40.779899: step: 36/464, loss: 0.08074159920215607 2023-01-24 04:16:41.600003: step: 38/464, loss: 0.03453665226697922 2023-01-24 04:16:42.377544: step: 40/464, loss: 0.09841134399175644 2023-01-24 04:16:43.271008: step: 42/464, loss: 0.10741052031517029 2023-01-24 04:16:43.940212: step: 44/464, loss: 0.05406144633889198 2023-01-24 04:16:44.794516: step: 46/464, loss: 0.05802828073501587 2023-01-24 04:16:45.530916: step: 48/464, loss: 0.015924572944641113 2023-01-24 04:16:46.300162: step: 50/464, loss: 0.036363400518894196 2023-01-24 04:16:46.975662: step: 52/464, loss: 0.024387864395976067 2023-01-24 04:16:47.785569: step: 54/464, loss: 0.030658980831503868 2023-01-24 04:16:48.447453: step: 56/464, loss: 0.020193232223391533 2023-01-24 04:16:49.182110: step: 58/464, loss: 0.34004831314086914 2023-01-24 04:16:49.959334: step: 60/464, loss: 0.05436702072620392 2023-01-24 04:16:50.742846: step: 62/464, loss: 0.9230442047119141 2023-01-24 04:16:51.417797: step: 64/464, loss: 0.021562401205301285 2023-01-24 04:16:52.160093: step: 66/464, loss: 0.030617645010352135 2023-01-24 04:16:52.981191: step: 68/464, loss: 0.2545686960220337 2023-01-24 04:16:53.664552: step: 70/464, loss: 0.05021323636174202 2023-01-24 04:16:54.390958: step: 72/464, loss: 0.03269173204898834 2023-01-24 04:16:55.097380: step: 74/464, loss: 0.07474524527788162 2023-01-24 04:16:55.812796: step: 76/464, loss: 0.05859186127781868 2023-01-24 04:16:56.543398: step: 78/464, loss: 0.022472452372312546 2023-01-24 04:16:57.283290: step: 80/464, loss: 0.03345894068479538 2023-01-24 04:16:57.996275: step: 82/464, loss: 0.002976879244670272 2023-01-24 04:16:58.771809: step: 84/464, loss: 0.5960641503334045 2023-01-24 04:16:59.503947: step: 86/464, loss: 0.0958690419793129 2023-01-24 04:17:00.283501: step: 88/464, loss: 0.026807444170117378 2023-01-24 04:17:01.011344: step: 90/464, loss: 0.11869814991950989 2023-01-24 04:17:01.698303: step: 92/464, loss: 0.031810395419597626 2023-01-24 04:17:02.341529: step: 94/464, loss: 0.05304112657904625 2023-01-24 04:17:03.084134: step: 96/464, loss: 0.19493931531906128 2023-01-24 04:17:03.835856: step: 98/464, loss: 0.03594636544585228 2023-01-24 04:17:04.454043: step: 100/464, loss: 0.0064991544932127 2023-01-24 04:17:05.154590: step: 102/464, loss: 0.1808507740497589 2023-01-24 04:17:05.931654: step: 104/464, loss: 0.006921856198459864 2023-01-24 04:17:06.708517: step: 106/464, loss: 0.21227648854255676 2023-01-24 04:17:07.372898: step: 108/464, loss: 0.027723848819732666 2023-01-24 04:17:08.087215: step: 110/464, loss: 0.031780317425727844 2023-01-24 04:17:08.829902: step: 112/464, loss: 0.14285974204540253 2023-01-24 04:17:09.452845: step: 114/464, loss: 0.11264093220233917 2023-01-24 04:17:10.294454: step: 116/464, loss: 0.05362096428871155 2023-01-24 04:17:10.957357: step: 118/464, loss: 0.03317618370056152 2023-01-24 04:17:11.662964: step: 120/464, loss: 0.767920196056366 2023-01-24 04:17:12.438381: step: 122/464, loss: 0.030632128939032555 2023-01-24 04:17:13.122576: step: 124/464, loss: 0.19124922156333923 2023-01-24 04:17:13.873317: step: 126/464, loss: 0.013469697907567024 2023-01-24 04:17:14.586581: step: 128/464, loss: 0.016623545438051224 2023-01-24 04:17:15.316885: step: 130/464, loss: 0.058871425688266754 2023-01-24 04:17:16.077382: step: 132/464, loss: 0.029303649440407753 2023-01-24 04:17:16.797131: step: 134/464, loss: 0.057812321931123734 2023-01-24 04:17:17.554285: step: 136/464, loss: 0.07526414096355438 2023-01-24 04:17:18.262850: step: 138/464, loss: 0.05507267266511917 2023-01-24 04:17:18.912913: step: 140/464, loss: 0.06511414051055908 2023-01-24 04:17:19.700004: step: 142/464, loss: 0.0870545282959938 2023-01-24 04:17:20.469926: step: 144/464, loss: 0.03300241008400917 2023-01-24 04:17:21.141757: step: 146/464, loss: 0.10764817148447037 2023-01-24 04:17:21.881066: step: 148/464, loss: 0.12006808072328568 2023-01-24 04:17:22.620851: step: 150/464, loss: 0.007246334571391344 2023-01-24 04:17:23.495372: step: 152/464, loss: 0.043186988681554794 2023-01-24 04:17:24.266124: step: 154/464, loss: 0.09337171167135239 2023-01-24 04:17:24.969013: step: 156/464, loss: 1.414793610572815 2023-01-24 04:17:25.671694: step: 158/464, loss: 0.051513053476810455 2023-01-24 04:17:26.453495: step: 160/464, loss: 0.015428408049046993 2023-01-24 04:17:27.389314: step: 162/464, loss: 0.08127868920564651 2023-01-24 04:17:28.096895: step: 164/464, loss: 0.007645327132195234 2023-01-24 04:17:28.918964: step: 166/464, loss: 0.16297253966331482 2023-01-24 04:17:29.629396: step: 168/464, loss: 0.0968436524271965 2023-01-24 04:17:30.326796: step: 170/464, loss: 0.18944884836673737 2023-01-24 04:17:31.122548: step: 172/464, loss: 0.032020069658756256 2023-01-24 04:17:31.850244: step: 174/464, loss: 0.053174279630184174 2023-01-24 04:17:32.628885: step: 176/464, loss: 0.11757547408342361 2023-01-24 04:17:33.347989: step: 178/464, loss: 0.028553200885653496 2023-01-24 04:17:34.032947: step: 180/464, loss: 0.09267770498991013 2023-01-24 04:17:34.664638: step: 182/464, loss: 0.02398275025188923 2023-01-24 04:17:35.379749: step: 184/464, loss: 0.07906018197536469 2023-01-24 04:17:36.109037: step: 186/464, loss: 0.02486865594983101 2023-01-24 04:17:36.842077: step: 188/464, loss: 0.06919976323843002 2023-01-24 04:17:37.645892: step: 190/464, loss: 0.048569340258836746 2023-01-24 04:17:38.408493: step: 192/464, loss: 0.08920535445213318 2023-01-24 04:17:39.149045: step: 194/464, loss: 0.03628360852599144 2023-01-24 04:17:39.971328: step: 196/464, loss: 0.6250218749046326 2023-01-24 04:17:40.705132: step: 198/464, loss: 0.17623533308506012 2023-01-24 04:17:41.389633: step: 200/464, loss: 0.08829116821289062 2023-01-24 04:17:42.189273: step: 202/464, loss: 0.1636122614145279 2023-01-24 04:17:42.925360: step: 204/464, loss: 0.027989499270915985 2023-01-24 04:17:43.621606: step: 206/464, loss: 0.0427122488617897 2023-01-24 04:17:44.356072: step: 208/464, loss: 0.034912075847387314 2023-01-24 04:17:45.110479: step: 210/464, loss: 0.14765290915966034 2023-01-24 04:17:45.899845: step: 212/464, loss: 0.0036442021373659372 2023-01-24 04:17:46.683158: step: 214/464, loss: 0.09502021223306656 2023-01-24 04:17:47.425075: step: 216/464, loss: 0.06033490598201752 2023-01-24 04:17:48.091551: step: 218/464, loss: 0.01887083612382412 2023-01-24 04:17:48.867667: step: 220/464, loss: 0.08002995699644089 2023-01-24 04:17:49.727076: step: 222/464, loss: 0.09107064455747604 2023-01-24 04:17:50.493490: step: 224/464, loss: 0.05492367967963219 2023-01-24 04:17:51.262988: step: 226/464, loss: 0.03423614054918289 2023-01-24 04:17:52.087144: step: 228/464, loss: 0.3338893949985504 2023-01-24 04:17:52.783388: step: 230/464, loss: 0.09567377716302872 2023-01-24 04:17:53.485240: step: 232/464, loss: 0.05865306407213211 2023-01-24 04:17:54.207101: step: 234/464, loss: 0.07105015218257904 2023-01-24 04:17:55.011928: step: 236/464, loss: 0.05263880640268326 2023-01-24 04:17:55.766894: step: 238/464, loss: 0.0064240251667797565 2023-01-24 04:17:56.472991: step: 240/464, loss: 0.01830356940627098 2023-01-24 04:17:57.210328: step: 242/464, loss: 0.010274741798639297 2023-01-24 04:17:57.992213: step: 244/464, loss: 0.07704408466815948 2023-01-24 04:17:58.703362: step: 246/464, loss: 0.08474908024072647 2023-01-24 04:17:59.460821: step: 248/464, loss: 0.018210938200354576 2023-01-24 04:18:00.147598: step: 250/464, loss: 0.03733988106250763 2023-01-24 04:18:00.873269: step: 252/464, loss: 0.0747935101389885 2023-01-24 04:18:01.538682: step: 254/464, loss: 0.031007833778858185 2023-01-24 04:18:02.185490: step: 256/464, loss: 0.005993698723614216 2023-01-24 04:18:02.932398: step: 258/464, loss: 0.03846803680062294 2023-01-24 04:18:03.622866: step: 260/464, loss: 0.014938733540475368 2023-01-24 04:18:04.429772: step: 262/464, loss: 0.030163099989295006 2023-01-24 04:18:05.128738: step: 264/464, loss: 0.02831958793103695 2023-01-24 04:18:05.895141: step: 266/464, loss: 0.08256911486387253 2023-01-24 04:18:06.623240: step: 268/464, loss: 0.0718783363699913 2023-01-24 04:18:07.355853: step: 270/464, loss: 0.31167250871658325 2023-01-24 04:18:08.131371: step: 272/464, loss: 0.05279042571783066 2023-01-24 04:18:08.905358: step: 274/464, loss: 0.02307545766234398 2023-01-24 04:18:09.637048: step: 276/464, loss: 0.011180113069713116 2023-01-24 04:18:10.451184: step: 278/464, loss: 0.02118949219584465 2023-01-24 04:18:11.262774: step: 280/464, loss: 0.11446698009967804 2023-01-24 04:18:12.031207: step: 282/464, loss: 0.09866126626729965 2023-01-24 04:18:12.780038: step: 284/464, loss: 0.472391277551651 2023-01-24 04:18:13.466775: step: 286/464, loss: 0.11861933022737503 2023-01-24 04:18:14.296365: step: 288/464, loss: 0.024260375648736954 2023-01-24 04:18:15.042009: step: 290/464, loss: 0.16927288472652435 2023-01-24 04:18:15.732294: step: 292/464, loss: 0.10281242430210114 2023-01-24 04:18:16.472403: step: 294/464, loss: 0.026213889941573143 2023-01-24 04:18:17.301647: step: 296/464, loss: 0.6735707521438599 2023-01-24 04:18:17.972526: step: 298/464, loss: 0.05733761563897133 2023-01-24 04:18:18.679314: step: 300/464, loss: 0.02001768723130226 2023-01-24 04:18:19.400510: step: 302/464, loss: 0.1101067066192627 2023-01-24 04:18:20.052101: step: 304/464, loss: 0.06691374629735947 2023-01-24 04:18:20.770421: step: 306/464, loss: 0.06036174297332764 2023-01-24 04:18:21.488927: step: 308/464, loss: 0.085693359375 2023-01-24 04:18:22.229783: step: 310/464, loss: 0.017201263457536697 2023-01-24 04:18:22.929717: step: 312/464, loss: 0.05184159800410271 2023-01-24 04:18:23.688242: step: 314/464, loss: 0.0776776447892189 2023-01-24 04:18:24.384074: step: 316/464, loss: 0.07553061097860336 2023-01-24 04:18:25.135452: step: 318/464, loss: 0.3421646058559418 2023-01-24 04:18:25.878966: step: 320/464, loss: 0.9886565804481506 2023-01-24 04:18:26.624693: step: 322/464, loss: 0.13509303331375122 2023-01-24 04:18:27.476081: step: 324/464, loss: 0.02174947038292885 2023-01-24 04:18:28.280919: step: 326/464, loss: 0.055290013551712036 2023-01-24 04:18:28.994661: step: 328/464, loss: 0.0537884496152401 2023-01-24 04:18:29.692544: step: 330/464, loss: 0.04658864066004753 2023-01-24 04:18:30.483648: step: 332/464, loss: 0.26379671692848206 2023-01-24 04:18:31.272129: step: 334/464, loss: 0.2708098590373993 2023-01-24 04:18:31.992789: step: 336/464, loss: 0.02883169986307621 2023-01-24 04:18:32.674064: step: 338/464, loss: 0.045695751905441284 2023-01-24 04:18:33.368010: step: 340/464, loss: 0.019763052463531494 2023-01-24 04:18:34.081808: step: 342/464, loss: 0.07616054266691208 2023-01-24 04:18:34.805684: step: 344/464, loss: 0.06221643462777138 2023-01-24 04:18:35.548964: step: 346/464, loss: 0.07993372529745102 2023-01-24 04:18:36.269908: step: 348/464, loss: 0.02037712186574936 2023-01-24 04:18:37.130485: step: 350/464, loss: 0.17848798632621765 2023-01-24 04:18:37.821409: step: 352/464, loss: 0.06046653166413307 2023-01-24 04:18:38.540128: step: 354/464, loss: 0.05236731842160225 2023-01-24 04:18:39.288828: step: 356/464, loss: 0.019864128902554512 2023-01-24 04:18:40.089580: step: 358/464, loss: 0.025152064859867096 2023-01-24 04:18:40.836365: step: 360/464, loss: 0.18172380328178406 2023-01-24 04:18:41.563857: step: 362/464, loss: 0.06978324800729752 2023-01-24 04:18:42.338410: step: 364/464, loss: 0.09853015094995499 2023-01-24 04:18:43.056152: step: 366/464, loss: 0.051949791610240936 2023-01-24 04:18:43.746179: step: 368/464, loss: 0.0738070160150528 2023-01-24 04:18:44.438157: step: 370/464, loss: 0.017090322449803352 2023-01-24 04:18:45.121430: step: 372/464, loss: 0.04485044628381729 2023-01-24 04:18:45.933923: step: 374/464, loss: 0.09104947000741959 2023-01-24 04:18:46.674398: step: 376/464, loss: 0.021099206060171127 2023-01-24 04:18:47.390383: step: 378/464, loss: 0.40562692284584045 2023-01-24 04:18:48.149434: step: 380/464, loss: 0.10820455104112625 2023-01-24 04:18:48.893753: step: 382/464, loss: 0.06618161499500275 2023-01-24 04:18:49.679841: step: 384/464, loss: 0.1598353087902069 2023-01-24 04:18:50.473945: step: 386/464, loss: 0.049808088690042496 2023-01-24 04:18:51.177931: step: 388/464, loss: 0.07933302223682404 2023-01-24 04:18:51.879200: step: 390/464, loss: 0.02542533352971077 2023-01-24 04:18:52.632142: step: 392/464, loss: 0.04747779667377472 2023-01-24 04:18:53.317697: step: 394/464, loss: 0.02016112394630909 2023-01-24 04:18:54.019641: step: 396/464, loss: 0.025817180052399635 2023-01-24 04:18:54.742846: step: 398/464, loss: 0.06619580835103989 2023-01-24 04:18:55.579921: step: 400/464, loss: 0.20073483884334564 2023-01-24 04:18:56.323157: step: 402/464, loss: 0.17219051718711853 2023-01-24 04:18:57.085262: step: 404/464, loss: 0.021845456212759018 2023-01-24 04:18:57.817197: step: 406/464, loss: 0.06755460798740387 2023-01-24 04:18:58.632749: step: 408/464, loss: 0.037240903824567795 2023-01-24 04:18:59.335101: step: 410/464, loss: 0.07010459899902344 2023-01-24 04:18:59.959083: step: 412/464, loss: 0.11949370056390762 2023-01-24 04:19:00.775755: step: 414/464, loss: 0.10700497776269913 2023-01-24 04:19:01.568229: step: 416/464, loss: 0.1285242736339569 2023-01-24 04:19:02.372537: step: 418/464, loss: 0.09551849216222763 2023-01-24 04:19:03.061303: step: 420/464, loss: 0.05062438175082207 2023-01-24 04:19:03.752984: step: 422/464, loss: 0.1664879471063614 2023-01-24 04:19:04.506160: step: 424/464, loss: 0.037926722317934036 2023-01-24 04:19:05.441009: step: 426/464, loss: 0.2571927011013031 2023-01-24 04:19:06.137941: step: 428/464, loss: 0.036315422505140305 2023-01-24 04:19:06.843315: step: 430/464, loss: 0.024037066847085953 2023-01-24 04:19:07.571459: step: 432/464, loss: 13.031647682189941 2023-01-24 04:19:08.346264: step: 434/464, loss: 0.03742430731654167 2023-01-24 04:19:09.016908: step: 436/464, loss: 0.10749435424804688 2023-01-24 04:19:09.782637: step: 438/464, loss: 0.06732063740491867 2023-01-24 04:19:10.521939: step: 440/464, loss: 0.15107282996177673 2023-01-24 04:19:11.271128: step: 442/464, loss: 0.11087916791439056 2023-01-24 04:19:11.936868: step: 444/464, loss: 0.19436655938625336 2023-01-24 04:19:12.687588: step: 446/464, loss: 0.15645796060562134 2023-01-24 04:19:13.490220: step: 448/464, loss: 0.08497386425733566 2023-01-24 04:19:14.199381: step: 450/464, loss: 0.07939328998327255 2023-01-24 04:19:14.908524: step: 452/464, loss: 0.026771867647767067 2023-01-24 04:19:15.639332: step: 454/464, loss: 0.09241552650928497 2023-01-24 04:19:16.460293: step: 456/464, loss: 0.03661811724305153 2023-01-24 04:19:17.192243: step: 458/464, loss: 0.016207559034228325 2023-01-24 04:19:17.952332: step: 460/464, loss: 0.01745665818452835 2023-01-24 04:19:18.715617: step: 462/464, loss: 0.04709574952721596 2023-01-24 04:19:19.432921: step: 464/464, loss: 0.12045050412416458 2023-01-24 04:19:20.236608: step: 466/464, loss: 0.048567306250333786 2023-01-24 04:19:20.916804: step: 468/464, loss: 0.20824171602725983 2023-01-24 04:19:21.644699: step: 470/464, loss: 0.1283740997314453 2023-01-24 04:19:22.382500: step: 472/464, loss: 0.052708856761455536 2023-01-24 04:19:23.104375: step: 474/464, loss: 0.021493423730134964 2023-01-24 04:19:23.855240: step: 476/464, loss: 0.03765876218676567 2023-01-24 04:19:24.608895: step: 478/464, loss: 0.06767209619283676 2023-01-24 04:19:25.371733: step: 480/464, loss: 0.0993877649307251 2023-01-24 04:19:26.105815: step: 482/464, loss: 0.011112421751022339 2023-01-24 04:19:26.865366: step: 484/464, loss: 0.10992801189422607 2023-01-24 04:19:27.654303: step: 486/464, loss: 0.044933851808309555 2023-01-24 04:19:28.414183: step: 488/464, loss: 0.042952749878168106 2023-01-24 04:19:29.133159: step: 490/464, loss: 0.07052365690469742 2023-01-24 04:19:29.900321: step: 492/464, loss: 0.012943526729941368 2023-01-24 04:19:30.661161: step: 494/464, loss: 0.09424760937690735 2023-01-24 04:19:31.413300: step: 496/464, loss: 0.014014906249940395 2023-01-24 04:19:32.157006: step: 498/464, loss: 0.018368808552622795 2023-01-24 04:19:32.887053: step: 500/464, loss: 0.06157062575221062 2023-01-24 04:19:33.693820: step: 502/464, loss: 0.10935144871473312 2023-01-24 04:19:34.411756: step: 504/464, loss: 0.04085175320506096 2023-01-24 04:19:35.109872: step: 506/464, loss: 0.04039693623781204 2023-01-24 04:19:35.889821: step: 508/464, loss: 0.0954434722661972 2023-01-24 04:19:36.675970: step: 510/464, loss: 0.054345130920410156 2023-01-24 04:19:37.429774: step: 512/464, loss: 0.06101298704743385 2023-01-24 04:19:38.135516: step: 514/464, loss: 0.09147502481937408 2023-01-24 04:19:38.862507: step: 516/464, loss: 0.36184293031692505 2023-01-24 04:19:39.637564: step: 518/464, loss: 0.019469410181045532 2023-01-24 04:19:40.433299: step: 520/464, loss: 0.20030872523784637 2023-01-24 04:19:41.222241: step: 522/464, loss: 0.8680578470230103 2023-01-24 04:19:41.936115: step: 524/464, loss: 0.6503241062164307 2023-01-24 04:19:42.650993: step: 526/464, loss: 0.1460971087217331 2023-01-24 04:19:43.389292: step: 528/464, loss: 0.009627019986510277 2023-01-24 04:19:44.145119: step: 530/464, loss: 0.06993068754673004 2023-01-24 04:19:44.898309: step: 532/464, loss: 0.14353105425834656 2023-01-24 04:19:45.628808: step: 534/464, loss: 0.058041051030159 2023-01-24 04:19:46.416849: step: 536/464, loss: 0.2680377960205078 2023-01-24 04:19:47.168169: step: 538/464, loss: 0.036180466413497925 2023-01-24 04:19:47.935673: step: 540/464, loss: 0.07975853234529495 2023-01-24 04:19:48.696830: step: 542/464, loss: 0.06441237032413483 2023-01-24 04:19:49.431860: step: 544/464, loss: 0.02542603202164173 2023-01-24 04:19:50.093953: step: 546/464, loss: 0.19588123261928558 2023-01-24 04:19:50.880977: step: 548/464, loss: 0.05832467973232269 2023-01-24 04:19:51.678235: step: 550/464, loss: 0.057443540543317795 2023-01-24 04:19:52.498763: step: 552/464, loss: 0.35340365767478943 2023-01-24 04:19:53.187395: step: 554/464, loss: 0.1496967375278473 2023-01-24 04:19:53.913837: step: 556/464, loss: 0.08306215703487396 2023-01-24 04:19:54.644741: step: 558/464, loss: 0.05035701021552086 2023-01-24 04:19:55.387158: step: 560/464, loss: 0.14342284202575684 2023-01-24 04:19:56.068583: step: 562/464, loss: 0.18788276612758636 2023-01-24 04:19:56.830004: step: 564/464, loss: 0.07673000544309616 2023-01-24 04:19:57.558825: step: 566/464, loss: 0.015498715452849865 2023-01-24 04:19:58.229768: step: 568/464, loss: 0.04238492622971535 2023-01-24 04:19:58.998837: step: 570/464, loss: 0.046905532479286194 2023-01-24 04:19:59.727796: step: 572/464, loss: 0.03854568675160408 2023-01-24 04:20:00.544415: step: 574/464, loss: 0.20472976565361023 2023-01-24 04:20:01.295491: step: 576/464, loss: 0.05854792892932892 2023-01-24 04:20:02.052536: step: 578/464, loss: 0.05327191203832626 2023-01-24 04:20:02.855231: step: 580/464, loss: 0.049676697701215744 2023-01-24 04:20:03.703468: step: 582/464, loss: 0.047542087733745575 2023-01-24 04:20:04.469087: step: 584/464, loss: 0.07915417104959488 2023-01-24 04:20:05.198764: step: 586/464, loss: 0.05390486866235733 2023-01-24 04:20:05.949347: step: 588/464, loss: 0.06481099873781204 2023-01-24 04:20:06.661530: step: 590/464, loss: 0.10352101922035217 2023-01-24 04:20:07.400800: step: 592/464, loss: 0.10283131152391434 2023-01-24 04:20:08.097748: step: 594/464, loss: 0.06312693655490875 2023-01-24 04:20:08.825732: step: 596/464, loss: 0.032400377094745636 2023-01-24 04:20:09.557321: step: 598/464, loss: 0.04646240547299385 2023-01-24 04:20:10.289074: step: 600/464, loss: 0.08965610712766647 2023-01-24 04:20:11.013689: step: 602/464, loss: 0.12899154424667358 2023-01-24 04:20:11.849041: step: 604/464, loss: 0.05910657346248627 2023-01-24 04:20:12.648702: step: 606/464, loss: 0.029358960688114166 2023-01-24 04:20:13.365120: step: 608/464, loss: 0.053024690598249435 2023-01-24 04:20:14.139383: step: 610/464, loss: 0.1587500274181366 2023-01-24 04:20:14.881450: step: 612/464, loss: 0.03578795865178108 2023-01-24 04:20:15.563176: step: 614/464, loss: 0.0607665553689003 2023-01-24 04:20:16.242455: step: 616/464, loss: 0.023167381063103676 2023-01-24 04:20:16.980462: step: 618/464, loss: 0.19250163435935974 2023-01-24 04:20:17.675489: step: 620/464, loss: 0.03690149635076523 2023-01-24 04:20:18.443324: step: 622/464, loss: 0.07546142488718033 2023-01-24 04:20:19.188296: step: 624/464, loss: 0.04564463347196579 2023-01-24 04:20:19.959529: step: 626/464, loss: 0.045563727617263794 2023-01-24 04:20:20.716056: step: 628/464, loss: 0.12343955785036087 2023-01-24 04:20:21.524352: step: 630/464, loss: 0.057827290147542953 2023-01-24 04:20:22.303400: step: 632/464, loss: 0.02587360516190529 2023-01-24 04:20:23.064182: step: 634/464, loss: 0.2215512990951538 2023-01-24 04:20:23.864075: step: 636/464, loss: 0.020450718700885773 2023-01-24 04:20:24.551532: step: 638/464, loss: 0.08267145603895187 2023-01-24 04:20:25.316921: step: 640/464, loss: 0.14544984698295593 2023-01-24 04:20:26.153198: step: 642/464, loss: 0.02726486511528492 2023-01-24 04:20:26.913984: step: 644/464, loss: 0.058721382170915604 2023-01-24 04:20:27.704632: step: 646/464, loss: 0.11252403259277344 2023-01-24 04:20:28.474163: step: 648/464, loss: 0.05572717636823654 2023-01-24 04:20:29.241302: step: 650/464, loss: 0.018893899396061897 2023-01-24 04:20:30.008887: step: 652/464, loss: 0.09553354978561401 2023-01-24 04:20:30.705774: step: 654/464, loss: 0.06263506412506104 2023-01-24 04:20:31.443816: step: 656/464, loss: 0.010931789875030518 2023-01-24 04:20:32.135933: step: 658/464, loss: 0.004481497686356306 2023-01-24 04:20:32.873502: step: 660/464, loss: 0.0381837897002697 2023-01-24 04:20:33.639750: step: 662/464, loss: 0.25678524374961853 2023-01-24 04:20:34.374391: step: 664/464, loss: 0.031600549817085266 2023-01-24 04:20:35.100418: step: 666/464, loss: 0.17737647891044617 2023-01-24 04:20:35.792592: step: 668/464, loss: 0.08674076944589615 2023-01-24 04:20:36.504266: step: 670/464, loss: 0.025893433019518852 2023-01-24 04:20:37.296649: step: 672/464, loss: 0.04026762768626213 2023-01-24 04:20:38.059583: step: 674/464, loss: 0.048054732382297516 2023-01-24 04:20:38.775523: step: 676/464, loss: 0.04531051591038704 2023-01-24 04:20:39.592767: step: 678/464, loss: 0.15182489156723022 2023-01-24 04:20:40.379908: step: 680/464, loss: 0.05073189735412598 2023-01-24 04:20:41.196891: step: 682/464, loss: 0.10138265043497086 2023-01-24 04:20:41.930272: step: 684/464, loss: 0.01679018884897232 2023-01-24 04:20:42.703858: step: 686/464, loss: 0.2979933023452759 2023-01-24 04:20:43.432870: step: 688/464, loss: 0.05901891365647316 2023-01-24 04:20:44.122645: step: 690/464, loss: 0.03939371928572655 2023-01-24 04:20:44.919376: step: 692/464, loss: 0.05838843062520027 2023-01-24 04:20:45.819297: step: 694/464, loss: 0.038581814616918564 2023-01-24 04:20:46.640859: step: 696/464, loss: 1.0980371236801147 2023-01-24 04:20:47.368439: step: 698/464, loss: 0.0928528904914856 2023-01-24 04:20:48.062895: step: 700/464, loss: 0.18491260707378387 2023-01-24 04:20:48.760232: step: 702/464, loss: 0.03720412403345108 2023-01-24 04:20:49.491919: step: 704/464, loss: 0.025067999958992004 2023-01-24 04:20:50.192289: step: 706/464, loss: 0.07839228212833405 2023-01-24 04:20:50.932929: step: 708/464, loss: 0.07519470900297165 2023-01-24 04:20:51.697908: step: 710/464, loss: 0.11080680787563324 2023-01-24 04:20:52.495073: step: 712/464, loss: 0.0394955612719059 2023-01-24 04:20:53.229355: step: 714/464, loss: 0.020085155963897705 2023-01-24 04:20:53.960886: step: 716/464, loss: 0.13045024871826172 2023-01-24 04:20:54.806183: step: 718/464, loss: 0.027165353298187256 2023-01-24 04:20:55.558922: step: 720/464, loss: 0.08542001247406006 2023-01-24 04:20:56.218892: step: 722/464, loss: 0.2847355604171753 2023-01-24 04:20:57.000972: step: 724/464, loss: 0.03878088667988777 2023-01-24 04:20:57.765980: step: 726/464, loss: 0.49133923649787903 2023-01-24 04:20:58.462011: step: 728/464, loss: 0.046279340982437134 2023-01-24 04:20:59.186593: step: 730/464, loss: 0.030544577166438103 2023-01-24 04:20:59.998848: step: 732/464, loss: 0.03781994804739952 2023-01-24 04:21:00.745024: step: 734/464, loss: 0.057049937546253204 2023-01-24 04:21:01.511021: step: 736/464, loss: 0.03740621358156204 2023-01-24 04:21:02.316949: step: 738/464, loss: 0.10029034316539764 2023-01-24 04:21:02.958544: step: 740/464, loss: 0.012559827417135239 2023-01-24 04:21:03.755554: step: 742/464, loss: 0.016750726848840714 2023-01-24 04:21:04.448126: step: 744/464, loss: 0.2108759582042694 2023-01-24 04:21:05.180574: step: 746/464, loss: 0.035853851586580276 2023-01-24 04:21:05.858693: step: 748/464, loss: 0.43498602509498596 2023-01-24 04:21:06.577023: step: 750/464, loss: 0.13186833262443542 2023-01-24 04:21:07.299435: step: 752/464, loss: 0.06522515416145325 2023-01-24 04:21:08.006078: step: 754/464, loss: 0.030804447829723358 2023-01-24 04:21:08.694699: step: 756/464, loss: 0.16361655294895172 2023-01-24 04:21:09.434779: step: 758/464, loss: 0.20067808032035828 2023-01-24 04:21:10.188383: step: 760/464, loss: 0.8925706148147583 2023-01-24 04:21:10.903170: step: 762/464, loss: 0.08398979157209396 2023-01-24 04:21:11.671357: step: 764/464, loss: 0.08976228535175323 2023-01-24 04:21:12.424665: step: 766/464, loss: 0.024204669520258904 2023-01-24 04:21:13.111470: step: 768/464, loss: 0.04503363370895386 2023-01-24 04:21:13.779398: step: 770/464, loss: 0.08009073883295059 2023-01-24 04:21:14.614202: step: 772/464, loss: 0.10919658839702606 2023-01-24 04:21:15.340979: step: 774/464, loss: 0.09016293287277222 2023-01-24 04:21:16.053980: step: 776/464, loss: 0.13644663989543915 2023-01-24 04:21:16.844996: step: 778/464, loss: 0.07259364426136017 2023-01-24 04:21:17.573218: step: 780/464, loss: 0.17036207020282745 2023-01-24 04:21:18.303712: step: 782/464, loss: 0.13564035296440125 2023-01-24 04:21:19.075020: step: 784/464, loss: 0.08406843990087509 2023-01-24 04:21:19.850787: step: 786/464, loss: 0.136801615357399 2023-01-24 04:21:20.645051: step: 788/464, loss: 0.13680937886238098 2023-01-24 04:21:21.540959: step: 790/464, loss: 0.09460506588220596 2023-01-24 04:21:22.319919: step: 792/464, loss: 0.13996107876300812 2023-01-24 04:21:23.069592: step: 794/464, loss: 0.06452802568674088 2023-01-24 04:21:23.875062: step: 796/464, loss: 0.1253369301557541 2023-01-24 04:21:24.596377: step: 798/464, loss: 0.04280988872051239 2023-01-24 04:21:25.322472: step: 800/464, loss: 0.022860821336507797 2023-01-24 04:21:26.035345: step: 802/464, loss: 0.05299188196659088 2023-01-24 04:21:26.852963: step: 804/464, loss: 0.13382039964199066 2023-01-24 04:21:27.568800: step: 806/464, loss: 0.14443878829479218 2023-01-24 04:21:28.345606: step: 808/464, loss: 0.033110111951828 2023-01-24 04:21:29.165657: step: 810/464, loss: 0.05904128775000572 2023-01-24 04:21:29.867967: step: 812/464, loss: 0.044445887207984924 2023-01-24 04:21:30.594300: step: 814/464, loss: 0.16334925591945648 2023-01-24 04:21:31.449897: step: 816/464, loss: 0.034357622265815735 2023-01-24 04:21:32.127864: step: 818/464, loss: 0.021517088636755943 2023-01-24 04:21:32.858549: step: 820/464, loss: 0.0479215607047081 2023-01-24 04:21:33.640921: step: 822/464, loss: 0.057476677000522614 2023-01-24 04:21:34.384768: step: 824/464, loss: 0.03518066182732582 2023-01-24 04:21:35.068531: step: 826/464, loss: 0.03787121921777725 2023-01-24 04:21:35.790442: step: 828/464, loss: 0.315551221370697 2023-01-24 04:21:36.505992: step: 830/464, loss: 0.28821465373039246 2023-01-24 04:21:37.201599: step: 832/464, loss: 0.018016284331679344 2023-01-24 04:21:37.927461: step: 834/464, loss: 0.04078349843621254 2023-01-24 04:21:38.640221: step: 836/464, loss: 0.6406852006912231 2023-01-24 04:21:39.395906: step: 838/464, loss: 0.08561952412128448 2023-01-24 04:21:40.158892: step: 840/464, loss: 0.04118037223815918 2023-01-24 04:21:40.938224: step: 842/464, loss: 0.06859776377677917 2023-01-24 04:21:41.720000: step: 844/464, loss: 0.06416252255439758 2023-01-24 04:21:42.526818: step: 846/464, loss: 0.0812632292509079 2023-01-24 04:21:43.254502: step: 848/464, loss: 0.4164171516895294 2023-01-24 04:21:43.952695: step: 850/464, loss: 0.15695109963417053 2023-01-24 04:21:44.651476: step: 852/464, loss: 0.2507461905479431 2023-01-24 04:21:45.363040: step: 854/464, loss: 0.14173288643360138 2023-01-24 04:21:46.120190: step: 856/464, loss: 0.07978172600269318 2023-01-24 04:21:46.799361: step: 858/464, loss: 0.036435868591070175 2023-01-24 04:21:47.606926: step: 860/464, loss: 0.026326339691877365 2023-01-24 04:21:48.404709: step: 862/464, loss: 0.08801314234733582 2023-01-24 04:21:49.172070: step: 864/464, loss: 0.17660625278949738 2023-01-24 04:21:49.909049: step: 866/464, loss: 0.06663139164447784 2023-01-24 04:21:50.650272: step: 868/464, loss: 0.041380446404218674 2023-01-24 04:21:51.403398: step: 870/464, loss: 0.10867740958929062 2023-01-24 04:21:52.081504: step: 872/464, loss: 0.06529152393341064 2023-01-24 04:21:52.776109: step: 874/464, loss: 0.12430823594331741 2023-01-24 04:21:53.507911: step: 876/464, loss: 0.057288605719804764 2023-01-24 04:21:54.347349: step: 878/464, loss: 0.06806528568267822 2023-01-24 04:21:54.970582: step: 880/464, loss: 0.013468866236507893 2023-01-24 04:21:55.691042: step: 882/464, loss: 0.03978579863905907 2023-01-24 04:21:56.513486: step: 884/464, loss: 0.436234712600708 2023-01-24 04:21:57.311833: step: 886/464, loss: 0.011038696393370628 2023-01-24 04:21:58.028676: step: 888/464, loss: 0.18119382858276367 2023-01-24 04:21:58.801875: step: 890/464, loss: 0.1493879109621048 2023-01-24 04:21:59.567556: step: 892/464, loss: 0.16955973207950592 2023-01-24 04:22:00.295031: step: 894/464, loss: 0.08893486857414246 2023-01-24 04:22:00.997728: step: 896/464, loss: 0.04885321855545044 2023-01-24 04:22:01.801443: step: 898/464, loss: 0.03767932206392288 2023-01-24 04:22:02.613237: step: 900/464, loss: 0.22809790074825287 2023-01-24 04:22:03.505421: step: 902/464, loss: 0.07466711103916168 2023-01-24 04:22:04.285592: step: 904/464, loss: 0.08488618582487106 2023-01-24 04:22:05.123416: step: 906/464, loss: 0.1496511995792389 2023-01-24 04:22:05.980910: step: 908/464, loss: 0.03573276102542877 2023-01-24 04:22:06.652482: step: 910/464, loss: 0.05416325107216835 2023-01-24 04:22:07.312806: step: 912/464, loss: 0.04344616085290909 2023-01-24 04:22:08.107044: step: 914/464, loss: 0.07182371616363525 2023-01-24 04:22:08.830314: step: 916/464, loss: 0.021106265485286713 2023-01-24 04:22:09.521928: step: 918/464, loss: 0.2376040667295456 2023-01-24 04:22:10.264428: step: 920/464, loss: 0.08329570293426514 2023-01-24 04:22:10.920075: step: 922/464, loss: 0.030238067731261253 2023-01-24 04:22:11.570593: step: 924/464, loss: 0.05236712470650673 2023-01-24 04:22:12.278857: step: 926/464, loss: 0.05165240168571472 2023-01-24 04:22:13.012795: step: 928/464, loss: 0.05135872960090637 2023-01-24 04:22:13.698103: step: 930/464, loss: 0.03845001384615898 ================================================== Loss: 0.134 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3406595089959061, 'r': 0.33613461988210847, 'f1': 0.3383819382576335}, 'combined': 0.2493340597687826, 'epoch': 19} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31892723655107225, 'r': 0.27354628589558366, 'f1': 0.2944987673684369}, 'combined': 0.18289923447092396, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31307230128893665, 'r': 0.3160426267281106, 'f1': 0.31455045190880887}, 'combined': 0.2317740171959644, 'epoch': 19} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.320975394273592, 'r': 0.26806039837656354, 'f1': 0.29214113346472176}, 'combined': 0.18143501973072196, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33704892682287113, 'r': 0.33001374998216604, 'f1': 0.33349424015455703}, 'combined': 0.24573259800862096, 'epoch': 19} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3269889263479404, 'r': 0.2741552476079776, 'f1': 0.2982503497557313}, 'combined': 0.18522916458513838, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.33482142857142855, 'r': 0.26785714285714285, 'f1': 0.2976190476190476}, 'combined': 0.1984126984126984, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26666666666666666, 'r': 0.34782608695652173, 'f1': 0.30188679245283023}, 'combined': 0.15094339622641512, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47014170040485825, 'r': 0.21075317604355714, 'f1': 0.29104010025062654}, 'combined': 0.1940267335004177, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:24:52.907995: step: 2/464, loss: 0.01712164096534252 2023-01-24 04:24:53.626986: step: 4/464, loss: 0.05320145562291145 2023-01-24 04:24:54.402996: step: 6/464, loss: 0.003598960814997554 2023-01-24 04:24:55.092017: step: 8/464, loss: 0.05244593694806099 2023-01-24 04:24:55.832463: step: 10/464, loss: 0.130269393324852 2023-01-24 04:24:56.638497: step: 12/464, loss: 0.02787611447274685 2023-01-24 04:24:57.356088: step: 14/464, loss: 0.0313459113240242 2023-01-24 04:24:58.068748: step: 16/464, loss: 0.018669242039322853 2023-01-24 04:24:58.833413: step: 18/464, loss: 0.03460734337568283 2023-01-24 04:24:59.660011: step: 20/464, loss: 0.05031322315335274 2023-01-24 04:25:00.389050: step: 22/464, loss: 0.10367558896541595 2023-01-24 04:25:01.218810: step: 24/464, loss: 0.045394621789455414 2023-01-24 04:25:01.998712: step: 26/464, loss: 0.013776100240647793 2023-01-24 04:25:02.701168: step: 28/464, loss: 0.05621929094195366 2023-01-24 04:25:03.462010: step: 30/464, loss: 0.017570164054632187 2023-01-24 04:25:04.188553: step: 32/464, loss: 0.06579134613275528 2023-01-24 04:25:04.936569: step: 34/464, loss: 0.01885702833533287 2023-01-24 04:25:05.620633: step: 36/464, loss: 0.005795876029878855 2023-01-24 04:25:06.315722: step: 38/464, loss: 0.3700122833251953 2023-01-24 04:25:07.038583: step: 40/464, loss: 0.060654789209365845 2023-01-24 04:25:07.858429: step: 42/464, loss: 0.09823274612426758 2023-01-24 04:25:08.556127: step: 44/464, loss: 0.05419113114476204 2023-01-24 04:25:09.227707: step: 46/464, loss: 0.05794157832860947 2023-01-24 04:25:09.916541: step: 48/464, loss: 0.053394392132759094 2023-01-24 04:25:10.657012: step: 50/464, loss: 0.06811627000570297 2023-01-24 04:25:11.430810: step: 52/464, loss: 0.006924196146428585 2023-01-24 04:25:12.196904: step: 54/464, loss: 0.011313280090689659 2023-01-24 04:25:12.962018: step: 56/464, loss: 0.02086627669632435 2023-01-24 04:25:13.796929: step: 58/464, loss: 0.06859908252954483 2023-01-24 04:25:14.519358: step: 60/464, loss: 0.048081107437610626 2023-01-24 04:25:15.277327: step: 62/464, loss: 0.2862105071544647 2023-01-24 04:25:16.021830: step: 64/464, loss: 0.04191207513213158 2023-01-24 04:25:16.775526: step: 66/464, loss: 0.48177024722099304 2023-01-24 04:25:17.465173: step: 68/464, loss: 0.14845621585845947 2023-01-24 04:25:18.119703: step: 70/464, loss: 0.04261395335197449 2023-01-24 04:25:18.796066: step: 72/464, loss: 0.1031220480799675 2023-01-24 04:25:19.703962: step: 74/464, loss: 0.03773185983300209 2023-01-24 04:25:20.407836: step: 76/464, loss: 0.0502183698117733 2023-01-24 04:25:21.172199: step: 78/464, loss: 0.047639064490795135 2023-01-24 04:25:21.979014: step: 80/464, loss: 0.054580919444561005 2023-01-24 04:25:22.752878: step: 82/464, loss: 0.05993421748280525 2023-01-24 04:25:23.518273: step: 84/464, loss: 0.06883340328931808 2023-01-24 04:25:24.200686: step: 86/464, loss: 0.15197524428367615 2023-01-24 04:25:24.977273: step: 88/464, loss: 0.11038285493850708 2023-01-24 04:25:25.657658: step: 90/464, loss: 0.00881672278046608 2023-01-24 04:25:26.394356: step: 92/464, loss: 0.0875900387763977 2023-01-24 04:25:27.138766: step: 94/464, loss: 0.3431815803050995 2023-01-24 04:25:27.830521: step: 96/464, loss: 0.017735961824655533 2023-01-24 04:25:28.594286: step: 98/464, loss: 0.05344702675938606 2023-01-24 04:25:29.308432: step: 100/464, loss: 0.05173359811306 2023-01-24 04:25:30.023503: step: 102/464, loss: 0.033794596791267395 2023-01-24 04:25:30.720277: step: 104/464, loss: 0.40232419967651367 2023-01-24 04:25:31.425447: step: 106/464, loss: 0.026458468288183212 2023-01-24 04:25:32.132548: step: 108/464, loss: 0.02863013930618763 2023-01-24 04:25:32.916876: step: 110/464, loss: 0.2328375279903412 2023-01-24 04:25:33.611389: step: 112/464, loss: 0.05186981335282326 2023-01-24 04:25:34.314300: step: 114/464, loss: 0.1496034860610962 2023-01-24 04:25:35.068755: step: 116/464, loss: 0.05353805795311928 2023-01-24 04:25:35.746641: step: 118/464, loss: 0.02698923647403717 2023-01-24 04:25:36.533246: step: 120/464, loss: 0.07385917752981186 2023-01-24 04:25:37.204421: step: 122/464, loss: 0.043923210352659225 2023-01-24 04:25:37.987640: step: 124/464, loss: 0.08073197305202484 2023-01-24 04:25:38.699809: step: 126/464, loss: 0.008128171786665916 2023-01-24 04:25:39.418002: step: 128/464, loss: 0.08548290282487869 2023-01-24 04:25:40.139967: step: 130/464, loss: 0.00616619223728776 2023-01-24 04:25:40.968955: step: 132/464, loss: 0.13567934930324554 2023-01-24 04:25:41.656018: step: 134/464, loss: 0.004059022758156061 2023-01-24 04:25:42.498901: step: 136/464, loss: 0.052950870245695114 2023-01-24 04:25:43.275024: step: 138/464, loss: 0.0192416962236166 2023-01-24 04:25:44.025251: step: 140/464, loss: 0.052168261259794235 2023-01-24 04:25:44.812885: step: 142/464, loss: 0.0380428284406662 2023-01-24 04:25:45.579922: step: 144/464, loss: 0.034303802996873856 2023-01-24 04:25:46.339853: step: 146/464, loss: 0.13178157806396484 2023-01-24 04:25:47.095104: step: 148/464, loss: 0.0370471328496933 2023-01-24 04:25:47.859748: step: 150/464, loss: 0.07469688355922699 2023-01-24 04:25:48.595632: step: 152/464, loss: 0.02738485112786293 2023-01-24 04:25:49.293142: step: 154/464, loss: 0.03580975532531738 2023-01-24 04:25:50.044368: step: 156/464, loss: 0.07651842385530472 2023-01-24 04:25:50.801696: step: 158/464, loss: 0.22850660979747772 2023-01-24 04:25:51.543942: step: 160/464, loss: 0.12235674262046814 2023-01-24 04:25:52.272518: step: 162/464, loss: 0.013990739360451698 2023-01-24 04:25:53.011155: step: 164/464, loss: 0.0508531779050827 2023-01-24 04:25:53.695100: step: 166/464, loss: 0.005535896867513657 2023-01-24 04:25:54.435589: step: 168/464, loss: 0.05309152603149414 2023-01-24 04:25:55.198853: step: 170/464, loss: 0.023079494014382362 2023-01-24 04:25:55.924620: step: 172/464, loss: 0.14203958213329315 2023-01-24 04:25:56.641808: step: 174/464, loss: 0.020587345585227013 2023-01-24 04:25:57.360850: step: 176/464, loss: 0.39844274520874023 2023-01-24 04:25:58.049323: step: 178/464, loss: 0.03402119129896164 2023-01-24 04:25:58.793844: step: 180/464, loss: 0.16260108351707458 2023-01-24 04:25:59.550560: step: 182/464, loss: 0.03452138602733612 2023-01-24 04:26:00.368272: step: 184/464, loss: 0.16593848168849945 2023-01-24 04:26:01.172224: step: 186/464, loss: 0.08963475376367569 2023-01-24 04:26:01.937849: step: 188/464, loss: 0.023618275299668312 2023-01-24 04:26:02.659023: step: 190/464, loss: 0.024404142051935196 2023-01-24 04:26:03.354443: step: 192/464, loss: 0.005555496551096439 2023-01-24 04:26:04.131302: step: 194/464, loss: 0.06363875418901443 2023-01-24 04:26:04.856116: step: 196/464, loss: 0.05725649371743202 2023-01-24 04:26:05.661517: step: 198/464, loss: 0.32381248474121094 2023-01-24 04:26:06.346560: step: 200/464, loss: 0.014517308212816715 2023-01-24 04:26:07.051994: step: 202/464, loss: 0.050813108682632446 2023-01-24 04:26:07.736241: step: 204/464, loss: 0.10932115465402603 2023-01-24 04:26:08.623139: step: 206/464, loss: 0.04808484762907028 2023-01-24 04:26:09.332665: step: 208/464, loss: 0.046366311609745026 2023-01-24 04:26:10.053401: step: 210/464, loss: 0.12464817613363266 2023-01-24 04:26:10.774281: step: 212/464, loss: 0.033209338784217834 2023-01-24 04:26:11.491303: step: 214/464, loss: 0.044971611350774765 2023-01-24 04:26:12.300102: step: 216/464, loss: 0.11272618174552917 2023-01-24 04:26:13.026308: step: 218/464, loss: 0.10156702995300293 2023-01-24 04:26:13.835006: step: 220/464, loss: 0.04471761733293533 2023-01-24 04:26:14.595887: step: 222/464, loss: 0.026782341301441193 2023-01-24 04:26:15.316784: step: 224/464, loss: 0.01698833890259266 2023-01-24 04:26:16.100713: step: 226/464, loss: 0.322632372379303 2023-01-24 04:26:16.927040: step: 228/464, loss: 0.09721288830041885 2023-01-24 04:26:17.697818: step: 230/464, loss: 0.059638142585754395 2023-01-24 04:26:18.419465: step: 232/464, loss: 0.018348954617977142 2023-01-24 04:26:19.284699: step: 234/464, loss: 0.028666546568274498 2023-01-24 04:26:19.970277: step: 236/464, loss: 0.03871145844459534 2023-01-24 04:26:20.677243: step: 238/464, loss: 0.011665408499538898 2023-01-24 04:26:21.358880: step: 240/464, loss: 0.04983703792095184 2023-01-24 04:26:22.075640: step: 242/464, loss: 0.05028875172138214 2023-01-24 04:26:22.751203: step: 244/464, loss: 0.003809570102021098 2023-01-24 04:26:23.552058: step: 246/464, loss: 0.09055406600236893 2023-01-24 04:26:24.265383: step: 248/464, loss: 0.028566665947437286 2023-01-24 04:26:25.072972: step: 250/464, loss: 0.10694783926010132 2023-01-24 04:26:25.833749: step: 252/464, loss: 0.6403794288635254 2023-01-24 04:26:26.506247: step: 254/464, loss: 0.02813796140253544 2023-01-24 04:26:27.301707: step: 256/464, loss: 0.044382158666849136 2023-01-24 04:26:28.108269: step: 258/464, loss: 0.021823404356837273 2023-01-24 04:26:28.754935: step: 260/464, loss: 0.0053736069239676 2023-01-24 04:26:29.524434: step: 262/464, loss: 1.5771262645721436 2023-01-24 04:26:30.205885: step: 264/464, loss: 0.01660967990756035 2023-01-24 04:26:30.969420: step: 266/464, loss: 0.0449100024998188 2023-01-24 04:26:31.786795: step: 268/464, loss: 0.056982915848493576 2023-01-24 04:26:32.522188: step: 270/464, loss: 0.17105650901794434 2023-01-24 04:26:33.212670: step: 272/464, loss: 0.004213482141494751 2023-01-24 04:26:33.918993: step: 274/464, loss: 0.1180211529135704 2023-01-24 04:26:34.757799: step: 276/464, loss: 0.1284855455160141 2023-01-24 04:26:35.542685: step: 278/464, loss: 0.09054433554410934 2023-01-24 04:26:36.268876: step: 280/464, loss: 0.07391920685768127 2023-01-24 04:26:37.013638: step: 282/464, loss: 0.09057587385177612 2023-01-24 04:26:37.776784: step: 284/464, loss: 0.006217170972377062 2023-01-24 04:26:38.550605: step: 286/464, loss: 0.06684119254350662 2023-01-24 04:26:39.263263: step: 288/464, loss: 0.11914774030447006 2023-01-24 04:26:40.045499: step: 290/464, loss: 0.02932037226855755 2023-01-24 04:26:40.754006: step: 292/464, loss: 0.04236677289009094 2023-01-24 04:26:41.500138: step: 294/464, loss: 0.05178089812397957 2023-01-24 04:26:42.249965: step: 296/464, loss: 0.06522136926651001 2023-01-24 04:26:43.020510: step: 298/464, loss: 0.13979460299015045 2023-01-24 04:26:43.761511: step: 300/464, loss: 0.017481815069913864 2023-01-24 04:26:44.522911: step: 302/464, loss: 0.03961453214287758 2023-01-24 04:26:45.251575: step: 304/464, loss: 0.31653302907943726 2023-01-24 04:26:46.058489: step: 306/464, loss: 0.05984179303050041 2023-01-24 04:26:46.839395: step: 308/464, loss: 0.039903104305267334 2023-01-24 04:26:47.556011: step: 310/464, loss: 0.27464759349823 2023-01-24 04:26:48.257374: step: 312/464, loss: 0.11150558292865753 2023-01-24 04:26:48.992543: step: 314/464, loss: 0.046920839697122574 2023-01-24 04:26:49.754342: step: 316/464, loss: 0.011616310104727745 2023-01-24 04:26:50.595517: step: 318/464, loss: 0.06396178901195526 2023-01-24 04:26:51.298391: step: 320/464, loss: 0.22413629293441772 2023-01-24 04:26:51.989733: step: 322/464, loss: 0.057452570647001266 2023-01-24 04:26:52.716927: step: 324/464, loss: 0.06499453634023666 2023-01-24 04:26:53.535738: step: 326/464, loss: 0.027838967740535736 2023-01-24 04:26:54.284650: step: 328/464, loss: 0.4065115451812744 2023-01-24 04:26:54.983288: step: 330/464, loss: 0.03231246396899223 2023-01-24 04:26:55.785985: step: 332/464, loss: 0.2747350335121155 2023-01-24 04:26:56.485293: step: 334/464, loss: 0.753449559211731 2023-01-24 04:26:57.279842: step: 336/464, loss: 0.2993898391723633 2023-01-24 04:26:57.998396: step: 338/464, loss: 0.20569021999835968 2023-01-24 04:26:58.786244: step: 340/464, loss: 0.5489237904548645 2023-01-24 04:26:59.467338: step: 342/464, loss: 0.09590140730142593 2023-01-24 04:27:00.252611: step: 344/464, loss: 0.04386964812874794 2023-01-24 04:27:00.955220: step: 346/464, loss: 0.05596918985247612 2023-01-24 04:27:01.708465: step: 348/464, loss: 0.08769191056489944 2023-01-24 04:27:02.463785: step: 350/464, loss: 0.05492006987333298 2023-01-24 04:27:03.128960: step: 352/464, loss: 0.055251482874155045 2023-01-24 04:27:03.882572: step: 354/464, loss: 0.02360299788415432 2023-01-24 04:27:04.628874: step: 356/464, loss: 0.059911515563726425 2023-01-24 04:27:05.331258: step: 358/464, loss: 0.017682574689388275 2023-01-24 04:27:06.066997: step: 360/464, loss: 0.013219114392995834 2023-01-24 04:27:06.884150: step: 362/464, loss: 0.04908164218068123 2023-01-24 04:27:07.643771: step: 364/464, loss: 0.17003220319747925 2023-01-24 04:27:08.345255: step: 366/464, loss: 0.021513281390070915 2023-01-24 04:27:09.037275: step: 368/464, loss: 0.02743382751941681 2023-01-24 04:27:09.831390: step: 370/464, loss: 0.02264592982828617 2023-01-24 04:27:10.609116: step: 372/464, loss: 0.015140403062105179 2023-01-24 04:27:11.356670: step: 374/464, loss: 0.06058161333203316 2023-01-24 04:27:12.144635: step: 376/464, loss: 0.4133065938949585 2023-01-24 04:27:12.862864: step: 378/464, loss: 0.1243203729391098 2023-01-24 04:27:13.553987: step: 380/464, loss: 0.14328400790691376 2023-01-24 04:27:14.336636: step: 382/464, loss: 0.02986525557935238 2023-01-24 04:27:15.136884: step: 384/464, loss: 0.15979009866714478 2023-01-24 04:27:15.996905: step: 386/464, loss: 0.020061299204826355 2023-01-24 04:27:16.769460: step: 388/464, loss: 0.0972837433218956 2023-01-24 04:27:17.508080: step: 390/464, loss: 0.040774136781692505 2023-01-24 04:27:18.189392: step: 392/464, loss: 0.03275227174162865 2023-01-24 04:27:18.878350: step: 394/464, loss: 0.035346053540706635 2023-01-24 04:27:19.661441: step: 396/464, loss: 0.05919338017702103 2023-01-24 04:27:20.438585: step: 398/464, loss: 0.06316707283258438 2023-01-24 04:27:21.189118: step: 400/464, loss: 0.007029821164906025 2023-01-24 04:27:22.047491: step: 402/464, loss: 0.033194344490766525 2023-01-24 04:27:22.873964: step: 404/464, loss: 0.08716045320034027 2023-01-24 04:27:23.704568: step: 406/464, loss: 0.14590072631835938 2023-01-24 04:27:24.405221: step: 408/464, loss: 0.07667405158281326 2023-01-24 04:27:25.087792: step: 410/464, loss: 0.9854905009269714 2023-01-24 04:27:25.928727: step: 412/464, loss: 0.04124729707837105 2023-01-24 04:27:26.705850: step: 414/464, loss: 0.4641401171684265 2023-01-24 04:27:27.404919: step: 416/464, loss: 0.05213777348399162 2023-01-24 04:27:28.144873: step: 418/464, loss: 0.041891228407621384 2023-01-24 04:27:28.871385: step: 420/464, loss: 0.10602536052465439 2023-01-24 04:27:29.569617: step: 422/464, loss: 0.031055880710482597 2023-01-24 04:27:30.330453: step: 424/464, loss: 0.046828266233205795 2023-01-24 04:27:30.987412: step: 426/464, loss: 0.016272274777293205 2023-01-24 04:27:31.721439: step: 428/464, loss: 0.041486937552690506 2023-01-24 04:27:32.435464: step: 430/464, loss: 0.11460515856742859 2023-01-24 04:27:33.146549: step: 432/464, loss: 0.04825626313686371 2023-01-24 04:27:33.878968: step: 434/464, loss: 0.016949543729424477 2023-01-24 04:27:34.662113: step: 436/464, loss: 0.11145293712615967 2023-01-24 04:27:35.371054: step: 438/464, loss: 0.025824761018157005 2023-01-24 04:27:36.090591: step: 440/464, loss: 0.2621608078479767 2023-01-24 04:27:36.780323: step: 442/464, loss: 0.058782532811164856 2023-01-24 04:27:37.622603: step: 444/464, loss: 0.09873618185520172 2023-01-24 04:27:38.354933: step: 446/464, loss: 0.03907536715269089 2023-01-24 04:27:39.057200: step: 448/464, loss: 0.11768066138029099 2023-01-24 04:27:39.856348: step: 450/464, loss: 0.016299933195114136 2023-01-24 04:27:40.567547: step: 452/464, loss: 0.07312924414873123 2023-01-24 04:27:41.350362: step: 454/464, loss: 0.013142102397978306 2023-01-24 04:27:42.121609: step: 456/464, loss: 0.015804991126060486 2023-01-24 04:27:42.947262: step: 458/464, loss: 0.07611986994743347 2023-01-24 04:27:43.794084: step: 460/464, loss: 0.027487069368362427 2023-01-24 04:27:44.490033: step: 462/464, loss: 0.013240874744951725 2023-01-24 04:27:45.294286: step: 464/464, loss: 0.10774283111095428 2023-01-24 04:27:46.022392: step: 466/464, loss: 0.026716547086834908 2023-01-24 04:27:46.756326: step: 468/464, loss: 0.12575234472751617 2023-01-24 04:27:47.521591: step: 470/464, loss: 0.015290974639356136 2023-01-24 04:27:48.271585: step: 472/464, loss: 0.017766600474715233 2023-01-24 04:27:49.048572: step: 474/464, loss: 0.07921173423528671 2023-01-24 04:27:49.803713: step: 476/464, loss: 0.017183203250169754 2023-01-24 04:27:50.504178: step: 478/464, loss: 0.06559363752603531 2023-01-24 04:27:51.295876: step: 480/464, loss: 0.06318671256303787 2023-01-24 04:27:52.016493: step: 482/464, loss: 0.008761835284531116 2023-01-24 04:27:52.750040: step: 484/464, loss: 0.23408980667591095 2023-01-24 04:27:53.559295: step: 486/464, loss: 0.05240677669644356 2023-01-24 04:27:54.409507: step: 488/464, loss: 0.11231359094381332 2023-01-24 04:27:55.218444: step: 490/464, loss: 0.029335487633943558 2023-01-24 04:27:55.869292: step: 492/464, loss: 0.002755326684564352 2023-01-24 04:27:56.600147: step: 494/464, loss: 0.017630210146307945 2023-01-24 04:27:57.320935: step: 496/464, loss: 0.09911082684993744 2023-01-24 04:27:58.022039: step: 498/464, loss: 0.028314009308815002 2023-01-24 04:27:58.776928: step: 500/464, loss: 0.0582076795399189 2023-01-24 04:27:59.588322: step: 502/464, loss: 0.011484694667160511 2023-01-24 04:28:00.283655: step: 504/464, loss: 0.026740385219454765 2023-01-24 04:28:00.982709: step: 506/464, loss: 0.031355928629636765 2023-01-24 04:28:01.787924: step: 508/464, loss: 0.02741563692688942 2023-01-24 04:28:02.590026: step: 510/464, loss: 0.03371531143784523 2023-01-24 04:28:03.404404: step: 512/464, loss: 0.02765187807381153 2023-01-24 04:28:04.136459: step: 514/464, loss: 0.01636318862438202 2023-01-24 04:28:04.991881: step: 516/464, loss: 0.058873556554317474 2023-01-24 04:28:05.731118: step: 518/464, loss: 0.06401578336954117 2023-01-24 04:28:06.516566: step: 520/464, loss: 0.03372131660580635 2023-01-24 04:28:07.311994: step: 522/464, loss: 0.05860723555088043 2023-01-24 04:28:08.044975: step: 524/464, loss: 0.014996029436588287 2023-01-24 04:28:08.811646: step: 526/464, loss: 0.07337082922458649 2023-01-24 04:28:09.559557: step: 528/464, loss: 0.29840704798698425 2023-01-24 04:28:10.255188: step: 530/464, loss: 0.1524370163679123 2023-01-24 04:28:11.009714: step: 532/464, loss: 0.03650538623332977 2023-01-24 04:28:11.748330: step: 534/464, loss: 0.2502961754798889 2023-01-24 04:28:12.505546: step: 536/464, loss: 0.24290591478347778 2023-01-24 04:28:13.297331: step: 538/464, loss: 0.08531565964221954 2023-01-24 04:28:14.060822: step: 540/464, loss: 0.047006767243146896 2023-01-24 04:28:14.763067: step: 542/464, loss: 0.07336611300706863 2023-01-24 04:28:15.491973: step: 544/464, loss: 0.053565382957458496 2023-01-24 04:28:16.261755: step: 546/464, loss: 0.08777978271245956 2023-01-24 04:28:16.994320: step: 548/464, loss: 0.16474194824695587 2023-01-24 04:28:17.744359: step: 550/464, loss: 0.08033395558595657 2023-01-24 04:28:18.516941: step: 552/464, loss: 0.10461867600679398 2023-01-24 04:28:19.322845: step: 554/464, loss: 0.07356631755828857 2023-01-24 04:28:20.055369: step: 556/464, loss: 0.004075208678841591 2023-01-24 04:28:20.894369: step: 558/464, loss: 0.050737012177705765 2023-01-24 04:28:21.692714: step: 560/464, loss: 0.0372830405831337 2023-01-24 04:28:22.445691: step: 562/464, loss: 0.17407050728797913 2023-01-24 04:28:23.266684: step: 564/464, loss: 0.017158811911940575 2023-01-24 04:28:24.007748: step: 566/464, loss: 0.05447883531451225 2023-01-24 04:28:24.742126: step: 568/464, loss: 0.026156453415751457 2023-01-24 04:28:25.470071: step: 570/464, loss: 0.1320725530385971 2023-01-24 04:28:26.159373: step: 572/464, loss: 0.06671235710382462 2023-01-24 04:28:26.927973: step: 574/464, loss: 0.06426624208688736 2023-01-24 04:28:27.733375: step: 576/464, loss: 0.11161121726036072 2023-01-24 04:28:28.555151: step: 578/464, loss: 0.0519336573779583 2023-01-24 04:28:29.395528: step: 580/464, loss: 0.12640352547168732 2023-01-24 04:28:30.213876: step: 582/464, loss: 0.06665212661027908 2023-01-24 04:28:30.966153: step: 584/464, loss: 0.05036737024784088 2023-01-24 04:28:31.752663: step: 586/464, loss: 0.04287680611014366 2023-01-24 04:28:32.511258: step: 588/464, loss: 0.06328330934047699 2023-01-24 04:28:33.324660: step: 590/464, loss: 0.019768400117754936 2023-01-24 04:28:33.999834: step: 592/464, loss: 0.035888101905584335 2023-01-24 04:28:34.748956: step: 594/464, loss: 7.311809539794922 2023-01-24 04:28:35.517394: step: 596/464, loss: 1.0828490257263184 2023-01-24 04:28:36.406421: step: 598/464, loss: 0.02576698176562786 2023-01-24 04:28:37.156814: step: 600/464, loss: 0.028343813493847847 2023-01-24 04:28:37.984619: step: 602/464, loss: 0.04987442120909691 2023-01-24 04:28:38.723394: step: 604/464, loss: 0.03227812796831131 2023-01-24 04:28:39.434503: step: 606/464, loss: 0.06706904619932175 2023-01-24 04:28:40.154014: step: 608/464, loss: 0.11491075903177261 2023-01-24 04:28:40.922964: step: 610/464, loss: 0.01785075105726719 2023-01-24 04:28:41.683882: step: 612/464, loss: 0.3231183588504791 2023-01-24 04:28:42.478794: step: 614/464, loss: 0.061851970851421356 2023-01-24 04:28:43.245366: step: 616/464, loss: 0.05380820855498314 2023-01-24 04:28:44.000256: step: 618/464, loss: 0.12674058973789215 2023-01-24 04:28:44.763687: step: 620/464, loss: 0.08594769239425659 2023-01-24 04:28:45.521481: step: 622/464, loss: 0.007985075004398823 2023-01-24 04:28:46.278724: step: 624/464, loss: 0.07399801164865494 2023-01-24 04:28:46.991576: step: 626/464, loss: 0.20825162529945374 2023-01-24 04:28:47.733035: step: 628/464, loss: 0.019077161327004433 2023-01-24 04:28:48.503741: step: 630/464, loss: 0.0259280726313591 2023-01-24 04:28:49.200492: step: 632/464, loss: 0.18837212026119232 2023-01-24 04:28:49.993960: step: 634/464, loss: 0.17273612320423126 2023-01-24 04:28:50.744589: step: 636/464, loss: 0.03467350825667381 2023-01-24 04:28:51.378814: step: 638/464, loss: 0.07087120413780212 2023-01-24 04:28:52.128518: step: 640/464, loss: 0.027575084939599037 2023-01-24 04:28:52.801285: step: 642/464, loss: 0.01259750034660101 2023-01-24 04:28:53.601618: step: 644/464, loss: 0.06660866737365723 2023-01-24 04:28:54.329243: step: 646/464, loss: 0.03903059661388397 2023-01-24 04:28:55.100101: step: 648/464, loss: 0.24814175069332123 2023-01-24 04:28:55.916481: step: 650/464, loss: 0.0645311027765274 2023-01-24 04:28:56.648717: step: 652/464, loss: 0.018755216151475906 2023-01-24 04:28:57.377210: step: 654/464, loss: 0.04339805245399475 2023-01-24 04:28:58.158483: step: 656/464, loss: 0.09537842124700546 2023-01-24 04:28:58.881234: step: 658/464, loss: 0.76046222448349 2023-01-24 04:28:59.601763: step: 660/464, loss: 0.037168506532907486 2023-01-24 04:29:00.293278: step: 662/464, loss: 0.06753918528556824 2023-01-24 04:29:01.030396: step: 664/464, loss: 0.036744534969329834 2023-01-24 04:29:01.881580: step: 666/464, loss: 0.13704568147659302 2023-01-24 04:29:02.639268: step: 668/464, loss: 0.05090853571891785 2023-01-24 04:29:03.378997: step: 670/464, loss: 0.04945719987154007 2023-01-24 04:29:04.110265: step: 672/464, loss: 0.44987764954566956 2023-01-24 04:29:04.840405: step: 674/464, loss: 0.10461324453353882 2023-01-24 04:29:05.605219: step: 676/464, loss: 0.08109944313764572 2023-01-24 04:29:06.267182: step: 678/464, loss: 0.04540938511490822 2023-01-24 04:29:06.992287: step: 680/464, loss: 0.019449058920145035 2023-01-24 04:29:07.759870: step: 682/464, loss: 0.052725568413734436 2023-01-24 04:29:08.453172: step: 684/464, loss: 0.0179046131670475 2023-01-24 04:29:09.296899: step: 686/464, loss: 0.01967434585094452 2023-01-24 04:29:09.951740: step: 688/464, loss: 0.0413089245557785 2023-01-24 04:29:10.599953: step: 690/464, loss: 0.06626380234956741 2023-01-24 04:29:11.336709: step: 692/464, loss: 1.3306009769439697 2023-01-24 04:29:12.106563: step: 694/464, loss: 0.01851009391248226 2023-01-24 04:29:12.826569: step: 696/464, loss: 0.00975774321705103 2023-01-24 04:29:13.608164: step: 698/464, loss: 0.04369215667247772 2023-01-24 04:29:14.333731: step: 700/464, loss: 0.03224438801407814 2023-01-24 04:29:15.036132: step: 702/464, loss: 0.03615008294582367 2023-01-24 04:29:15.728465: step: 704/464, loss: 0.9618604183197021 2023-01-24 04:29:16.395441: step: 706/464, loss: 0.030223244801163673 2023-01-24 04:29:17.089186: step: 708/464, loss: 0.052093807607889175 2023-01-24 04:29:17.791392: step: 710/464, loss: 0.1564226597547531 2023-01-24 04:29:18.543087: step: 712/464, loss: 0.038040641695261 2023-01-24 04:29:19.270113: step: 714/464, loss: 0.06683960556983948 2023-01-24 04:29:20.005740: step: 716/464, loss: 0.03439417853951454 2023-01-24 04:29:20.778354: step: 718/464, loss: 0.012578189373016357 2023-01-24 04:29:21.565665: step: 720/464, loss: 0.011915381997823715 2023-01-24 04:29:22.353828: step: 722/464, loss: 0.11274097114801407 2023-01-24 04:29:23.072625: step: 724/464, loss: 0.01812215894460678 2023-01-24 04:29:23.778839: step: 726/464, loss: 0.05313228443264961 2023-01-24 04:29:24.494699: step: 728/464, loss: 0.0890948623418808 2023-01-24 04:29:25.179984: step: 730/464, loss: 0.3860137462615967 2023-01-24 04:29:25.906396: step: 732/464, loss: 0.024147091433405876 2023-01-24 04:29:26.635470: step: 734/464, loss: 0.0798778086900711 2023-01-24 04:29:27.393759: step: 736/464, loss: 0.191580131649971 2023-01-24 04:29:28.179376: step: 738/464, loss: 0.07451540231704712 2023-01-24 04:29:28.920544: step: 740/464, loss: 0.044656287878751755 2023-01-24 04:29:29.613776: step: 742/464, loss: 0.26471590995788574 2023-01-24 04:29:30.355334: step: 744/464, loss: 0.08491283655166626 2023-01-24 04:29:31.163832: step: 746/464, loss: 0.04643037170171738 2023-01-24 04:29:31.975461: step: 748/464, loss: 0.24509261548519135 2023-01-24 04:29:32.622613: step: 750/464, loss: 0.04010670259594917 2023-01-24 04:29:33.364078: step: 752/464, loss: 0.04671388119459152 2023-01-24 04:29:34.156793: step: 754/464, loss: 0.10081122815608978 2023-01-24 04:29:34.918675: step: 756/464, loss: 0.17207136750221252 2023-01-24 04:29:35.730038: step: 758/464, loss: 0.055953122675418854 2023-01-24 04:29:36.508208: step: 760/464, loss: 0.056002210825681686 2023-01-24 04:29:37.196511: step: 762/464, loss: 0.038005270063877106 2023-01-24 04:29:37.918486: step: 764/464, loss: 0.26270192861557007 2023-01-24 04:29:38.619214: step: 766/464, loss: 0.09764519333839417 2023-01-24 04:29:39.433236: step: 768/464, loss: 0.06416065245866776 2023-01-24 04:29:40.218184: step: 770/464, loss: 0.05219209939241409 2023-01-24 04:29:41.039979: step: 772/464, loss: 0.008765576407313347 2023-01-24 04:29:41.826570: step: 774/464, loss: 1.222083568572998 2023-01-24 04:29:42.512626: step: 776/464, loss: 0.12024626135826111 2023-01-24 04:29:43.261364: step: 778/464, loss: 0.05762336403131485 2023-01-24 04:29:43.982340: step: 780/464, loss: 0.051064152270555496 2023-01-24 04:29:44.736592: step: 782/464, loss: 0.016113949939608574 2023-01-24 04:29:45.437441: step: 784/464, loss: 0.5698862075805664 2023-01-24 04:29:46.182531: step: 786/464, loss: 0.0769958645105362 2023-01-24 04:29:46.903024: step: 788/464, loss: 0.07791303098201752 2023-01-24 04:29:47.758267: step: 790/464, loss: 0.11422698944807053 2023-01-24 04:29:48.548515: step: 792/464, loss: 0.08424840122461319 2023-01-24 04:29:49.357441: step: 794/464, loss: 0.051752686500549316 2023-01-24 04:29:50.129177: step: 796/464, loss: 0.12153435498476028 2023-01-24 04:29:50.925328: step: 798/464, loss: 0.050786785781383514 2023-01-24 04:29:51.643557: step: 800/464, loss: 0.023236608132719994 2023-01-24 04:29:52.350977: step: 802/464, loss: 0.5370764136314392 2023-01-24 04:29:53.128996: step: 804/464, loss: 0.08696542680263519 2023-01-24 04:29:53.865329: step: 806/464, loss: 0.033709701150655746 2023-01-24 04:29:54.571261: step: 808/464, loss: 0.11055350303649902 2023-01-24 04:29:55.292375: step: 810/464, loss: 0.04368586465716362 2023-01-24 04:29:55.948427: step: 812/464, loss: 0.004434123169630766 2023-01-24 04:29:56.767514: step: 814/464, loss: 0.10015460103750229 2023-01-24 04:29:57.511402: step: 816/464, loss: 0.03871988505125046 2023-01-24 04:29:58.191823: step: 818/464, loss: 0.022155897691845894 2023-01-24 04:29:58.969741: step: 820/464, loss: 0.07232918590307236 2023-01-24 04:29:59.639925: step: 822/464, loss: 0.060673587024211884 2023-01-24 04:30:00.300430: step: 824/464, loss: 0.005866081919521093 2023-01-24 04:30:00.978136: step: 826/464, loss: 0.09922577440738678 2023-01-24 04:30:01.650018: step: 828/464, loss: 0.09058462828397751 2023-01-24 04:30:02.473543: step: 830/464, loss: 0.02790232188999653 2023-01-24 04:30:03.149591: step: 832/464, loss: 0.008085663430392742 2023-01-24 04:30:03.914914: step: 834/464, loss: 0.05334268510341644 2023-01-24 04:30:04.695375: step: 836/464, loss: 0.029737956821918488 2023-01-24 04:30:05.443412: step: 838/464, loss: 0.25331979990005493 2023-01-24 04:30:06.218728: step: 840/464, loss: 0.10104769468307495 2023-01-24 04:30:06.923009: step: 842/464, loss: 0.017015360295772552 2023-01-24 04:30:07.668217: step: 844/464, loss: 0.11380429565906525 2023-01-24 04:30:08.481308: step: 846/464, loss: 0.04035916551947594 2023-01-24 04:30:09.226676: step: 848/464, loss: 0.03218059614300728 2023-01-24 04:30:09.970059: step: 850/464, loss: 0.08396913856267929 2023-01-24 04:30:10.738917: step: 852/464, loss: 0.2091253399848938 2023-01-24 04:30:11.414976: step: 854/464, loss: 0.041528936475515366 2023-01-24 04:30:12.106733: step: 856/464, loss: 0.05493639409542084 2023-01-24 04:30:12.823208: step: 858/464, loss: 0.11059834808111191 2023-01-24 04:30:13.541928: step: 860/464, loss: 0.06909855455160141 2023-01-24 04:30:14.243320: step: 862/464, loss: 0.0038817180320620537 2023-01-24 04:30:14.981859: step: 864/464, loss: 0.01490766741335392 2023-01-24 04:30:15.756478: step: 866/464, loss: 0.058409348130226135 2023-01-24 04:30:16.472131: step: 868/464, loss: 0.052582159638404846 2023-01-24 04:30:17.156995: step: 870/464, loss: 0.011038169264793396 2023-01-24 04:30:17.816330: step: 872/464, loss: 0.002536794636398554 2023-01-24 04:30:18.572793: step: 874/464, loss: 0.1839173287153244 2023-01-24 04:30:19.322913: step: 876/464, loss: 0.01185121014714241 2023-01-24 04:30:20.101764: step: 878/464, loss: 0.04605353623628616 2023-01-24 04:30:20.917772: step: 880/464, loss: 0.056616757065057755 2023-01-24 04:30:21.657286: step: 882/464, loss: 0.04454910755157471 2023-01-24 04:30:22.410072: step: 884/464, loss: 0.10783626139163971 2023-01-24 04:30:23.147890: step: 886/464, loss: 0.029736977070569992 2023-01-24 04:30:23.847921: step: 888/464, loss: 0.00434313528239727 2023-01-24 04:30:24.513346: step: 890/464, loss: 0.02773529291152954 2023-01-24 04:30:25.282178: step: 892/464, loss: 0.14134477078914642 2023-01-24 04:30:26.032949: step: 894/464, loss: 0.038266871124506 2023-01-24 04:30:26.765833: step: 896/464, loss: 0.03223191574215889 2023-01-24 04:30:27.529775: step: 898/464, loss: 0.028650274500250816 2023-01-24 04:30:28.290645: step: 900/464, loss: 0.0798439085483551 2023-01-24 04:30:29.083518: step: 902/464, loss: 0.05823419243097305 2023-01-24 04:30:29.771788: step: 904/464, loss: 0.017082292586565018 2023-01-24 04:30:30.560760: step: 906/464, loss: 0.5384454727172852 2023-01-24 04:30:31.361536: step: 908/464, loss: 0.07013249397277832 2023-01-24 04:30:32.062212: step: 910/464, loss: 0.10660932958126068 2023-01-24 04:30:32.746168: step: 912/464, loss: 0.007429053075611591 2023-01-24 04:30:33.508558: step: 914/464, loss: 0.059720948338508606 2023-01-24 04:30:34.240587: step: 916/464, loss: 0.027867119759321213 2023-01-24 04:30:34.957266: step: 918/464, loss: 0.031830303370952606 2023-01-24 04:30:35.634001: step: 920/464, loss: 0.010990363545715809 2023-01-24 04:30:36.420303: step: 922/464, loss: 0.012598209083080292 2023-01-24 04:30:37.144054: step: 924/464, loss: 0.3085867762565613 2023-01-24 04:30:37.876650: step: 926/464, loss: 0.022714342921972275 2023-01-24 04:30:38.670914: step: 928/464, loss: 0.09439236670732498 2023-01-24 04:30:39.365183: step: 930/464, loss: 0.08354859799146652 ================================================== Loss: 0.114 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32055154064082636, 'r': 0.31507722590502474, 'f1': 0.3177908096688001}, 'combined': 0.23416164922964214, 'epoch': 20} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3121755679271711, 'r': 0.2696061723007387, 'f1': 0.2893334532007928}, 'combined': 0.1796913025141766, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30970540882586706, 'r': 0.31146509864874133, 'f1': 0.3105827612588016}, 'combined': 0.2288504556643801, 'epoch': 20} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.297261045345724, 'r': 0.2587435409537459, 'f1': 0.2766681331615411}, 'combined': 0.17182547217400976, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3325161113961061, 'r': 0.32181010023373147, 'f1': 0.3270755205455442}, 'combined': 0.241003015138822, 'epoch': 20} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3161607538142622, 'r': 0.27028155261910275, 'f1': 0.29142651708690903}, 'combined': 0.18099120534871194, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30128205128205127, 'r': 0.3357142857142857, 'f1': 0.31756756756756754}, 'combined': 0.2117117117117117, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2916666666666667, 'r': 0.3804347826086957, 'f1': 0.3301886792452831}, 'combined': 0.16509433962264156, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5520833333333334, 'r': 0.22844827586206898, 'f1': 0.3231707317073171}, 'combined': 0.21544715447154472, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:33:19.467320: step: 2/464, loss: 0.11975234001874924 2023-01-24 04:33:20.166862: step: 4/464, loss: 0.05728471279144287 2023-01-24 04:33:20.872432: step: 6/464, loss: 0.03545444458723068 2023-01-24 04:33:21.621495: step: 8/464, loss: 0.13810107111930847 2023-01-24 04:33:22.397398: step: 10/464, loss: 0.0021625454537570477 2023-01-24 04:33:23.106526: step: 12/464, loss: 0.24097493290901184 2023-01-24 04:33:23.862687: step: 14/464, loss: 0.5120839476585388 2023-01-24 04:33:24.663101: step: 16/464, loss: 0.07629422098398209 2023-01-24 04:33:25.393284: step: 18/464, loss: 0.35801276564598083 2023-01-24 04:33:26.171793: step: 20/464, loss: 0.021506866440176964 2023-01-24 04:33:26.882472: step: 22/464, loss: 0.07394732534885406 2023-01-24 04:33:27.544842: step: 24/464, loss: 0.007438796106725931 2023-01-24 04:33:28.391395: step: 26/464, loss: 0.07921618223190308 2023-01-24 04:33:29.253496: step: 28/464, loss: 0.03039364144206047 2023-01-24 04:33:30.046140: step: 30/464, loss: 0.040770698338747025 2023-01-24 04:33:30.819870: step: 32/464, loss: 0.0478195957839489 2023-01-24 04:33:31.465392: step: 34/464, loss: 0.011056684888899326 2023-01-24 04:33:32.131717: step: 36/464, loss: 0.033487141132354736 2023-01-24 04:33:32.856730: step: 38/464, loss: 0.028325526043772697 2023-01-24 04:33:33.560684: step: 40/464, loss: 0.11843187361955643 2023-01-24 04:33:34.363023: step: 42/464, loss: 0.03145572170615196 2023-01-24 04:33:35.071616: step: 44/464, loss: 0.11456778645515442 2023-01-24 04:33:35.789790: step: 46/464, loss: 0.00822090357542038 2023-01-24 04:33:36.529905: step: 48/464, loss: 0.05254720151424408 2023-01-24 04:33:37.415667: step: 50/464, loss: 0.02053934521973133 2023-01-24 04:33:38.082068: step: 52/464, loss: 0.01988641545176506 2023-01-24 04:33:38.825401: step: 54/464, loss: 0.19410990178585052 2023-01-24 04:33:39.579927: step: 56/464, loss: 0.06711027026176453 2023-01-24 04:33:40.290594: step: 58/464, loss: 0.012005824595689774 2023-01-24 04:33:41.041777: step: 60/464, loss: 0.02315746806561947 2023-01-24 04:33:41.825075: step: 62/464, loss: 0.01853269897401333 2023-01-24 04:33:42.612078: step: 64/464, loss: 0.026548484340310097 2023-01-24 04:33:43.341060: step: 66/464, loss: 0.008066806942224503 2023-01-24 04:33:44.103357: step: 68/464, loss: 0.08661675453186035 2023-01-24 04:33:44.860301: step: 70/464, loss: 0.025570321828126907 2023-01-24 04:33:45.584617: step: 72/464, loss: 0.016779575496912003 2023-01-24 04:33:46.262127: step: 74/464, loss: 0.017785130068659782 2023-01-24 04:33:47.030898: step: 76/464, loss: 0.007999873720109463 2023-01-24 04:33:47.836223: step: 78/464, loss: 0.02611628547310829 2023-01-24 04:33:48.539688: step: 80/464, loss: 0.052033498883247375 2023-01-24 04:33:49.276700: step: 82/464, loss: 0.007343766279518604 2023-01-24 04:33:50.065531: step: 84/464, loss: 0.013929629698395729 2023-01-24 04:33:50.837193: step: 86/464, loss: 0.05986665561795235 2023-01-24 04:33:51.568119: step: 88/464, loss: 0.054861441254615784 2023-01-24 04:33:52.357210: step: 90/464, loss: 0.018272459506988525 2023-01-24 04:33:52.999568: step: 92/464, loss: 0.00936177372932434 2023-01-24 04:33:53.645192: step: 94/464, loss: 0.02910560928285122 2023-01-24 04:33:54.409522: step: 96/464, loss: 0.014908765442669392 2023-01-24 04:33:55.160876: step: 98/464, loss: 0.030643368139863014 2023-01-24 04:33:55.891084: step: 100/464, loss: 0.04429630935192108 2023-01-24 04:33:56.660646: step: 102/464, loss: 0.11084703356027603 2023-01-24 04:33:57.418592: step: 104/464, loss: 0.013898937962949276 2023-01-24 04:33:58.189411: step: 106/464, loss: 0.09585338830947876 2023-01-24 04:33:58.946897: step: 108/464, loss: 0.03203404322266579 2023-01-24 04:33:59.563220: step: 110/464, loss: 0.012530727311968803 2023-01-24 04:34:00.238248: step: 112/464, loss: 0.36144790053367615 2023-01-24 04:34:00.949516: step: 114/464, loss: 0.04872361198067665 2023-01-24 04:34:01.639728: step: 116/464, loss: 0.05325092375278473 2023-01-24 04:34:02.356499: step: 118/464, loss: 0.013846561312675476 2023-01-24 04:34:03.105932: step: 120/464, loss: 2.050266981124878 2023-01-24 04:34:03.883708: step: 122/464, loss: 0.030106237158179283 2023-01-24 04:34:04.598147: step: 124/464, loss: 0.024658236652612686 2023-01-24 04:34:05.329331: step: 126/464, loss: 0.12699775397777557 2023-01-24 04:34:06.034609: step: 128/464, loss: 0.03840040788054466 2023-01-24 04:34:06.815592: step: 130/464, loss: 0.057593636214733124 2023-01-24 04:34:07.551247: step: 132/464, loss: 0.11989856511354446 2023-01-24 04:34:08.225024: step: 134/464, loss: 0.2313280552625656 2023-01-24 04:34:08.955885: step: 136/464, loss: 0.06446570158004761 2023-01-24 04:34:09.691163: step: 138/464, loss: 0.07844987511634827 2023-01-24 04:34:10.388799: step: 140/464, loss: 0.10319890826940536 2023-01-24 04:34:11.142154: step: 142/464, loss: 0.1371772140264511 2023-01-24 04:34:11.970270: step: 144/464, loss: 0.421131432056427 2023-01-24 04:34:12.594413: step: 146/464, loss: 0.09265350550413132 2023-01-24 04:34:13.365268: step: 148/464, loss: 0.09814798831939697 2023-01-24 04:34:14.115407: step: 150/464, loss: 0.003304727841168642 2023-01-24 04:34:14.933953: step: 152/464, loss: 0.09079311043024063 2023-01-24 04:34:15.661384: step: 154/464, loss: 0.07017511874437332 2023-01-24 04:34:16.367654: step: 156/464, loss: 0.03948606550693512 2023-01-24 04:34:17.165125: step: 158/464, loss: 0.031009254977107048 2023-01-24 04:34:18.010497: step: 160/464, loss: 0.03819343075156212 2023-01-24 04:34:18.770920: step: 162/464, loss: 0.021974503993988037 2023-01-24 04:34:19.481665: step: 164/464, loss: 0.041077401489019394 2023-01-24 04:34:20.197490: step: 166/464, loss: 0.033953309059143066 2023-01-24 04:34:20.949241: step: 168/464, loss: 0.07859236747026443 2023-01-24 04:34:21.659923: step: 170/464, loss: 0.044446911662817 2023-01-24 04:34:22.375143: step: 172/464, loss: 0.044578131288290024 2023-01-24 04:34:23.089736: step: 174/464, loss: 0.029382256790995598 2023-01-24 04:34:23.782850: step: 176/464, loss: 0.16760005056858063 2023-01-24 04:34:24.493463: step: 178/464, loss: 0.013348049484193325 2023-01-24 04:34:25.214805: step: 180/464, loss: 0.04084669426083565 2023-01-24 04:34:25.977455: step: 182/464, loss: 0.3408990204334259 2023-01-24 04:34:26.724138: step: 184/464, loss: 0.9496893286705017 2023-01-24 04:34:27.468460: step: 186/464, loss: 0.0338098406791687 2023-01-24 04:34:28.226473: step: 188/464, loss: 0.06081795319914818 2023-01-24 04:34:28.955365: step: 190/464, loss: 0.06129498407244682 2023-01-24 04:34:29.784905: step: 192/464, loss: 0.018290026113390923 2023-01-24 04:34:30.497027: step: 194/464, loss: 0.029910344630479813 2023-01-24 04:34:31.271660: step: 196/464, loss: 0.00872301310300827 2023-01-24 04:34:32.047998: step: 198/464, loss: 0.04122915118932724 2023-01-24 04:34:32.744293: step: 200/464, loss: 0.018726056441664696 2023-01-24 04:34:33.507344: step: 202/464, loss: 0.0619959831237793 2023-01-24 04:34:34.223059: step: 204/464, loss: 0.02457534149289131 2023-01-24 04:34:34.953084: step: 206/464, loss: 0.009037651121616364 2023-01-24 04:34:35.751787: step: 208/464, loss: 0.10055802017450333 2023-01-24 04:34:36.495499: step: 210/464, loss: 0.06440237164497375 2023-01-24 04:34:37.263061: step: 212/464, loss: 0.4239731729030609 2023-01-24 04:34:38.047030: step: 214/464, loss: 0.03676867112517357 2023-01-24 04:34:38.777150: step: 216/464, loss: 0.03093644417822361 2023-01-24 04:34:39.522367: step: 218/464, loss: 0.04951198399066925 2023-01-24 04:34:40.236036: step: 220/464, loss: 0.03629906848073006 2023-01-24 04:34:41.017159: step: 222/464, loss: 0.02488712966442108 2023-01-24 04:34:41.803257: step: 224/464, loss: 0.047976914793252945 2023-01-24 04:34:42.529552: step: 226/464, loss: 0.0374411940574646 2023-01-24 04:34:43.205553: step: 228/464, loss: 0.01827031560242176 2023-01-24 04:34:43.892129: step: 230/464, loss: 0.012965069152414799 2023-01-24 04:34:44.631015: step: 232/464, loss: 0.05805027112364769 2023-01-24 04:34:45.363544: step: 234/464, loss: 0.002010543365031481 2023-01-24 04:34:46.090110: step: 236/464, loss: 0.004295994061976671 2023-01-24 04:34:46.861875: step: 238/464, loss: 0.05023113265633583 2023-01-24 04:34:47.620672: step: 240/464, loss: 0.04340185225009918 2023-01-24 04:34:48.345126: step: 242/464, loss: 0.028225280344486237 2023-01-24 04:34:49.003592: step: 244/464, loss: 0.032393768429756165 2023-01-24 04:34:49.724870: step: 246/464, loss: 0.048727914690971375 2023-01-24 04:34:50.465996: step: 248/464, loss: 0.007662827614694834 2023-01-24 04:34:51.189366: step: 250/464, loss: 0.023732934147119522 2023-01-24 04:34:51.889870: step: 252/464, loss: 0.06910266727209091 2023-01-24 04:34:52.682555: step: 254/464, loss: 0.03406153991818428 2023-01-24 04:34:53.376057: step: 256/464, loss: 0.0055818636901676655 2023-01-24 04:34:54.161065: step: 258/464, loss: 0.038593094795942307 2023-01-24 04:34:54.821145: step: 260/464, loss: 0.06385339051485062 2023-01-24 04:34:55.476609: step: 262/464, loss: 0.0857200101017952 2023-01-24 04:34:56.211749: step: 264/464, loss: 0.03669006749987602 2023-01-24 04:34:56.909505: step: 266/464, loss: 0.018789229914546013 2023-01-24 04:34:57.676982: step: 268/464, loss: 0.04875718429684639 2023-01-24 04:34:58.368507: step: 270/464, loss: 0.14598001539707184 2023-01-24 04:34:59.105536: step: 272/464, loss: 0.04028183966875076 2023-01-24 04:34:59.824250: step: 274/464, loss: 0.36252397298812866 2023-01-24 04:35:00.690817: step: 276/464, loss: 0.06845507770776749 2023-01-24 04:35:01.431464: step: 278/464, loss: 0.17075254023075104 2023-01-24 04:35:02.279990: step: 280/464, loss: 0.024251427501440048 2023-01-24 04:35:02.997765: step: 282/464, loss: 0.5997735857963562 2023-01-24 04:35:03.751323: step: 284/464, loss: 0.02716807648539543 2023-01-24 04:35:04.499519: step: 286/464, loss: 0.5014903545379639 2023-01-24 04:35:05.233795: step: 288/464, loss: 0.07848905771970749 2023-01-24 04:35:05.957848: step: 290/464, loss: 0.1874598264694214 2023-01-24 04:35:06.695518: step: 292/464, loss: 0.04445421323180199 2023-01-24 04:35:07.386748: step: 294/464, loss: 0.029825005680322647 2023-01-24 04:35:08.090773: step: 296/464, loss: 0.16320347785949707 2023-01-24 04:35:08.831055: step: 298/464, loss: 0.16363082826137543 2023-01-24 04:35:09.513834: step: 300/464, loss: 0.2617831230163574 2023-01-24 04:35:10.284578: step: 302/464, loss: 0.009264235384762287 2023-01-24 04:35:11.043632: step: 304/464, loss: 0.02039198949933052 2023-01-24 04:35:11.849258: step: 306/464, loss: 0.013186992146074772 2023-01-24 04:35:12.618334: step: 308/464, loss: 0.1154455617070198 2023-01-24 04:35:13.350334: step: 310/464, loss: 0.008570646867156029 2023-01-24 04:35:14.144919: step: 312/464, loss: 0.047249674797058105 2023-01-24 04:35:14.874386: step: 314/464, loss: 0.03741049766540527 2023-01-24 04:35:15.550205: step: 316/464, loss: 0.023542368784546852 2023-01-24 04:35:16.334689: step: 318/464, loss: 0.014139845035970211 2023-01-24 04:35:17.136964: step: 320/464, loss: 0.03782298043370247 2023-01-24 04:35:17.871270: step: 322/464, loss: 0.04408476501703262 2023-01-24 04:35:18.630201: step: 324/464, loss: 0.010484587401151657 2023-01-24 04:35:19.396935: step: 326/464, loss: 0.6605069637298584 2023-01-24 04:35:20.166230: step: 328/464, loss: 0.023553185164928436 2023-01-24 04:35:20.919211: step: 330/464, loss: 0.05442517250776291 2023-01-24 04:35:21.641243: step: 332/464, loss: 0.40287327766418457 2023-01-24 04:35:22.399817: step: 334/464, loss: 0.14791059494018555 2023-01-24 04:35:23.080291: step: 336/464, loss: 0.3907485008239746 2023-01-24 04:35:23.890775: step: 338/464, loss: 0.08137544989585876 2023-01-24 04:35:24.677401: step: 340/464, loss: 0.12367463111877441 2023-01-24 04:35:25.408423: step: 342/464, loss: 0.012803366407752037 2023-01-24 04:35:26.074378: step: 344/464, loss: 0.024366678670048714 2023-01-24 04:35:26.844709: step: 346/464, loss: 0.02786608599126339 2023-01-24 04:35:27.589178: step: 348/464, loss: 12.75429916381836 2023-01-24 04:35:28.341761: step: 350/464, loss: 0.0908380076289177 2023-01-24 04:35:29.147073: step: 352/464, loss: 0.016326939687132835 2023-01-24 04:35:29.895936: step: 354/464, loss: 0.023789649829268456 2023-01-24 04:35:30.680685: step: 356/464, loss: 0.1525188535451889 2023-01-24 04:35:31.406999: step: 358/464, loss: 0.05632682889699936 2023-01-24 04:35:32.158751: step: 360/464, loss: 0.04506479203701019 2023-01-24 04:35:32.967194: step: 362/464, loss: 0.07740932703018188 2023-01-24 04:35:33.682642: step: 364/464, loss: 0.11900129169225693 2023-01-24 04:35:34.410644: step: 366/464, loss: 0.020981112495064735 2023-01-24 04:35:35.128634: step: 368/464, loss: 0.024556007236242294 2023-01-24 04:35:35.810169: step: 370/464, loss: 0.05328279733657837 2023-01-24 04:35:36.533183: step: 372/464, loss: 0.019029740244150162 2023-01-24 04:35:37.320004: step: 374/464, loss: 0.007005555089563131 2023-01-24 04:35:38.050094: step: 376/464, loss: 0.1613781452178955 2023-01-24 04:35:38.745766: step: 378/464, loss: 0.1356181651353836 2023-01-24 04:35:39.485284: step: 380/464, loss: 0.035038989037275314 2023-01-24 04:35:40.168414: step: 382/464, loss: 0.01615901291370392 2023-01-24 04:35:40.929448: step: 384/464, loss: 0.021121768280863762 2023-01-24 04:35:41.712865: step: 386/464, loss: 0.25910723209381104 2023-01-24 04:35:42.462791: step: 388/464, loss: 0.5805371403694153 2023-01-24 04:35:43.272214: step: 390/464, loss: 0.03570196032524109 2023-01-24 04:35:43.998836: step: 392/464, loss: 0.05775092914700508 2023-01-24 04:35:44.768425: step: 394/464, loss: 0.011715341359376907 2023-01-24 04:35:45.542661: step: 396/464, loss: 0.20321254432201385 2023-01-24 04:35:46.331003: step: 398/464, loss: 0.04123862460255623 2023-01-24 04:35:47.028678: step: 400/464, loss: 0.04705808311700821 2023-01-24 04:35:47.755923: step: 402/464, loss: 0.08706068992614746 2023-01-24 04:35:48.521032: step: 404/464, loss: 2.7982990741729736 2023-01-24 04:35:49.213538: step: 406/464, loss: 0.05648184195160866 2023-01-24 04:35:49.906840: step: 408/464, loss: 0.004382117185741663 2023-01-24 04:35:50.676014: step: 410/464, loss: 0.008848189376294613 2023-01-24 04:35:51.442770: step: 412/464, loss: 0.0265215914696455 2023-01-24 04:35:52.178098: step: 414/464, loss: 0.07577735930681229 2023-01-24 04:35:52.866917: step: 416/464, loss: 0.5927653908729553 2023-01-24 04:35:53.601087: step: 418/464, loss: 0.13155563175678253 2023-01-24 04:35:54.295253: step: 420/464, loss: 0.0663122907280922 2023-01-24 04:35:54.924908: step: 422/464, loss: 0.04869604483246803 2023-01-24 04:35:55.665589: step: 424/464, loss: 0.01776723749935627 2023-01-24 04:35:56.360235: step: 426/464, loss: 0.13402137160301208 2023-01-24 04:35:57.137483: step: 428/464, loss: 0.02516421489417553 2023-01-24 04:35:57.973530: step: 430/464, loss: 0.03531419485807419 2023-01-24 04:35:58.737030: step: 432/464, loss: 0.009145190007984638 2023-01-24 04:35:59.498665: step: 434/464, loss: 0.015271683223545551 2023-01-24 04:36:00.276516: step: 436/464, loss: 0.007533950265496969 2023-01-24 04:36:00.997710: step: 438/464, loss: 0.026609541848301888 2023-01-24 04:36:01.814515: step: 440/464, loss: 0.13909491896629333 2023-01-24 04:36:02.487683: step: 442/464, loss: 0.007417329587042332 2023-01-24 04:36:03.259433: step: 444/464, loss: 0.03112209588289261 2023-01-24 04:36:04.052358: step: 446/464, loss: 0.07906953245401382 2023-01-24 04:36:04.913093: step: 448/464, loss: 0.03891181945800781 2023-01-24 04:36:05.618691: step: 450/464, loss: 0.13607284426689148 2023-01-24 04:36:06.317247: step: 452/464, loss: 0.04584129899740219 2023-01-24 04:36:07.042995: step: 454/464, loss: 0.07778904587030411 2023-01-24 04:36:07.703416: step: 456/464, loss: 0.014196380972862244 2023-01-24 04:36:08.429732: step: 458/464, loss: 0.42927083373069763 2023-01-24 04:36:09.188628: step: 460/464, loss: 0.029154594987630844 2023-01-24 04:36:09.868222: step: 462/464, loss: 0.0662357434630394 2023-01-24 04:36:10.606826: step: 464/464, loss: 0.05748889595270157 2023-01-24 04:36:11.318339: step: 466/464, loss: 0.09649182856082916 2023-01-24 04:36:12.093638: step: 468/464, loss: 0.08588551729917526 2023-01-24 04:36:12.738404: step: 470/464, loss: 0.3580757677555084 2023-01-24 04:36:13.475465: step: 472/464, loss: 0.049161091446876526 2023-01-24 04:36:14.292624: step: 474/464, loss: 0.019643118605017662 2023-01-24 04:36:14.994042: step: 476/464, loss: 0.13698026537895203 2023-01-24 04:36:15.768200: step: 478/464, loss: 0.04523114860057831 2023-01-24 04:36:16.433675: step: 480/464, loss: 0.013664919883012772 2023-01-24 04:36:17.232882: step: 482/464, loss: 0.032867517322301865 2023-01-24 04:36:17.968955: step: 484/464, loss: 0.028636222705245018 2023-01-24 04:36:18.791798: step: 486/464, loss: 0.04870878905057907 2023-01-24 04:36:19.497660: step: 488/464, loss: 0.10918847471475601 2023-01-24 04:36:20.252435: step: 490/464, loss: 0.046230800449848175 2023-01-24 04:36:21.058185: step: 492/464, loss: 0.08702777326107025 2023-01-24 04:36:21.842031: step: 494/464, loss: 0.104853555560112 2023-01-24 04:36:22.591692: step: 496/464, loss: 0.04353467375040054 2023-01-24 04:36:23.352336: step: 498/464, loss: 0.26607006788253784 2023-01-24 04:36:24.046498: step: 500/464, loss: 0.07972798496484756 2023-01-24 04:36:24.864260: step: 502/464, loss: 0.11201713979244232 2023-01-24 04:36:25.633535: step: 504/464, loss: 0.15039947628974915 2023-01-24 04:36:26.396175: step: 506/464, loss: 0.03871912509202957 2023-01-24 04:36:27.181143: step: 508/464, loss: 0.008508550934493542 2023-01-24 04:36:27.925676: step: 510/464, loss: 0.09333653002977371 2023-01-24 04:36:28.676957: step: 512/464, loss: 0.03663307800889015 2023-01-24 04:36:29.421572: step: 514/464, loss: 0.052844543009996414 2023-01-24 04:36:30.251111: step: 516/464, loss: 0.42462268471717834 2023-01-24 04:36:30.931528: step: 518/464, loss: 0.030073698610067368 2023-01-24 04:36:31.656015: step: 520/464, loss: 0.058907970786094666 2023-01-24 04:36:32.378587: step: 522/464, loss: 0.04350745305418968 2023-01-24 04:36:33.151638: step: 524/464, loss: 0.44929540157318115 2023-01-24 04:36:33.881271: step: 526/464, loss: 0.1174045279622078 2023-01-24 04:36:34.572605: step: 528/464, loss: 0.16680335998535156 2023-01-24 04:36:35.283203: step: 530/464, loss: 0.04566435143351555 2023-01-24 04:36:36.149703: step: 532/464, loss: 0.04176734387874603 2023-01-24 04:36:36.881531: step: 534/464, loss: 0.7629539966583252 2023-01-24 04:36:37.608556: step: 536/464, loss: 0.018504736945033073 2023-01-24 04:36:38.317791: step: 538/464, loss: 0.02703903056681156 2023-01-24 04:36:39.101136: step: 540/464, loss: 0.09397948533296585 2023-01-24 04:36:39.854889: step: 542/464, loss: 0.11122222989797592 2023-01-24 04:36:40.591143: step: 544/464, loss: 0.26820042729377747 2023-01-24 04:36:41.365684: step: 546/464, loss: 0.026853233575820923 2023-01-24 04:36:42.200318: step: 548/464, loss: 0.018640518188476562 2023-01-24 04:36:42.905525: step: 550/464, loss: 0.3911222517490387 2023-01-24 04:36:43.646138: step: 552/464, loss: 0.04636659845709801 2023-01-24 04:36:44.329493: step: 554/464, loss: 0.06301417201757431 2023-01-24 04:36:45.114743: step: 556/464, loss: 0.05418648570775986 2023-01-24 04:36:45.842219: step: 558/464, loss: 0.3370462954044342 2023-01-24 04:36:46.624042: step: 560/464, loss: 0.004634824115782976 2023-01-24 04:36:47.359360: step: 562/464, loss: 0.01641211099922657 2023-01-24 04:36:48.090589: step: 564/464, loss: 0.023339591920375824 2023-01-24 04:36:48.816568: step: 566/464, loss: 0.0751141682267189 2023-01-24 04:36:49.511202: step: 568/464, loss: 0.004145853221416473 2023-01-24 04:36:50.245276: step: 570/464, loss: 0.04671361669898033 2023-01-24 04:36:50.995407: step: 572/464, loss: 0.032283443957567215 2023-01-24 04:36:51.751561: step: 574/464, loss: 0.052826691418886185 2023-01-24 04:36:52.547596: step: 576/464, loss: 0.08888741582632065 2023-01-24 04:36:53.253696: step: 578/464, loss: 0.019992846995592117 2023-01-24 04:36:54.032058: step: 580/464, loss: 0.11506533622741699 2023-01-24 04:36:54.792731: step: 582/464, loss: 0.044647980481386185 2023-01-24 04:36:55.581555: step: 584/464, loss: 0.15260529518127441 2023-01-24 04:36:56.384613: step: 586/464, loss: 0.02975417673587799 2023-01-24 04:36:57.247119: step: 588/464, loss: 0.08308760821819305 2023-01-24 04:36:58.023339: step: 590/464, loss: 0.05899420380592346 2023-01-24 04:36:58.746608: step: 592/464, loss: 0.01571488566696644 2023-01-24 04:36:59.458924: step: 594/464, loss: 0.06790255010128021 2023-01-24 04:37:00.218991: step: 596/464, loss: 0.027487952262163162 2023-01-24 04:37:00.883185: step: 598/464, loss: 0.05656803399324417 2023-01-24 04:37:01.618695: step: 600/464, loss: 0.010896249674260616 2023-01-24 04:37:02.407966: step: 602/464, loss: 0.10806481540203094 2023-01-24 04:37:03.082660: step: 604/464, loss: 0.0048584723845124245 2023-01-24 04:37:03.800951: step: 606/464, loss: 0.057283997535705566 2023-01-24 04:37:04.574472: step: 608/464, loss: 0.02630491741001606 2023-01-24 04:37:05.258266: step: 610/464, loss: 0.027782080695033073 2023-01-24 04:37:05.980964: step: 612/464, loss: 0.04245239496231079 2023-01-24 04:37:06.686289: step: 614/464, loss: 0.05944732576608658 2023-01-24 04:37:07.425861: step: 616/464, loss: 0.007870529778301716 2023-01-24 04:37:08.238231: step: 618/464, loss: 0.05828342214226723 2023-01-24 04:37:08.931373: step: 620/464, loss: 0.1730232685804367 2023-01-24 04:37:09.675503: step: 622/464, loss: 0.05308079347014427 2023-01-24 04:37:10.438328: step: 624/464, loss: 0.07295246422290802 2023-01-24 04:37:11.160760: step: 626/464, loss: 0.017740445211529732 2023-01-24 04:37:11.995971: step: 628/464, loss: 0.24502922594547272 2023-01-24 04:37:12.808591: step: 630/464, loss: 0.04027089849114418 2023-01-24 04:37:13.548103: step: 632/464, loss: 0.08156416565179825 2023-01-24 04:37:14.254999: step: 634/464, loss: 0.11167629808187485 2023-01-24 04:37:15.044768: step: 636/464, loss: 0.00642075389623642 2023-01-24 04:37:15.819478: step: 638/464, loss: 0.0548754557967186 2023-01-24 04:37:16.512804: step: 640/464, loss: 0.10855691879987717 2023-01-24 04:37:17.351121: step: 642/464, loss: 0.01500038243830204 2023-01-24 04:37:18.043272: step: 644/464, loss: 0.05018560588359833 2023-01-24 04:37:18.873563: step: 646/464, loss: 0.02341439574956894 2023-01-24 04:37:19.588560: step: 648/464, loss: 0.09186741709709167 2023-01-24 04:37:20.374209: step: 650/464, loss: 0.039070356637239456 2023-01-24 04:37:21.111063: step: 652/464, loss: 0.07108777016401291 2023-01-24 04:37:21.762981: step: 654/464, loss: 0.03220715373754501 2023-01-24 04:37:22.524849: step: 656/464, loss: 0.17577025294303894 2023-01-24 04:37:23.359508: step: 658/464, loss: 0.15418113768100739 2023-01-24 04:37:24.035349: step: 660/464, loss: 0.058939479291439056 2023-01-24 04:37:24.759017: step: 662/464, loss: 0.2582094371318817 2023-01-24 04:37:25.516084: step: 664/464, loss: 0.008546644821763039 2023-01-24 04:37:26.231213: step: 666/464, loss: 0.00984677579253912 2023-01-24 04:37:26.938904: step: 668/464, loss: 0.10566455870866776 2023-01-24 04:37:27.738833: step: 670/464, loss: 0.0995333343744278 2023-01-24 04:37:28.468193: step: 672/464, loss: 0.14754481613636017 2023-01-24 04:37:29.191407: step: 674/464, loss: 0.0048037185333669186 2023-01-24 04:37:29.954402: step: 676/464, loss: 0.041190601885318756 2023-01-24 04:37:30.683374: step: 678/464, loss: 0.47897103428840637 2023-01-24 04:37:31.460581: step: 680/464, loss: 0.031011898070573807 2023-01-24 04:37:32.246317: step: 682/464, loss: 0.05496121942996979 2023-01-24 04:37:33.052707: step: 684/464, loss: 0.04974808543920517 2023-01-24 04:37:33.748186: step: 686/464, loss: 0.07194700092077255 2023-01-24 04:37:34.482764: step: 688/464, loss: 0.016992082819342613 2023-01-24 04:37:35.223803: step: 690/464, loss: 0.08583711832761765 2023-01-24 04:37:35.973395: step: 692/464, loss: 0.0561419315636158 2023-01-24 04:37:36.710474: step: 694/464, loss: 0.0464247465133667 2023-01-24 04:37:37.523017: step: 696/464, loss: 0.07206926494836807 2023-01-24 04:37:38.371034: step: 698/464, loss: 0.044868115335702896 2023-01-24 04:37:39.153908: step: 700/464, loss: 0.05982567369937897 2023-01-24 04:37:40.032836: step: 702/464, loss: 0.13941554725170135 2023-01-24 04:37:40.699249: step: 704/464, loss: 0.017828669399023056 2023-01-24 04:37:41.409263: step: 706/464, loss: 0.007217045873403549 2023-01-24 04:37:42.147709: step: 708/464, loss: 0.031991638243198395 2023-01-24 04:37:42.858095: step: 710/464, loss: 0.10996410250663757 2023-01-24 04:37:43.568634: step: 712/464, loss: 0.022569648921489716 2023-01-24 04:37:44.309844: step: 714/464, loss: 0.08497320115566254 2023-01-24 04:37:45.166707: step: 716/464, loss: 0.029784033074975014 2023-01-24 04:37:45.848791: step: 718/464, loss: 0.04577137902379036 2023-01-24 04:37:46.516478: step: 720/464, loss: 0.01864718087017536 2023-01-24 04:37:47.278475: step: 722/464, loss: 0.08041205257177353 2023-01-24 04:37:48.039776: step: 724/464, loss: 0.0066671911627054214 2023-01-24 04:37:48.880863: step: 726/464, loss: 0.07465112209320068 2023-01-24 04:37:49.631456: step: 728/464, loss: 0.3429624140262604 2023-01-24 04:37:50.401961: step: 730/464, loss: 0.07083141058683395 2023-01-24 04:37:51.158072: step: 732/464, loss: 0.03474923595786095 2023-01-24 04:37:51.828294: step: 734/464, loss: 0.02197730541229248 2023-01-24 04:37:52.589865: step: 736/464, loss: 0.020554326474666595 2023-01-24 04:37:53.291574: step: 738/464, loss: 0.07115784287452698 2023-01-24 04:37:54.004411: step: 740/464, loss: 0.026381531730294228 2023-01-24 04:37:54.699417: step: 742/464, loss: 0.31829917430877686 2023-01-24 04:37:55.470972: step: 744/464, loss: 0.0353279784321785 2023-01-24 04:37:56.237174: step: 746/464, loss: 0.03324635699391365 2023-01-24 04:37:56.908607: step: 748/464, loss: 0.009753571823239326 2023-01-24 04:37:57.617474: step: 750/464, loss: 0.06722907721996307 2023-01-24 04:37:58.316314: step: 752/464, loss: 0.03678857162594795 2023-01-24 04:37:59.029985: step: 754/464, loss: 0.08187399059534073 2023-01-24 04:37:59.738016: step: 756/464, loss: 0.08561398833990097 2023-01-24 04:38:00.531979: step: 758/464, loss: 0.1783737689256668 2023-01-24 04:38:01.288604: step: 760/464, loss: 0.06394535303115845 2023-01-24 04:38:02.083665: step: 762/464, loss: 0.039629869163036346 2023-01-24 04:38:02.840461: step: 764/464, loss: 0.026326943188905716 2023-01-24 04:38:03.604621: step: 766/464, loss: 0.004333826247602701 2023-01-24 04:38:04.295299: step: 768/464, loss: 0.09951043874025345 2023-01-24 04:38:05.009270: step: 770/464, loss: 0.057956498116254807 2023-01-24 04:38:05.903637: step: 772/464, loss: 0.05867960676550865 2023-01-24 04:38:06.625421: step: 774/464, loss: 0.041094571352005005 2023-01-24 04:38:07.382887: step: 776/464, loss: 0.08115795254707336 2023-01-24 04:38:08.148409: step: 778/464, loss: 0.03187811002135277 2023-01-24 04:38:09.004171: step: 780/464, loss: 6.479488372802734 2023-01-24 04:38:09.753709: step: 782/464, loss: 0.387126088142395 2023-01-24 04:38:10.589047: step: 784/464, loss: 0.08255114406347275 2023-01-24 04:38:11.328435: step: 786/464, loss: 0.13230735063552856 2023-01-24 04:38:12.128881: step: 788/464, loss: 0.04115178436040878 2023-01-24 04:38:12.990538: step: 790/464, loss: 0.03452340140938759 2023-01-24 04:38:13.729326: step: 792/464, loss: 0.06277056038379669 2023-01-24 04:38:14.480379: step: 794/464, loss: 0.039516910910606384 2023-01-24 04:38:15.346750: step: 796/464, loss: 0.05712104216217995 2023-01-24 04:38:16.076018: step: 798/464, loss: 0.03840438649058342 2023-01-24 04:38:16.748922: step: 800/464, loss: 0.010737729258835316 2023-01-24 04:38:17.663463: step: 802/464, loss: 0.01658307947218418 2023-01-24 04:38:18.456821: step: 804/464, loss: 0.20165158808231354 2023-01-24 04:38:19.155581: step: 806/464, loss: 0.038009755313396454 2023-01-24 04:38:19.882236: step: 808/464, loss: 0.031555887311697006 2023-01-24 04:38:20.619746: step: 810/464, loss: 0.04327677935361862 2023-01-24 04:38:21.384000: step: 812/464, loss: 0.030223559588193893 2023-01-24 04:38:22.066294: step: 814/464, loss: 0.13202138245105743 2023-01-24 04:38:22.750927: step: 816/464, loss: 0.039860039949417114 2023-01-24 04:38:23.599763: step: 818/464, loss: 0.14208300411701202 2023-01-24 04:38:24.335722: step: 820/464, loss: 0.03917303681373596 2023-01-24 04:38:25.088491: step: 822/464, loss: 0.053042635321617126 2023-01-24 04:38:25.848093: step: 824/464, loss: 0.24905726313591003 2023-01-24 04:38:26.503767: step: 826/464, loss: 0.005672526080161333 2023-01-24 04:38:27.217885: step: 828/464, loss: 0.14162936806678772 2023-01-24 04:38:27.968540: step: 830/464, loss: 0.1332976073026657 2023-01-24 04:38:28.666908: step: 832/464, loss: 0.052006494253873825 2023-01-24 04:38:29.305745: step: 834/464, loss: 0.024735642597079277 2023-01-24 04:38:29.993088: step: 836/464, loss: 0.07817701250314713 2023-01-24 04:38:30.852846: step: 838/464, loss: 0.023987311869859695 2023-01-24 04:38:31.606016: step: 840/464, loss: 0.06612563133239746 2023-01-24 04:38:32.327007: step: 842/464, loss: 0.2896845042705536 2023-01-24 04:38:33.001211: step: 844/464, loss: 0.03888014703989029 2023-01-24 04:38:33.768865: step: 846/464, loss: 0.11084342002868652 2023-01-24 04:38:34.591621: step: 848/464, loss: 0.045841753482818604 2023-01-24 04:38:35.270768: step: 850/464, loss: 0.035428255796432495 2023-01-24 04:38:36.091937: step: 852/464, loss: 0.027241677045822144 2023-01-24 04:38:36.824822: step: 854/464, loss: 0.05396389588713646 2023-01-24 04:38:37.519062: step: 856/464, loss: 0.03509226068854332 2023-01-24 04:38:38.250953: step: 858/464, loss: 0.02402915060520172 2023-01-24 04:38:38.965350: step: 860/464, loss: 0.03456997871398926 2023-01-24 04:38:39.695393: step: 862/464, loss: 0.09369662404060364 2023-01-24 04:38:40.436786: step: 864/464, loss: 0.05106193199753761 2023-01-24 04:38:41.116660: step: 866/464, loss: 0.013446471653878689 2023-01-24 04:38:42.012498: step: 868/464, loss: 0.08844896405935287 2023-01-24 04:38:42.741730: step: 870/464, loss: 0.0037490795366466045 2023-01-24 04:38:43.442574: step: 872/464, loss: 0.2520065903663635 2023-01-24 04:38:44.240160: step: 874/464, loss: 0.2629806697368622 2023-01-24 04:38:44.957424: step: 876/464, loss: 0.07260128110647202 2023-01-24 04:38:45.748262: step: 878/464, loss: 0.11487490683794022 2023-01-24 04:38:46.454433: step: 880/464, loss: 0.08654702454805374 2023-01-24 04:38:47.126043: step: 882/464, loss: 0.04224324971437454 2023-01-24 04:38:47.790133: step: 884/464, loss: 0.24555733799934387 2023-01-24 04:38:48.529444: step: 886/464, loss: 0.0406196303665638 2023-01-24 04:38:49.244272: step: 888/464, loss: 0.018796822056174278 2023-01-24 04:38:49.866022: step: 890/464, loss: 0.05456920340657234 2023-01-24 04:38:50.599534: step: 892/464, loss: 0.030479077249765396 2023-01-24 04:38:51.441160: step: 894/464, loss: 0.046267785131931305 2023-01-24 04:38:52.176481: step: 896/464, loss: 0.014180081896483898 2023-01-24 04:38:52.924996: step: 898/464, loss: 0.07442369312047958 2023-01-24 04:38:53.704050: step: 900/464, loss: 0.05919457599520683 2023-01-24 04:38:54.406036: step: 902/464, loss: 0.0010191906476393342 2023-01-24 04:38:55.106281: step: 904/464, loss: 0.41506895422935486 2023-01-24 04:38:55.853167: step: 906/464, loss: 0.07254950702190399 2023-01-24 04:38:56.583934: step: 908/464, loss: 0.03722519800066948 2023-01-24 04:38:57.368247: step: 910/464, loss: 0.042995885014534 2023-01-24 04:38:58.020003: step: 912/464, loss: 0.06633169203996658 2023-01-24 04:38:58.816450: step: 914/464, loss: 0.11544479429721832 2023-01-24 04:38:59.590813: step: 916/464, loss: 0.03535765781998634 2023-01-24 04:39:00.294680: step: 918/464, loss: 0.09880461543798447 2023-01-24 04:39:00.997371: step: 920/464, loss: 0.19629810750484467 2023-01-24 04:39:01.789771: step: 922/464, loss: 0.07875211536884308 2023-01-24 04:39:02.604906: step: 924/464, loss: 0.1211591511964798 2023-01-24 04:39:03.296967: step: 926/464, loss: 0.03621485084295273 2023-01-24 04:39:03.979903: step: 928/464, loss: 0.0687384083867073 2023-01-24 04:39:04.642674: step: 930/464, loss: 0.06540561467409134 ================================================== Loss: 0.136 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33904307207601847, 'r': 0.3236027803685717, 'f1': 0.3311430393286161}, 'combined': 0.24400013424213818, 'epoch': 21} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29535292343538005, 'r': 0.2641248969456709, 'f1': 0.27886739249767234}, 'combined': 0.17319132797223863, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.315728667959865, 'r': 0.3163277735916674, 'f1': 0.3160279368394478}, 'combined': 0.232862690302751, 'epoch': 21} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.29759658698414704, 'r': 0.26175573445496053, 'f1': 0.27852789811226364}, 'combined': 0.17298048409077427, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33670685558294494, 'r': 0.32392860679421837, 'f1': 0.3301941504459441}, 'combined': 0.24330095296016932, 'epoch': 21} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31328614021336565, 'r': 0.27497501458569573, 'f1': 0.29288305124989017}, 'combined': 0.181895789723616, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28289473684210525, 'r': 0.30714285714285716, 'f1': 0.29452054794520544}, 'combined': 0.19634703196347028, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28125, 'r': 0.391304347826087, 'f1': 0.3272727272727273}, 'combined': 0.16363636363636366, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5446428571428571, 'r': 0.2629310344827586, 'f1': 0.3546511627906977}, 'combined': 0.2364341085271318, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:41:44.281599: step: 2/464, loss: 0.022911660373210907 2023-01-24 04:41:45.024201: step: 4/464, loss: 0.048933934420347214 2023-01-24 04:41:45.771738: step: 6/464, loss: 0.19535832107067108 2023-01-24 04:41:46.462367: step: 8/464, loss: 0.014200533740222454 2023-01-24 04:41:47.248574: step: 10/464, loss: 0.06074250862002373 2023-01-24 04:41:47.950690: step: 12/464, loss: 0.04321758449077606 2023-01-24 04:41:48.704285: step: 14/464, loss: 0.020962128415703773 2023-01-24 04:41:49.487543: step: 16/464, loss: 0.028577618300914764 2023-01-24 04:41:50.196485: step: 18/464, loss: 0.026005618274211884 2023-01-24 04:41:50.958045: step: 20/464, loss: 0.06449444591999054 2023-01-24 04:41:51.726870: step: 22/464, loss: 0.010618659667670727 2023-01-24 04:41:52.469954: step: 24/464, loss: 0.07123077660799026 2023-01-24 04:41:53.260849: step: 26/464, loss: 0.03983668237924576 2023-01-24 04:41:53.964614: step: 28/464, loss: 0.03342576324939728 2023-01-24 04:41:54.727988: step: 30/464, loss: 0.039252303540706635 2023-01-24 04:41:55.462063: step: 32/464, loss: 0.018664592877030373 2023-01-24 04:41:56.142101: step: 34/464, loss: 0.1506842076778412 2023-01-24 04:41:56.857044: step: 36/464, loss: 0.9334834814071655 2023-01-24 04:41:57.717017: step: 38/464, loss: 0.007731994614005089 2023-01-24 04:41:58.452734: step: 40/464, loss: 0.020626060664653778 2023-01-24 04:41:59.173472: step: 42/464, loss: 0.011708186939358711 2023-01-24 04:41:59.935794: step: 44/464, loss: 0.11415652185678482 2023-01-24 04:42:00.656893: step: 46/464, loss: 0.03801979497075081 2023-01-24 04:42:01.446643: step: 48/464, loss: 0.05007893592119217 2023-01-24 04:42:02.117788: step: 50/464, loss: 0.02001386322081089 2023-01-24 04:42:02.871975: step: 52/464, loss: 0.03638208657503128 2023-01-24 04:42:03.582266: step: 54/464, loss: 0.02277105487883091 2023-01-24 04:42:04.298629: step: 56/464, loss: 0.05654141679406166 2023-01-24 04:42:05.025780: step: 58/464, loss: 0.04333549737930298 2023-01-24 04:42:05.764700: step: 60/464, loss: 0.06319651007652283 2023-01-24 04:42:06.469594: step: 62/464, loss: 0.03601942956447601 2023-01-24 04:42:07.142710: step: 64/464, loss: 0.018980784341692924 2023-01-24 04:42:07.881973: step: 66/464, loss: 0.5078731775283813 2023-01-24 04:42:08.617020: step: 68/464, loss: 0.013460144400596619 2023-01-24 04:42:09.403807: step: 70/464, loss: 0.24691608548164368 2023-01-24 04:42:10.107942: step: 72/464, loss: 0.03298424929380417 2023-01-24 04:42:10.848180: step: 74/464, loss: 0.03243899345397949 2023-01-24 04:42:11.528672: step: 76/464, loss: 0.061396270990371704 2023-01-24 04:42:12.313297: step: 78/464, loss: 0.044844284653663635 2023-01-24 04:42:13.022285: step: 80/464, loss: 0.2925359606742859 2023-01-24 04:42:13.765102: step: 82/464, loss: 0.053221434354782104 2023-01-24 04:42:14.440555: step: 84/464, loss: 0.02855098433792591 2023-01-24 04:42:15.128220: step: 86/464, loss: 0.04170306771993637 2023-01-24 04:42:15.805790: step: 88/464, loss: 0.1138177141547203 2023-01-24 04:42:16.522417: step: 90/464, loss: 0.008606791496276855 2023-01-24 04:42:17.301013: step: 92/464, loss: 0.006901235319674015 2023-01-24 04:42:17.945411: step: 94/464, loss: 0.13284118473529816 2023-01-24 04:42:18.605299: step: 96/464, loss: 0.018453076481819153 2023-01-24 04:42:19.309596: step: 98/464, loss: 0.06665834039449692 2023-01-24 04:42:20.053365: step: 100/464, loss: 0.07431719452142715 2023-01-24 04:42:20.798061: step: 102/464, loss: 0.009169948287308216 2023-01-24 04:42:21.483572: step: 104/464, loss: 0.007283048704266548 2023-01-24 04:42:22.289267: step: 106/464, loss: 0.10819932818412781 2023-01-24 04:42:23.014570: step: 108/464, loss: 0.02185155265033245 2023-01-24 04:42:23.789778: step: 110/464, loss: 0.015204259194433689 2023-01-24 04:42:24.555095: step: 112/464, loss: 0.02292870543897152 2023-01-24 04:42:25.316176: step: 114/464, loss: 0.19087494909763336 2023-01-24 04:42:26.091176: step: 116/464, loss: 0.07576505094766617 2023-01-24 04:42:26.829459: step: 118/464, loss: 0.01860167644917965 2023-01-24 04:42:27.672816: step: 120/464, loss: 0.04837913438677788 2023-01-24 04:42:28.432728: step: 122/464, loss: 0.05998101457953453 2023-01-24 04:42:29.223159: step: 124/464, loss: 0.09130522608757019 2023-01-24 04:42:29.958354: step: 126/464, loss: 0.06464896351099014 2023-01-24 04:42:30.711715: step: 128/464, loss: 0.014377767220139503 2023-01-24 04:42:31.462493: step: 130/464, loss: 0.015954501926898956 2023-01-24 04:42:32.213449: step: 132/464, loss: 0.09322734922170639 2023-01-24 04:42:32.909273: step: 134/464, loss: 0.007519495207816362 2023-01-24 04:42:33.705702: step: 136/464, loss: 0.24314019083976746 2023-01-24 04:42:34.443003: step: 138/464, loss: 0.08465724438428879 2023-01-24 04:42:35.203502: step: 140/464, loss: 0.04498489573597908 2023-01-24 04:42:35.879790: step: 142/464, loss: 0.05277256295084953 2023-01-24 04:42:36.740629: step: 144/464, loss: 0.0748797133564949 2023-01-24 04:42:37.483867: step: 146/464, loss: 0.6362795233726501 2023-01-24 04:42:38.246683: step: 148/464, loss: 0.0029451518785208464 2023-01-24 04:42:38.955016: step: 150/464, loss: 0.03429803252220154 2023-01-24 04:42:39.801179: step: 152/464, loss: 0.04802894964814186 2023-01-24 04:42:40.684529: step: 154/464, loss: 0.07614947855472565 2023-01-24 04:42:41.486664: step: 156/464, loss: 0.0460069477558136 2023-01-24 04:42:42.278034: step: 158/464, loss: 0.03649597987532616 2023-01-24 04:42:43.060903: step: 160/464, loss: 0.02964697778224945 2023-01-24 04:42:43.822100: step: 162/464, loss: 0.03786936402320862 2023-01-24 04:42:44.529210: step: 164/464, loss: 0.030483728274703026 2023-01-24 04:42:45.206591: step: 166/464, loss: 0.05337598919868469 2023-01-24 04:42:45.967891: step: 168/464, loss: 0.017475681379437447 2023-01-24 04:42:46.755510: step: 170/464, loss: 0.26207685470581055 2023-01-24 04:42:47.499001: step: 172/464, loss: 0.07319401949644089 2023-01-24 04:42:48.179541: step: 174/464, loss: 0.025556493550539017 2023-01-24 04:42:48.944121: step: 176/464, loss: 0.06023148447275162 2023-01-24 04:42:49.748144: step: 178/464, loss: 0.16737209260463715 2023-01-24 04:42:50.453239: step: 180/464, loss: 0.013148444704711437 2023-01-24 04:42:51.172074: step: 182/464, loss: 0.08468661457300186 2023-01-24 04:42:51.981646: step: 184/464, loss: 0.013178776018321514 2023-01-24 04:42:52.707383: step: 186/464, loss: 0.02558089606463909 2023-01-24 04:42:53.475852: step: 188/464, loss: 0.4905921518802643 2023-01-24 04:42:54.150738: step: 190/464, loss: 0.034917011857032776 2023-01-24 04:42:54.796907: step: 192/464, loss: 0.004073809366673231 2023-01-24 04:42:55.531893: step: 194/464, loss: 0.012788454070687294 2023-01-24 04:42:56.351515: step: 196/464, loss: 0.055027078837156296 2023-01-24 04:42:57.076007: step: 198/464, loss: 0.03156561031937599 2023-01-24 04:42:57.860471: step: 200/464, loss: 0.004597052466124296 2023-01-24 04:42:58.568505: step: 202/464, loss: 0.010899416171014309 2023-01-24 04:42:59.321932: step: 204/464, loss: 0.0004813208943232894 2023-01-24 04:43:00.047520: step: 206/464, loss: 0.012121593579649925 2023-01-24 04:43:00.807734: step: 208/464, loss: 0.031046288087964058 2023-01-24 04:43:01.588185: step: 210/464, loss: 0.0077215139754116535 2023-01-24 04:43:02.336950: step: 212/464, loss: 0.02506837248802185 2023-01-24 04:43:03.264585: step: 214/464, loss: 0.027091197669506073 2023-01-24 04:43:04.021774: step: 216/464, loss: 0.020081406459212303 2023-01-24 04:43:04.776636: step: 218/464, loss: 0.07068915665149689 2023-01-24 04:43:05.516438: step: 220/464, loss: 0.06371705234050751 2023-01-24 04:43:06.211759: step: 222/464, loss: 0.07542310655117035 2023-01-24 04:43:07.051033: step: 224/464, loss: 0.02536981925368309 2023-01-24 04:43:07.777810: step: 226/464, loss: 0.019606802612543106 2023-01-24 04:43:08.490625: step: 228/464, loss: 0.006087008863687515 2023-01-24 04:43:09.187738: step: 230/464, loss: 0.019796758890151978 2023-01-24 04:43:09.843542: step: 232/464, loss: 0.00813978910446167 2023-01-24 04:43:10.531364: step: 234/464, loss: 0.008569409139454365 2023-01-24 04:43:11.246861: step: 236/464, loss: 0.0056127277202904224 2023-01-24 04:43:12.041898: step: 238/464, loss: 0.015920937061309814 2023-01-24 04:43:12.805835: step: 240/464, loss: 0.010793288238346577 2023-01-24 04:43:13.596380: step: 242/464, loss: 0.07124904543161392 2023-01-24 04:43:14.355852: step: 244/464, loss: 0.038125790655612946 2023-01-24 04:43:15.069157: step: 246/464, loss: 0.08426562696695328 2023-01-24 04:43:15.798019: step: 248/464, loss: 0.43768274784088135 2023-01-24 04:43:16.485121: step: 250/464, loss: 0.005656179040670395 2023-01-24 04:43:17.174508: step: 252/464, loss: 0.00686601409688592 2023-01-24 04:43:17.861216: step: 254/464, loss: 0.030220884829759598 2023-01-24 04:43:18.650209: step: 256/464, loss: 0.009510435163974762 2023-01-24 04:43:19.293082: step: 258/464, loss: 0.022821389138698578 2023-01-24 04:43:19.993065: step: 260/464, loss: 0.05829579755663872 2023-01-24 04:43:20.814372: step: 262/464, loss: 0.08035297691822052 2023-01-24 04:43:21.524002: step: 264/464, loss: 0.14977802336215973 2023-01-24 04:43:22.209122: step: 266/464, loss: 0.03890952467918396 2023-01-24 04:43:22.922893: step: 268/464, loss: 0.023425551131367683 2023-01-24 04:43:23.631079: step: 270/464, loss: 0.03978399559855461 2023-01-24 04:43:24.362587: step: 272/464, loss: 0.1666889637708664 2023-01-24 04:43:25.054429: step: 274/464, loss: 0.13102814555168152 2023-01-24 04:43:25.752993: step: 276/464, loss: 0.5807995796203613 2023-01-24 04:43:26.421933: step: 278/464, loss: 0.050122637301683426 2023-01-24 04:43:27.159193: step: 280/464, loss: 0.03379981592297554 2023-01-24 04:43:27.864225: step: 282/464, loss: 0.049888014793395996 2023-01-24 04:43:28.644476: step: 284/464, loss: 0.01664917916059494 2023-01-24 04:43:29.398437: step: 286/464, loss: 0.05668970197439194 2023-01-24 04:43:30.131341: step: 288/464, loss: 0.023765837773680687 2023-01-24 04:43:30.865351: step: 290/464, loss: 0.12605422735214233 2023-01-24 04:43:31.598736: step: 292/464, loss: 0.08360307663679123 2023-01-24 04:43:32.369410: step: 294/464, loss: 0.04222423955798149 2023-01-24 04:43:33.219419: step: 296/464, loss: 0.055150359869003296 2023-01-24 04:43:34.022943: step: 298/464, loss: 0.016825374215841293 2023-01-24 04:43:34.765357: step: 300/464, loss: 0.008687654510140419 2023-01-24 04:43:35.449678: step: 302/464, loss: 0.12264332920312881 2023-01-24 04:43:36.143425: step: 304/464, loss: 0.008854144252836704 2023-01-24 04:43:36.899877: step: 306/464, loss: 0.04204096645116806 2023-01-24 04:43:37.736432: step: 308/464, loss: 0.06922618299722672 2023-01-24 04:43:38.516535: step: 310/464, loss: 0.016595102846622467 2023-01-24 04:43:39.219599: step: 312/464, loss: 0.0613955520093441 2023-01-24 04:43:39.983461: step: 314/464, loss: 0.06470753252506256 2023-01-24 04:43:40.688993: step: 316/464, loss: 0.0006224927492439747 2023-01-24 04:43:41.439898: step: 318/464, loss: 0.38640323281288147 2023-01-24 04:43:42.140358: step: 320/464, loss: 0.05243564769625664 2023-01-24 04:43:42.945099: step: 322/464, loss: 0.03029179573059082 2023-01-24 04:43:43.843913: step: 324/464, loss: 0.04183750972151756 2023-01-24 04:43:44.558497: step: 326/464, loss: 0.021579837426543236 2023-01-24 04:43:45.280436: step: 328/464, loss: 0.0030274181626737118 2023-01-24 04:43:46.016040: step: 330/464, loss: 0.016017360612750053 2023-01-24 04:43:46.840896: step: 332/464, loss: 0.1567627489566803 2023-01-24 04:43:47.545214: step: 334/464, loss: 0.009751198813319206 2023-01-24 04:43:48.335226: step: 336/464, loss: 0.2539663314819336 2023-01-24 04:43:49.020550: step: 338/464, loss: 0.0058738901279866695 2023-01-24 04:43:49.854690: step: 340/464, loss: 0.01695852354168892 2023-01-24 04:43:50.613047: step: 342/464, loss: 0.11627886444330215 2023-01-24 04:43:51.441359: step: 344/464, loss: 0.1474836766719818 2023-01-24 04:43:52.208062: step: 346/464, loss: 0.18009726703166962 2023-01-24 04:43:52.890142: step: 348/464, loss: 0.8425463438034058 2023-01-24 04:43:53.616323: step: 350/464, loss: 0.0017548745963722467 2023-01-24 04:43:54.279645: step: 352/464, loss: 0.06723642349243164 2023-01-24 04:43:55.011563: step: 354/464, loss: 0.033363692462444305 2023-01-24 04:43:55.750027: step: 356/464, loss: 0.006056908518075943 2023-01-24 04:43:56.623392: step: 358/464, loss: 0.04731690138578415 2023-01-24 04:43:57.349216: step: 360/464, loss: 0.03328216075897217 2023-01-24 04:43:58.089604: step: 362/464, loss: 0.0012911552330479026 2023-01-24 04:43:58.848803: step: 364/464, loss: 0.02457933872938156 2023-01-24 04:43:59.542422: step: 366/464, loss: 0.07838442176580429 2023-01-24 04:44:00.254227: step: 368/464, loss: 0.06474398076534271 2023-01-24 04:44:01.065377: step: 370/464, loss: 0.05870361626148224 2023-01-24 04:44:01.884354: step: 372/464, loss: 0.24338991940021515 2023-01-24 04:44:02.568612: step: 374/464, loss: 0.04443281888961792 2023-01-24 04:44:03.300407: step: 376/464, loss: 0.041696105152368546 2023-01-24 04:44:03.982564: step: 378/464, loss: 0.014436283148825169 2023-01-24 04:44:04.671363: step: 380/464, loss: 0.06830400973558426 2023-01-24 04:44:05.333240: step: 382/464, loss: 0.06629308313131332 2023-01-24 04:44:06.030682: step: 384/464, loss: 0.0009762270492501557 2023-01-24 04:44:06.683426: step: 386/464, loss: 0.07588119804859161 2023-01-24 04:44:07.425123: step: 388/464, loss: 0.03207830712199211 2023-01-24 04:44:08.144954: step: 390/464, loss: 0.23488739132881165 2023-01-24 04:44:08.951339: step: 392/464, loss: 0.024468503892421722 2023-01-24 04:44:09.749292: step: 394/464, loss: 0.03723740577697754 2023-01-24 04:44:10.504782: step: 396/464, loss: 0.08763992786407471 2023-01-24 04:44:11.174437: step: 398/464, loss: 0.06267467141151428 2023-01-24 04:44:11.932397: step: 400/464, loss: 0.02886023558676243 2023-01-24 04:44:12.672799: step: 402/464, loss: 0.057991873472929 2023-01-24 04:44:13.515853: step: 404/464, loss: 0.018807729706168175 2023-01-24 04:44:14.234283: step: 406/464, loss: 0.011335845105350018 2023-01-24 04:44:15.006587: step: 408/464, loss: 0.058162566274404526 2023-01-24 04:44:15.777103: step: 410/464, loss: 0.1280667930841446 2023-01-24 04:44:16.503055: step: 412/464, loss: 0.02704724669456482 2023-01-24 04:44:17.155223: step: 414/464, loss: 0.02012377791106701 2023-01-24 04:44:17.906721: step: 416/464, loss: 0.041829902678728104 2023-01-24 04:44:18.738914: step: 418/464, loss: 0.08959502726793289 2023-01-24 04:44:19.476151: step: 420/464, loss: 0.017190277576446533 2023-01-24 04:44:20.225432: step: 422/464, loss: 1.137946605682373 2023-01-24 04:44:20.938299: step: 424/464, loss: 0.08984936028718948 2023-01-24 04:44:21.659535: step: 426/464, loss: 0.05588805302977562 2023-01-24 04:44:22.366585: step: 428/464, loss: 0.02053980715572834 2023-01-24 04:44:23.073403: step: 430/464, loss: 0.029160495847463608 2023-01-24 04:44:23.890171: step: 432/464, loss: 0.0642860159277916 2023-01-24 04:44:24.635974: step: 434/464, loss: 0.6985005736351013 2023-01-24 04:44:25.346740: step: 436/464, loss: 0.5311950445175171 2023-01-24 04:44:26.096837: step: 438/464, loss: 0.11107250303030014 2023-01-24 04:44:26.794130: step: 440/464, loss: 0.021011127158999443 2023-01-24 04:44:27.578065: step: 442/464, loss: 0.04673737287521362 2023-01-24 04:44:28.278643: step: 444/464, loss: 0.002503372263163328 2023-01-24 04:44:28.986406: step: 446/464, loss: 0.11988291144371033 2023-01-24 04:44:29.690195: step: 448/464, loss: 0.07330343127250671 2023-01-24 04:44:30.479403: step: 450/464, loss: 0.5222264528274536 2023-01-24 04:44:31.219046: step: 452/464, loss: 0.03502650558948517 2023-01-24 04:44:31.946356: step: 454/464, loss: 0.24348917603492737 2023-01-24 04:44:32.730248: step: 456/464, loss: 0.06922262907028198 2023-01-24 04:44:33.515082: step: 458/464, loss: 0.01801128126680851 2023-01-24 04:44:34.306398: step: 460/464, loss: 0.05330865830183029 2023-01-24 04:44:35.048105: step: 462/464, loss: 0.10185433179140091 2023-01-24 04:44:35.846005: step: 464/464, loss: 0.033372409641742706 2023-01-24 04:44:36.653929: step: 466/464, loss: 0.06386049836874008 2023-01-24 04:44:37.334151: step: 468/464, loss: 0.022253213450312614 2023-01-24 04:44:38.145100: step: 470/464, loss: 0.04618272930383682 2023-01-24 04:44:38.915812: step: 472/464, loss: 0.029864918440580368 2023-01-24 04:44:39.610512: step: 474/464, loss: 0.13251470029354095 2023-01-24 04:44:40.341890: step: 476/464, loss: 0.2079554796218872 2023-01-24 04:44:41.035321: step: 478/464, loss: 0.2565006613731384 2023-01-24 04:44:41.709868: step: 480/464, loss: 0.02998241037130356 2023-01-24 04:44:42.472152: step: 482/464, loss: 0.014584019780158997 2023-01-24 04:44:43.309266: step: 484/464, loss: 0.08024832606315613 2023-01-24 04:44:44.057180: step: 486/464, loss: 0.14153648912906647 2023-01-24 04:44:44.880095: step: 488/464, loss: 0.16485629975795746 2023-01-24 04:44:45.674492: step: 490/464, loss: 0.01724920980632305 2023-01-24 04:44:46.408423: step: 492/464, loss: 0.028226202353835106 2023-01-24 04:44:47.127863: step: 494/464, loss: 0.044736597687006 2023-01-24 04:44:47.939607: step: 496/464, loss: 0.0690031349658966 2023-01-24 04:44:48.635841: step: 498/464, loss: 0.021006660535931587 2023-01-24 04:44:49.464540: step: 500/464, loss: 0.12912136316299438 2023-01-24 04:44:50.250497: step: 502/464, loss: 0.10888178646564484 2023-01-24 04:44:50.949832: step: 504/464, loss: 0.009902720339596272 2023-01-24 04:44:51.722693: step: 506/464, loss: 0.02853531204164028 2023-01-24 04:44:52.454112: step: 508/464, loss: 0.0805187001824379 2023-01-24 04:44:53.188514: step: 510/464, loss: 0.030831599608063698 2023-01-24 04:44:53.888739: step: 512/464, loss: 0.041993558406829834 2023-01-24 04:44:54.621853: step: 514/464, loss: 0.02250916138291359 2023-01-24 04:44:55.449298: step: 516/464, loss: 0.1520540416240692 2023-01-24 04:44:56.211617: step: 518/464, loss: 0.04919816926121712 2023-01-24 04:44:57.115793: step: 520/464, loss: 0.029099134728312492 2023-01-24 04:44:57.822486: step: 522/464, loss: 0.04807744175195694 2023-01-24 04:44:58.503422: step: 524/464, loss: 0.041146181523799896 2023-01-24 04:44:59.220749: step: 526/464, loss: 0.042510777711868286 2023-01-24 04:44:59.955591: step: 528/464, loss: 0.058072157204151154 2023-01-24 04:45:00.704734: step: 530/464, loss: 1.4828964471817017 2023-01-24 04:45:01.398584: step: 532/464, loss: 0.002875608392059803 2023-01-24 04:45:02.304725: step: 534/464, loss: 0.02097652293741703 2023-01-24 04:45:03.039809: step: 536/464, loss: 0.01865016296505928 2023-01-24 04:45:03.753816: step: 538/464, loss: 0.01164599135518074 2023-01-24 04:45:04.565151: step: 540/464, loss: 0.024205351248383522 2023-01-24 04:45:05.300638: step: 542/464, loss: 0.03508354350924492 2023-01-24 04:45:06.136737: step: 544/464, loss: 1.322606086730957 2023-01-24 04:45:06.850183: step: 546/464, loss: 0.07354927062988281 2023-01-24 04:45:07.602233: step: 548/464, loss: 0.0703032910823822 2023-01-24 04:45:08.405264: step: 550/464, loss: 0.016364462673664093 2023-01-24 04:45:09.160990: step: 552/464, loss: 0.010696408338844776 2023-01-24 04:45:09.871041: step: 554/464, loss: 0.03282889723777771 2023-01-24 04:45:10.561973: step: 556/464, loss: 0.004567502066493034 2023-01-24 04:45:11.327694: step: 558/464, loss: 0.2184530347585678 2023-01-24 04:45:12.117638: step: 560/464, loss: 0.016430631279945374 2023-01-24 04:45:12.947194: step: 562/464, loss: 0.1944238543510437 2023-01-24 04:45:13.683052: step: 564/464, loss: 0.030758991837501526 2023-01-24 04:45:14.434719: step: 566/464, loss: 0.046245649456977844 2023-01-24 04:45:15.186497: step: 568/464, loss: 0.03639359399676323 2023-01-24 04:45:15.817763: step: 570/464, loss: 0.03184223920106888 2023-01-24 04:45:16.486938: step: 572/464, loss: 0.0072190905921161175 2023-01-24 04:45:17.132175: step: 574/464, loss: 0.006273357663303614 2023-01-24 04:45:17.941387: step: 576/464, loss: 0.1006399616599083 2023-01-24 04:45:18.627192: step: 578/464, loss: 0.014032825827598572 2023-01-24 04:45:19.512999: step: 580/464, loss: 0.04493585601449013 2023-01-24 04:45:20.265128: step: 582/464, loss: 0.02772378921508789 2023-01-24 04:45:20.955182: step: 584/464, loss: 0.053779810667037964 2023-01-24 04:45:21.698423: step: 586/464, loss: 0.11084982752799988 2023-01-24 04:45:22.482571: step: 588/464, loss: 0.03748084232211113 2023-01-24 04:45:23.230645: step: 590/464, loss: 0.0179930217564106 2023-01-24 04:45:24.050277: step: 592/464, loss: 0.014146368019282818 2023-01-24 04:45:24.835719: step: 594/464, loss: 0.14494703710079193 2023-01-24 04:45:25.595003: step: 596/464, loss: 0.05555734410881996 2023-01-24 04:45:26.292420: step: 598/464, loss: 0.05155664309859276 2023-01-24 04:45:27.005192: step: 600/464, loss: 0.16895079612731934 2023-01-24 04:45:27.763542: step: 602/464, loss: 0.06997605413198471 2023-01-24 04:45:28.471847: step: 604/464, loss: 0.040857136249542236 2023-01-24 04:45:29.215290: step: 606/464, loss: 0.0022094054147601128 2023-01-24 04:45:30.029860: step: 608/464, loss: 0.007077968679368496 2023-01-24 04:45:30.761727: step: 610/464, loss: 0.02948709949851036 2023-01-24 04:45:31.532208: step: 612/464, loss: 0.047195546329021454 2023-01-24 04:45:32.247034: step: 614/464, loss: 0.2917841970920563 2023-01-24 04:45:33.002313: step: 616/464, loss: 0.0415961816906929 2023-01-24 04:45:33.794221: step: 618/464, loss: 0.31351780891418457 2023-01-24 04:45:34.546868: step: 620/464, loss: 0.36635974049568176 2023-01-24 04:45:35.191171: step: 622/464, loss: 0.17023654282093048 2023-01-24 04:45:35.872508: step: 624/464, loss: 0.016414647921919823 2023-01-24 04:45:36.655860: step: 626/464, loss: 0.045262303203344345 2023-01-24 04:45:37.380337: step: 628/464, loss: 0.11020675301551819 2023-01-24 04:45:38.086252: step: 630/464, loss: 0.05932263284921646 2023-01-24 04:45:38.850687: step: 632/464, loss: 0.11531080305576324 2023-01-24 04:45:39.594885: step: 634/464, loss: 0.03160162642598152 2023-01-24 04:45:40.319551: step: 636/464, loss: 0.10523614287376404 2023-01-24 04:45:41.073949: step: 638/464, loss: 0.019944677129387856 2023-01-24 04:45:41.874136: step: 640/464, loss: 0.031220460310578346 2023-01-24 04:45:42.511391: step: 642/464, loss: 0.08424553275108337 2023-01-24 04:45:43.249040: step: 644/464, loss: 0.05185442790389061 2023-01-24 04:45:43.923637: step: 646/464, loss: 0.01238182745873928 2023-01-24 04:45:44.642403: step: 648/464, loss: 0.040707577019929886 2023-01-24 04:45:45.328596: step: 650/464, loss: 0.0012632677098736167 2023-01-24 04:45:46.070294: step: 652/464, loss: 0.06378267705440521 2023-01-24 04:45:46.788570: step: 654/464, loss: 0.033059362322092056 2023-01-24 04:45:47.565031: step: 656/464, loss: 0.015181529335677624 2023-01-24 04:45:48.304828: step: 658/464, loss: 0.04422836750745773 2023-01-24 04:45:49.149205: step: 660/464, loss: 0.12794829905033112 2023-01-24 04:45:49.942812: step: 662/464, loss: 0.04287206009030342 2023-01-24 04:45:50.656662: step: 664/464, loss: 0.008199275471270084 2023-01-24 04:45:51.389414: step: 666/464, loss: 0.024731893092393875 2023-01-24 04:45:52.286277: step: 668/464, loss: 0.028729377314448357 2023-01-24 04:45:53.032129: step: 670/464, loss: 0.056502483785152435 2023-01-24 04:45:53.794581: step: 672/464, loss: 0.022548364475369453 2023-01-24 04:45:54.590215: step: 674/464, loss: 0.037794943898916245 2023-01-24 04:45:55.276754: step: 676/464, loss: 0.006393145769834518 2023-01-24 04:45:55.973900: step: 678/464, loss: 0.28734323382377625 2023-01-24 04:45:56.742032: step: 680/464, loss: 0.16981349885463715 2023-01-24 04:45:57.551639: step: 682/464, loss: 0.13790276646614075 2023-01-24 04:45:58.317073: step: 684/464, loss: 0.07101164013147354 2023-01-24 04:45:59.025560: step: 686/464, loss: 0.04143301770091057 2023-01-24 04:45:59.841645: step: 688/464, loss: 0.03418415039777756 2023-01-24 04:46:00.577973: step: 690/464, loss: 0.005767000373452902 2023-01-24 04:46:01.334826: step: 692/464, loss: 0.04734319448471069 2023-01-24 04:46:02.124528: step: 694/464, loss: 0.04089387133717537 2023-01-24 04:46:02.839052: step: 696/464, loss: 0.02467603236436844 2023-01-24 04:46:03.537934: step: 698/464, loss: 0.041893370449543 2023-01-24 04:46:04.285889: step: 700/464, loss: 0.05832946300506592 2023-01-24 04:46:05.135447: step: 702/464, loss: 0.045026276260614395 2023-01-24 04:46:05.915317: step: 704/464, loss: 0.10983053594827652 2023-01-24 04:46:06.610909: step: 706/464, loss: 0.029054909944534302 2023-01-24 04:46:07.360044: step: 708/464, loss: 0.04096385836601257 2023-01-24 04:46:08.100077: step: 710/464, loss: 0.012389479205012321 2023-01-24 04:46:08.802747: step: 712/464, loss: 0.07159966975450516 2023-01-24 04:46:09.454394: step: 714/464, loss: 0.030216261744499207 2023-01-24 04:46:10.203770: step: 716/464, loss: 0.03051423840224743 2023-01-24 04:46:10.972411: step: 718/464, loss: 0.017640970647335052 2023-01-24 04:46:11.751712: step: 720/464, loss: 0.017679141834378242 2023-01-24 04:46:12.524727: step: 722/464, loss: 0.046189967542886734 2023-01-24 04:46:13.278403: step: 724/464, loss: 0.3321578800678253 2023-01-24 04:46:14.013100: step: 726/464, loss: 0.09389912337064743 2023-01-24 04:46:14.771058: step: 728/464, loss: 0.42146536707878113 2023-01-24 04:46:15.517414: step: 730/464, loss: 0.1609586775302887 2023-01-24 04:46:16.235978: step: 732/464, loss: 0.29355260729789734 2023-01-24 04:46:16.974660: step: 734/464, loss: 0.013211609795689583 2023-01-24 04:46:17.704910: step: 736/464, loss: 0.013481264002621174 2023-01-24 04:46:18.542576: step: 738/464, loss: 0.040613461285829544 2023-01-24 04:46:19.262093: step: 740/464, loss: 0.01638202555477619 2023-01-24 04:46:20.004633: step: 742/464, loss: 0.024240298196673393 2023-01-24 04:46:20.702968: step: 744/464, loss: 0.04148198664188385 2023-01-24 04:46:21.441787: step: 746/464, loss: 0.980080783367157 2023-01-24 04:46:22.228956: step: 748/464, loss: 0.03613200783729553 2023-01-24 04:46:22.963121: step: 750/464, loss: 0.07290980219841003 2023-01-24 04:46:23.878498: step: 752/464, loss: 0.10207195580005646 2023-01-24 04:46:24.569108: step: 754/464, loss: 0.045234184712171555 2023-01-24 04:46:25.327238: step: 756/464, loss: 0.014640030451118946 2023-01-24 04:46:26.050898: step: 758/464, loss: 0.021963372826576233 2023-01-24 04:46:26.762292: step: 760/464, loss: 0.030695024877786636 2023-01-24 04:46:27.540572: step: 762/464, loss: 0.06672321259975433 2023-01-24 04:46:28.270086: step: 764/464, loss: 0.0397285558283329 2023-01-24 04:46:29.006843: step: 766/464, loss: 0.05721195042133331 2023-01-24 04:46:29.752078: step: 768/464, loss: 0.007625281810760498 2023-01-24 04:46:30.558235: step: 770/464, loss: 0.10010773688554764 2023-01-24 04:46:31.225949: step: 772/464, loss: 0.04354416951537132 2023-01-24 04:46:32.046153: step: 774/464, loss: 0.06004622206091881 2023-01-24 04:46:32.821184: step: 776/464, loss: 0.03328992426395416 2023-01-24 04:46:33.555737: step: 778/464, loss: 0.007132671307772398 2023-01-24 04:46:34.290580: step: 780/464, loss: 0.06367408484220505 2023-01-24 04:46:35.074280: step: 782/464, loss: 0.24943578243255615 2023-01-24 04:46:35.804343: step: 784/464, loss: 0.029331697151064873 2023-01-24 04:46:36.491172: step: 786/464, loss: 0.07233763486146927 2023-01-24 04:46:37.189276: step: 788/464, loss: 0.015424920246005058 2023-01-24 04:46:37.969279: step: 790/464, loss: 0.17237427830696106 2023-01-24 04:46:38.642751: step: 792/464, loss: 0.05033355578780174 2023-01-24 04:46:39.351137: step: 794/464, loss: 0.012341322377324104 2023-01-24 04:46:40.030463: step: 796/464, loss: 0.041706882417201996 2023-01-24 04:46:40.820503: step: 798/464, loss: 0.15417589247226715 2023-01-24 04:46:41.609725: step: 800/464, loss: 0.5894966125488281 2023-01-24 04:46:42.433648: step: 802/464, loss: 0.04737395420670509 2023-01-24 04:46:43.160721: step: 804/464, loss: 0.03578581288456917 2023-01-24 04:46:43.845626: step: 806/464, loss: 0.018068134784698486 2023-01-24 04:46:44.631426: step: 808/464, loss: 0.02468440867960453 2023-01-24 04:46:45.401118: step: 810/464, loss: 0.010481053963303566 2023-01-24 04:46:46.147374: step: 812/464, loss: 0.05397513136267662 2023-01-24 04:46:46.944819: step: 814/464, loss: 0.03365986421704292 2023-01-24 04:46:47.745496: step: 816/464, loss: 1.4167505502700806 2023-01-24 04:46:48.381559: step: 818/464, loss: 0.20036783814430237 2023-01-24 04:46:49.090659: step: 820/464, loss: 0.0020387363620102406 2023-01-24 04:46:49.793884: step: 822/464, loss: 0.022577812895178795 2023-01-24 04:46:50.563828: step: 824/464, loss: 0.18894566595554352 2023-01-24 04:46:51.325823: step: 826/464, loss: 0.11747203022241592 2023-01-24 04:46:51.967315: step: 828/464, loss: 0.013679565861821175 2023-01-24 04:46:52.668388: step: 830/464, loss: 0.010823186486959457 2023-01-24 04:46:53.424691: step: 832/464, loss: 0.002872373443096876 2023-01-24 04:46:54.135593: step: 834/464, loss: 0.09699154645204544 2023-01-24 04:46:54.888642: step: 836/464, loss: 0.030660971999168396 2023-01-24 04:46:55.597264: step: 838/464, loss: 0.027780331671237946 2023-01-24 04:46:56.374170: step: 840/464, loss: 0.06117891147732735 2023-01-24 04:46:57.179577: step: 842/464, loss: 0.012357320636510849 2023-01-24 04:46:57.890359: step: 844/464, loss: 0.036991577595472336 2023-01-24 04:46:58.602227: step: 846/464, loss: 0.1404614895582199 2023-01-24 04:46:59.376878: step: 848/464, loss: 0.006232497747987509 2023-01-24 04:47:00.102656: step: 850/464, loss: 0.0432085283100605 2023-01-24 04:47:00.910924: step: 852/464, loss: 0.03327338397502899 2023-01-24 04:47:01.580201: step: 854/464, loss: 0.03004441037774086 2023-01-24 04:47:02.412285: step: 856/464, loss: 0.01888202875852585 2023-01-24 04:47:03.173861: step: 858/464, loss: 0.025666790083050728 2023-01-24 04:47:03.890255: step: 860/464, loss: 0.040373627096414566 2023-01-24 04:47:04.582145: step: 862/464, loss: 0.0444427989423275 2023-01-24 04:47:05.234427: step: 864/464, loss: 0.020637089386582375 2023-01-24 04:47:05.962623: step: 866/464, loss: 0.013471740297973156 2023-01-24 04:47:06.755612: step: 868/464, loss: 0.44508418440818787 2023-01-24 04:47:07.550989: step: 870/464, loss: 0.06201205402612686 2023-01-24 04:47:08.293494: step: 872/464, loss: 0.026107273995876312 2023-01-24 04:47:08.994125: step: 874/464, loss: 0.026736147701740265 2023-01-24 04:47:09.825505: step: 876/464, loss: 0.0658494085073471 2023-01-24 04:47:10.539884: step: 878/464, loss: 0.013066594488918781 2023-01-24 04:47:11.330824: step: 880/464, loss: 0.027758102864027023 2023-01-24 04:47:12.114385: step: 882/464, loss: 0.004973025061190128 2023-01-24 04:47:12.804915: step: 884/464, loss: 0.09600269794464111 2023-01-24 04:47:13.555039: step: 886/464, loss: 0.0065295337699353695 2023-01-24 04:47:14.297175: step: 888/464, loss: 0.07899457216262817 2023-01-24 04:47:15.050369: step: 890/464, loss: 0.05921126902103424 2023-01-24 04:47:15.759184: step: 892/464, loss: 0.07933531701564789 2023-01-24 04:47:16.521165: step: 894/464, loss: 1.9494664669036865 2023-01-24 04:47:17.227403: step: 896/464, loss: 0.02592792920768261 2023-01-24 04:47:18.016061: step: 898/464, loss: 0.03737859055399895 2023-01-24 04:47:18.880007: step: 900/464, loss: 0.040085483342409134 2023-01-24 04:47:19.582187: step: 902/464, loss: 0.02060028165578842 2023-01-24 04:47:20.282600: step: 904/464, loss: 0.012544417753815651 2023-01-24 04:47:20.969360: step: 906/464, loss: 0.005425968207418919 2023-01-24 04:47:21.606691: step: 908/464, loss: 0.02438453957438469 2023-01-24 04:47:22.361444: step: 910/464, loss: 0.06317855417728424 2023-01-24 04:47:23.076791: step: 912/464, loss: 0.009412641637027264 2023-01-24 04:47:23.803932: step: 914/464, loss: 0.032311778515577316 2023-01-24 04:47:24.602693: step: 916/464, loss: 0.018643241375684738 2023-01-24 04:47:25.424955: step: 918/464, loss: 0.0377984344959259 2023-01-24 04:47:26.211903: step: 920/464, loss: 0.10418711602687836 2023-01-24 04:47:26.923278: step: 922/464, loss: 0.198570117354393 2023-01-24 04:47:27.706984: step: 924/464, loss: 1.4531371593475342 2023-01-24 04:47:28.403482: step: 926/464, loss: 0.06543616205453873 2023-01-24 04:47:29.103256: step: 928/464, loss: 0.2391623854637146 2023-01-24 04:47:29.731401: step: 930/464, loss: 0.027653774246573448 ================================================== Loss: 0.093 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3370973802744239, 'r': 0.3447732219505018, 'f1': 0.3408920975007776}, 'combined': 0.2511836507900466, 'epoch': 22} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3115003385075192, 'r': 0.2729864489788941, 'f1': 0.29097448594376596}, 'combined': 0.18071047021770728, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.310254329004329, 'r': 0.32320612262500303, 'f1': 0.31659781900255873}, 'combined': 0.2332826034755696, 'epoch': 22} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3050911537589621, 'r': 0.2646937084983881, 'f1': 0.28346035238134254}, 'combined': 0.17604379779472853, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3307524583380092, 'r': 0.3364009822944458, 'f1': 0.3335528084086038}, 'combined': 0.24577575356423437, 'epoch': 22} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32414788248918663, 'r': 0.27770376889142767, 'f1': 0.2991338095988556}, 'combined': 0.18577783964560507, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32211538461538464, 'r': 0.2392857142857143, 'f1': 0.27459016393442626}, 'combined': 0.1830601092896175, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2727272727272727, 'r': 0.391304347826087, 'f1': 0.32142857142857145}, 'combined': 0.16071428571428573, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.20689655172413793, 'f1': 0.26666666666666666}, 'combined': 0.17777777777777776, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:50:09.183994: step: 2/464, loss: 0.02068914659321308 2023-01-24 04:50:09.948427: step: 4/464, loss: 0.03398273512721062 2023-01-24 04:50:10.643726: step: 6/464, loss: 0.08365309983491898 2023-01-24 04:50:11.269401: step: 8/464, loss: 0.026360979303717613 2023-01-24 04:50:12.091625: step: 10/464, loss: 0.021755725145339966 2023-01-24 04:50:12.875948: step: 12/464, loss: 0.03131546080112457 2023-01-24 04:50:13.765084: step: 14/464, loss: 0.06308458745479584 2023-01-24 04:50:14.501542: step: 16/464, loss: 0.04444117099046707 2023-01-24 04:50:15.253164: step: 18/464, loss: 0.08508457988500595 2023-01-24 04:50:15.970019: step: 20/464, loss: 0.11405806988477707 2023-01-24 04:50:16.679261: step: 22/464, loss: 0.08450333029031754 2023-01-24 04:50:17.465767: step: 24/464, loss: 0.1825638711452484 2023-01-24 04:50:18.142215: step: 26/464, loss: 0.035793039947748184 2023-01-24 04:50:18.906525: step: 28/464, loss: 0.04304810240864754 2023-01-24 04:50:19.702273: step: 30/464, loss: 0.42393800616264343 2023-01-24 04:50:20.485612: step: 32/464, loss: 0.04675702378153801 2023-01-24 04:50:21.127306: step: 34/464, loss: 0.006157548166811466 2023-01-24 04:50:21.837442: step: 36/464, loss: 0.7356515526771545 2023-01-24 04:50:22.500746: step: 38/464, loss: 0.02841765619814396 2023-01-24 04:50:23.207374: step: 40/464, loss: 0.011313747614622116 2023-01-24 04:50:23.952169: step: 42/464, loss: 0.7958417534828186 2023-01-24 04:50:24.658618: step: 44/464, loss: 0.023113884031772614 2023-01-24 04:50:25.422636: step: 46/464, loss: 0.05851215496659279 2023-01-24 04:50:26.228987: step: 48/464, loss: 0.020469114184379578 2023-01-24 04:50:26.989218: step: 50/464, loss: 0.009623406454920769 2023-01-24 04:50:27.775075: step: 52/464, loss: 0.06048066169023514 2023-01-24 04:50:28.549873: step: 54/464, loss: 0.02682196907699108 2023-01-24 04:50:29.253638: step: 56/464, loss: 0.020548773929476738 2023-01-24 04:50:29.963522: step: 58/464, loss: 0.033835507929325104 2023-01-24 04:50:30.750264: step: 60/464, loss: 0.041417691856622696 2023-01-24 04:50:31.455902: step: 62/464, loss: 0.026796171441674232 2023-01-24 04:50:32.179344: step: 64/464, loss: 0.04955141246318817 2023-01-24 04:50:32.910438: step: 66/464, loss: 0.04345294088125229 2023-01-24 04:50:33.639552: step: 68/464, loss: 0.00819335225969553 2023-01-24 04:50:34.369894: step: 70/464, loss: 0.01806209795176983 2023-01-24 04:50:35.112517: step: 72/464, loss: 0.4654695391654968 2023-01-24 04:50:35.817850: step: 74/464, loss: 0.1681564301252365 2023-01-24 04:50:36.493324: step: 76/464, loss: 0.008412414230406284 2023-01-24 04:50:37.211468: step: 78/464, loss: 0.013876117765903473 2023-01-24 04:50:38.012482: step: 80/464, loss: 0.053247906267642975 2023-01-24 04:50:38.894671: step: 82/464, loss: 0.08832264691591263 2023-01-24 04:50:39.677891: step: 84/464, loss: 0.20598171651363373 2023-01-24 04:50:40.442511: step: 86/464, loss: 0.028211485594511032 2023-01-24 04:50:41.188442: step: 88/464, loss: 0.07934857904911041 2023-01-24 04:50:41.903480: step: 90/464, loss: 0.07944697141647339 2023-01-24 04:50:42.677308: step: 92/464, loss: 0.08435480296611786 2023-01-24 04:50:43.321273: step: 94/464, loss: 0.00021403271239250898 2023-01-24 04:50:44.022168: step: 96/464, loss: 0.022179214283823967 2023-01-24 04:50:44.769160: step: 98/464, loss: 0.06918807327747345 2023-01-24 04:50:45.513576: step: 100/464, loss: 0.3057056665420532 2023-01-24 04:50:46.253451: step: 102/464, loss: 0.03395991399884224 2023-01-24 04:50:46.927459: step: 104/464, loss: 0.0005153696401976049 2023-01-24 04:50:47.782899: step: 106/464, loss: 0.010365036316215992 2023-01-24 04:50:48.481934: step: 108/464, loss: 0.1427009552717209 2023-01-24 04:50:49.252617: step: 110/464, loss: 0.017721237614750862 2023-01-24 04:50:49.982940: step: 112/464, loss: 0.09974846243858337 2023-01-24 04:50:50.729966: step: 114/464, loss: 0.02816518023610115 2023-01-24 04:50:51.448458: step: 116/464, loss: 0.06467827409505844 2023-01-24 04:50:52.201465: step: 118/464, loss: 0.004381565377116203 2023-01-24 04:50:52.993752: step: 120/464, loss: 0.0018641493516042829 2023-01-24 04:50:53.857034: step: 122/464, loss: 0.006512850988656282 2023-01-24 04:50:54.593543: step: 124/464, loss: 0.17227263748645782 2023-01-24 04:50:55.372321: step: 126/464, loss: 0.13674922287464142 2023-01-24 04:50:56.146195: step: 128/464, loss: 0.036892298609018326 2023-01-24 04:50:56.849287: step: 130/464, loss: 0.28341665863990784 2023-01-24 04:50:57.584918: step: 132/464, loss: 0.04057060182094574 2023-01-24 04:50:58.319296: step: 134/464, loss: 0.03943055495619774 2023-01-24 04:50:59.078483: step: 136/464, loss: 0.10524342954158783 2023-01-24 04:50:59.876186: step: 138/464, loss: 0.024937961250543594 2023-01-24 04:51:00.620646: step: 140/464, loss: 1.0579050779342651 2023-01-24 04:51:01.364577: step: 142/464, loss: 0.020976925268769264 2023-01-24 04:51:02.130840: step: 144/464, loss: 0.04678136110305786 2023-01-24 04:51:02.906123: step: 146/464, loss: 0.04356539994478226 2023-01-24 04:51:03.675977: step: 148/464, loss: 0.027293233200907707 2023-01-24 04:51:04.407789: step: 150/464, loss: 0.07927026599645615 2023-01-24 04:51:05.171860: step: 152/464, loss: 0.052855201065540314 2023-01-24 04:51:05.935940: step: 154/464, loss: 0.07365193963050842 2023-01-24 04:51:06.694959: step: 156/464, loss: 0.014998635277152061 2023-01-24 04:51:07.383061: step: 158/464, loss: 0.020044660195708275 2023-01-24 04:51:08.135114: step: 160/464, loss: 0.006574376951903105 2023-01-24 04:51:08.921682: step: 162/464, loss: 0.10204543173313141 2023-01-24 04:51:09.673909: step: 164/464, loss: 0.017370322719216347 2023-01-24 04:51:10.410251: step: 166/464, loss: 0.05167322978377342 2023-01-24 04:51:11.157382: step: 168/464, loss: 0.02687511220574379 2023-01-24 04:51:11.955625: step: 170/464, loss: 0.04617585614323616 2023-01-24 04:51:12.758266: step: 172/464, loss: 0.011619649827480316 2023-01-24 04:51:13.498706: step: 174/464, loss: 0.0502808652818203 2023-01-24 04:51:14.277261: step: 176/464, loss: 0.027911610901355743 2023-01-24 04:51:15.060476: step: 178/464, loss: 0.02121800370514393 2023-01-24 04:51:15.824896: step: 180/464, loss: 0.025166582316160202 2023-01-24 04:51:16.499083: step: 182/464, loss: 0.01773116923868656 2023-01-24 04:51:17.213808: step: 184/464, loss: 0.22564925253391266 2023-01-24 04:51:17.947704: step: 186/464, loss: 0.00383604783564806 2023-01-24 04:51:18.730278: step: 188/464, loss: 0.03472012281417847 2023-01-24 04:51:19.484902: step: 190/464, loss: 0.03232753276824951 2023-01-24 04:51:20.214480: step: 192/464, loss: 0.02210436575114727 2023-01-24 04:51:20.926794: step: 194/464, loss: 0.030768616124987602 2023-01-24 04:51:21.655533: step: 196/464, loss: 0.028965983539819717 2023-01-24 04:51:22.399432: step: 198/464, loss: 0.0606737844645977 2023-01-24 04:51:23.153622: step: 200/464, loss: 0.22486914694309235 2023-01-24 04:51:23.872653: step: 202/464, loss: 0.018386442214250565 2023-01-24 04:51:24.653900: step: 204/464, loss: 0.03689342364668846 2023-01-24 04:51:25.433198: step: 206/464, loss: 0.1655503213405609 2023-01-24 04:51:26.143631: step: 208/464, loss: 0.039773356169462204 2023-01-24 04:51:26.986600: step: 210/464, loss: 0.057250939309597015 2023-01-24 04:51:27.760575: step: 212/464, loss: 0.018177485093474388 2023-01-24 04:51:28.494455: step: 214/464, loss: 0.014665749855339527 2023-01-24 04:51:29.282174: step: 216/464, loss: 0.04910998418927193 2023-01-24 04:51:30.027213: step: 218/464, loss: 0.04642369598150253 2023-01-24 04:51:30.709454: step: 220/464, loss: 0.038499731570482254 2023-01-24 04:51:31.392817: step: 222/464, loss: 0.03830535709857941 2023-01-24 04:51:32.083710: step: 224/464, loss: 0.0034814043901860714 2023-01-24 04:51:32.849163: step: 226/464, loss: 0.04010867327451706 2023-01-24 04:51:33.543192: step: 228/464, loss: 0.18558181822299957 2023-01-24 04:51:34.369699: step: 230/464, loss: 0.03923764452338219 2023-01-24 04:51:35.056255: step: 232/464, loss: 0.003573687979951501 2023-01-24 04:51:35.789082: step: 234/464, loss: 0.06191304326057434 2023-01-24 04:51:36.459102: step: 236/464, loss: 0.04891486093401909 2023-01-24 04:51:37.289781: step: 238/464, loss: 0.05465091019868851 2023-01-24 04:51:38.010058: step: 240/464, loss: 0.0303682591766119 2023-01-24 04:51:38.753737: step: 242/464, loss: 0.010735997930169106 2023-01-24 04:51:39.537194: step: 244/464, loss: 0.03328661620616913 2023-01-24 04:51:40.277594: step: 246/464, loss: 0.01912694051861763 2023-01-24 04:51:40.974715: step: 248/464, loss: 0.03149857372045517 2023-01-24 04:51:41.671760: step: 250/464, loss: 0.07606285810470581 2023-01-24 04:51:42.433527: step: 252/464, loss: 0.02646615169942379 2023-01-24 04:51:43.274239: step: 254/464, loss: 0.03380844369530678 2023-01-24 04:51:43.951855: step: 256/464, loss: 0.05554010719060898 2023-01-24 04:51:44.763806: step: 258/464, loss: 0.037372887134552 2023-01-24 04:51:45.501114: step: 260/464, loss: 0.05111650377511978 2023-01-24 04:51:46.283453: step: 262/464, loss: 0.06232859566807747 2023-01-24 04:51:47.003221: step: 264/464, loss: 0.02072969451546669 2023-01-24 04:51:47.823775: step: 266/464, loss: 0.0021110870875418186 2023-01-24 04:51:48.599373: step: 268/464, loss: 0.07640819996595383 2023-01-24 04:51:49.318422: step: 270/464, loss: 0.09810517728328705 2023-01-24 04:51:50.116600: step: 272/464, loss: 0.10910936444997787 2023-01-24 04:51:50.791998: step: 274/464, loss: 0.11092743277549744 2023-01-24 04:51:51.461349: step: 276/464, loss: 0.04382758587598801 2023-01-24 04:51:52.182875: step: 278/464, loss: 0.02791966311633587 2023-01-24 04:51:52.922898: step: 280/464, loss: 0.013097509741783142 2023-01-24 04:51:53.708306: step: 282/464, loss: 0.19788333773612976 2023-01-24 04:51:54.438592: step: 284/464, loss: 0.02221561409533024 2023-01-24 04:51:55.195457: step: 286/464, loss: 0.05585845559835434 2023-01-24 04:51:55.899274: step: 288/464, loss: 0.029758591204881668 2023-01-24 04:51:56.577657: step: 290/464, loss: 0.039395757019519806 2023-01-24 04:51:57.322100: step: 292/464, loss: 0.061954349279403687 2023-01-24 04:51:58.064059: step: 294/464, loss: 0.020964158698916435 2023-01-24 04:51:58.771703: step: 296/464, loss: 0.01524094957858324 2023-01-24 04:51:59.558483: step: 298/464, loss: 0.05274888500571251 2023-01-24 04:52:00.283875: step: 300/464, loss: 0.09849551320075989 2023-01-24 04:52:01.191626: step: 302/464, loss: 0.44920846819877625 2023-01-24 04:52:01.875199: step: 304/464, loss: 0.027357229962944984 2023-01-24 04:52:02.537607: step: 306/464, loss: 0.11521462351083755 2023-01-24 04:52:03.348615: step: 308/464, loss: 0.05012825131416321 2023-01-24 04:52:04.118787: step: 310/464, loss: 0.06689614057540894 2023-01-24 04:52:04.861615: step: 312/464, loss: 0.015322437509894371 2023-01-24 04:52:05.598660: step: 314/464, loss: 0.0024135306011885405 2023-01-24 04:52:06.353334: step: 316/464, loss: 0.04468587040901184 2023-01-24 04:52:07.092013: step: 318/464, loss: 0.11873765289783478 2023-01-24 04:52:07.819440: step: 320/464, loss: 0.019776202738285065 2023-01-24 04:52:08.557516: step: 322/464, loss: 0.05951458588242531 2023-01-24 04:52:09.237959: step: 324/464, loss: 0.0024840477854013443 2023-01-24 04:52:09.951478: step: 326/464, loss: 0.036224596202373505 2023-01-24 04:52:10.735911: step: 328/464, loss: 0.021483929827809334 2023-01-24 04:52:11.468784: step: 330/464, loss: 0.021507222205400467 2023-01-24 04:52:12.209906: step: 332/464, loss: 0.004655972123146057 2023-01-24 04:52:12.914919: step: 334/464, loss: 0.05982765182852745 2023-01-24 04:52:13.709888: step: 336/464, loss: 0.09155252575874329 2023-01-24 04:52:14.422343: step: 338/464, loss: 0.01392027921974659 2023-01-24 04:52:15.133536: step: 340/464, loss: 0.03629325330257416 2023-01-24 04:52:15.919575: step: 342/464, loss: 0.02009829320013523 2023-01-24 04:52:16.630756: step: 344/464, loss: 0.1702081263065338 2023-01-24 04:52:17.258211: step: 346/464, loss: 0.009383754804730415 2023-01-24 04:52:17.940131: step: 348/464, loss: 0.0682232603430748 2023-01-24 04:52:18.680871: step: 350/464, loss: 0.08816039562225342 2023-01-24 04:52:19.344903: step: 352/464, loss: 0.0227990560233593 2023-01-24 04:52:20.082143: step: 354/464, loss: 0.7874547243118286 2023-01-24 04:52:20.762612: step: 356/464, loss: 0.015370347537100315 2023-01-24 04:52:21.542614: step: 358/464, loss: 0.17370478808879852 2023-01-24 04:52:22.321214: step: 360/464, loss: 0.010362098924815655 2023-01-24 04:52:23.031561: step: 362/464, loss: 0.107728011906147 2023-01-24 04:52:23.766649: step: 364/464, loss: 1.1347079277038574 2023-01-24 04:52:24.481711: step: 366/464, loss: 0.06244118884205818 2023-01-24 04:52:25.267265: step: 368/464, loss: 0.021270813420414925 2023-01-24 04:52:26.023919: step: 370/464, loss: 0.042185161262750626 2023-01-24 04:52:26.784802: step: 372/464, loss: 0.022724080830812454 2023-01-24 04:52:27.557933: step: 374/464, loss: 0.3049149811267853 2023-01-24 04:52:28.242558: step: 376/464, loss: 0.04081624746322632 2023-01-24 04:52:29.012006: step: 378/464, loss: 0.019985472783446312 2023-01-24 04:52:29.748925: step: 380/464, loss: 0.013072039932012558 2023-01-24 04:52:30.474818: step: 382/464, loss: 0.007333280052989721 2023-01-24 04:52:31.316804: step: 384/464, loss: 0.051796723157167435 2023-01-24 04:52:32.067411: step: 386/464, loss: 0.013379773125052452 2023-01-24 04:52:32.796685: step: 388/464, loss: 0.01237155869603157 2023-01-24 04:52:33.526196: step: 390/464, loss: 0.05361822247505188 2023-01-24 04:52:34.308123: step: 392/464, loss: 0.044191669672727585 2023-01-24 04:52:35.059928: step: 394/464, loss: 0.07439300417900085 2023-01-24 04:52:35.820208: step: 396/464, loss: 0.10303560644388199 2023-01-24 04:52:36.523958: step: 398/464, loss: 0.04386376962065697 2023-01-24 04:52:37.246504: step: 400/464, loss: 0.02358752302825451 2023-01-24 04:52:37.958974: step: 402/464, loss: 0.025624962523579597 2023-01-24 04:52:38.684539: step: 404/464, loss: 0.13347728550434113 2023-01-24 04:52:39.419588: step: 406/464, loss: 0.011129355989396572 2023-01-24 04:52:40.119730: step: 408/464, loss: 0.11874540150165558 2023-01-24 04:52:41.058965: step: 410/464, loss: 0.03069928288459778 2023-01-24 04:52:41.883739: step: 412/464, loss: 0.06555747240781784 2023-01-24 04:52:42.609080: step: 414/464, loss: 0.11554235965013504 2023-01-24 04:52:43.385009: step: 416/464, loss: 0.056944798678159714 2023-01-24 04:52:44.142850: step: 418/464, loss: 0.06231764703989029 2023-01-24 04:52:44.903522: step: 420/464, loss: 0.006298363674432039 2023-01-24 04:52:45.725269: step: 422/464, loss: 0.20415742695331573 2023-01-24 04:52:46.487563: step: 424/464, loss: 0.021070022135972977 2023-01-24 04:52:47.232429: step: 426/464, loss: 0.007275238633155823 2023-01-24 04:52:47.919747: step: 428/464, loss: 0.0696689710021019 2023-01-24 04:52:48.556010: step: 430/464, loss: 0.06242004409432411 2023-01-24 04:52:49.321036: step: 432/464, loss: 0.019797658547759056 2023-01-24 04:52:50.029496: step: 434/464, loss: 0.029195213690400124 2023-01-24 04:52:50.885853: step: 436/464, loss: 0.29423123598098755 2023-01-24 04:52:51.607553: step: 438/464, loss: 0.03901423513889313 2023-01-24 04:52:52.308821: step: 440/464, loss: 0.02818756178021431 2023-01-24 04:52:53.058283: step: 442/464, loss: 0.09110277891159058 2023-01-24 04:52:53.801915: step: 444/464, loss: 0.5170221328735352 2023-01-24 04:52:54.538763: step: 446/464, loss: 0.0012232566950842738 2023-01-24 04:52:55.212832: step: 448/464, loss: 0.006468184292316437 2023-01-24 04:52:55.995310: step: 450/464, loss: 0.061203423887491226 2023-01-24 04:52:56.746011: step: 452/464, loss: 0.07362731546163559 2023-01-24 04:52:57.608514: step: 454/464, loss: 0.15933118760585785 2023-01-24 04:52:58.368569: step: 456/464, loss: 0.2627415955066681 2023-01-24 04:52:59.142556: step: 458/464, loss: 0.04037583991885185 2023-01-24 04:52:59.842995: step: 460/464, loss: 0.04900494962930679 2023-01-24 04:53:00.598863: step: 462/464, loss: 0.9591463208198547 2023-01-24 04:53:01.422895: step: 464/464, loss: 0.012484485283493996 2023-01-24 04:53:02.228864: step: 466/464, loss: 0.08850326389074326 2023-01-24 04:53:02.981649: step: 468/464, loss: 0.037056587636470795 2023-01-24 04:53:03.807324: step: 470/464, loss: 0.06972339749336243 2023-01-24 04:53:04.669865: step: 472/464, loss: 0.024174122139811516 2023-01-24 04:53:05.283167: step: 474/464, loss: 0.014855777844786644 2023-01-24 04:53:06.056616: step: 476/464, loss: 0.02535218745470047 2023-01-24 04:53:06.781307: step: 478/464, loss: 0.012558290734887123 2023-01-24 04:53:07.461795: step: 480/464, loss: 0.0026807389222085476 2023-01-24 04:53:08.343281: step: 482/464, loss: 0.004997937940061092 2023-01-24 04:53:09.114594: step: 484/464, loss: 0.042675137519836426 2023-01-24 04:53:09.821847: step: 486/464, loss: 0.021459804847836494 2023-01-24 04:53:10.611209: step: 488/464, loss: 0.032483913004398346 2023-01-24 04:53:11.422862: step: 490/464, loss: 0.2530280351638794 2023-01-24 04:53:12.125859: step: 492/464, loss: 0.010920085944235325 2023-01-24 04:53:12.951869: step: 494/464, loss: 0.06375552713871002 2023-01-24 04:53:13.725375: step: 496/464, loss: 0.10358325392007828 2023-01-24 04:53:14.474082: step: 498/464, loss: 0.03224386274814606 2023-01-24 04:53:15.150741: step: 500/464, loss: 0.02806994877755642 2023-01-24 04:53:15.803849: step: 502/464, loss: 0.004388626664876938 2023-01-24 04:53:16.532614: step: 504/464, loss: 0.023558881133794785 2023-01-24 04:53:17.285758: step: 506/464, loss: 0.03565063700079918 2023-01-24 04:53:18.016656: step: 508/464, loss: 0.04556173458695412 2023-01-24 04:53:18.817722: step: 510/464, loss: 0.041230447590351105 2023-01-24 04:53:19.692257: step: 512/464, loss: 0.5097260475158691 2023-01-24 04:53:20.399414: step: 514/464, loss: 0.06440841406583786 2023-01-24 04:53:21.159241: step: 516/464, loss: 0.024644505232572556 2023-01-24 04:53:21.837121: step: 518/464, loss: 0.023807339370250702 2023-01-24 04:53:22.603832: step: 520/464, loss: 0.03844551742076874 2023-01-24 04:53:23.446410: step: 522/464, loss: 0.017746197059750557 2023-01-24 04:53:24.192473: step: 524/464, loss: 0.022382527589797974 2023-01-24 04:53:24.883867: step: 526/464, loss: 0.15803313255310059 2023-01-24 04:53:25.604231: step: 528/464, loss: 0.06795371323823929 2023-01-24 04:53:26.321868: step: 530/464, loss: 0.05073828995227814 2023-01-24 04:53:27.050000: step: 532/464, loss: 0.050155382603406906 2023-01-24 04:53:27.742854: step: 534/464, loss: 0.12563583254814148 2023-01-24 04:53:28.415588: step: 536/464, loss: 0.14176763594150543 2023-01-24 04:53:29.168300: step: 538/464, loss: 0.025613034144043922 2023-01-24 04:53:29.948264: step: 540/464, loss: 0.15801124274730682 2023-01-24 04:53:30.635655: step: 542/464, loss: 0.12523072957992554 2023-01-24 04:53:31.397349: step: 544/464, loss: 0.28735101222991943 2023-01-24 04:53:32.121186: step: 546/464, loss: 0.182787224650383 2023-01-24 04:53:32.993442: step: 548/464, loss: 0.11324718594551086 2023-01-24 04:53:33.776943: step: 550/464, loss: 0.10533501207828522 2023-01-24 04:53:34.453640: step: 552/464, loss: 0.03276410326361656 2023-01-24 04:53:35.203989: step: 554/464, loss: 0.029330329969525337 2023-01-24 04:53:35.961026: step: 556/464, loss: 0.00085727364057675 2023-01-24 04:53:36.653640: step: 558/464, loss: 0.025780213996767998 2023-01-24 04:53:37.411458: step: 560/464, loss: 0.05369078367948532 2023-01-24 04:53:38.151998: step: 562/464, loss: 0.03835272043943405 2023-01-24 04:53:38.913004: step: 564/464, loss: 0.030365869402885437 2023-01-24 04:53:39.659536: step: 566/464, loss: 0.0115794837474823 2023-01-24 04:53:40.393880: step: 568/464, loss: 0.07855859398841858 2023-01-24 04:53:41.187650: step: 570/464, loss: 0.020242253318428993 2023-01-24 04:53:41.949139: step: 572/464, loss: 0.08514165133237839 2023-01-24 04:53:42.670093: step: 574/464, loss: 0.023960469290614128 2023-01-24 04:53:43.443075: step: 576/464, loss: 3.4834632873535156 2023-01-24 04:53:44.170772: step: 578/464, loss: 0.022735727950930595 2023-01-24 04:53:44.878847: step: 580/464, loss: 0.056836120784282684 2023-01-24 04:53:45.623328: step: 582/464, loss: 0.02904140204191208 2023-01-24 04:53:46.423869: step: 584/464, loss: 0.0137387840077281 2023-01-24 04:53:47.130148: step: 586/464, loss: 0.021905794739723206 2023-01-24 04:53:47.877124: step: 588/464, loss: 0.006982157006859779 2023-01-24 04:53:48.604701: step: 590/464, loss: 0.011092767119407654 2023-01-24 04:53:49.264231: step: 592/464, loss: 0.05825098976492882 2023-01-24 04:53:49.951298: step: 594/464, loss: 0.0053213657811284065 2023-01-24 04:53:50.616219: step: 596/464, loss: 0.07439672201871872 2023-01-24 04:53:51.347142: step: 598/464, loss: 0.08712119609117508 2023-01-24 04:53:52.054735: step: 600/464, loss: 0.04311757907271385 2023-01-24 04:53:52.850752: step: 602/464, loss: 0.08456127345561981 2023-01-24 04:53:53.542470: step: 604/464, loss: 0.04044141620397568 2023-01-24 04:53:54.881421: step: 606/464, loss: 0.016841372475028038 2023-01-24 04:53:55.590463: step: 608/464, loss: 0.026384098455309868 2023-01-24 04:53:56.235139: step: 610/464, loss: 0.007268472574651241 2023-01-24 04:53:56.987140: step: 612/464, loss: 0.014984131790697575 2023-01-24 04:53:57.720768: step: 614/464, loss: 0.026813920587301254 2023-01-24 04:53:58.383456: step: 616/464, loss: 0.0704665556550026 2023-01-24 04:53:59.141643: step: 618/464, loss: 0.06092187389731407 2023-01-24 04:53:59.838158: step: 620/464, loss: 0.019842494279146194 2023-01-24 04:54:00.581851: step: 622/464, loss: 0.03070417419075966 2023-01-24 04:54:01.260109: step: 624/464, loss: 0.00864504650235176 2023-01-24 04:54:02.019274: step: 626/464, loss: 0.0526597835123539 2023-01-24 04:54:02.792879: step: 628/464, loss: 0.04984492063522339 2023-01-24 04:54:03.569513: step: 630/464, loss: 0.1512703150510788 2023-01-24 04:54:04.372394: step: 632/464, loss: 0.07364907115697861 2023-01-24 04:54:05.049642: step: 634/464, loss: 0.018775172531604767 2023-01-24 04:54:05.771741: step: 636/464, loss: 0.03580787405371666 2023-01-24 04:54:06.489370: step: 638/464, loss: 0.068617083132267 2023-01-24 04:54:07.281426: step: 640/464, loss: 0.016016459092497826 2023-01-24 04:54:08.028015: step: 642/464, loss: 0.06150452792644501 2023-01-24 04:54:08.822841: step: 644/464, loss: 0.03719574585556984 2023-01-24 04:54:09.554005: step: 646/464, loss: 0.05981617048382759 2023-01-24 04:54:10.243968: step: 648/464, loss: 0.034328460693359375 2023-01-24 04:54:10.968965: step: 650/464, loss: 0.023457909002900124 2023-01-24 04:54:11.756726: step: 652/464, loss: 0.3904138207435608 2023-01-24 04:54:12.583318: step: 654/464, loss: 0.02361692301928997 2023-01-24 04:54:13.323355: step: 656/464, loss: 0.07791952788829803 2023-01-24 04:54:14.037371: step: 658/464, loss: 0.021876579150557518 2023-01-24 04:54:14.728450: step: 660/464, loss: 0.04831695184111595 2023-01-24 04:54:15.446717: step: 662/464, loss: 0.14800116419792175 2023-01-24 04:54:16.241200: step: 664/464, loss: 0.007488205097615719 2023-01-24 04:54:17.001540: step: 666/464, loss: 0.02209978550672531 2023-01-24 04:54:17.637481: step: 668/464, loss: 0.001123339869081974 2023-01-24 04:54:18.348149: step: 670/464, loss: 0.10765864700078964 2023-01-24 04:54:19.156048: step: 672/464, loss: 0.10831757634878159 2023-01-24 04:54:19.899896: step: 674/464, loss: 0.011373594403266907 2023-01-24 04:54:20.682581: step: 676/464, loss: 0.015240014530718327 2023-01-24 04:54:21.424905: step: 678/464, loss: 0.4238124191761017 2023-01-24 04:54:22.255664: step: 680/464, loss: 0.01398735772818327 2023-01-24 04:54:22.989618: step: 682/464, loss: 0.09794515371322632 2023-01-24 04:54:23.706020: step: 684/464, loss: 0.05332712456583977 2023-01-24 04:54:24.419188: step: 686/464, loss: 0.0721079632639885 2023-01-24 04:54:25.172972: step: 688/464, loss: 0.09018091857433319 2023-01-24 04:54:25.955026: step: 690/464, loss: 0.5125716924667358 2023-01-24 04:54:26.722667: step: 692/464, loss: 0.06655218452215195 2023-01-24 04:54:27.513317: step: 694/464, loss: 0.13092640042304993 2023-01-24 04:54:28.255066: step: 696/464, loss: 0.032938260585069656 2023-01-24 04:54:28.940483: step: 698/464, loss: 0.008865829557180405 2023-01-24 04:54:29.611331: step: 700/464, loss: 0.08985569328069687 2023-01-24 04:54:30.405572: step: 702/464, loss: 0.002692323410883546 2023-01-24 04:54:31.166924: step: 704/464, loss: 0.03374726325273514 2023-01-24 04:54:31.918537: step: 706/464, loss: 0.03221918269991875 2023-01-24 04:54:32.644874: step: 708/464, loss: 0.04863818734884262 2023-01-24 04:54:33.518775: step: 710/464, loss: 0.028400612995028496 2023-01-24 04:54:34.285131: step: 712/464, loss: 0.03320635110139847 2023-01-24 04:54:34.983393: step: 714/464, loss: 0.04137527197599411 2023-01-24 04:54:35.752228: step: 716/464, loss: 0.06885325163602829 2023-01-24 04:54:36.478987: step: 718/464, loss: 2.812523126602173 2023-01-24 04:54:37.179106: step: 720/464, loss: 0.05131245404481888 2023-01-24 04:54:37.947582: step: 722/464, loss: 0.03149893507361412 2023-01-24 04:54:38.741161: step: 724/464, loss: 0.023914489895105362 2023-01-24 04:54:39.564697: step: 726/464, loss: 0.10525958240032196 2023-01-24 04:54:40.337414: step: 728/464, loss: 0.015110420063138008 2023-01-24 04:54:41.068964: step: 730/464, loss: 0.0554676316678524 2023-01-24 04:54:41.743617: step: 732/464, loss: 0.047157224267721176 2023-01-24 04:54:42.545732: step: 734/464, loss: 0.12007953226566315 2023-01-24 04:54:43.329157: step: 736/464, loss: 0.05309174582362175 2023-01-24 04:54:44.090694: step: 738/464, loss: 0.011370309628546238 2023-01-24 04:54:44.859803: step: 740/464, loss: 0.05437813326716423 2023-01-24 04:54:45.599674: step: 742/464, loss: 0.02289034053683281 2023-01-24 04:54:46.404315: step: 744/464, loss: 0.03706967830657959 2023-01-24 04:54:47.148135: step: 746/464, loss: 0.11801394075155258 2023-01-24 04:54:47.857348: step: 748/464, loss: 0.019746290519833565 2023-01-24 04:54:48.762204: step: 750/464, loss: 0.014962991699576378 2023-01-24 04:54:49.540616: step: 752/464, loss: 0.07079713046550751 2023-01-24 04:54:50.271921: step: 754/464, loss: 0.07932285964488983 2023-01-24 04:54:51.042995: step: 756/464, loss: 0.03423899784684181 2023-01-24 04:54:51.779145: step: 758/464, loss: 0.0764031857252121 2023-01-24 04:54:52.430375: step: 760/464, loss: 0.08636163175106049 2023-01-24 04:54:53.165066: step: 762/464, loss: 0.021301904693245888 2023-01-24 04:54:53.943154: step: 764/464, loss: 0.017300531268119812 2023-01-24 04:54:54.681036: step: 766/464, loss: 0.029869444668293 2023-01-24 04:54:55.373210: step: 768/464, loss: 0.13179388642311096 2023-01-24 04:54:56.119583: step: 770/464, loss: 0.24576228857040405 2023-01-24 04:54:56.815704: step: 772/464, loss: 0.04601896554231644 2023-01-24 04:54:57.571764: step: 774/464, loss: 0.008786949329078197 2023-01-24 04:54:58.312427: step: 776/464, loss: 0.08430537581443787 2023-01-24 04:54:59.044422: step: 778/464, loss: 0.002615898149088025 2023-01-24 04:54:59.761562: step: 780/464, loss: 0.004254632163792849 2023-01-24 04:55:00.524160: step: 782/464, loss: 0.08581655472517014 2023-01-24 04:55:01.241022: step: 784/464, loss: 0.007409216836094856 2023-01-24 04:55:02.032898: step: 786/464, loss: 0.052561745047569275 2023-01-24 04:55:02.859882: step: 788/464, loss: 0.01425325870513916 2023-01-24 04:55:03.557678: step: 790/464, loss: 0.12579873204231262 2023-01-24 04:55:04.312193: step: 792/464, loss: 0.10305667668581009 2023-01-24 04:55:05.096333: step: 794/464, loss: 0.1198534145951271 2023-01-24 04:55:05.833393: step: 796/464, loss: 0.13133980333805084 2023-01-24 04:55:06.575330: step: 798/464, loss: 0.04805370420217514 2023-01-24 04:55:07.288158: step: 800/464, loss: 0.09546133875846863 2023-01-24 04:55:08.040328: step: 802/464, loss: 0.1074833944439888 2023-01-24 04:55:08.729218: step: 804/464, loss: 0.016533413901925087 2023-01-24 04:55:09.455768: step: 806/464, loss: 0.018085090443491936 2023-01-24 04:55:10.162337: step: 808/464, loss: 0.014931551180779934 2023-01-24 04:55:10.842499: step: 810/464, loss: 0.016278119757771492 2023-01-24 04:55:11.483132: step: 812/464, loss: 0.013235675171017647 2023-01-24 04:55:12.268952: step: 814/464, loss: 0.18394386768341064 2023-01-24 04:55:12.974554: step: 816/464, loss: 0.039952490478754044 2023-01-24 04:55:13.765864: step: 818/464, loss: 0.03714795038104057 2023-01-24 04:55:14.496211: step: 820/464, loss: 0.17323879897594452 2023-01-24 04:55:15.208901: step: 822/464, loss: 0.028041217476129532 2023-01-24 04:55:15.868407: step: 824/464, loss: 0.03805036470293999 2023-01-24 04:55:16.643309: step: 826/464, loss: 0.05020849406719208 2023-01-24 04:55:17.376184: step: 828/464, loss: 0.010961787775158882 2023-01-24 04:55:18.072775: step: 830/464, loss: 0.6328175663948059 2023-01-24 04:55:18.801605: step: 832/464, loss: 0.6535952687263489 2023-01-24 04:55:19.528881: step: 834/464, loss: 0.2235526591539383 2023-01-24 04:55:20.340416: step: 836/464, loss: 0.04729365184903145 2023-01-24 04:55:21.173698: step: 838/464, loss: 0.11663021892309189 2023-01-24 04:55:21.899007: step: 840/464, loss: 0.01592096872627735 2023-01-24 04:55:22.673882: step: 842/464, loss: 0.028326695784926414 2023-01-24 04:55:23.465325: step: 844/464, loss: 0.009754992090165615 2023-01-24 04:55:24.201803: step: 846/464, loss: 0.05774148181080818 2023-01-24 04:55:24.885445: step: 848/464, loss: 0.004699027165770531 2023-01-24 04:55:25.653529: step: 850/464, loss: 0.2547849416732788 2023-01-24 04:55:26.417334: step: 852/464, loss: 0.021360615268349648 2023-01-24 04:55:27.167105: step: 854/464, loss: 0.05056114122271538 2023-01-24 04:55:27.846683: step: 856/464, loss: 0.002268953714519739 2023-01-24 04:55:28.537263: step: 858/464, loss: 0.01847556233406067 2023-01-24 04:55:29.301640: step: 860/464, loss: 0.012106945738196373 2023-01-24 04:55:30.008733: step: 862/464, loss: 0.04129493981599808 2023-01-24 04:55:30.719193: step: 864/464, loss: 0.04316987842321396 2023-01-24 04:55:31.409581: step: 866/464, loss: 0.05905461311340332 2023-01-24 04:55:32.146814: step: 868/464, loss: 0.28730905055999756 2023-01-24 04:55:32.785922: step: 870/464, loss: 0.021385056897997856 2023-01-24 04:55:33.517673: step: 872/464, loss: 0.04858007654547691 2023-01-24 04:55:34.246607: step: 874/464, loss: 0.10534320026636124 2023-01-24 04:55:34.961519: step: 876/464, loss: 0.034968048334121704 2023-01-24 04:55:35.753027: step: 878/464, loss: 0.012991942465305328 2023-01-24 04:55:36.655657: step: 880/464, loss: 0.03125219792127609 2023-01-24 04:55:37.367153: step: 882/464, loss: 0.044226501137018204 2023-01-24 04:55:38.115006: step: 884/464, loss: 0.01766287535429001 2023-01-24 04:55:38.917493: step: 886/464, loss: 0.14172212779521942 2023-01-24 04:55:39.686087: step: 888/464, loss: 0.013373379595577717 2023-01-24 04:55:40.463467: step: 890/464, loss: 0.0199363362044096 2023-01-24 04:55:41.200125: step: 892/464, loss: 0.9622757434844971 2023-01-24 04:55:41.896303: step: 894/464, loss: 0.0009300005622208118 2023-01-24 04:55:42.758037: step: 896/464, loss: 0.013780355453491211 2023-01-24 04:55:43.535018: step: 898/464, loss: 0.01795125938951969 2023-01-24 04:55:44.273596: step: 900/464, loss: 0.15666379034519196 2023-01-24 04:55:45.039349: step: 902/464, loss: 0.09425216168165207 2023-01-24 04:55:45.792018: step: 904/464, loss: 0.04233413562178612 2023-01-24 04:55:46.547438: step: 906/464, loss: 0.07996021956205368 2023-01-24 04:55:47.406472: step: 908/464, loss: 0.03722037002444267 2023-01-24 04:55:48.129611: step: 910/464, loss: 0.020252905786037445 2023-01-24 04:55:48.847683: step: 912/464, loss: 0.06584428250789642 2023-01-24 04:55:49.557327: step: 914/464, loss: 0.36199814081192017 2023-01-24 04:55:50.404135: step: 916/464, loss: 0.0024279439821839333 2023-01-24 04:55:51.133654: step: 918/464, loss: 0.0012322930851951241 2023-01-24 04:55:51.857770: step: 920/464, loss: 0.07466727495193481 2023-01-24 04:55:52.602944: step: 922/464, loss: 0.08562754839658737 2023-01-24 04:55:53.350589: step: 924/464, loss: 0.03290723264217377 2023-01-24 04:55:54.076463: step: 926/464, loss: 0.034934818744659424 2023-01-24 04:55:54.792623: step: 928/464, loss: 0.04039369150996208 2023-01-24 04:55:55.464812: step: 930/464, loss: 0.010865515097975731 ================================================== Loss: 0.092 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3461410018552876, 'r': 0.3218388821804382, 'f1': 0.33354786806114245}, 'combined': 0.24577211330821022, 'epoch': 23} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3128959684906148, 'r': 0.26899159346525187, 'f1': 0.2892874522708129}, 'combined': 0.17966273351555748, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.333237854072628, 'r': 0.31806193661960513, 'f1': 0.3254730885408387}, 'combined': 0.23982227576693374, 'epoch': 23} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30942131119762306, 'r': 0.26416997319638963, 'f1': 0.28501067470655256}, 'combined': 0.1770066295545958, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35085194045720364, 'r': 0.3295478378108459, 'f1': 0.3398663611082501}, 'combined': 0.2504278450271316, 'epoch': 23} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32373813027865517, 'r': 0.2729744589627714, 'f1': 0.29619700523087233}, 'combined': 0.1839539295644365, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32211538461538464, 'r': 0.2392857142857143, 'f1': 0.27459016393442626}, 'combined': 0.1830601092896175, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.325, 'r': 0.42391304347826086, 'f1': 0.36792452830188677}, 'combined': 0.18396226415094338, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5079887218045113, 'r': 0.2452359346642468, 'f1': 0.3307833537331701}, 'combined': 0.2205222358221134, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:58:36.465322: step: 2/464, loss: 0.04216277226805687 2023-01-24 04:58:37.238092: step: 4/464, loss: 0.025450855493545532 2023-01-24 04:58:37.962197: step: 6/464, loss: 1.791204810142517 2023-01-24 04:58:38.708794: step: 8/464, loss: 0.01918802782893181 2023-01-24 04:58:39.466137: step: 10/464, loss: 0.03409218788146973 2023-01-24 04:58:40.198357: step: 12/464, loss: 0.00027603565831668675 2023-01-24 04:58:40.979398: step: 14/464, loss: 0.03904130682349205 2023-01-24 04:58:41.751103: step: 16/464, loss: 0.003379831090569496 2023-01-24 04:58:42.520250: step: 18/464, loss: 0.03145407885313034 2023-01-24 04:58:43.241239: step: 20/464, loss: 0.013981604017317295 2023-01-24 04:58:43.970115: step: 22/464, loss: 0.052782196551561356 2023-01-24 04:58:44.728261: step: 24/464, loss: 0.09911604225635529 2023-01-24 04:58:45.452999: step: 26/464, loss: 0.4873526692390442 2023-01-24 04:58:46.157137: step: 28/464, loss: 0.007562725339084864 2023-01-24 04:58:46.957688: step: 30/464, loss: 0.08361497521400452 2023-01-24 04:58:47.729788: step: 32/464, loss: 0.6130539774894714 2023-01-24 04:58:48.512005: step: 34/464, loss: 0.005319906864315271 2023-01-24 04:58:49.295407: step: 36/464, loss: 0.014001567848026752 2023-01-24 04:58:50.070507: step: 38/464, loss: 0.05576007440686226 2023-01-24 04:58:50.833248: step: 40/464, loss: 0.01852346770465374 2023-01-24 04:58:51.566627: step: 42/464, loss: 0.005907487124204636 2023-01-24 04:58:52.435676: step: 44/464, loss: 0.06917471438646317 2023-01-24 04:58:53.246667: step: 46/464, loss: 0.0684736967086792 2023-01-24 04:58:54.067572: step: 48/464, loss: 0.019859906286001205 2023-01-24 04:58:54.889967: step: 50/464, loss: 0.010839635506272316 2023-01-24 04:58:55.604317: step: 52/464, loss: 0.050964005291461945 2023-01-24 04:58:56.342300: step: 54/464, loss: 0.04273327812552452 2023-01-24 04:58:57.089546: step: 56/464, loss: 0.3398931324481964 2023-01-24 04:58:57.883986: step: 58/464, loss: 0.018881939351558685 2023-01-24 04:58:58.701082: step: 60/464, loss: 0.10342434793710709 2023-01-24 04:58:59.473495: step: 62/464, loss: 0.019913988187909126 2023-01-24 04:59:00.278747: step: 64/464, loss: 0.2856569290161133 2023-01-24 04:59:01.017861: step: 66/464, loss: 0.006631938274949789 2023-01-24 04:59:01.760782: step: 68/464, loss: 0.003249118337407708 2023-01-24 04:59:02.657903: step: 70/464, loss: 0.025199271738529205 2023-01-24 04:59:03.526916: step: 72/464, loss: 0.04742187634110451 2023-01-24 04:59:04.225753: step: 74/464, loss: 0.023487098515033722 2023-01-24 04:59:04.972215: step: 76/464, loss: 0.0440724715590477 2023-01-24 04:59:05.855323: step: 78/464, loss: 0.03205224126577377 2023-01-24 04:59:06.629682: step: 80/464, loss: 0.32313501834869385 2023-01-24 04:59:07.428460: step: 82/464, loss: 0.017892520874738693 2023-01-24 04:59:08.166239: step: 84/464, loss: 0.00491137383505702 2023-01-24 04:59:08.941842: step: 86/464, loss: 0.013730145059525967 2023-01-24 04:59:09.626534: step: 88/464, loss: 0.15901567041873932 2023-01-24 04:59:10.436989: step: 90/464, loss: 0.029119687154889107 2023-01-24 04:59:11.188239: step: 92/464, loss: 0.04215722158551216 2023-01-24 04:59:11.877831: step: 94/464, loss: 0.02104552648961544 2023-01-24 04:59:12.598411: step: 96/464, loss: 0.06284292042255402 2023-01-24 04:59:13.426475: step: 98/464, loss: 0.030893992632627487 2023-01-24 04:59:14.168620: step: 100/464, loss: 0.046926919370889664 2023-01-24 04:59:14.982704: step: 102/464, loss: 0.02747052162885666 2023-01-24 04:59:15.782017: step: 104/464, loss: 0.04495851695537567 2023-01-24 04:59:16.548188: step: 106/464, loss: 0.005997125059366226 2023-01-24 04:59:17.326094: step: 108/464, loss: 0.021033380180597305 2023-01-24 04:59:18.036433: step: 110/464, loss: 0.03415670990943909 2023-01-24 04:59:18.913328: step: 112/464, loss: 0.05024714022874832 2023-01-24 04:59:19.692383: step: 114/464, loss: 0.006801267620176077 2023-01-24 04:59:20.440825: step: 116/464, loss: 0.11825337260961533 2023-01-24 04:59:21.176144: step: 118/464, loss: 0.025873728096485138 2023-01-24 04:59:21.923803: step: 120/464, loss: 0.0497831255197525 2023-01-24 04:59:22.704656: step: 122/464, loss: 0.047892551869153976 2023-01-24 04:59:23.450368: step: 124/464, loss: 0.05598433315753937 2023-01-24 04:59:24.214781: step: 126/464, loss: 0.007054745685309172 2023-01-24 04:59:24.998336: step: 128/464, loss: 0.07335645705461502 2023-01-24 04:59:25.749863: step: 130/464, loss: 0.040367502719163895 2023-01-24 04:59:26.540261: step: 132/464, loss: 0.2059478759765625 2023-01-24 04:59:27.242023: step: 134/464, loss: 0.023334026336669922 2023-01-24 04:59:28.023727: step: 136/464, loss: 0.2638107240200043 2023-01-24 04:59:28.794022: step: 138/464, loss: 0.021714024245738983 2023-01-24 04:59:29.566353: step: 140/464, loss: 0.024914352223277092 2023-01-24 04:59:30.342036: step: 142/464, loss: 0.03169897198677063 2023-01-24 04:59:31.052884: step: 144/464, loss: 0.02856578677892685 2023-01-24 04:59:31.781721: step: 146/464, loss: 0.02671072632074356 2023-01-24 04:59:32.531305: step: 148/464, loss: 0.05585016682744026 2023-01-24 04:59:33.371367: step: 150/464, loss: 0.022833239287137985 2023-01-24 04:59:34.089620: step: 152/464, loss: 0.023303162306547165 2023-01-24 04:59:34.896872: step: 154/464, loss: 0.057167548686265945 2023-01-24 04:59:35.666882: step: 156/464, loss: 0.0038631537463515997 2023-01-24 04:59:36.438518: step: 158/464, loss: 7.0057597160339355 2023-01-24 04:59:37.282689: step: 160/464, loss: 0.038201894611120224 2023-01-24 04:59:37.985460: step: 162/464, loss: 0.006435670889914036 2023-01-24 04:59:38.759765: step: 164/464, loss: 0.05948295816779137 2023-01-24 04:59:39.541483: step: 166/464, loss: 0.004582865629345179 2023-01-24 04:59:40.311166: step: 168/464, loss: 0.03444823622703552 2023-01-24 04:59:41.051306: step: 170/464, loss: 0.07894705981016159 2023-01-24 04:59:41.869315: step: 172/464, loss: 0.019771507009863853 2023-01-24 04:59:42.570759: step: 174/464, loss: 0.01344778761267662 2023-01-24 04:59:43.329483: step: 176/464, loss: 0.09892649203538895 2023-01-24 04:59:44.203698: step: 178/464, loss: 0.02090064436197281 2023-01-24 04:59:45.087358: step: 180/464, loss: 0.3622448444366455 2023-01-24 04:59:45.808226: step: 182/464, loss: 0.014399930834770203 2023-01-24 04:59:46.613731: step: 184/464, loss: 0.019762679934501648 2023-01-24 04:59:47.448840: step: 186/464, loss: 0.007910501211881638 2023-01-24 04:59:48.369194: step: 188/464, loss: 0.05793405696749687 2023-01-24 04:59:49.144771: step: 190/464, loss: 0.01847977377474308 2023-01-24 04:59:49.967855: step: 192/464, loss: 0.25239136815071106 2023-01-24 04:59:50.626975: step: 194/464, loss: 0.053811557590961456 2023-01-24 04:59:51.362042: step: 196/464, loss: 0.11339523643255234 2023-01-24 04:59:52.049625: step: 198/464, loss: 0.03743233159184456 2023-01-24 04:59:52.841004: step: 200/464, loss: 0.026974214240908623 2023-01-24 04:59:53.535025: step: 202/464, loss: 0.0056051891297101974 2023-01-24 04:59:54.288545: step: 204/464, loss: 2.888148069381714 2023-01-24 04:59:55.073002: step: 206/464, loss: 0.22232641279697418 2023-01-24 04:59:55.896512: step: 208/464, loss: 0.05838814750313759 2023-01-24 04:59:56.622739: step: 210/464, loss: 0.08797691762447357 2023-01-24 04:59:57.359886: step: 212/464, loss: 0.014637548476457596 2023-01-24 04:59:58.081370: step: 214/464, loss: 0.017743902280926704 2023-01-24 04:59:58.795392: step: 216/464, loss: 0.0002531524805817753 2023-01-24 04:59:59.508059: step: 218/464, loss: 0.0060111405327916145 2023-01-24 05:00:00.341238: step: 220/464, loss: 0.10804308950901031 2023-01-24 05:00:01.081411: step: 222/464, loss: 0.03683672472834587 2023-01-24 05:00:01.879834: step: 224/464, loss: 0.2199680656194687 2023-01-24 05:00:02.571701: step: 226/464, loss: 0.18807777762413025 2023-01-24 05:00:03.368415: step: 228/464, loss: 0.06853378564119339 2023-01-24 05:00:04.144307: step: 230/464, loss: 1.3367910385131836 2023-01-24 05:00:04.838813: step: 232/464, loss: 0.0589243546128273 2023-01-24 05:00:05.653595: step: 234/464, loss: 0.07898443192243576 2023-01-24 05:00:06.382423: step: 236/464, loss: 0.22820298373699188 2023-01-24 05:00:07.030462: step: 238/464, loss: 0.03967048227787018 2023-01-24 05:00:07.784649: step: 240/464, loss: 0.037137553095817566 2023-01-24 05:00:08.461806: step: 242/464, loss: 0.004308091476559639 2023-01-24 05:00:09.175193: step: 244/464, loss: 0.006603443995118141 2023-01-24 05:00:09.925177: step: 246/464, loss: 0.02261928655207157 2023-01-24 05:00:10.620631: step: 248/464, loss: 0.010544451884925365 2023-01-24 05:00:11.442978: step: 250/464, loss: 0.014210019260644913 2023-01-24 05:00:12.236170: step: 252/464, loss: 0.07230827957391739 2023-01-24 05:00:12.947747: step: 254/464, loss: 0.015503766015172005 2023-01-24 05:00:13.706779: step: 256/464, loss: 0.024298088625073433 2023-01-24 05:00:14.487958: step: 258/464, loss: 0.08183684200048447 2023-01-24 05:00:15.241139: step: 260/464, loss: 0.03380714729428291 2023-01-24 05:00:15.919872: step: 262/464, loss: 0.003679390996694565 2023-01-24 05:00:16.636948: step: 264/464, loss: 0.02577679231762886 2023-01-24 05:00:17.400834: step: 266/464, loss: 0.2571578919887543 2023-01-24 05:00:18.208287: step: 268/464, loss: 0.01715640164911747 2023-01-24 05:00:18.981893: step: 270/464, loss: 0.07080483436584473 2023-01-24 05:00:19.721752: step: 272/464, loss: 0.030466442927718163 2023-01-24 05:00:20.459763: step: 274/464, loss: 0.23173189163208008 2023-01-24 05:00:21.235669: step: 276/464, loss: 0.02331365831196308 2023-01-24 05:00:21.978151: step: 278/464, loss: 0.018626758828759193 2023-01-24 05:00:22.642455: step: 280/464, loss: 0.02581346221268177 2023-01-24 05:00:23.369190: step: 282/464, loss: 0.07633237540721893 2023-01-24 05:00:24.097746: step: 284/464, loss: 0.0020407966803759336 2023-01-24 05:00:24.788466: step: 286/464, loss: 0.07563275098800659 2023-01-24 05:00:25.494532: step: 288/464, loss: 0.08578566461801529 2023-01-24 05:00:26.223305: step: 290/464, loss: 0.02253068797290325 2023-01-24 05:00:26.975008: step: 292/464, loss: 0.07634179294109344 2023-01-24 05:00:27.745394: step: 294/464, loss: 0.017030959948897362 2023-01-24 05:00:28.581633: step: 296/464, loss: 0.05161159113049507 2023-01-24 05:00:29.333741: step: 298/464, loss: 0.1865466833114624 2023-01-24 05:00:30.085702: step: 300/464, loss: 0.034873880445957184 2023-01-24 05:00:30.790888: step: 302/464, loss: 0.22604385018348694 2023-01-24 05:00:31.556013: step: 304/464, loss: 0.03261805325746536 2023-01-24 05:00:32.285436: step: 306/464, loss: 0.24040238559246063 2023-01-24 05:00:33.038429: step: 308/464, loss: 0.382816880941391 2023-01-24 05:00:33.816968: step: 310/464, loss: 0.02216779626905918 2023-01-24 05:00:34.516238: step: 312/464, loss: 0.00621431227773428 2023-01-24 05:00:35.269534: step: 314/464, loss: 0.01933230273425579 2023-01-24 05:00:35.936164: step: 316/464, loss: 0.10341157764196396 2023-01-24 05:00:36.804229: step: 318/464, loss: 0.05525139346718788 2023-01-24 05:00:37.532703: step: 320/464, loss: 0.03290149196982384 2023-01-24 05:00:38.300693: step: 322/464, loss: 0.009247610345482826 2023-01-24 05:00:39.037683: step: 324/464, loss: 0.009450143203139305 2023-01-24 05:00:39.730039: step: 326/464, loss: 0.006791172549128532 2023-01-24 05:00:40.422563: step: 328/464, loss: 0.012052402831614017 2023-01-24 05:00:41.232309: step: 330/464, loss: 0.2342667579650879 2023-01-24 05:00:42.013756: step: 332/464, loss: 0.2897554039955139 2023-01-24 05:00:42.693084: step: 334/464, loss: 0.01669563166797161 2023-01-24 05:00:43.397530: step: 336/464, loss: 0.06249774247407913 2023-01-24 05:00:44.128791: step: 338/464, loss: 0.005403649061918259 2023-01-24 05:00:44.874921: step: 340/464, loss: 0.01682235673069954 2023-01-24 05:00:45.627358: step: 342/464, loss: 0.050066784024238586 2023-01-24 05:00:46.338671: step: 344/464, loss: 0.02224194072186947 2023-01-24 05:00:47.044493: step: 346/464, loss: 0.28325849771499634 2023-01-24 05:00:47.722634: step: 348/464, loss: 0.14532120525836945 2023-01-24 05:00:48.444029: step: 350/464, loss: 0.04144451767206192 2023-01-24 05:00:49.149083: step: 352/464, loss: 0.015775060281157494 2023-01-24 05:00:49.904587: step: 354/464, loss: 0.027337608858942986 2023-01-24 05:00:50.648218: step: 356/464, loss: 0.015663186088204384 2023-01-24 05:00:51.389262: step: 358/464, loss: 0.022179163992404938 2023-01-24 05:00:52.137218: step: 360/464, loss: 0.025821568444371223 2023-01-24 05:00:52.849856: step: 362/464, loss: 0.05381513386964798 2023-01-24 05:00:53.577196: step: 364/464, loss: 0.03104221634566784 2023-01-24 05:00:54.305795: step: 366/464, loss: 0.0401470810174942 2023-01-24 05:00:54.993711: step: 368/464, loss: 0.05643609166145325 2023-01-24 05:00:55.758754: step: 370/464, loss: 0.05265054106712341 2023-01-24 05:00:56.487763: step: 372/464, loss: 0.10396604984998703 2023-01-24 05:00:57.204530: step: 374/464, loss: 0.057258524000644684 2023-01-24 05:00:57.940625: step: 376/464, loss: 0.0088130421936512 2023-01-24 05:00:58.666571: step: 378/464, loss: 0.06175927817821503 2023-01-24 05:00:59.412558: step: 380/464, loss: 0.0916573703289032 2023-01-24 05:01:00.120287: step: 382/464, loss: 0.08674547076225281 2023-01-24 05:01:00.850149: step: 384/464, loss: 0.1691557765007019 2023-01-24 05:01:01.515110: step: 386/464, loss: 0.08958717435598373 2023-01-24 05:01:02.355444: step: 388/464, loss: 0.016225000843405724 2023-01-24 05:01:03.073331: step: 390/464, loss: 0.020185474306344986 2023-01-24 05:01:03.861326: step: 392/464, loss: 0.03769129142165184 2023-01-24 05:01:04.509272: step: 394/464, loss: 0.009174306876957417 2023-01-24 05:01:05.162603: step: 396/464, loss: 0.005557133350521326 2023-01-24 05:01:05.944486: step: 398/464, loss: 0.00031553933513350785 2023-01-24 05:01:06.761620: step: 400/464, loss: 0.009889748878777027 2023-01-24 05:01:07.585411: step: 402/464, loss: 0.09212450683116913 2023-01-24 05:01:08.379875: step: 404/464, loss: 0.016208630055189133 2023-01-24 05:01:09.068528: step: 406/464, loss: 0.0028690879698842764 2023-01-24 05:01:09.924022: step: 408/464, loss: 0.04372553899884224 2023-01-24 05:01:10.640615: step: 410/464, loss: 0.042513079941272736 2023-01-24 05:01:11.315777: step: 412/464, loss: 0.04648435860872269 2023-01-24 05:01:12.092422: step: 414/464, loss: 0.0648239478468895 2023-01-24 05:01:12.823460: step: 416/464, loss: 0.03131159394979477 2023-01-24 05:01:13.561075: step: 418/464, loss: 0.04443042725324631 2023-01-24 05:01:14.332896: step: 420/464, loss: 0.029055485501885414 2023-01-24 05:01:15.124792: step: 422/464, loss: 0.04159461706876755 2023-01-24 05:01:15.913873: step: 424/464, loss: 0.5957244634628296 2023-01-24 05:01:16.720311: step: 426/464, loss: 0.09278106689453125 2023-01-24 05:01:17.503653: step: 428/464, loss: 0.018769947811961174 2023-01-24 05:01:18.338352: step: 430/464, loss: 0.35789287090301514 2023-01-24 05:01:19.035835: step: 432/464, loss: 0.061263084411621094 2023-01-24 05:01:19.772893: step: 434/464, loss: 0.11250555515289307 2023-01-24 05:01:20.472780: step: 436/464, loss: 0.016005242243409157 2023-01-24 05:01:21.153615: step: 438/464, loss: 0.024398449808359146 2023-01-24 05:01:21.815175: step: 440/464, loss: 0.053523093461990356 2023-01-24 05:01:22.573292: step: 442/464, loss: 0.09610924869775772 2023-01-24 05:01:23.292324: step: 444/464, loss: 0.018684133887290955 2023-01-24 05:01:23.989751: step: 446/464, loss: 0.2663451135158539 2023-01-24 05:01:24.715567: step: 448/464, loss: 0.01431179791688919 2023-01-24 05:01:25.398082: step: 450/464, loss: 0.03379271551966667 2023-01-24 05:01:26.185481: step: 452/464, loss: 0.07740333676338196 2023-01-24 05:01:26.988803: step: 454/464, loss: 0.03083229996263981 2023-01-24 05:01:27.677935: step: 456/464, loss: 0.10461738705635071 2023-01-24 05:01:28.395409: step: 458/464, loss: 0.4135224223136902 2023-01-24 05:01:29.093056: step: 460/464, loss: 0.28587913513183594 2023-01-24 05:01:29.888555: step: 462/464, loss: 0.3790675103664398 2023-01-24 05:01:30.657311: step: 464/464, loss: 0.10238826274871826 2023-01-24 05:01:31.359280: step: 466/464, loss: 0.00831583235412836 2023-01-24 05:01:32.165333: step: 468/464, loss: 0.3245208263397217 2023-01-24 05:01:32.860621: step: 470/464, loss: 0.0015388050815090537 2023-01-24 05:01:33.605398: step: 472/464, loss: 0.12672469019889832 2023-01-24 05:01:34.349494: step: 474/464, loss: 1.2395349740982056 2023-01-24 05:01:35.090477: step: 476/464, loss: 16.51024055480957 2023-01-24 05:01:35.924198: step: 478/464, loss: 0.003449058858677745 2023-01-24 05:01:36.645334: step: 480/464, loss: 0.009907940402626991 2023-01-24 05:01:37.448239: step: 482/464, loss: 0.8217645287513733 2023-01-24 05:01:38.240434: step: 484/464, loss: 0.1157865822315216 2023-01-24 05:01:39.012785: step: 486/464, loss: 0.11795174330472946 2023-01-24 05:01:39.779526: step: 488/464, loss: 0.03714916482567787 2023-01-24 05:01:40.584777: step: 490/464, loss: 0.2336808443069458 2023-01-24 05:01:41.292277: step: 492/464, loss: 0.19613677263259888 2023-01-24 05:01:42.118344: step: 494/464, loss: 0.07602205127477646 2023-01-24 05:01:42.900177: step: 496/464, loss: 0.03136870637536049 2023-01-24 05:01:43.590682: step: 498/464, loss: 1.29381263256073 2023-01-24 05:01:44.327339: step: 500/464, loss: 0.019961200654506683 2023-01-24 05:01:45.056810: step: 502/464, loss: 6.027398109436035 2023-01-24 05:01:45.865538: step: 504/464, loss: 2.7483906745910645 2023-01-24 05:01:46.621902: step: 506/464, loss: 0.21104967594146729 2023-01-24 05:01:47.369346: step: 508/464, loss: 0.13606393337249756 2023-01-24 05:01:48.152640: step: 510/464, loss: 4.976814270019531 2023-01-24 05:01:48.861461: step: 512/464, loss: 0.10234958678483963 2023-01-24 05:01:49.595804: step: 514/464, loss: 0.05969827249646187 2023-01-24 05:01:50.359112: step: 516/464, loss: 1.065191626548767 2023-01-24 05:01:51.069764: step: 518/464, loss: 0.04677828773856163 2023-01-24 05:01:51.852391: step: 520/464, loss: 0.24411696195602417 2023-01-24 05:01:52.559442: step: 522/464, loss: 0.11171295493841171 2023-01-24 05:01:53.270170: step: 524/464, loss: 0.0074959914200007915 2023-01-24 05:01:54.126443: step: 526/464, loss: 0.9222694039344788 2023-01-24 05:01:54.910900: step: 528/464, loss: 0.0855538472533226 2023-01-24 05:01:55.603864: step: 530/464, loss: 0.011772975325584412 2023-01-24 05:01:56.297537: step: 532/464, loss: 0.09362389147281647 2023-01-24 05:01:57.032498: step: 534/464, loss: 0.016605759039521217 2023-01-24 05:01:57.838131: step: 536/464, loss: 2.961850881576538 2023-01-24 05:01:58.467130: step: 538/464, loss: 0.00629340810701251 2023-01-24 05:01:59.225654: step: 540/464, loss: 0.01514369435608387 2023-01-24 05:02:00.065666: step: 542/464, loss: 0.875588595867157 2023-01-24 05:02:00.834411: step: 544/464, loss: 0.009261378087103367 2023-01-24 05:02:01.537466: step: 546/464, loss: 0.021717533469200134 2023-01-24 05:02:02.333988: step: 548/464, loss: 0.1673237681388855 2023-01-24 05:02:03.022251: step: 550/464, loss: 0.05481730401515961 2023-01-24 05:02:03.849986: step: 552/464, loss: 0.06743796169757843 2023-01-24 05:02:04.684456: step: 554/464, loss: 0.5204348564147949 2023-01-24 05:02:05.392285: step: 556/464, loss: 0.09271502494812012 2023-01-24 05:02:06.141540: step: 558/464, loss: 0.4915308654308319 2023-01-24 05:02:06.824058: step: 560/464, loss: 0.18924763798713684 2023-01-24 05:02:07.519303: step: 562/464, loss: 0.01631811447441578 2023-01-24 05:02:08.319343: step: 564/464, loss: 0.00829684641212225 2023-01-24 05:02:08.996384: step: 566/464, loss: 0.016613049432635307 2023-01-24 05:02:09.762908: step: 568/464, loss: 0.01600293442606926 2023-01-24 05:02:10.417941: step: 570/464, loss: 0.1619260311126709 2023-01-24 05:02:11.166814: step: 572/464, loss: 0.05017637461423874 2023-01-24 05:02:11.887946: step: 574/464, loss: 0.017390595749020576 2023-01-24 05:02:12.644498: step: 576/464, loss: 0.007992753759026527 2023-01-24 05:02:13.411264: step: 578/464, loss: 0.09127692133188248 2023-01-24 05:02:14.112909: step: 580/464, loss: 0.024530908092856407 2023-01-24 05:02:14.906725: step: 582/464, loss: 0.2850531041622162 2023-01-24 05:02:15.643468: step: 584/464, loss: 0.02502652071416378 2023-01-24 05:02:16.423332: step: 586/464, loss: 0.020360369235277176 2023-01-24 05:02:17.156010: step: 588/464, loss: 0.047007378190755844 2023-01-24 05:02:17.867768: step: 590/464, loss: 0.005273542366921902 2023-01-24 05:02:18.635100: step: 592/464, loss: 0.0321476086974144 2023-01-24 05:02:19.323730: step: 594/464, loss: 0.023537082597613335 2023-01-24 05:02:20.005172: step: 596/464, loss: 0.021900134161114693 2023-01-24 05:02:20.767696: step: 598/464, loss: 0.06477120518684387 2023-01-24 05:02:21.505708: step: 600/464, loss: 0.022067628800868988 2023-01-24 05:02:22.186520: step: 602/464, loss: 0.02224505878984928 2023-01-24 05:02:23.047653: step: 604/464, loss: 0.0633058026432991 2023-01-24 05:02:23.844929: step: 606/464, loss: 0.0420142337679863 2023-01-24 05:02:24.521167: step: 608/464, loss: 0.023906320333480835 2023-01-24 05:02:25.211329: step: 610/464, loss: 0.19567719101905823 2023-01-24 05:02:25.939086: step: 612/464, loss: 0.006387477740645409 2023-01-24 05:02:26.705232: step: 614/464, loss: 0.00764105562120676 2023-01-24 05:02:27.404977: step: 616/464, loss: 0.07847554236650467 2023-01-24 05:02:28.115010: step: 618/464, loss: 0.007820271886885166 2023-01-24 05:02:28.816928: step: 620/464, loss: 0.06269571930170059 2023-01-24 05:02:29.578574: step: 622/464, loss: 0.02810000814497471 2023-01-24 05:02:30.423282: step: 624/464, loss: 0.011240391060709953 2023-01-24 05:02:31.157100: step: 626/464, loss: 0.026797929778695107 2023-01-24 05:02:31.884982: step: 628/464, loss: 0.11885682493448257 2023-01-24 05:02:32.612214: step: 630/464, loss: 0.038971733301877975 2023-01-24 05:02:33.382962: step: 632/464, loss: 0.014642666094005108 2023-01-24 05:02:34.179587: step: 634/464, loss: 0.08038690686225891 2023-01-24 05:02:34.933805: step: 636/464, loss: 0.3002861738204956 2023-01-24 05:02:35.712510: step: 638/464, loss: 0.10866020619869232 2023-01-24 05:02:36.384831: step: 640/464, loss: 0.031055618077516556 2023-01-24 05:02:37.132918: step: 642/464, loss: 0.8886435031890869 2023-01-24 05:02:37.879158: step: 644/464, loss: 0.3501538336277008 2023-01-24 05:02:38.651789: step: 646/464, loss: 0.040703218430280685 2023-01-24 05:02:39.460743: step: 648/464, loss: 0.027664896100759506 2023-01-24 05:02:40.217189: step: 650/464, loss: 0.0026877825148403645 2023-01-24 05:02:40.960881: step: 652/464, loss: 0.03684217855334282 2023-01-24 05:02:41.722817: step: 654/464, loss: 0.050308216363191605 2023-01-24 05:02:42.479404: step: 656/464, loss: 0.046196311712265015 2023-01-24 05:02:43.216800: step: 658/464, loss: 0.01166035607457161 2023-01-24 05:02:43.939821: step: 660/464, loss: 0.08269614726305008 2023-01-24 05:02:44.650675: step: 662/464, loss: 0.035362984985113144 2023-01-24 05:02:45.433594: step: 664/464, loss: 0.0011275168508291245 2023-01-24 05:02:46.127285: step: 666/464, loss: 0.000661600090097636 2023-01-24 05:02:46.888281: step: 668/464, loss: 0.053751636296510696 2023-01-24 05:02:47.579452: step: 670/464, loss: 0.024816978722810745 2023-01-24 05:02:48.303394: step: 672/464, loss: 0.0922572910785675 2023-01-24 05:02:49.076137: step: 674/464, loss: 0.04353713616728783 2023-01-24 05:02:49.848072: step: 676/464, loss: 0.00623973598703742 2023-01-24 05:02:50.525056: step: 678/464, loss: 0.007656637113541365 2023-01-24 05:02:51.309625: step: 680/464, loss: 0.11449252814054489 2023-01-24 05:02:52.096818: step: 682/464, loss: 0.023578859865665436 2023-01-24 05:02:52.830711: step: 684/464, loss: 0.0682889074087143 2023-01-24 05:02:53.583062: step: 686/464, loss: 0.02251061610877514 2023-01-24 05:02:54.331662: step: 688/464, loss: 0.1295761913061142 2023-01-24 05:02:54.995278: step: 690/464, loss: 0.008971642702817917 2023-01-24 05:02:55.671597: step: 692/464, loss: 0.02738947980105877 2023-01-24 05:02:56.286108: step: 694/464, loss: 0.016341639682650566 2023-01-24 05:02:57.027060: step: 696/464, loss: 0.014974063262343407 2023-01-24 05:02:57.736645: step: 698/464, loss: 0.0013749344507232308 2023-01-24 05:02:58.473395: step: 700/464, loss: 0.016520194709300995 2023-01-24 05:02:59.307705: step: 702/464, loss: 0.052617549896240234 2023-01-24 05:03:00.041508: step: 704/464, loss: 0.024146053940057755 2023-01-24 05:03:00.888913: step: 706/464, loss: 0.026096755638718605 2023-01-24 05:03:01.610560: step: 708/464, loss: 0.014157273806631565 2023-01-24 05:03:02.363182: step: 710/464, loss: 0.04489751532673836 2023-01-24 05:03:03.150093: step: 712/464, loss: 0.2788737416267395 2023-01-24 05:03:03.902713: step: 714/464, loss: 0.012423225678503513 2023-01-24 05:03:04.641329: step: 716/464, loss: 0.05163278803229332 2023-01-24 05:03:05.359933: step: 718/464, loss: 0.03783539682626724 2023-01-24 05:03:06.233099: step: 720/464, loss: 0.007425676565617323 2023-01-24 05:03:06.930852: step: 722/464, loss: 0.03953913599252701 2023-01-24 05:03:07.705540: step: 724/464, loss: 0.22150929272174835 2023-01-24 05:03:08.473750: step: 726/464, loss: 0.010052965953946114 2023-01-24 05:03:09.215551: step: 728/464, loss: 0.051495082676410675 2023-01-24 05:03:10.000418: step: 730/464, loss: 0.05625370517373085 2023-01-24 05:03:10.714991: step: 732/464, loss: 0.06636309623718262 2023-01-24 05:03:11.450873: step: 734/464, loss: 0.07243285328149796 2023-01-24 05:03:12.351694: step: 736/464, loss: 0.13123223185539246 2023-01-24 05:03:13.108522: step: 738/464, loss: 0.04448264464735985 2023-01-24 05:03:13.896579: step: 740/464, loss: 0.05884762108325958 2023-01-24 05:03:14.612867: step: 742/464, loss: 0.02446148730814457 2023-01-24 05:03:15.414139: step: 744/464, loss: 0.0030263373628258705 2023-01-24 05:03:16.211672: step: 746/464, loss: 0.04585908725857735 2023-01-24 05:03:16.911052: step: 748/464, loss: 0.06436149030923843 2023-01-24 05:03:17.587118: step: 750/464, loss: 0.0034831571392714977 2023-01-24 05:03:18.330680: step: 752/464, loss: 0.0066585722379386425 2023-01-24 05:03:19.080861: step: 754/464, loss: 0.08963311463594437 2023-01-24 05:03:19.770424: step: 756/464, loss: 0.028069909662008286 2023-01-24 05:03:20.453179: step: 758/464, loss: 0.03629578277468681 2023-01-24 05:03:21.183023: step: 760/464, loss: 0.09160567820072174 2023-01-24 05:03:21.873644: step: 762/464, loss: 0.03636811301112175 2023-01-24 05:03:22.630714: step: 764/464, loss: 0.012255738489329815 2023-01-24 05:03:23.472864: step: 766/464, loss: 0.07985013723373413 2023-01-24 05:03:24.239415: step: 768/464, loss: 0.011398863978683949 2023-01-24 05:03:24.976561: step: 770/464, loss: 0.10022206604480743 2023-01-24 05:03:25.743140: step: 772/464, loss: 0.2913207709789276 2023-01-24 05:03:26.458230: step: 774/464, loss: 0.016932744532823563 2023-01-24 05:03:27.215169: step: 776/464, loss: 0.006578746717423201 2023-01-24 05:03:27.854391: step: 778/464, loss: 0.423820823431015 2023-01-24 05:03:28.499403: step: 780/464, loss: 0.027954528108239174 2023-01-24 05:03:29.251407: step: 782/464, loss: 0.34176504611968994 2023-01-24 05:03:30.019785: step: 784/464, loss: 0.47238412499427795 2023-01-24 05:03:30.766072: step: 786/464, loss: 0.019191065803170204 2023-01-24 05:03:31.549634: step: 788/464, loss: 0.006832434795796871 2023-01-24 05:03:32.318037: step: 790/464, loss: 0.051002223044633865 2023-01-24 05:03:32.971152: step: 792/464, loss: 0.009259670041501522 2023-01-24 05:03:33.642843: step: 794/464, loss: 0.02580624632537365 2023-01-24 05:03:34.300858: step: 796/464, loss: 0.057828933000564575 2023-01-24 05:03:35.056701: step: 798/464, loss: 0.05048080533742905 2023-01-24 05:03:35.818749: step: 800/464, loss: 0.06187806278467178 2023-01-24 05:03:36.659344: step: 802/464, loss: 0.04779934138059616 2023-01-24 05:03:37.391554: step: 804/464, loss: 0.04217812046408653 2023-01-24 05:03:38.115198: step: 806/464, loss: 0.03599544242024422 2023-01-24 05:03:38.850251: step: 808/464, loss: 0.0035362495109438896 2023-01-24 05:03:39.533290: step: 810/464, loss: 0.019715817645192146 2023-01-24 05:03:40.234855: step: 812/464, loss: 0.026319032534956932 2023-01-24 05:03:41.009496: step: 814/464, loss: 0.1430322378873825 2023-01-24 05:03:41.832261: step: 816/464, loss: 0.02330571413040161 2023-01-24 05:03:42.588843: step: 818/464, loss: 0.01277852151542902 2023-01-24 05:03:43.251194: step: 820/464, loss: 0.0470539890229702 2023-01-24 05:03:43.946632: step: 822/464, loss: 0.02166724018752575 2023-01-24 05:03:44.635084: step: 824/464, loss: 0.0243095550686121 2023-01-24 05:03:45.390196: step: 826/464, loss: 0.007452270481735468 2023-01-24 05:03:46.014321: step: 828/464, loss: 0.04806208238005638 2023-01-24 05:03:46.759037: step: 830/464, loss: 0.0014277611626312137 2023-01-24 05:03:47.575184: step: 832/464, loss: 0.051559727638959885 2023-01-24 05:03:48.304199: step: 834/464, loss: 0.017420530319213867 2023-01-24 05:03:49.129526: step: 836/464, loss: 0.048703331500291824 2023-01-24 05:03:49.816243: step: 838/464, loss: 0.539999783039093 2023-01-24 05:03:50.574889: step: 840/464, loss: 0.05690797418355942 2023-01-24 05:03:51.324995: step: 842/464, loss: 0.03980085253715515 2023-01-24 05:03:52.020195: step: 844/464, loss: 0.4376107156276703 2023-01-24 05:03:52.823793: step: 846/464, loss: 0.02877359464764595 2023-01-24 05:03:53.718640: step: 848/464, loss: 0.006814028136432171 2023-01-24 05:03:54.388512: step: 850/464, loss: 0.03682967647910118 2023-01-24 05:03:55.100533: step: 852/464, loss: 0.048403650522232056 2023-01-24 05:03:55.944450: step: 854/464, loss: 0.009567310102283955 2023-01-24 05:03:56.768002: step: 856/464, loss: 0.010289072059094906 2023-01-24 05:03:57.531842: step: 858/464, loss: 0.025864794850349426 2023-01-24 05:03:58.211252: step: 860/464, loss: 0.003975290339440107 2023-01-24 05:03:59.019607: step: 862/464, loss: 0.2906065285205841 2023-01-24 05:03:59.753368: step: 864/464, loss: 0.04651524871587753 2023-01-24 05:04:00.474793: step: 866/464, loss: 0.020465346053242683 2023-01-24 05:04:01.181159: step: 868/464, loss: 0.07506394386291504 2023-01-24 05:04:01.896237: step: 870/464, loss: 0.0477907694876194 2023-01-24 05:04:02.625203: step: 872/464, loss: 0.03773835673928261 2023-01-24 05:04:03.379500: step: 874/464, loss: 0.01569550670683384 2023-01-24 05:04:04.103008: step: 876/464, loss: 0.024225924164056778 2023-01-24 05:04:04.929366: step: 878/464, loss: 0.03536829352378845 2023-01-24 05:04:05.617051: step: 880/464, loss: 0.01074038352817297 2023-01-24 05:04:06.332350: step: 882/464, loss: 0.010568689554929733 2023-01-24 05:04:07.084024: step: 884/464, loss: 0.05296974256634712 2023-01-24 05:04:07.827140: step: 886/464, loss: 0.027974413707852364 2023-01-24 05:04:08.631558: step: 888/464, loss: 0.00846959464251995 2023-01-24 05:04:09.347106: step: 890/464, loss: 0.1548718512058258 2023-01-24 05:04:10.038819: step: 892/464, loss: 0.013443255797028542 2023-01-24 05:04:10.725368: step: 894/464, loss: 0.011265892535448074 2023-01-24 05:04:11.521520: step: 896/464, loss: 0.042480919510126114 2023-01-24 05:04:12.306205: step: 898/464, loss: 0.3838881254196167 2023-01-24 05:04:13.004093: step: 900/464, loss: 0.010052017867565155 2023-01-24 05:04:13.732620: step: 902/464, loss: 0.1091848760843277 2023-01-24 05:04:14.466025: step: 904/464, loss: 0.004417903255671263 2023-01-24 05:04:15.157249: step: 906/464, loss: 0.034344952553510666 2023-01-24 05:04:15.872698: step: 908/464, loss: 0.008729771710932255 2023-01-24 05:04:16.595131: step: 910/464, loss: 0.19502171874046326 2023-01-24 05:04:17.293172: step: 912/464, loss: 0.029527118429541588 2023-01-24 05:04:18.160260: step: 914/464, loss: 0.01878412254154682 2023-01-24 05:04:18.912680: step: 916/464, loss: 0.15181230008602142 2023-01-24 05:04:19.652827: step: 918/464, loss: 0.004631921648979187 2023-01-24 05:04:20.423079: step: 920/464, loss: 0.04741932079195976 2023-01-24 05:04:21.229705: step: 922/464, loss: 0.0586797297000885 2023-01-24 05:04:21.969932: step: 924/464, loss: 0.05027880519628525 2023-01-24 05:04:22.590979: step: 926/464, loss: 0.18030577898025513 2023-01-24 05:04:23.424216: step: 928/464, loss: 0.004381497856229544 2023-01-24 05:04:24.152500: step: 930/464, loss: 0.010361275635659695 ================================================== Loss: 0.184 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34602455689802003, 'r': 0.3204174265013923, 'f1': 0.33272903205169213}, 'combined': 0.24516876045914154, 'epoch': 24} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3211916436679063, 'r': 0.2670261889665138, 'f1': 0.29161503027683905}, 'combined': 0.18110828196140533, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3256896958640574, 'r': 0.31456556962961135, 'f1': 0.320030994584566}, 'combined': 0.2358123117991539, 'epoch': 24} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3142045130064909, 'r': 0.2624568269393469, 'f1': 0.286008836664694}, 'combined': 0.17762654066544156, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3542049966392071, 'r': 0.32328767245438067, 'f1': 0.33804087972908453}, 'combined': 0.24908275348458858, 'epoch': 24} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3359795641603727, 'r': 0.2756755398238956, 'f1': 0.3028548183980824}, 'combined': 0.18808878195249332, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3605769230769231, 'r': 0.26785714285714285, 'f1': 0.3073770491803278}, 'combined': 0.2049180327868852, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28125, 'r': 0.391304347826087, 'f1': 0.3272727272727273}, 'combined': 0.16363636363636366, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5096153846153846, 'r': 0.22844827586206898, 'f1': 0.31547619047619047}, 'combined': 0.2103174603174603, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:07:07.340725: step: 2/464, loss: 0.0419095940887928 2023-01-24 05:07:08.079666: step: 4/464, loss: 0.033328909426927567 2023-01-24 05:07:08.799684: step: 6/464, loss: 0.018745923414826393 2023-01-24 05:07:09.540229: step: 8/464, loss: 0.029888661578297615 2023-01-24 05:07:10.286540: step: 10/464, loss: 0.06249646469950676 2023-01-24 05:07:11.052345: step: 12/464, loss: 0.06099044531583786 2023-01-24 05:07:11.747080: step: 14/464, loss: 0.0551176480948925 2023-01-24 05:07:12.460193: step: 16/464, loss: 0.051852885633707047 2023-01-24 05:07:13.193292: step: 18/464, loss: 0.015076231211423874 2023-01-24 05:07:14.039506: step: 20/464, loss: 0.0029031329322606325 2023-01-24 05:07:14.769724: step: 22/464, loss: 0.018571581691503525 2023-01-24 05:07:15.588239: step: 24/464, loss: 0.03045966476202011 2023-01-24 05:07:16.256362: step: 26/464, loss: 0.047910746186971664 2023-01-24 05:07:17.096209: step: 28/464, loss: 0.016626769676804543 2023-01-24 05:07:17.756435: step: 30/464, loss: 0.05166122689843178 2023-01-24 05:07:18.553309: step: 32/464, loss: 0.020833736285567284 2023-01-24 05:07:19.284417: step: 34/464, loss: 0.04184484854340553 2023-01-24 05:07:20.220949: step: 36/464, loss: 0.0944288820028305 2023-01-24 05:07:20.910748: step: 38/464, loss: 0.0076257940381765366 2023-01-24 05:07:21.659193: step: 40/464, loss: 0.0417209193110466 2023-01-24 05:07:22.374716: step: 42/464, loss: 0.026369836181402206 2023-01-24 05:07:23.225046: step: 44/464, loss: 0.021718092262744904 2023-01-24 05:07:24.001270: step: 46/464, loss: 0.021722707897424698 2023-01-24 05:07:24.710746: step: 48/464, loss: 0.08610419183969498 2023-01-24 05:07:25.507239: step: 50/464, loss: 0.01406539510935545 2023-01-24 05:07:26.209746: step: 52/464, loss: 0.009583326987922192 2023-01-24 05:07:27.005070: step: 54/464, loss: 0.0708504468202591 2023-01-24 05:07:27.706218: step: 56/464, loss: 0.023223470896482468 2023-01-24 05:07:28.423861: step: 58/464, loss: 0.026299627497792244 2023-01-24 05:07:29.142991: step: 60/464, loss: 0.47032177448272705 2023-01-24 05:07:29.913138: step: 62/464, loss: 0.017070485278964043 2023-01-24 05:07:30.633525: step: 64/464, loss: 0.24147924780845642 2023-01-24 05:07:31.402301: step: 66/464, loss: 0.05244443193078041 2023-01-24 05:07:32.130408: step: 68/464, loss: 0.030533207580447197 2023-01-24 05:07:32.870443: step: 70/464, loss: 0.014197474345564842 2023-01-24 05:07:33.601449: step: 72/464, loss: 0.06051541492342949 2023-01-24 05:07:34.322681: step: 74/464, loss: 0.03234916180372238 2023-01-24 05:07:35.043856: step: 76/464, loss: 0.011735406704246998 2023-01-24 05:07:35.779629: step: 78/464, loss: 0.03225342929363251 2023-01-24 05:07:36.486514: step: 80/464, loss: 0.019292151555418968 2023-01-24 05:07:37.333584: step: 82/464, loss: 0.019184516742825508 2023-01-24 05:07:38.071230: step: 84/464, loss: 0.008848621509969234 2023-01-24 05:07:38.766878: step: 86/464, loss: 0.022780464962124825 2023-01-24 05:07:39.510434: step: 88/464, loss: 0.00024235923774540424 2023-01-24 05:07:40.235422: step: 90/464, loss: 0.04511115700006485 2023-01-24 05:07:40.974940: step: 92/464, loss: 0.007133756764233112 2023-01-24 05:07:41.755134: step: 94/464, loss: 0.055271781980991364 2023-01-24 05:07:42.556234: step: 96/464, loss: 0.006807959638535976 2023-01-24 05:07:43.205074: step: 98/464, loss: 0.0008343493682332337 2023-01-24 05:07:43.935595: step: 100/464, loss: 0.7415191531181335 2023-01-24 05:07:44.622962: step: 102/464, loss: 0.016804508864879608 2023-01-24 05:07:45.382569: step: 104/464, loss: 0.03757983446121216 2023-01-24 05:07:46.237562: step: 106/464, loss: 0.07727273553609848 2023-01-24 05:07:46.942282: step: 108/464, loss: 0.0360029935836792 2023-01-24 05:07:47.672824: step: 110/464, loss: 0.026875462383031845 2023-01-24 05:07:48.322343: step: 112/464, loss: 0.033845219761133194 2023-01-24 05:07:48.961329: step: 114/464, loss: 0.22472339868545532 2023-01-24 05:07:49.613447: step: 116/464, loss: 0.012281347066164017 2023-01-24 05:07:50.335427: step: 118/464, loss: 0.0384163036942482 2023-01-24 05:07:51.063572: step: 120/464, loss: 0.020371444523334503 2023-01-24 05:07:51.790593: step: 122/464, loss: 0.005753284320235252 2023-01-24 05:07:52.442106: step: 124/464, loss: 0.009750408120453358 2023-01-24 05:07:53.083775: step: 126/464, loss: 0.007648147642612457 2023-01-24 05:07:53.815249: step: 128/464, loss: 0.006110684014856815 2023-01-24 05:07:54.603917: step: 130/464, loss: 0.008586232550442219 2023-01-24 05:07:55.303761: step: 132/464, loss: 0.020226802676916122 2023-01-24 05:07:56.037825: step: 134/464, loss: 0.023586483672261238 2023-01-24 05:07:56.720822: step: 136/464, loss: 0.022161681205034256 2023-01-24 05:07:57.483518: step: 138/464, loss: 0.2518976628780365 2023-01-24 05:07:58.284131: step: 140/464, loss: 0.02171619050204754 2023-01-24 05:07:59.058909: step: 142/464, loss: 0.0016933977603912354 2023-01-24 05:07:59.812784: step: 144/464, loss: 0.002125772647559643 2023-01-24 05:08:00.503074: step: 146/464, loss: 0.0029265161138027906 2023-01-24 05:08:01.220352: step: 148/464, loss: 0.17299233376979828 2023-01-24 05:08:01.922000: step: 150/464, loss: 0.04470699280500412 2023-01-24 05:08:02.608290: step: 152/464, loss: 0.028527090325951576 2023-01-24 05:08:03.330836: step: 154/464, loss: 0.005463603418320417 2023-01-24 05:08:04.135248: step: 156/464, loss: 0.030952492728829384 2023-01-24 05:08:04.805505: step: 158/464, loss: 0.010252845473587513 2023-01-24 05:08:05.474478: step: 160/464, loss: 0.01254748459905386 2023-01-24 05:08:06.136356: step: 162/464, loss: 0.009948525577783585 2023-01-24 05:08:06.835076: step: 164/464, loss: 0.01122485101222992 2023-01-24 05:08:07.566464: step: 166/464, loss: 0.028889335691928864 2023-01-24 05:08:08.280568: step: 168/464, loss: 0.2363210767507553 2023-01-24 05:08:08.997570: step: 170/464, loss: 0.04092266410589218 2023-01-24 05:08:09.697799: step: 172/464, loss: 0.059117868542671204 2023-01-24 05:08:10.413547: step: 174/464, loss: 0.007250105030834675 2023-01-24 05:08:11.221197: step: 176/464, loss: 0.03736590966582298 2023-01-24 05:08:11.968761: step: 178/464, loss: 0.019464800134301186 2023-01-24 05:08:12.717503: step: 180/464, loss: 0.030115772038698196 2023-01-24 05:08:13.459528: step: 182/464, loss: 0.03560752794146538 2023-01-24 05:08:14.216950: step: 184/464, loss: 0.20950794219970703 2023-01-24 05:08:14.957853: step: 186/464, loss: 0.031240640208125114 2023-01-24 05:08:15.665347: step: 188/464, loss: 0.018869325518608093 2023-01-24 05:08:16.519502: step: 190/464, loss: 1.1431554555892944 2023-01-24 05:08:17.231373: step: 192/464, loss: 0.02737320400774479 2023-01-24 05:08:17.916451: step: 194/464, loss: 0.003148356219753623 2023-01-24 05:08:18.665517: step: 196/464, loss: 0.04883582890033722 2023-01-24 05:08:19.354943: step: 198/464, loss: 0.03754701465368271 2023-01-24 05:08:20.148584: step: 200/464, loss: 0.011245599016547203 2023-01-24 05:08:20.901236: step: 202/464, loss: 0.07296408712863922 2023-01-24 05:08:21.578067: step: 204/464, loss: 0.024501923471689224 2023-01-24 05:08:22.377430: step: 206/464, loss: 0.1741705983877182 2023-01-24 05:08:23.142716: step: 208/464, loss: 0.0009615622693672776 2023-01-24 05:08:23.795954: step: 210/464, loss: 0.005018910858780146 2023-01-24 05:08:24.486306: step: 212/464, loss: 0.06708942353725433 2023-01-24 05:08:25.243566: step: 214/464, loss: 0.0673610121011734 2023-01-24 05:08:26.097971: step: 216/464, loss: 0.0976266860961914 2023-01-24 05:08:26.901351: step: 218/464, loss: 0.005886498838663101 2023-01-24 05:08:27.633550: step: 220/464, loss: 0.05451471731066704 2023-01-24 05:08:28.404590: step: 222/464, loss: 0.03318226337432861 2023-01-24 05:08:29.109903: step: 224/464, loss: 0.035258155316114426 2023-01-24 05:08:29.836272: step: 226/464, loss: 0.04683135449886322 2023-01-24 05:08:30.568255: step: 228/464, loss: 0.04850374534726143 2023-01-24 05:08:31.358117: step: 230/464, loss: 0.017435938119888306 2023-01-24 05:08:32.098694: step: 232/464, loss: 0.0573580302298069 2023-01-24 05:08:32.839825: step: 234/464, loss: 1.5812244415283203 2023-01-24 05:08:33.487974: step: 236/464, loss: 0.005821664817631245 2023-01-24 05:08:34.205259: step: 238/464, loss: 0.01273046899586916 2023-01-24 05:08:34.907644: step: 240/464, loss: 0.008258101530373096 2023-01-24 05:08:35.686050: step: 242/464, loss: 0.012394250370562077 2023-01-24 05:08:36.383466: step: 244/464, loss: 0.022714046761393547 2023-01-24 05:08:37.150801: step: 246/464, loss: 0.04308302700519562 2023-01-24 05:08:38.001072: step: 248/464, loss: 0.07067999243736267 2023-01-24 05:08:38.804038: step: 250/464, loss: 0.02938844822347164 2023-01-24 05:08:39.555332: step: 252/464, loss: 0.07796920835971832 2023-01-24 05:08:40.214154: step: 254/464, loss: 0.017405791208148003 2023-01-24 05:08:40.995053: step: 256/464, loss: 0.004125073552131653 2023-01-24 05:08:41.878045: step: 258/464, loss: 0.03315935656428337 2023-01-24 05:08:42.542541: step: 260/464, loss: 0.005836012773215771 2023-01-24 05:08:43.292283: step: 262/464, loss: 0.04530177637934685 2023-01-24 05:08:44.045864: step: 264/464, loss: 0.32245516777038574 2023-01-24 05:08:44.742898: step: 266/464, loss: 0.0796879455447197 2023-01-24 05:08:45.576524: step: 268/464, loss: 0.1779918223619461 2023-01-24 05:08:46.346962: step: 270/464, loss: 0.015152917243540287 2023-01-24 05:08:47.052778: step: 272/464, loss: 0.017251847311854362 2023-01-24 05:08:47.770409: step: 274/464, loss: 0.07568525522947311 2023-01-24 05:08:48.526100: step: 276/464, loss: 0.010382352396845818 2023-01-24 05:08:49.288676: step: 278/464, loss: 0.050715453922748566 2023-01-24 05:08:49.999163: step: 280/464, loss: 0.009335944429039955 2023-01-24 05:08:50.643227: step: 282/464, loss: 0.03318656235933304 2023-01-24 05:08:51.375743: step: 284/464, loss: 0.03201092779636383 2023-01-24 05:08:52.085717: step: 286/464, loss: 0.028812000527977943 2023-01-24 05:08:52.807948: step: 288/464, loss: 0.001550150802358985 2023-01-24 05:08:53.533346: step: 290/464, loss: 0.003170878393575549 2023-01-24 05:08:54.311143: step: 292/464, loss: 0.01939692534506321 2023-01-24 05:08:55.182847: step: 294/464, loss: 0.0947684794664383 2023-01-24 05:08:55.889655: step: 296/464, loss: 0.3248078525066376 2023-01-24 05:08:56.642114: step: 298/464, loss: 0.06562580913305283 2023-01-24 05:08:57.364183: step: 300/464, loss: 0.028699979186058044 2023-01-24 05:08:58.127373: step: 302/464, loss: 0.024749379605054855 2023-01-24 05:08:58.857478: step: 304/464, loss: 0.012792816385626793 2023-01-24 05:08:59.663784: step: 306/464, loss: 0.008358832448720932 2023-01-24 05:09:00.359256: step: 308/464, loss: 0.2585357129573822 2023-01-24 05:09:01.108801: step: 310/464, loss: 0.0026077909860759974 2023-01-24 05:09:01.764102: step: 312/464, loss: 0.04927883297204971 2023-01-24 05:09:02.565365: step: 314/464, loss: 0.051128946244716644 2023-01-24 05:09:03.336147: step: 316/464, loss: 0.05816463753581047 2023-01-24 05:09:04.120672: step: 318/464, loss: 0.2281452864408493 2023-01-24 05:09:04.825449: step: 320/464, loss: 0.04876122996211052 2023-01-24 05:09:05.569128: step: 322/464, loss: 0.03142109885811806 2023-01-24 05:09:06.347298: step: 324/464, loss: 0.08197657018899918 2023-01-24 05:09:07.158626: step: 326/464, loss: 0.03738683834671974 2023-01-24 05:09:07.848301: step: 328/464, loss: 0.005808872636407614 2023-01-24 05:09:08.549534: step: 330/464, loss: 0.022158043459057808 2023-01-24 05:09:09.304004: step: 332/464, loss: 0.005494985263794661 2023-01-24 05:09:10.046372: step: 334/464, loss: 0.03324667736887932 2023-01-24 05:09:10.874891: step: 336/464, loss: 0.029779573902487755 2023-01-24 05:09:11.600875: step: 338/464, loss: 0.01161265093833208 2023-01-24 05:09:12.368940: step: 340/464, loss: 0.0024349091108888388 2023-01-24 05:09:13.131351: step: 342/464, loss: 0.08912298828363419 2023-01-24 05:09:13.858131: step: 344/464, loss: 0.07514897733926773 2023-01-24 05:09:14.622195: step: 346/464, loss: 0.020701896399259567 2023-01-24 05:09:15.305093: step: 348/464, loss: 0.015195302665233612 2023-01-24 05:09:16.020582: step: 350/464, loss: 0.004897533915936947 2023-01-24 05:09:16.748507: step: 352/464, loss: 0.015249923802912235 2023-01-24 05:09:17.463855: step: 354/464, loss: 0.0631057620048523 2023-01-24 05:09:18.153610: step: 356/464, loss: 0.005183662287890911 2023-01-24 05:09:18.816278: step: 358/464, loss: 0.015517822466790676 2023-01-24 05:09:19.629759: step: 360/464, loss: 0.027061475440859795 2023-01-24 05:09:20.385916: step: 362/464, loss: 0.02585103176534176 2023-01-24 05:09:21.121135: step: 364/464, loss: 0.014051264151930809 2023-01-24 05:09:21.822382: step: 366/464, loss: 0.007386281155049801 2023-01-24 05:09:22.556728: step: 368/464, loss: 0.01598726026713848 2023-01-24 05:09:23.224575: step: 370/464, loss: 0.03127245977520943 2023-01-24 05:09:24.029139: step: 372/464, loss: 0.04506853222846985 2023-01-24 05:09:24.805223: step: 374/464, loss: 0.02675584889948368 2023-01-24 05:09:25.621896: step: 376/464, loss: 0.021385207772254944 2023-01-24 05:09:26.351718: step: 378/464, loss: 0.03744923695921898 2023-01-24 05:09:27.058176: step: 380/464, loss: 0.017804961651563644 2023-01-24 05:09:27.799871: step: 382/464, loss: 0.031217500567436218 2023-01-24 05:09:28.497098: step: 384/464, loss: 0.003439696505665779 2023-01-24 05:09:29.241723: step: 386/464, loss: 0.020607760176062584 2023-01-24 05:09:30.002316: step: 388/464, loss: 0.02362990193068981 2023-01-24 05:09:30.750170: step: 390/464, loss: 0.06575938314199448 2023-01-24 05:09:31.449993: step: 392/464, loss: 0.08327308297157288 2023-01-24 05:09:32.290154: step: 394/464, loss: 0.05336488410830498 2023-01-24 05:09:33.017703: step: 396/464, loss: 0.028403114527463913 2023-01-24 05:09:33.811794: step: 398/464, loss: 0.004095328506082296 2023-01-24 05:09:34.524451: step: 400/464, loss: 0.017295170575380325 2023-01-24 05:09:35.289185: step: 402/464, loss: 0.01599576510488987 2023-01-24 05:09:36.039152: step: 404/464, loss: 0.04949192702770233 2023-01-24 05:09:36.784925: step: 406/464, loss: 0.005515251308679581 2023-01-24 05:09:37.557642: step: 408/464, loss: 0.01241600513458252 2023-01-24 05:09:38.284167: step: 410/464, loss: 0.009431494399905205 2023-01-24 05:09:38.972067: step: 412/464, loss: 0.025457508862018585 2023-01-24 05:09:39.676045: step: 414/464, loss: 0.1266927570104599 2023-01-24 05:09:40.362953: step: 416/464, loss: 0.0686836913228035 2023-01-24 05:09:41.220550: step: 418/464, loss: 0.005135274026542902 2023-01-24 05:09:41.930838: step: 420/464, loss: 0.032037705183029175 2023-01-24 05:09:42.702802: step: 422/464, loss: 0.05591385066509247 2023-01-24 05:09:43.428144: step: 424/464, loss: 0.0294948797672987 2023-01-24 05:09:44.138441: step: 426/464, loss: 0.00016107734700199217 2023-01-24 05:09:44.822554: step: 428/464, loss: 0.008500921539962292 2023-01-24 05:09:45.524274: step: 430/464, loss: 0.02550342306494713 2023-01-24 05:09:46.254298: step: 432/464, loss: 0.0052842143923044205 2023-01-24 05:09:47.014463: step: 434/464, loss: 0.011525984853506088 2023-01-24 05:09:47.750421: step: 436/464, loss: 0.047036126255989075 2023-01-24 05:09:48.484382: step: 438/464, loss: 0.07947475463151932 2023-01-24 05:09:49.144684: step: 440/464, loss: 0.013226899318397045 2023-01-24 05:09:49.943901: step: 442/464, loss: 0.033193811774253845 2023-01-24 05:09:50.651441: step: 444/464, loss: 0.007622615899890661 2023-01-24 05:09:51.504193: step: 446/464, loss: 0.023654088377952576 2023-01-24 05:09:52.286254: step: 448/464, loss: 0.021606845781207085 2023-01-24 05:09:53.101171: step: 450/464, loss: 0.020649321377277374 2023-01-24 05:09:53.803014: step: 452/464, loss: 0.1784026324748993 2023-01-24 05:09:54.490752: step: 454/464, loss: 0.003845152212306857 2023-01-24 05:09:55.173141: step: 456/464, loss: 0.0008206103229895234 2023-01-24 05:09:55.855679: step: 458/464, loss: 0.011938661336898804 2023-01-24 05:09:56.577126: step: 460/464, loss: 0.06470710039138794 2023-01-24 05:09:57.300143: step: 462/464, loss: 0.07462415844202042 2023-01-24 05:09:58.047661: step: 464/464, loss: 0.010974127799272537 2023-01-24 05:09:58.807573: step: 466/464, loss: 0.054856494069099426 2023-01-24 05:09:59.567630: step: 468/464, loss: 0.10140056908130646 2023-01-24 05:10:00.291793: step: 470/464, loss: 0.10893024504184723 2023-01-24 05:10:01.065900: step: 472/464, loss: 0.14633715152740479 2023-01-24 05:10:01.843518: step: 474/464, loss: 0.007225559558719397 2023-01-24 05:10:02.542631: step: 476/464, loss: 0.13329027593135834 2023-01-24 05:10:03.322916: step: 478/464, loss: 0.0937078669667244 2023-01-24 05:10:04.081496: step: 480/464, loss: 0.10952486842870712 2023-01-24 05:10:04.783717: step: 482/464, loss: 0.048163872212171555 2023-01-24 05:10:05.584386: step: 484/464, loss: 0.0025707564782351255 2023-01-24 05:10:06.295249: step: 486/464, loss: 0.011743676848709583 2023-01-24 05:10:07.063508: step: 488/464, loss: 0.01013951189815998 2023-01-24 05:10:07.824053: step: 490/464, loss: 0.030154544860124588 2023-01-24 05:10:08.564371: step: 492/464, loss: 0.011203690432012081 2023-01-24 05:10:09.325566: step: 494/464, loss: 0.09164541959762573 2023-01-24 05:10:10.054689: step: 496/464, loss: 0.06809283792972565 2023-01-24 05:10:10.891428: step: 498/464, loss: 0.07905484735965729 2023-01-24 05:10:11.609115: step: 500/464, loss: 0.1469649225473404 2023-01-24 05:10:12.357624: step: 502/464, loss: 0.03829202055931091 2023-01-24 05:10:13.108100: step: 504/464, loss: 0.20179483294487 2023-01-24 05:10:13.791966: step: 506/464, loss: 0.011438527144491673 2023-01-24 05:10:14.562982: step: 508/464, loss: 0.012148118577897549 2023-01-24 05:10:15.253488: step: 510/464, loss: 0.009396116249263287 2023-01-24 05:10:15.995840: step: 512/464, loss: 0.006800531875342131 2023-01-24 05:10:16.711064: step: 514/464, loss: 0.021514644846320152 2023-01-24 05:10:17.478600: step: 516/464, loss: 0.023618390783667564 2023-01-24 05:10:18.201973: step: 518/464, loss: 0.06186339259147644 2023-01-24 05:10:18.880497: step: 520/464, loss: 0.7646817564964294 2023-01-24 05:10:19.524555: step: 522/464, loss: 0.005214558448642492 2023-01-24 05:10:20.310221: step: 524/464, loss: 0.49541327357292175 2023-01-24 05:10:21.033601: step: 526/464, loss: 0.012311800383031368 2023-01-24 05:10:21.775207: step: 528/464, loss: 0.0192912295460701 2023-01-24 05:10:22.547730: step: 530/464, loss: 0.020535197108983994 2023-01-24 05:10:23.305433: step: 532/464, loss: 0.10043486952781677 2023-01-24 05:10:24.033303: step: 534/464, loss: 0.03558532893657684 2023-01-24 05:10:24.818641: step: 536/464, loss: 0.01853848062455654 2023-01-24 05:10:25.598389: step: 538/464, loss: 0.009096699766814709 2023-01-24 05:10:26.413645: step: 540/464, loss: 0.01217248011380434 2023-01-24 05:10:27.115570: step: 542/464, loss: 0.0004268806369509548 2023-01-24 05:10:27.878950: step: 544/464, loss: 0.06526768952608109 2023-01-24 05:10:28.565554: step: 546/464, loss: 0.07047459483146667 2023-01-24 05:10:29.253152: step: 548/464, loss: 0.004196105059236288 2023-01-24 05:10:30.035405: step: 550/464, loss: 0.019067952409386635 2023-01-24 05:10:30.797880: step: 552/464, loss: 0.08304072916507721 2023-01-24 05:10:31.465963: step: 554/464, loss: 0.0038383365608751774 2023-01-24 05:10:32.282572: step: 556/464, loss: 0.03883401304483414 2023-01-24 05:10:33.027078: step: 558/464, loss: 0.06817010045051575 2023-01-24 05:10:33.746607: step: 560/464, loss: 0.16193494200706482 2023-01-24 05:10:34.506206: step: 562/464, loss: 0.043620552867650986 2023-01-24 05:10:35.228663: step: 564/464, loss: 0.037637632340192795 2023-01-24 05:10:35.918061: step: 566/464, loss: 0.00019179204537067562 2023-01-24 05:10:36.751540: step: 568/464, loss: 0.005051923915743828 2023-01-24 05:10:37.501196: step: 570/464, loss: 0.03895522281527519 2023-01-24 05:10:38.321147: step: 572/464, loss: 0.08474939316511154 2023-01-24 05:10:39.137352: step: 574/464, loss: 0.06918489933013916 2023-01-24 05:10:39.844045: step: 576/464, loss: 0.041286442428827286 2023-01-24 05:10:40.688553: step: 578/464, loss: 0.06198017671704292 2023-01-24 05:10:41.483072: step: 580/464, loss: 0.06712707132101059 2023-01-24 05:10:42.203586: step: 582/464, loss: 0.02944483980536461 2023-01-24 05:10:42.964420: step: 584/464, loss: 0.190678671002388 2023-01-24 05:10:43.641604: step: 586/464, loss: 0.013495603576302528 2023-01-24 05:10:44.374198: step: 588/464, loss: 0.07886006683111191 2023-01-24 05:10:45.129259: step: 590/464, loss: 0.2840629518032074 2023-01-24 05:10:45.846115: step: 592/464, loss: 0.004939667880535126 2023-01-24 05:10:46.566954: step: 594/464, loss: 0.0687856450676918 2023-01-24 05:10:47.269572: step: 596/464, loss: 0.003516688011586666 2023-01-24 05:10:47.959389: step: 598/464, loss: 0.05601877719163895 2023-01-24 05:10:48.652894: step: 600/464, loss: 0.1229490116238594 2023-01-24 05:10:49.496701: step: 602/464, loss: 0.034515511244535446 2023-01-24 05:10:50.228605: step: 604/464, loss: 0.03390450030565262 2023-01-24 05:10:51.123572: step: 606/464, loss: 0.017845941707491875 2023-01-24 05:10:51.798183: step: 608/464, loss: 0.04194774106144905 2023-01-24 05:10:52.528351: step: 610/464, loss: 0.06085171550512314 2023-01-24 05:10:53.248494: step: 612/464, loss: 0.029198765754699707 2023-01-24 05:10:54.024079: step: 614/464, loss: 0.04369807988405228 2023-01-24 05:10:54.767317: step: 616/464, loss: 0.04185190424323082 2023-01-24 05:10:55.466135: step: 618/464, loss: 0.060502730309963226 2023-01-24 05:10:56.189727: step: 620/464, loss: 0.012164751067757607 2023-01-24 05:10:56.988162: step: 622/464, loss: 0.034595951437950134 2023-01-24 05:10:57.807753: step: 624/464, loss: 0.01952231489121914 2023-01-24 05:10:58.493481: step: 626/464, loss: 0.013642151840031147 2023-01-24 05:10:59.178819: step: 628/464, loss: 0.046000026166439056 2023-01-24 05:10:59.897796: step: 630/464, loss: 0.03781093284487724 2023-01-24 05:11:00.663920: step: 632/464, loss: 0.01693800650537014 2023-01-24 05:11:01.422578: step: 634/464, loss: 0.01331486739218235 2023-01-24 05:11:02.146204: step: 636/464, loss: 0.04176183044910431 2023-01-24 05:11:02.895096: step: 638/464, loss: 0.09693517535924911 2023-01-24 05:11:03.697335: step: 640/464, loss: 0.027495836839079857 2023-01-24 05:11:04.466833: step: 642/464, loss: 0.09919502586126328 2023-01-24 05:11:05.148332: step: 644/464, loss: 0.012383551336824894 2023-01-24 05:11:05.828107: step: 646/464, loss: 0.0037764981389045715 2023-01-24 05:11:06.569385: step: 648/464, loss: 0.03265717625617981 2023-01-24 05:11:07.343159: step: 650/464, loss: 0.06838352233171463 2023-01-24 05:11:08.125700: step: 652/464, loss: 0.034150850027799606 2023-01-24 05:11:08.838489: step: 654/464, loss: 0.023890497162938118 2023-01-24 05:11:09.555815: step: 656/464, loss: 0.3121313154697418 2023-01-24 05:11:10.329059: step: 658/464, loss: 0.0807483047246933 2023-01-24 05:11:11.028023: step: 660/464, loss: 1.5089443922042847 2023-01-24 05:11:11.832531: step: 662/464, loss: 0.023560039699077606 2023-01-24 05:11:12.570593: step: 664/464, loss: 0.12527887523174286 2023-01-24 05:11:13.282837: step: 666/464, loss: 0.009700451046228409 2023-01-24 05:11:13.973305: step: 668/464, loss: 0.016948828473687172 2023-01-24 05:11:14.724590: step: 670/464, loss: 0.08660709857940674 2023-01-24 05:11:15.487125: step: 672/464, loss: 0.016221001744270325 2023-01-24 05:11:16.257518: step: 674/464, loss: 0.10388290137052536 2023-01-24 05:11:16.964516: step: 676/464, loss: 0.037814389914274216 2023-01-24 05:11:17.653242: step: 678/464, loss: 0.014574986882507801 2023-01-24 05:11:18.397295: step: 680/464, loss: 0.02477123774588108 2023-01-24 05:11:19.243237: step: 682/464, loss: 0.10053754597902298 2023-01-24 05:11:20.076699: step: 684/464, loss: 0.04018322750926018 2023-01-24 05:11:20.743977: step: 686/464, loss: 0.11917680501937866 2023-01-24 05:11:21.488057: step: 688/464, loss: 0.044764451682567596 2023-01-24 05:11:22.296864: step: 690/464, loss: 0.03544643893837929 2023-01-24 05:11:23.054343: step: 692/464, loss: 0.07452749460935593 2023-01-24 05:11:23.781210: step: 694/464, loss: 0.02271382510662079 2023-01-24 05:11:24.502067: step: 696/464, loss: 0.058031462132930756 2023-01-24 05:11:25.193550: step: 698/464, loss: 0.019309645518660545 2023-01-24 05:11:25.941270: step: 700/464, loss: 0.02176925353705883 2023-01-24 05:11:26.600368: step: 702/464, loss: 0.017063865438103676 2023-01-24 05:11:27.344193: step: 704/464, loss: 0.004486426245421171 2023-01-24 05:11:28.182114: step: 706/464, loss: 0.8419817686080933 2023-01-24 05:11:28.909414: step: 708/464, loss: 0.009446037001907825 2023-01-24 05:11:29.668013: step: 710/464, loss: 0.027423586696386337 2023-01-24 05:11:30.379468: step: 712/464, loss: 0.015616148710250854 2023-01-24 05:11:31.192950: step: 714/464, loss: 0.06453923135995865 2023-01-24 05:11:31.914746: step: 716/464, loss: 0.090809166431427 2023-01-24 05:11:32.680902: step: 718/464, loss: 0.07786067575216293 2023-01-24 05:11:33.477359: step: 720/464, loss: 0.022998638451099396 2023-01-24 05:11:34.154186: step: 722/464, loss: 0.00016782456077635288 2023-01-24 05:11:34.875795: step: 724/464, loss: 0.06510668992996216 2023-01-24 05:11:35.713564: step: 726/464, loss: 0.03561001271009445 2023-01-24 05:11:36.459234: step: 728/464, loss: 0.09167484194040298 2023-01-24 05:11:37.227045: step: 730/464, loss: 0.08646490424871445 2023-01-24 05:11:37.941837: step: 732/464, loss: 0.027139829471707344 2023-01-24 05:11:38.639626: step: 734/464, loss: 0.10090488195419312 2023-01-24 05:11:39.430947: step: 736/464, loss: 0.0418844036757946 2023-01-24 05:11:40.118431: step: 738/464, loss: 0.04323262348771095 2023-01-24 05:11:40.898653: step: 740/464, loss: 0.011654680594801903 2023-01-24 05:11:41.594313: step: 742/464, loss: 0.036519430577754974 2023-01-24 05:11:42.300030: step: 744/464, loss: 3.667783260345459 2023-01-24 05:11:43.006053: step: 746/464, loss: 0.033932533115148544 2023-01-24 05:11:43.745314: step: 748/464, loss: 0.015561865642666817 2023-01-24 05:11:44.519930: step: 750/464, loss: 0.11233902722597122 2023-01-24 05:11:45.193275: step: 752/464, loss: 0.10170744359493256 2023-01-24 05:11:45.942717: step: 754/464, loss: 0.0084531893953681 2023-01-24 05:11:46.638712: step: 756/464, loss: 0.04653886705636978 2023-01-24 05:11:47.385602: step: 758/464, loss: 0.004033430479466915 2023-01-24 05:11:48.113931: step: 760/464, loss: 0.14108993113040924 2023-01-24 05:11:48.912767: step: 762/464, loss: 0.013614281080663204 2023-01-24 05:11:49.606446: step: 764/464, loss: 0.016145724803209305 2023-01-24 05:11:50.362699: step: 766/464, loss: 0.01627621054649353 2023-01-24 05:11:51.077886: step: 768/464, loss: 0.006332142744213343 2023-01-24 05:11:51.784014: step: 770/464, loss: 0.010430836118757725 2023-01-24 05:11:52.519730: step: 772/464, loss: 0.006132225971668959 2023-01-24 05:11:53.232591: step: 774/464, loss: 0.015794722363352776 2023-01-24 05:11:53.960088: step: 776/464, loss: 0.015555287711322308 2023-01-24 05:11:54.585957: step: 778/464, loss: 0.006737166550010443 2023-01-24 05:11:55.425103: step: 780/464, loss: 0.03899889439344406 2023-01-24 05:11:56.162179: step: 782/464, loss: 0.061172544956207275 2023-01-24 05:11:56.916982: step: 784/464, loss: 0.03725861385464668 2023-01-24 05:11:57.592819: step: 786/464, loss: 0.03506654128432274 2023-01-24 05:11:58.261050: step: 788/464, loss: 0.008974473923444748 2023-01-24 05:11:58.991069: step: 790/464, loss: 0.03022157959640026 2023-01-24 05:11:59.712844: step: 792/464, loss: 0.06096022576093674 2023-01-24 05:12:00.455568: step: 794/464, loss: 0.014920524321496487 2023-01-24 05:12:01.139108: step: 796/464, loss: 0.0718575119972229 2023-01-24 05:12:01.832138: step: 798/464, loss: 0.018265772610902786 2023-01-24 05:12:02.566497: step: 800/464, loss: 0.006621572654694319 2023-01-24 05:12:03.358202: step: 802/464, loss: 0.008757734671235085 2023-01-24 05:12:04.030354: step: 804/464, loss: 0.009250654838979244 2023-01-24 05:12:04.767021: step: 806/464, loss: 0.03962739557027817 2023-01-24 05:12:05.478283: step: 808/464, loss: 0.007000217214226723 2023-01-24 05:12:06.186044: step: 810/464, loss: 0.020241227000951767 2023-01-24 05:12:06.948961: step: 812/464, loss: 0.013793728314340115 2023-01-24 05:12:07.634024: step: 814/464, loss: 0.0265958309173584 2023-01-24 05:12:08.393117: step: 816/464, loss: 0.020706037059426308 2023-01-24 05:12:09.163134: step: 818/464, loss: 0.0056082699447870255 2023-01-24 05:12:09.934449: step: 820/464, loss: 0.017334256321191788 2023-01-24 05:12:10.673240: step: 822/464, loss: 0.013314464129507542 2023-01-24 05:12:11.447982: step: 824/464, loss: 0.042475175112485886 2023-01-24 05:12:12.148204: step: 826/464, loss: 0.04164883866906166 2023-01-24 05:12:12.917616: step: 828/464, loss: 0.060853902250528336 2023-01-24 05:12:13.676961: step: 830/464, loss: 0.03174266591668129 2023-01-24 05:12:14.545341: step: 832/464, loss: 0.0642930120229721 2023-01-24 05:12:15.265632: step: 834/464, loss: 0.05120242014527321 2023-01-24 05:12:15.989077: step: 836/464, loss: 0.02251666598021984 2023-01-24 05:12:16.640802: step: 838/464, loss: 0.01608583703637123 2023-01-24 05:12:17.366159: step: 840/464, loss: 0.003405903000384569 2023-01-24 05:12:18.159742: step: 842/464, loss: 0.10293716937303543 2023-01-24 05:12:18.916990: step: 844/464, loss: 0.005280649289488792 2023-01-24 05:12:19.721247: step: 846/464, loss: 0.040373336523771286 2023-01-24 05:12:20.432468: step: 848/464, loss: 0.0019112235167995095 2023-01-24 05:12:21.320436: step: 850/464, loss: 0.01604733057320118 2023-01-24 05:12:22.120267: step: 852/464, loss: 0.05956016480922699 2023-01-24 05:12:22.803267: step: 854/464, loss: 0.04864457622170448 2023-01-24 05:12:23.551462: step: 856/464, loss: 0.17723682522773743 2023-01-24 05:12:24.352295: step: 858/464, loss: 0.010293657891452312 2023-01-24 05:12:25.074195: step: 860/464, loss: 0.05993131175637245 2023-01-24 05:12:25.777610: step: 862/464, loss: 0.015174886211752892 2023-01-24 05:12:26.473252: step: 864/464, loss: 0.07661637663841248 2023-01-24 05:12:27.151063: step: 866/464, loss: 0.010754907503724098 2023-01-24 05:12:27.871553: step: 868/464, loss: 0.03574849292635918 2023-01-24 05:12:28.665125: step: 870/464, loss: 0.012999016791582108 2023-01-24 05:12:29.435637: step: 872/464, loss: 0.009559421800076962 2023-01-24 05:12:30.209395: step: 874/464, loss: 0.07725869119167328 2023-01-24 05:12:31.028294: step: 876/464, loss: 0.05234874412417412 2023-01-24 05:12:31.698494: step: 878/464, loss: 0.13046760857105255 2023-01-24 05:12:32.463178: step: 880/464, loss: 0.10775807499885559 2023-01-24 05:12:33.184968: step: 882/464, loss: 0.017713425680994987 2023-01-24 05:12:33.988966: step: 884/464, loss: 0.06358397006988525 2023-01-24 05:12:34.686749: step: 886/464, loss: 0.012726555578410625 2023-01-24 05:12:35.420685: step: 888/464, loss: 0.002486567012965679 2023-01-24 05:12:36.247834: step: 890/464, loss: 0.0012825526064261794 2023-01-24 05:12:36.992081: step: 892/464, loss: 0.024141965433955193 2023-01-24 05:12:37.718973: step: 894/464, loss: 0.015300101600587368 2023-01-24 05:12:38.483213: step: 896/464, loss: 0.02721925638616085 2023-01-24 05:12:39.170374: step: 898/464, loss: 0.07990097254514694 2023-01-24 05:12:39.946808: step: 900/464, loss: 0.036514200270175934 2023-01-24 05:12:40.809824: step: 902/464, loss: 0.02834787406027317 2023-01-24 05:12:41.469549: step: 904/464, loss: 0.02153974585235119 2023-01-24 05:12:42.197646: step: 906/464, loss: 0.003995648585259914 2023-01-24 05:12:42.855340: step: 908/464, loss: 0.09679343551397324 2023-01-24 05:12:43.588980: step: 910/464, loss: 0.03607809543609619 2023-01-24 05:12:44.292692: step: 912/464, loss: 0.03484540060162544 2023-01-24 05:12:45.082920: step: 914/464, loss: 0.055608466267585754 2023-01-24 05:12:45.877336: step: 916/464, loss: 0.04974879324436188 2023-01-24 05:12:46.652377: step: 918/464, loss: 0.003665907308459282 2023-01-24 05:12:47.356703: step: 920/464, loss: 0.07038435339927673 2023-01-24 05:12:48.143410: step: 922/464, loss: 0.08866694569587708 2023-01-24 05:12:48.847934: step: 924/464, loss: 0.3629743158817291 2023-01-24 05:12:49.619408: step: 926/464, loss: 0.006601552478969097 2023-01-24 05:12:50.368742: step: 928/464, loss: 0.045003585517406464 2023-01-24 05:12:51.015362: step: 930/464, loss: 0.004139734897762537 ================================================== Loss: 0.067 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32718892446277403, 'r': 0.3365017021989061, 'f1': 0.33177997578825724}, 'combined': 0.244469455843979, 'epoch': 25} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32177443954383633, 'r': 0.2743985586228565, 'f1': 0.29620409741475284}, 'combined': 0.18395833418389915, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3121769334269334, 'r': 0.32876318415929423, 'f1': 0.3202554492642293}, 'combined': 0.23597769945785313, 'epoch': 25} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31449866671403126, 'r': 0.2642037170519651, 'f1': 0.2871656280833054}, 'combined': 0.17834496902015812, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32769559643100354, 'r': 0.3364009822944458, 'f1': 0.3319912315902115}, 'combined': 0.24462511801384004, 'epoch': 25} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3266270362803424, 'r': 0.2715449621147225, 'f1': 0.2965499101608278}, 'combined': 0.18417310209988255, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36041666666666666, 'r': 0.30892857142857144, 'f1': 0.33269230769230773}, 'combined': 0.2217948717948718, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3046875, 'r': 0.42391304347826086, 'f1': 0.3545454545454545}, 'combined': 0.17727272727272725, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4259868421052631, 'r': 0.1762704174228675, 'f1': 0.2493581514762516}, 'combined': 0.1662387676508344, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:15:31.865658: step: 2/464, loss: 0.03979220986366272 2023-01-24 05:15:32.495800: step: 4/464, loss: 0.0028875789139419794 2023-01-24 05:15:33.258677: step: 6/464, loss: 0.08068735897541046 2023-01-24 05:15:33.986554: step: 8/464, loss: 0.012192752212285995 2023-01-24 05:15:34.720638: step: 10/464, loss: 0.31130632758140564 2023-01-24 05:15:35.435883: step: 12/464, loss: 0.027175702154636383 2023-01-24 05:15:36.152775: step: 14/464, loss: 0.039750535041093826 2023-01-24 05:15:36.932215: step: 16/464, loss: 0.010211330838501453 2023-01-24 05:15:37.693960: step: 18/464, loss: 0.030984103679656982 2023-01-24 05:15:38.406678: step: 20/464, loss: 0.003631194122135639 2023-01-24 05:15:39.201166: step: 22/464, loss: 0.06238573417067528 2023-01-24 05:15:40.003636: step: 24/464, loss: 0.032683271914720535 2023-01-24 05:15:40.749086: step: 26/464, loss: 0.038223229348659515 2023-01-24 05:15:41.470984: step: 28/464, loss: 0.05012506991624832 2023-01-24 05:15:42.235080: step: 30/464, loss: 0.01869715377688408 2023-01-24 05:15:43.046105: step: 32/464, loss: 0.18817028403282166 2023-01-24 05:15:43.740189: step: 34/464, loss: 0.0033002072013914585 2023-01-24 05:15:44.509286: step: 36/464, loss: 0.0033029152546077967 2023-01-24 05:15:45.326939: step: 38/464, loss: 0.016620703041553497 2023-01-24 05:15:46.069326: step: 40/464, loss: 0.06675605475902557 2023-01-24 05:15:46.878119: step: 42/464, loss: 0.01723124273121357 2023-01-24 05:15:47.612770: step: 44/464, loss: 0.009953220374882221 2023-01-24 05:15:48.351703: step: 46/464, loss: 0.00018864394223783165 2023-01-24 05:15:49.133860: step: 48/464, loss: 0.003479225095361471 2023-01-24 05:15:49.896260: step: 50/464, loss: 0.016302935779094696 2023-01-24 05:15:50.617210: step: 52/464, loss: 0.02298845537006855 2023-01-24 05:15:51.312779: step: 54/464, loss: 0.013436194509267807 2023-01-24 05:15:52.036449: step: 56/464, loss: 0.01852521114051342 2023-01-24 05:15:52.691969: step: 58/464, loss: 0.03060929849743843 2023-01-24 05:15:53.429203: step: 60/464, loss: 0.02657526358962059 2023-01-24 05:15:54.206096: step: 62/464, loss: 0.057143684476614 2023-01-24 05:15:54.943940: step: 64/464, loss: 0.013616350479424 2023-01-24 05:15:55.720405: step: 66/464, loss: 0.004383295774459839 2023-01-24 05:15:56.461256: step: 68/464, loss: 0.01339748129248619 2023-01-24 05:15:57.256430: step: 70/464, loss: 0.014214854687452316 2023-01-24 05:15:57.872930: step: 72/464, loss: 0.001885912730358541 2023-01-24 05:15:58.561659: step: 74/464, loss: 0.05567344278097153 2023-01-24 05:15:59.243438: step: 76/464, loss: 0.1963343620300293 2023-01-24 05:16:00.012336: step: 78/464, loss: 0.19187796115875244 2023-01-24 05:16:00.724824: step: 80/464, loss: 0.10916055738925934 2023-01-24 05:16:01.569234: step: 82/464, loss: 0.03477245569229126 2023-01-24 05:16:02.287694: step: 84/464, loss: 0.024640601128339767 2023-01-24 05:16:02.978120: step: 86/464, loss: 0.01551698986440897 2023-01-24 05:16:03.705398: step: 88/464, loss: 0.02397140860557556 2023-01-24 05:16:04.431549: step: 90/464, loss: 0.07582138478755951 2023-01-24 05:16:05.099085: step: 92/464, loss: 0.03233405947685242 2023-01-24 05:16:05.853970: step: 94/464, loss: 0.006312546785920858 2023-01-24 05:16:06.557229: step: 96/464, loss: 0.05362057313323021 2023-01-24 05:16:07.355238: step: 98/464, loss: 0.028060076758265495 2023-01-24 05:16:08.107310: step: 100/464, loss: 0.008015839383006096 2023-01-24 05:16:08.866477: step: 102/464, loss: 0.011111016385257244 2023-01-24 05:16:09.594598: step: 104/464, loss: 0.012221268378198147 2023-01-24 05:16:10.377995: step: 106/464, loss: 0.04969086870551109 2023-01-24 05:16:11.077829: step: 108/464, loss: 0.019485192373394966 2023-01-24 05:16:11.878282: step: 110/464, loss: 0.05747364088892937 2023-01-24 05:16:12.645431: step: 112/464, loss: 0.010442215949296951 2023-01-24 05:16:13.346783: step: 114/464, loss: 0.01553972065448761 2023-01-24 05:16:14.100194: step: 116/464, loss: 0.08189647644758224 2023-01-24 05:16:14.811561: step: 118/464, loss: 0.04713998734951019 2023-01-24 05:16:15.470622: step: 120/464, loss: 0.015384606085717678 2023-01-24 05:16:16.231481: step: 122/464, loss: 0.06837371736764908 2023-01-24 05:16:16.930345: step: 124/464, loss: 0.0416836179792881 2023-01-24 05:16:17.678230: step: 126/464, loss: 0.010772022418677807 2023-01-24 05:16:18.485770: step: 128/464, loss: 0.006497004069387913 2023-01-24 05:16:19.234619: step: 130/464, loss: 0.005042895209044218 2023-01-24 05:16:19.998825: step: 132/464, loss: 0.009416569024324417 2023-01-24 05:16:20.785182: step: 134/464, loss: 0.03312671184539795 2023-01-24 05:16:21.529609: step: 136/464, loss: 0.023155786097049713 2023-01-24 05:16:22.272131: step: 138/464, loss: 0.08792765438556671 2023-01-24 05:16:22.999496: step: 140/464, loss: 0.01703648269176483 2023-01-24 05:16:23.802538: step: 142/464, loss: 0.006686758249998093 2023-01-24 05:16:24.541464: step: 144/464, loss: 0.00440249452367425 2023-01-24 05:16:25.195778: step: 146/464, loss: 0.002988354070112109 2023-01-24 05:16:25.942660: step: 148/464, loss: 0.00513819744810462 2023-01-24 05:16:26.708750: step: 150/464, loss: 0.025694118812680244 2023-01-24 05:16:27.439528: step: 152/464, loss: 0.021511996164917946 2023-01-24 05:16:28.168805: step: 154/464, loss: 0.0037316379602998495 2023-01-24 05:16:28.872030: step: 156/464, loss: 0.01730765588581562 2023-01-24 05:16:29.553897: step: 158/464, loss: 0.03143288195133209 2023-01-24 05:16:30.282413: step: 160/464, loss: 0.006385265849530697 2023-01-24 05:16:31.007727: step: 162/464, loss: 0.004676298703998327 2023-01-24 05:16:31.685488: step: 164/464, loss: 0.03778495639562607 2023-01-24 05:16:32.440228: step: 166/464, loss: 0.041014038026332855 2023-01-24 05:16:33.139970: step: 168/464, loss: 0.006660899613052607 2023-01-24 05:16:33.854380: step: 170/464, loss: 0.009321154095232487 2023-01-24 05:16:34.585493: step: 172/464, loss: 0.048160020262002945 2023-01-24 05:16:35.310633: step: 174/464, loss: 0.032669879496097565 2023-01-24 05:16:36.074136: step: 176/464, loss: 0.0007618823437951505 2023-01-24 05:16:36.783916: step: 178/464, loss: 0.018640637397766113 2023-01-24 05:16:37.508804: step: 180/464, loss: 0.03052210435271263 2023-01-24 05:16:38.238553: step: 182/464, loss: 0.03019343689084053 2023-01-24 05:16:38.986022: step: 184/464, loss: 0.09650694578886032 2023-01-24 05:16:39.682536: step: 186/464, loss: 0.022672880440950394 2023-01-24 05:16:40.392739: step: 188/464, loss: 0.006853120867162943 2023-01-24 05:16:41.113203: step: 190/464, loss: 0.11154012382030487 2023-01-24 05:16:41.945867: step: 192/464, loss: 0.02759392186999321 2023-01-24 05:16:42.641811: step: 194/464, loss: 0.010374793782830238 2023-01-24 05:16:43.301279: step: 196/464, loss: 0.0036212902050465345 2023-01-24 05:16:44.062940: step: 198/464, loss: 0.4379126727581024 2023-01-24 05:16:44.826396: step: 200/464, loss: 0.006713510025292635 2023-01-24 05:16:45.589452: step: 202/464, loss: 0.037869472056627274 2023-01-24 05:16:46.328820: step: 204/464, loss: 0.1598629355430603 2023-01-24 05:16:47.035287: step: 206/464, loss: 0.05931587144732475 2023-01-24 05:16:47.811177: step: 208/464, loss: 0.01667376235127449 2023-01-24 05:16:48.537526: step: 210/464, loss: 0.020716484636068344 2023-01-24 05:16:49.277176: step: 212/464, loss: 0.06653925031423569 2023-01-24 05:16:50.038134: step: 214/464, loss: 0.15032264590263367 2023-01-24 05:16:50.782666: step: 216/464, loss: 0.01681193895637989 2023-01-24 05:16:51.539260: step: 218/464, loss: 0.08305919915437698 2023-01-24 05:16:52.244304: step: 220/464, loss: 0.01702980510890484 2023-01-24 05:16:52.974550: step: 222/464, loss: 0.0005055239307694137 2023-01-24 05:16:53.732710: step: 224/464, loss: 0.004098755773156881 2023-01-24 05:16:54.492195: step: 226/464, loss: 0.028517749160528183 2023-01-24 05:16:55.293859: step: 228/464, loss: 0.02958236075937748 2023-01-24 05:16:56.061480: step: 230/464, loss: 0.010918030515313148 2023-01-24 05:16:56.831749: step: 232/464, loss: 0.01749340072274208 2023-01-24 05:16:57.620442: step: 234/464, loss: 0.08216807246208191 2023-01-24 05:16:58.343767: step: 236/464, loss: 0.13501933217048645 2023-01-24 05:16:59.165682: step: 238/464, loss: 0.28800854086875916 2023-01-24 05:16:59.848202: step: 240/464, loss: 0.035141170024871826 2023-01-24 05:17:00.576915: step: 242/464, loss: 0.03113182634115219 2023-01-24 05:17:01.280534: step: 244/464, loss: 0.0638260543346405 2023-01-24 05:17:02.128724: step: 246/464, loss: 0.02870015986263752 2023-01-24 05:17:02.888270: step: 248/464, loss: 0.04577351361513138 2023-01-24 05:17:03.642710: step: 250/464, loss: 0.06411606073379517 2023-01-24 05:17:04.415155: step: 252/464, loss: 0.263606995344162 2023-01-24 05:17:05.242437: step: 254/464, loss: 0.02350165694952011 2023-01-24 05:17:06.035106: step: 256/464, loss: 0.1228107437491417 2023-01-24 05:17:06.813291: step: 258/464, loss: 0.008794655092060566 2023-01-24 05:17:07.612000: step: 260/464, loss: 0.16834703087806702 2023-01-24 05:17:08.451531: step: 262/464, loss: 0.003793665673583746 2023-01-24 05:17:09.206689: step: 264/464, loss: 0.009521029889583588 2023-01-24 05:17:09.912277: step: 266/464, loss: 0.007902123034000397 2023-01-24 05:17:10.606660: step: 268/464, loss: 0.016075551509857178 2023-01-24 05:17:11.354079: step: 270/464, loss: 0.004283635877072811 2023-01-24 05:17:12.133421: step: 272/464, loss: 0.03238300979137421 2023-01-24 05:17:12.864003: step: 274/464, loss: 0.023233672603964806 2023-01-24 05:17:13.573980: step: 276/464, loss: 0.011083599179983139 2023-01-24 05:17:14.366948: step: 278/464, loss: 0.009170886129140854 2023-01-24 05:17:15.075604: step: 280/464, loss: 0.19675010442733765 2023-01-24 05:17:15.805472: step: 282/464, loss: 0.04092198982834816 2023-01-24 05:17:16.543984: step: 284/464, loss: 0.08173315227031708 2023-01-24 05:17:17.288119: step: 286/464, loss: 0.022941935807466507 2023-01-24 05:17:18.108165: step: 288/464, loss: 0.027069993317127228 2023-01-24 05:17:18.881128: step: 290/464, loss: 0.054724760353565216 2023-01-24 05:17:19.611379: step: 292/464, loss: 0.02735094539821148 2023-01-24 05:17:20.294911: step: 294/464, loss: 0.04187670722603798 2023-01-24 05:17:21.028010: step: 296/464, loss: 0.02951827645301819 2023-01-24 05:17:21.773246: step: 298/464, loss: 0.009366628713905811 2023-01-24 05:17:22.480685: step: 300/464, loss: 0.006680920720100403 2023-01-24 05:17:23.151165: step: 302/464, loss: 0.000542443071026355 2023-01-24 05:17:23.904756: step: 304/464, loss: 0.06525389105081558 2023-01-24 05:17:24.646411: step: 306/464, loss: 0.041585177183151245 2023-01-24 05:17:25.317445: step: 308/464, loss: 0.0009010765352286398 2023-01-24 05:17:26.018691: step: 310/464, loss: 0.043093081563711166 2023-01-24 05:17:26.815672: step: 312/464, loss: 0.08462139219045639 2023-01-24 05:17:27.525487: step: 314/464, loss: 0.029180768877267838 2023-01-24 05:17:28.274002: step: 316/464, loss: 0.2786836624145508 2023-01-24 05:17:29.024427: step: 318/464, loss: 0.007162583060562611 2023-01-24 05:17:29.691442: step: 320/464, loss: 0.0022578127682209015 2023-01-24 05:17:30.480372: step: 322/464, loss: 0.029035838320851326 2023-01-24 05:17:31.276404: step: 324/464, loss: 0.0048900507390499115 2023-01-24 05:17:32.078536: step: 326/464, loss: 0.019386572763323784 2023-01-24 05:17:32.789976: step: 328/464, loss: 0.07106909155845642 2023-01-24 05:17:33.539451: step: 330/464, loss: 0.010740678757429123 2023-01-24 05:17:34.315236: step: 332/464, loss: 0.03408215939998627 2023-01-24 05:17:34.991264: step: 334/464, loss: 0.10296069830656052 2023-01-24 05:17:35.665316: step: 336/464, loss: 0.02283811941742897 2023-01-24 05:17:36.396310: step: 338/464, loss: 0.00903315469622612 2023-01-24 05:17:37.213429: step: 340/464, loss: 0.1214098259806633 2023-01-24 05:17:37.903554: step: 342/464, loss: 0.0025513179134577513 2023-01-24 05:17:38.663560: step: 344/464, loss: 0.006067641545087099 2023-01-24 05:17:39.412238: step: 346/464, loss: 0.023233452811837196 2023-01-24 05:17:40.159887: step: 348/464, loss: 0.04339459165930748 2023-01-24 05:17:40.849300: step: 350/464, loss: 0.012552225030958652 2023-01-24 05:17:41.599340: step: 352/464, loss: 0.0010526591213420033 2023-01-24 05:17:42.419753: step: 354/464, loss: 0.005083049647510052 2023-01-24 05:17:43.048144: step: 356/464, loss: 0.012266969308257103 2023-01-24 05:17:43.736571: step: 358/464, loss: 6.913995265960693 2023-01-24 05:17:44.674040: step: 360/464, loss: 0.02275826968252659 2023-01-24 05:17:45.374320: step: 362/464, loss: 0.04937801882624626 2023-01-24 05:17:46.206744: step: 364/464, loss: 0.04725099727511406 2023-01-24 05:17:47.083224: step: 366/464, loss: 0.04512450471520424 2023-01-24 05:17:47.874278: step: 368/464, loss: 0.07886477559804916 2023-01-24 05:17:48.564078: step: 370/464, loss: 0.017831819131970406 2023-01-24 05:17:49.254651: step: 372/464, loss: 0.006999008823186159 2023-01-24 05:17:49.912038: step: 374/464, loss: 0.1305447369813919 2023-01-24 05:17:50.699227: step: 376/464, loss: 0.005541624501347542 2023-01-24 05:17:51.422398: step: 378/464, loss: 0.02828267030417919 2023-01-24 05:17:52.194400: step: 380/464, loss: 0.007844929583370686 2023-01-24 05:17:52.965034: step: 382/464, loss: 0.014002975076436996 2023-01-24 05:17:53.695187: step: 384/464, loss: 0.018246008083224297 2023-01-24 05:17:54.442734: step: 386/464, loss: 0.044929563999176025 2023-01-24 05:17:55.122272: step: 388/464, loss: 0.011728995479643345 2023-01-24 05:17:55.776856: step: 390/464, loss: 0.08749885112047195 2023-01-24 05:17:56.530404: step: 392/464, loss: 0.09420124441385269 2023-01-24 05:17:57.265462: step: 394/464, loss: 0.0393458716571331 2023-01-24 05:17:58.066263: step: 396/464, loss: 0.4072987139225006 2023-01-24 05:17:58.845783: step: 398/464, loss: 0.003674858482554555 2023-01-24 05:17:59.587360: step: 400/464, loss: 0.01733958162367344 2023-01-24 05:18:00.296541: step: 402/464, loss: 0.010459842160344124 2023-01-24 05:18:01.052862: step: 404/464, loss: 0.09545612335205078 2023-01-24 05:18:01.744464: step: 406/464, loss: 0.07118836045265198 2023-01-24 05:18:02.488950: step: 408/464, loss: 0.08896943926811218 2023-01-24 05:18:03.211702: step: 410/464, loss: 0.006611619610339403 2023-01-24 05:18:03.931824: step: 412/464, loss: 0.004229702055454254 2023-01-24 05:18:04.734840: step: 414/464, loss: 0.07376924157142639 2023-01-24 05:18:05.460757: step: 416/464, loss: 0.05804646015167236 2023-01-24 05:18:06.218922: step: 418/464, loss: 0.06894968450069427 2023-01-24 05:18:07.096635: step: 420/464, loss: 0.019484853371977806 2023-01-24 05:18:07.876973: step: 422/464, loss: 0.00924522802233696 2023-01-24 05:18:08.686856: step: 424/464, loss: 0.030628271400928497 2023-01-24 05:18:09.398118: step: 426/464, loss: 0.026602789759635925 2023-01-24 05:18:10.101198: step: 428/464, loss: 0.0014539804542437196 2023-01-24 05:18:10.804082: step: 430/464, loss: 0.013244767673313618 2023-01-24 05:18:11.506354: step: 432/464, loss: 0.01868750900030136 2023-01-24 05:18:12.209351: step: 434/464, loss: 0.01659456081688404 2023-01-24 05:18:13.090585: step: 436/464, loss: 0.13781066238880157 2023-01-24 05:18:13.877760: step: 438/464, loss: 0.0003242420207243413 2023-01-24 05:18:14.610082: step: 440/464, loss: 0.059889595955610275 2023-01-24 05:18:15.327161: step: 442/464, loss: 0.09946936368942261 2023-01-24 05:18:16.073425: step: 444/464, loss: 0.01946125365793705 2023-01-24 05:18:16.867903: step: 446/464, loss: 0.02001163363456726 2023-01-24 05:18:17.584390: step: 448/464, loss: 0.03295079246163368 2023-01-24 05:18:18.350502: step: 450/464, loss: 0.009511049836874008 2023-01-24 05:18:19.099380: step: 452/464, loss: 0.020733296871185303 2023-01-24 05:18:19.775833: step: 454/464, loss: 0.05650974065065384 2023-01-24 05:18:20.498802: step: 456/464, loss: 0.00144986214581877 2023-01-24 05:18:21.225082: step: 458/464, loss: 0.013950464315712452 2023-01-24 05:18:21.929387: step: 460/464, loss: 0.00036745882243849337 2023-01-24 05:18:22.713627: step: 462/464, loss: 0.046166226267814636 2023-01-24 05:18:23.649587: step: 464/464, loss: 0.004961703438311815 2023-01-24 05:18:24.432274: step: 466/464, loss: 0.027376022189855576 2023-01-24 05:18:25.206305: step: 468/464, loss: 0.004060413688421249 2023-01-24 05:18:25.917020: step: 470/464, loss: 0.029101349413394928 2023-01-24 05:18:26.677462: step: 472/464, loss: 0.01377090159803629 2023-01-24 05:18:27.380324: step: 474/464, loss: 0.017810342833399773 2023-01-24 05:18:28.107044: step: 476/464, loss: 0.013420073315501213 2023-01-24 05:18:28.934438: step: 478/464, loss: 0.03463403135538101 2023-01-24 05:18:29.646261: step: 480/464, loss: 0.012036774307489395 2023-01-24 05:18:30.327821: step: 482/464, loss: 0.05726030468940735 2023-01-24 05:18:31.123209: step: 484/464, loss: 0.02741643413901329 2023-01-24 05:18:31.897076: step: 486/464, loss: 0.016417313367128372 2023-01-24 05:18:32.649984: step: 488/464, loss: 0.22834038734436035 2023-01-24 05:18:33.369568: step: 490/464, loss: 0.028050106018781662 2023-01-24 05:18:34.155684: step: 492/464, loss: 0.12751196324825287 2023-01-24 05:18:34.864440: step: 494/464, loss: 0.0044830976985394955 2023-01-24 05:18:35.602741: step: 496/464, loss: 0.0003841651196125895 2023-01-24 05:18:36.360410: step: 498/464, loss: 0.00019563184469006956 2023-01-24 05:18:37.174338: step: 500/464, loss: 0.022695457562804222 2023-01-24 05:18:37.909398: step: 502/464, loss: 0.02632814832031727 2023-01-24 05:18:38.595479: step: 504/464, loss: 0.03141283243894577 2023-01-24 05:18:39.317113: step: 506/464, loss: 0.021201007068157196 2023-01-24 05:18:40.107058: step: 508/464, loss: 0.044027432799339294 2023-01-24 05:18:40.824304: step: 510/464, loss: 0.0025048651732504368 2023-01-24 05:18:41.554588: step: 512/464, loss: 0.004569270648062229 2023-01-24 05:18:42.296826: step: 514/464, loss: 0.0010739141143858433 2023-01-24 05:18:43.101710: step: 516/464, loss: 0.017874712124466896 2023-01-24 05:18:43.890453: step: 518/464, loss: 0.019535720348358154 2023-01-24 05:18:44.783115: step: 520/464, loss: 0.05086197704076767 2023-01-24 05:18:45.482891: step: 522/464, loss: 0.044072188436985016 2023-01-24 05:18:46.242078: step: 524/464, loss: 0.038728076964616776 2023-01-24 05:18:47.060746: step: 526/464, loss: 0.008920364081859589 2023-01-24 05:18:47.776572: step: 528/464, loss: 0.042976170778274536 2023-01-24 05:18:48.470632: step: 530/464, loss: 0.11609112471342087 2023-01-24 05:18:49.215614: step: 532/464, loss: 0.07067747414112091 2023-01-24 05:18:50.071826: step: 534/464, loss: 0.019455162808299065 2023-01-24 05:18:50.825933: step: 536/464, loss: 0.08025778830051422 2023-01-24 05:18:51.541718: step: 538/464, loss: 0.037985384464263916 2023-01-24 05:18:52.253330: step: 540/464, loss: 0.008334038779139519 2023-01-24 05:18:53.011069: step: 542/464, loss: 0.051863398402929306 2023-01-24 05:18:53.746381: step: 544/464, loss: 0.016841573640704155 2023-01-24 05:18:54.486579: step: 546/464, loss: 0.035958219319581985 2023-01-24 05:18:55.183579: step: 548/464, loss: 0.006521169561892748 2023-01-24 05:18:55.930451: step: 550/464, loss: 0.003640382084995508 2023-01-24 05:18:56.700145: step: 552/464, loss: 0.004826993215829134 2023-01-24 05:18:57.500549: step: 554/464, loss: 0.14086350798606873 2023-01-24 05:18:58.267751: step: 556/464, loss: 0.009819505736231804 2023-01-24 05:18:59.027040: step: 558/464, loss: 0.06159777566790581 2023-01-24 05:18:59.811032: step: 560/464, loss: 0.017736200243234634 2023-01-24 05:19:00.482619: step: 562/464, loss: 0.04474842548370361 2023-01-24 05:19:01.152590: step: 564/464, loss: 0.028115447610616684 2023-01-24 05:19:01.906232: step: 566/464, loss: 0.01623515598475933 2023-01-24 05:19:02.668275: step: 568/464, loss: 0.02251431718468666 2023-01-24 05:19:03.350458: step: 570/464, loss: 0.0011539249680936337 2023-01-24 05:19:04.063103: step: 572/464, loss: 0.0839838832616806 2023-01-24 05:19:04.738622: step: 574/464, loss: 0.006072110962122679 2023-01-24 05:19:05.488577: step: 576/464, loss: 0.0036800343077629805 2023-01-24 05:19:06.269116: step: 578/464, loss: 0.00892962608486414 2023-01-24 05:19:06.943103: step: 580/464, loss: 0.013826594687998295 2023-01-24 05:19:07.684389: step: 582/464, loss: 0.07251441478729248 2023-01-24 05:19:08.373137: step: 584/464, loss: 0.006227490957826376 2023-01-24 05:19:09.068711: step: 586/464, loss: 0.013700391165912151 2023-01-24 05:19:09.849730: step: 588/464, loss: 0.06363295018672943 2023-01-24 05:19:10.565779: step: 590/464, loss: 0.021341076120734215 2023-01-24 05:19:11.363603: step: 592/464, loss: 0.011852027848362923 2023-01-24 05:19:12.182344: step: 594/464, loss: 0.02045590803027153 2023-01-24 05:19:12.892135: step: 596/464, loss: 0.03566189855337143 2023-01-24 05:19:13.634250: step: 598/464, loss: 0.0173689853399992 2023-01-24 05:19:14.369351: step: 600/464, loss: 0.357320100069046 2023-01-24 05:19:15.080321: step: 602/464, loss: 0.11469513177871704 2023-01-24 05:19:15.747798: step: 604/464, loss: 0.007034547161310911 2023-01-24 05:19:16.423698: step: 606/464, loss: 0.01139721181243658 2023-01-24 05:19:17.216821: step: 608/464, loss: 0.032890621572732925 2023-01-24 05:19:17.906034: step: 610/464, loss: 0.01588939130306244 2023-01-24 05:19:18.671152: step: 612/464, loss: 0.11238836497068405 2023-01-24 05:19:19.400281: step: 614/464, loss: 0.24029462039470673 2023-01-24 05:19:20.180514: step: 616/464, loss: 0.03379188850522041 2023-01-24 05:19:20.883569: step: 618/464, loss: 0.09208420664072037 2023-01-24 05:19:21.623390: step: 620/464, loss: 0.6428366303443909 2023-01-24 05:19:22.354280: step: 622/464, loss: 0.13242271542549133 2023-01-24 05:19:23.093248: step: 624/464, loss: 0.08914587646722794 2023-01-24 05:19:23.859075: step: 626/464, loss: 0.01581208035349846 2023-01-24 05:19:24.616137: step: 628/464, loss: 0.2521286904811859 2023-01-24 05:19:25.385383: step: 630/464, loss: 0.1282821148633957 2023-01-24 05:19:26.110388: step: 632/464, loss: 0.07224276661872864 2023-01-24 05:19:26.927375: step: 634/464, loss: 0.028242819011211395 2023-01-24 05:19:27.759021: step: 636/464, loss: 0.008096271194517612 2023-01-24 05:19:28.472837: step: 638/464, loss: 0.04342019557952881 2023-01-24 05:19:29.189651: step: 640/464, loss: 0.012415018863976002 2023-01-24 05:19:29.969372: step: 642/464, loss: 0.037182290107011795 2023-01-24 05:19:30.622699: step: 644/464, loss: 0.21031810343265533 2023-01-24 05:19:31.429595: step: 646/464, loss: 0.007701578550040722 2023-01-24 05:19:32.099255: step: 648/464, loss: 0.006078493315726519 2023-01-24 05:19:32.834512: step: 650/464, loss: 0.016446499153971672 2023-01-24 05:19:33.626565: step: 652/464, loss: 0.053291235119104385 2023-01-24 05:19:34.431707: step: 654/464, loss: 0.05071832984685898 2023-01-24 05:19:35.164006: step: 656/464, loss: 0.06600752472877502 2023-01-24 05:19:36.019425: step: 658/464, loss: 0.12854290008544922 2023-01-24 05:19:36.802781: step: 660/464, loss: 0.016413327306509018 2023-01-24 05:19:37.663460: step: 662/464, loss: 0.013318453915417194 2023-01-24 05:19:38.436362: step: 664/464, loss: 0.021495165303349495 2023-01-24 05:19:39.106147: step: 666/464, loss: 0.004696085583418608 2023-01-24 05:19:39.797531: step: 668/464, loss: 0.015222067944705486 2023-01-24 05:19:40.534021: step: 670/464, loss: 0.0056726341135799885 2023-01-24 05:19:41.441047: step: 672/464, loss: 0.06520474702119827 2023-01-24 05:19:42.212838: step: 674/464, loss: 0.025041626766324043 2023-01-24 05:19:42.921377: step: 676/464, loss: 0.01942654699087143 2023-01-24 05:19:43.638713: step: 678/464, loss: 0.009234144352376461 2023-01-24 05:19:44.360841: step: 680/464, loss: 0.024104246869683266 2023-01-24 05:19:45.114383: step: 682/464, loss: 0.0200289748609066 2023-01-24 05:19:45.863672: step: 684/464, loss: 0.043629322201013565 2023-01-24 05:19:46.614323: step: 686/464, loss: 0.05797417834401131 2023-01-24 05:19:47.375687: step: 688/464, loss: 0.08065325766801834 2023-01-24 05:19:48.132975: step: 690/464, loss: 0.03184974193572998 2023-01-24 05:19:48.856260: step: 692/464, loss: 0.015992360189557076 2023-01-24 05:19:49.548888: step: 694/464, loss: 0.010463027283549309 2023-01-24 05:19:50.352280: step: 696/464, loss: 0.022422725334763527 2023-01-24 05:19:51.128254: step: 698/464, loss: 0.03707791492342949 2023-01-24 05:19:51.837656: step: 700/464, loss: 0.09422971308231354 2023-01-24 05:19:52.545611: step: 702/464, loss: 0.056585729122161865 2023-01-24 05:19:53.318472: step: 704/464, loss: 0.05239574611186981 2023-01-24 05:19:54.021864: step: 706/464, loss: 0.1525680422782898 2023-01-24 05:19:54.755069: step: 708/464, loss: 0.06405721604824066 2023-01-24 05:19:55.474662: step: 710/464, loss: 0.03996497765183449 2023-01-24 05:19:56.158997: step: 712/464, loss: 0.07094903290271759 2023-01-24 05:19:56.836536: step: 714/464, loss: 0.010407168418169022 2023-01-24 05:19:57.566401: step: 716/464, loss: 0.049233485013246536 2023-01-24 05:19:58.258935: step: 718/464, loss: 0.015937745571136475 2023-01-24 05:19:58.953730: step: 720/464, loss: 0.09470876306295395 2023-01-24 05:19:59.687088: step: 722/464, loss: 0.01487759593874216 2023-01-24 05:20:00.421098: step: 724/464, loss: 0.011564928106963634 2023-01-24 05:20:01.206516: step: 726/464, loss: 0.040957946330308914 2023-01-24 05:20:02.007823: step: 728/464, loss: 0.03968048840761185 2023-01-24 05:20:02.671317: step: 730/464, loss: 0.04555586352944374 2023-01-24 05:20:03.429156: step: 732/464, loss: 0.029980555176734924 2023-01-24 05:20:04.177574: step: 734/464, loss: 0.07169114798307419 2023-01-24 05:20:04.893854: step: 736/464, loss: 0.006400719750672579 2023-01-24 05:20:05.685739: step: 738/464, loss: 0.0033728305716067553 2023-01-24 05:20:06.370339: step: 740/464, loss: 0.03094770386815071 2023-01-24 05:20:07.116905: step: 742/464, loss: 0.023393241688609123 2023-01-24 05:20:07.913592: step: 744/464, loss: 0.05773283913731575 2023-01-24 05:20:08.595989: step: 746/464, loss: 0.0021309382282197475 2023-01-24 05:20:09.281720: step: 748/464, loss: 0.05149630084633827 2023-01-24 05:20:10.044704: step: 750/464, loss: 0.04184247553348541 2023-01-24 05:20:10.755853: step: 752/464, loss: 0.007670742925256491 2023-01-24 05:20:11.543111: step: 754/464, loss: 0.3596675992012024 2023-01-24 05:20:12.330281: step: 756/464, loss: 0.003946928307414055 2023-01-24 05:20:12.998229: step: 758/464, loss: 0.09387727081775665 2023-01-24 05:20:13.651906: step: 760/464, loss: 0.002123955637216568 2023-01-24 05:20:14.429673: step: 762/464, loss: 0.03694995492696762 2023-01-24 05:20:15.146885: step: 764/464, loss: 0.02468699961900711 2023-01-24 05:20:15.876746: step: 766/464, loss: 0.04791996255517006 2023-01-24 05:20:16.580281: step: 768/464, loss: 0.0017564924200996757 2023-01-24 05:20:17.336865: step: 770/464, loss: 0.004304036498069763 2023-01-24 05:20:18.118301: step: 772/464, loss: 0.013958727940917015 2023-01-24 05:20:18.885327: step: 774/464, loss: 0.007403239607810974 2023-01-24 05:20:19.636128: step: 776/464, loss: 0.012045308947563171 2023-01-24 05:20:20.422086: step: 778/464, loss: 0.11195321381092072 2023-01-24 05:20:21.204190: step: 780/464, loss: 0.006845542695373297 2023-01-24 05:20:21.928658: step: 782/464, loss: 0.1624700129032135 2023-01-24 05:20:22.692649: step: 784/464, loss: 0.042097195982933044 2023-01-24 05:20:23.449495: step: 786/464, loss: 0.11464142799377441 2023-01-24 05:20:24.162066: step: 788/464, loss: 1.7824957370758057 2023-01-24 05:20:24.831018: step: 790/464, loss: 0.034144118428230286 2023-01-24 05:20:25.508800: step: 792/464, loss: 0.003950945101678371 2023-01-24 05:20:26.236143: step: 794/464, loss: 0.009137485176324844 2023-01-24 05:20:26.967639: step: 796/464, loss: 0.017941009253263474 2023-01-24 05:20:27.688926: step: 798/464, loss: 0.0004914068267680705 2023-01-24 05:20:28.474039: step: 800/464, loss: 0.026943765580654144 2023-01-24 05:20:29.285515: step: 802/464, loss: 0.07514145970344543 2023-01-24 05:20:29.992084: step: 804/464, loss: 1.424988865852356 2023-01-24 05:20:30.866093: step: 806/464, loss: 0.005588890518993139 2023-01-24 05:20:31.671448: step: 808/464, loss: 0.082185298204422 2023-01-24 05:20:32.337093: step: 810/464, loss: 0.005688100587576628 2023-01-24 05:20:33.119782: step: 812/464, loss: 0.024905728176236153 2023-01-24 05:20:33.847020: step: 814/464, loss: 0.0005005030543543398 2023-01-24 05:20:34.570303: step: 816/464, loss: 0.001384145813062787 2023-01-24 05:20:35.202450: step: 818/464, loss: 0.016171080991625786 2023-01-24 05:20:35.918648: step: 820/464, loss: 0.0553651861846447 2023-01-24 05:20:36.621106: step: 822/464, loss: 0.02234969288110733 2023-01-24 05:20:37.351680: step: 824/464, loss: 0.03491181135177612 2023-01-24 05:20:38.157810: step: 826/464, loss: 0.0041991109028458595 2023-01-24 05:20:38.933117: step: 828/464, loss: 0.0028750444762408733 2023-01-24 05:20:39.639910: step: 830/464, loss: 0.016252554953098297 2023-01-24 05:20:40.418648: step: 832/464, loss: 0.09442304819822311 2023-01-24 05:20:41.251849: step: 834/464, loss: 0.00023691660317126662 2023-01-24 05:20:42.049692: step: 836/464, loss: 0.09088034927845001 2023-01-24 05:20:42.800772: step: 838/464, loss: 0.031029315665364265 2023-01-24 05:20:43.474011: step: 840/464, loss: 0.06107908859848976 2023-01-24 05:20:44.221004: step: 842/464, loss: 0.10602616518735886 2023-01-24 05:20:45.003326: step: 844/464, loss: 0.05736605450510979 2023-01-24 05:20:45.770163: step: 846/464, loss: 0.01999666728079319 2023-01-24 05:20:46.514752: step: 848/464, loss: 0.007230349816381931 2023-01-24 05:20:47.205984: step: 850/464, loss: 0.03765219822525978 2023-01-24 05:20:47.867720: step: 852/464, loss: 0.043977733701467514 2023-01-24 05:20:48.614911: step: 854/464, loss: 0.031765200197696686 2023-01-24 05:20:49.373660: step: 856/464, loss: 0.020025676116347313 2023-01-24 05:20:50.040934: step: 858/464, loss: 0.001896336441859603 2023-01-24 05:20:50.727928: step: 860/464, loss: 0.009502287954092026 2023-01-24 05:20:51.482542: step: 862/464, loss: 0.02049115113914013 2023-01-24 05:20:52.268083: step: 864/464, loss: 0.04266717657446861 2023-01-24 05:20:52.936665: step: 866/464, loss: 0.00879158079624176 2023-01-24 05:20:53.626962: step: 868/464, loss: 0.14739181101322174 2023-01-24 05:20:54.454158: step: 870/464, loss: 0.044605158269405365 2023-01-24 05:20:55.180531: step: 872/464, loss: 0.45157331228256226 2023-01-24 05:20:56.045003: step: 874/464, loss: 0.027765942737460136 2023-01-24 05:20:56.722723: step: 876/464, loss: 0.03756709396839142 2023-01-24 05:20:57.468105: step: 878/464, loss: 0.0074466816149652 2023-01-24 05:20:58.165636: step: 880/464, loss: 0.03392532840371132 2023-01-24 05:20:59.016436: step: 882/464, loss: 0.0901467427611351 2023-01-24 05:20:59.768832: step: 884/464, loss: 0.021974651142954826 2023-01-24 05:21:00.512875: step: 886/464, loss: 0.002986679784953594 2023-01-24 05:21:01.393451: step: 888/464, loss: 0.004938908386975527 2023-01-24 05:21:02.101108: step: 890/464, loss: 0.14711670577526093 2023-01-24 05:21:02.795702: step: 892/464, loss: 0.10275810211896896 2023-01-24 05:21:03.563278: step: 894/464, loss: 0.02016407996416092 2023-01-24 05:21:04.299173: step: 896/464, loss: 0.019757527858018875 2023-01-24 05:21:05.002327: step: 898/464, loss: 0.0035847180988639593 2023-01-24 05:21:05.782867: step: 900/464, loss: 0.05121622979640961 2023-01-24 05:21:06.478327: step: 902/464, loss: 0.02605106495320797 2023-01-24 05:21:07.217508: step: 904/464, loss: 0.007822553627192974 2023-01-24 05:21:07.944970: step: 906/464, loss: 0.017992887645959854 2023-01-24 05:21:08.667439: step: 908/464, loss: 0.2159995585680008 2023-01-24 05:21:09.392212: step: 910/464, loss: 0.0006608831463381648 2023-01-24 05:21:10.121629: step: 912/464, loss: 0.005477035418152809 2023-01-24 05:21:10.856235: step: 914/464, loss: 0.021487193182110786 2023-01-24 05:21:11.633284: step: 916/464, loss: 0.013565809465944767 2023-01-24 05:21:12.376736: step: 918/464, loss: 0.048289552330970764 2023-01-24 05:21:13.105978: step: 920/464, loss: 1.4374891519546509 2023-01-24 05:21:13.842213: step: 922/464, loss: 0.02214394509792328 2023-01-24 05:21:14.582877: step: 924/464, loss: 0.01092542801052332 2023-01-24 05:21:15.331433: step: 926/464, loss: 0.038795508444309235 2023-01-24 05:21:16.059834: step: 928/464, loss: 0.0012786659644916654 2023-01-24 05:21:16.663733: step: 930/464, loss: 0.14401409029960632 ================================================== Loss: 0.070 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34067594800824796, 'r': 0.3413223919323623, 'f1': 0.3409988635987771}, 'combined': 0.25126232054646735, 'epoch': 26} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3156877008899806, 'r': 0.2660394868034258, 'f1': 0.2887449502504171}, 'combined': 0.17932581120815377, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3127425870547538, 'r': 0.3234244970490338, 'f1': 0.31799386183738954}, 'combined': 0.23431126661702387, 'epoch': 26} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3126589163716822, 'r': 0.26291772513073275, 'f1': 0.28563901001857384}, 'combined': 0.1773968588536406, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33144577006319675, 'r': 0.3333325581280726, 'f1': 0.33238648653452085}, 'combined': 0.2449163584991206, 'epoch': 26} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32839821993999374, 'r': 0.2728531204249206, 'f1': 0.2980599727170121}, 'combined': 0.18511093042424964, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3389423076923077, 'r': 0.2517857142857143, 'f1': 0.2889344262295082}, 'combined': 0.19262295081967212, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25806451612903225, 'r': 0.34782608695652173, 'f1': 0.2962962962962963}, 'combined': 0.14814814814814814, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.23275862068965517, 'f1': 0.3068181818181818}, 'combined': 0.20454545454545453, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:23:59.111328: step: 2/464, loss: 0.004643636755645275 2023-01-24 05:23:59.958523: step: 4/464, loss: 0.05969075858592987 2023-01-24 05:24:00.767851: step: 6/464, loss: 0.05660129338502884 2023-01-24 05:24:01.484881: step: 8/464, loss: 0.009980532340705395 2023-01-24 05:24:02.164808: step: 10/464, loss: 0.008995480835437775 2023-01-24 05:24:02.840182: step: 12/464, loss: 0.011057031340897083 2023-01-24 05:24:03.562704: step: 14/464, loss: 0.04951007291674614 2023-01-24 05:24:04.323545: step: 16/464, loss: 0.00735006807371974 2023-01-24 05:24:05.094519: step: 18/464, loss: 0.0010468775872141123 2023-01-24 05:24:05.862230: step: 20/464, loss: 0.03526607155799866 2023-01-24 05:24:06.571101: step: 22/464, loss: 0.010531190782785416 2023-01-24 05:24:07.341044: step: 24/464, loss: 0.006157663185149431 2023-01-24 05:24:07.999337: step: 26/464, loss: 0.0033533810637891293 2023-01-24 05:24:08.683736: step: 28/464, loss: 0.011918006464838982 2023-01-24 05:24:09.364911: step: 30/464, loss: 0.01299851480871439 2023-01-24 05:24:10.077984: step: 32/464, loss: 0.0388779379427433 2023-01-24 05:24:10.807354: step: 34/464, loss: 0.04373202845454216 2023-01-24 05:24:11.493557: step: 36/464, loss: 0.011047535575926304 2023-01-24 05:24:12.295531: step: 38/464, loss: 0.030027193948626518 2023-01-24 05:24:13.005123: step: 40/464, loss: 0.0028982798103243113 2023-01-24 05:24:13.746127: step: 42/464, loss: 0.16715313494205475 2023-01-24 05:24:14.471533: step: 44/464, loss: 0.037980757653713226 2023-01-24 05:24:15.200453: step: 46/464, loss: 0.05919745936989784 2023-01-24 05:24:15.938822: step: 48/464, loss: 0.003755714511498809 2023-01-24 05:24:16.686582: step: 50/464, loss: 0.02528190053999424 2023-01-24 05:24:17.489608: step: 52/464, loss: 0.03390359506011009 2023-01-24 05:24:18.229918: step: 54/464, loss: 0.01960902288556099 2023-01-24 05:24:18.972053: step: 56/464, loss: 0.0009774720529094338 2023-01-24 05:24:19.781620: step: 58/464, loss: 0.07767617702484131 2023-01-24 05:24:20.570878: step: 60/464, loss: 0.009920057840645313 2023-01-24 05:24:21.281578: step: 62/464, loss: 0.05598536878824234 2023-01-24 05:24:22.097744: step: 64/464, loss: 0.002746915677562356 2023-01-24 05:24:22.874639: step: 66/464, loss: 0.13891775906085968 2023-01-24 05:24:23.579893: step: 68/464, loss: 0.012845741584897041 2023-01-24 05:24:24.335518: step: 70/464, loss: 0.0018104149494320154 2023-01-24 05:24:25.026351: step: 72/464, loss: 0.07344887405633926 2023-01-24 05:24:25.794197: step: 74/464, loss: 0.02678999863564968 2023-01-24 05:24:26.526135: step: 76/464, loss: 0.011106455698609352 2023-01-24 05:24:27.266731: step: 78/464, loss: 0.0064119272865355015 2023-01-24 05:24:27.958738: step: 80/464, loss: 0.014576753601431847 2023-01-24 05:24:28.762228: step: 82/464, loss: 0.08922789990901947 2023-01-24 05:24:29.517740: step: 84/464, loss: 0.002564490307122469 2023-01-24 05:24:30.239473: step: 86/464, loss: 0.042727261781692505 2023-01-24 05:24:30.974058: step: 88/464, loss: 0.02241584286093712 2023-01-24 05:24:31.726789: step: 90/464, loss: 0.03405474126338959 2023-01-24 05:24:32.473311: step: 92/464, loss: 0.02004759944975376 2023-01-24 05:24:33.150862: step: 94/464, loss: 0.007997794076800346 2023-01-24 05:24:33.939196: step: 96/464, loss: 0.005616101436316967 2023-01-24 05:24:34.648459: step: 98/464, loss: 0.015116574242711067 2023-01-24 05:24:35.386400: step: 100/464, loss: 0.0059138028882443905 2023-01-24 05:24:36.115192: step: 102/464, loss: 0.005302782170474529 2023-01-24 05:24:36.855586: step: 104/464, loss: 0.0039724973030388355 2023-01-24 05:24:37.584119: step: 106/464, loss: 0.0138095049187541 2023-01-24 05:24:38.373678: step: 108/464, loss: 0.021086279302835464 2023-01-24 05:24:39.116083: step: 110/464, loss: 0.01628832332789898 2023-01-24 05:24:39.811650: step: 112/464, loss: 0.011404899880290031 2023-01-24 05:24:40.545289: step: 114/464, loss: 0.006049105431884527 2023-01-24 05:24:41.375622: step: 116/464, loss: 0.027632322162389755 2023-01-24 05:24:42.164723: step: 118/464, loss: 0.011812705546617508 2023-01-24 05:24:42.878895: step: 120/464, loss: 0.03694211319088936 2023-01-24 05:24:43.582641: step: 122/464, loss: 0.0032955994829535484 2023-01-24 05:24:44.295304: step: 124/464, loss: 0.032637108117341995 2023-01-24 05:24:45.050678: step: 126/464, loss: 0.008958675898611546 2023-01-24 05:24:45.880626: step: 128/464, loss: 0.0314217209815979 2023-01-24 05:24:46.643584: step: 130/464, loss: 0.05973093584179878 2023-01-24 05:24:47.364389: step: 132/464, loss: 0.0837894082069397 2023-01-24 05:24:48.177528: step: 134/464, loss: 2.221879243850708 2023-01-24 05:24:48.915902: step: 136/464, loss: 0.031027106568217278 2023-01-24 05:24:49.650687: step: 138/464, loss: 0.008257872425019741 2023-01-24 05:24:50.382252: step: 140/464, loss: 0.008987409062683582 2023-01-24 05:24:51.102275: step: 142/464, loss: 0.05197222903370857 2023-01-24 05:24:51.852310: step: 144/464, loss: 0.0036501206923276186 2023-01-24 05:24:52.597493: step: 146/464, loss: 0.23644611239433289 2023-01-24 05:24:53.355151: step: 148/464, loss: 0.01297426875680685 2023-01-24 05:24:54.049067: step: 150/464, loss: 0.0007067727274261415 2023-01-24 05:24:54.748745: step: 152/464, loss: 0.059547603130340576 2023-01-24 05:24:55.454639: step: 154/464, loss: 0.05646130442619324 2023-01-24 05:24:56.178613: step: 156/464, loss: 0.08381831645965576 2023-01-24 05:24:56.979916: step: 158/464, loss: 0.01727456972002983 2023-01-24 05:24:57.758118: step: 160/464, loss: 0.019560186192393303 2023-01-24 05:24:58.531939: step: 162/464, loss: 0.036793388426303864 2023-01-24 05:24:59.234459: step: 164/464, loss: 0.020215477794408798 2023-01-24 05:24:59.987270: step: 166/464, loss: 0.0006976730655878782 2023-01-24 05:25:00.763573: step: 168/464, loss: 0.007520411163568497 2023-01-24 05:25:01.451897: step: 170/464, loss: 0.10326247662305832 2023-01-24 05:25:02.272130: step: 172/464, loss: 0.013687805272638798 2023-01-24 05:25:03.119596: step: 174/464, loss: 0.016566680744290352 2023-01-24 05:25:03.879279: step: 176/464, loss: 0.009976368397474289 2023-01-24 05:25:04.586819: step: 178/464, loss: 0.013758454471826553 2023-01-24 05:25:05.380345: step: 180/464, loss: 0.06455282121896744 2023-01-24 05:25:06.064566: step: 182/464, loss: 0.004204621072858572 2023-01-24 05:25:06.774456: step: 184/464, loss: 0.041804660111665726 2023-01-24 05:25:07.442437: step: 186/464, loss: 0.07819627225399017 2023-01-24 05:25:08.165802: step: 188/464, loss: 0.0023909374140203 2023-01-24 05:25:08.982516: step: 190/464, loss: 0.01398144755512476 2023-01-24 05:25:09.709374: step: 192/464, loss: 0.007653918582946062 2023-01-24 05:25:10.398276: step: 194/464, loss: 0.005307620856910944 2023-01-24 05:25:11.135314: step: 196/464, loss: 0.012562950141727924 2023-01-24 05:25:11.908693: step: 198/464, loss: 0.01593020185828209 2023-01-24 05:25:12.698909: step: 200/464, loss: 0.0009638724150136113 2023-01-24 05:25:13.539472: step: 202/464, loss: 0.048679254949092865 2023-01-24 05:25:14.277318: step: 204/464, loss: 0.006392453797161579 2023-01-24 05:25:14.979625: step: 206/464, loss: 0.01042192429304123 2023-01-24 05:25:15.670291: step: 208/464, loss: 0.01366907637566328 2023-01-24 05:25:16.367387: step: 210/464, loss: 0.005037717055529356 2023-01-24 05:25:17.092889: step: 212/464, loss: 1.1862741708755493 2023-01-24 05:25:17.808379: step: 214/464, loss: 0.02484513819217682 2023-01-24 05:25:18.489970: step: 216/464, loss: 0.07665397226810455 2023-01-24 05:25:19.269574: step: 218/464, loss: 0.029446804895997047 2023-01-24 05:25:19.977809: step: 220/464, loss: 0.042743612080812454 2023-01-24 05:25:20.747351: step: 222/464, loss: 0.0012197594624012709 2023-01-24 05:25:21.472960: step: 224/464, loss: 0.022778544574975967 2023-01-24 05:25:22.225839: step: 226/464, loss: 0.0036713809240609407 2023-01-24 05:25:22.934125: step: 228/464, loss: 0.0013936725445091724 2023-01-24 05:25:23.690784: step: 230/464, loss: 0.021703477948904037 2023-01-24 05:25:24.504887: step: 232/464, loss: 0.025985587388277054 2023-01-24 05:25:25.201044: step: 234/464, loss: 0.012457039207220078 2023-01-24 05:25:25.982365: step: 236/464, loss: 0.01900213584303856 2023-01-24 05:25:26.682677: step: 238/464, loss: 0.03523466736078262 2023-01-24 05:25:27.421799: step: 240/464, loss: 0.013470427133142948 2023-01-24 05:25:28.055765: step: 242/464, loss: 0.010749666020274162 2023-01-24 05:25:28.770593: step: 244/464, loss: 0.024799073114991188 2023-01-24 05:25:29.575195: step: 246/464, loss: 0.00432855449616909 2023-01-24 05:25:30.310381: step: 248/464, loss: 0.04193061962723732 2023-01-24 05:25:31.080290: step: 250/464, loss: 0.026824962347745895 2023-01-24 05:25:31.780695: step: 252/464, loss: 0.1815386265516281 2023-01-24 05:25:32.491321: step: 254/464, loss: 0.030335990712046623 2023-01-24 05:25:33.215290: step: 256/464, loss: 0.042864929884672165 2023-01-24 05:25:33.942977: step: 258/464, loss: 0.019191723316907883 2023-01-24 05:25:34.671775: step: 260/464, loss: 0.040823861956596375 2023-01-24 05:25:35.315658: step: 262/464, loss: 0.016286570578813553 2023-01-24 05:25:36.003665: step: 264/464, loss: 0.09178738296031952 2023-01-24 05:25:36.848698: step: 266/464, loss: 0.03739660233259201 2023-01-24 05:25:37.668460: step: 268/464, loss: 0.01867361180484295 2023-01-24 05:25:38.378034: step: 270/464, loss: 0.0024883486330509186 2023-01-24 05:25:39.106361: step: 272/464, loss: 0.003901525866240263 2023-01-24 05:25:39.834615: step: 274/464, loss: 0.029833318665623665 2023-01-24 05:25:40.550626: step: 276/464, loss: 0.02278943918645382 2023-01-24 05:25:41.260793: step: 278/464, loss: 0.2309410125017166 2023-01-24 05:25:42.022715: step: 280/464, loss: 0.022811105474829674 2023-01-24 05:25:42.798102: step: 282/464, loss: 0.02011258341372013 2023-01-24 05:25:43.575430: step: 284/464, loss: 0.010888233780860901 2023-01-24 05:25:44.317619: step: 286/464, loss: 0.03074740245938301 2023-01-24 05:25:45.058361: step: 288/464, loss: 0.011113407090306282 2023-01-24 05:25:45.729186: step: 290/464, loss: 0.024814600124955177 2023-01-24 05:25:46.411908: step: 292/464, loss: 0.029330937191843987 2023-01-24 05:25:47.174130: step: 294/464, loss: 0.017294079065322876 2023-01-24 05:25:47.868779: step: 296/464, loss: 0.12338482588529587 2023-01-24 05:25:48.604873: step: 298/464, loss: 0.027676189318299294 2023-01-24 05:25:49.421021: step: 300/464, loss: 0.2536686360836029 2023-01-24 05:25:50.202178: step: 302/464, loss: 0.02364785224199295 2023-01-24 05:25:51.050986: step: 304/464, loss: 0.13542599976062775 2023-01-24 05:25:51.736279: step: 306/464, loss: 0.002939973957836628 2023-01-24 05:25:52.504011: step: 308/464, loss: 0.009112313389778137 2023-01-24 05:25:53.218353: step: 310/464, loss: 0.005944511387497187 2023-01-24 05:25:53.977988: step: 312/464, loss: 0.024802474305033684 2023-01-24 05:25:54.726293: step: 314/464, loss: 0.11303552240133286 2023-01-24 05:25:55.459753: step: 316/464, loss: 0.001480169128626585 2023-01-24 05:25:56.200236: step: 318/464, loss: 0.021034857258200645 2023-01-24 05:25:56.885259: step: 320/464, loss: 0.008457268588244915 2023-01-24 05:25:57.637011: step: 322/464, loss: 0.024015389382839203 2023-01-24 05:25:58.317272: step: 324/464, loss: 0.0032060788944363594 2023-01-24 05:25:59.002018: step: 326/464, loss: 0.00801069661974907 2023-01-24 05:25:59.713924: step: 328/464, loss: 0.009024497121572495 2023-01-24 05:26:00.411969: step: 330/464, loss: 0.015846198424696922 2023-01-24 05:26:01.083792: step: 332/464, loss: 0.02931729145348072 2023-01-24 05:26:01.960258: step: 334/464, loss: 0.06721168756484985 2023-01-24 05:26:02.732897: step: 336/464, loss: 0.05309503898024559 2023-01-24 05:26:03.477575: step: 338/464, loss: 0.025797521695494652 2023-01-24 05:26:04.302696: step: 340/464, loss: 16.31406593322754 2023-01-24 05:26:04.983570: step: 342/464, loss: 0.032370854169130325 2023-01-24 05:26:05.775143: step: 344/464, loss: 0.011842369101941586 2023-01-24 05:26:06.523635: step: 346/464, loss: 0.07969319820404053 2023-01-24 05:26:07.264009: step: 348/464, loss: 0.0680510401725769 2023-01-24 05:26:07.966031: step: 350/464, loss: 0.0023781214840710163 2023-01-24 05:26:08.668407: step: 352/464, loss: 0.0967128649353981 2023-01-24 05:26:09.415396: step: 354/464, loss: 0.046918027102947235 2023-01-24 05:26:10.157377: step: 356/464, loss: 0.018872834742069244 2023-01-24 05:26:10.906215: step: 358/464, loss: 0.0149329649284482 2023-01-24 05:26:11.636442: step: 360/464, loss: 0.03147076815366745 2023-01-24 05:26:12.382858: step: 362/464, loss: 0.042903654277324677 2023-01-24 05:26:13.075588: step: 364/464, loss: 0.021718693897128105 2023-01-24 05:26:13.870628: step: 366/464, loss: 0.016806410625576973 2023-01-24 05:26:14.606485: step: 368/464, loss: 0.02991560660302639 2023-01-24 05:26:15.212602: step: 370/464, loss: 0.00047591744805686176 2023-01-24 05:26:15.956928: step: 372/464, loss: 0.018689189106225967 2023-01-24 05:26:16.767078: step: 374/464, loss: 0.0032049152068793774 2023-01-24 05:26:17.481766: step: 376/464, loss: 0.011647749692201614 2023-01-24 05:26:18.216458: step: 378/464, loss: 0.032072123140096664 2023-01-24 05:26:19.060593: step: 380/464, loss: 1.2236533164978027 2023-01-24 05:26:19.780289: step: 382/464, loss: 0.1918351650238037 2023-01-24 05:26:20.590793: step: 384/464, loss: 0.007488076109439135 2023-01-24 05:26:21.400002: step: 386/464, loss: 0.029658658429980278 2023-01-24 05:26:22.112447: step: 388/464, loss: 0.0018123077461495996 2023-01-24 05:26:22.834408: step: 390/464, loss: 0.3252991735935211 2023-01-24 05:26:23.541523: step: 392/464, loss: 0.0037515009753406048 2023-01-24 05:26:24.205562: step: 394/464, loss: 0.01372498832643032 2023-01-24 05:26:24.916113: step: 396/464, loss: 0.02327573671936989 2023-01-24 05:26:25.644566: step: 398/464, loss: 0.02104596234858036 2023-01-24 05:26:26.423105: step: 400/464, loss: 0.006648695096373558 2023-01-24 05:26:27.220912: step: 402/464, loss: 0.013397648930549622 2023-01-24 05:26:28.004792: step: 404/464, loss: 0.08620434254407883 2023-01-24 05:26:28.786435: step: 406/464, loss: 0.061332013458013535 2023-01-24 05:26:29.549200: step: 408/464, loss: 0.006002719514071941 2023-01-24 05:26:30.313791: step: 410/464, loss: 0.03985309228301048 2023-01-24 05:26:31.039361: step: 412/464, loss: 0.054387107491493225 2023-01-24 05:26:31.765356: step: 414/464, loss: 0.025079520419239998 2023-01-24 05:26:32.464287: step: 416/464, loss: 0.004718593787401915 2023-01-24 05:26:33.251823: step: 418/464, loss: 0.03148522600531578 2023-01-24 05:26:34.019755: step: 420/464, loss: 0.010869407095015049 2023-01-24 05:26:34.844927: step: 422/464, loss: 0.019349634647369385 2023-01-24 05:26:35.566816: step: 424/464, loss: 0.067308709025383 2023-01-24 05:26:36.235354: step: 426/464, loss: 0.23457546532154083 2023-01-24 05:26:36.969065: step: 428/464, loss: 0.02192721515893936 2023-01-24 05:26:37.647931: step: 430/464, loss: 0.00015661267389077693 2023-01-24 05:26:38.329176: step: 432/464, loss: 0.007550378330051899 2023-01-24 05:26:39.085502: step: 434/464, loss: 0.0132135059684515 2023-01-24 05:26:39.806812: step: 436/464, loss: 0.003985149785876274 2023-01-24 05:26:40.542931: step: 438/464, loss: 0.07638142257928848 2023-01-24 05:26:41.222313: step: 440/464, loss: 0.013048535212874413 2023-01-24 05:26:41.966397: step: 442/464, loss: 0.015374964103102684 2023-01-24 05:26:42.704307: step: 444/464, loss: 0.0020470223389565945 2023-01-24 05:26:43.439839: step: 446/464, loss: 0.006991243921220303 2023-01-24 05:26:44.142500: step: 448/464, loss: 0.02094842866063118 2023-01-24 05:26:44.816478: step: 450/464, loss: 0.054518863558769226 2023-01-24 05:26:45.559253: step: 452/464, loss: 0.06273539364337921 2023-01-24 05:26:46.324645: step: 454/464, loss: 0.006025554146617651 2023-01-24 05:26:47.006646: step: 456/464, loss: 0.05231739953160286 2023-01-24 05:26:47.783737: step: 458/464, loss: 0.001241020974703133 2023-01-24 05:26:48.489856: step: 460/464, loss: 0.0381249338388443 2023-01-24 05:26:49.213667: step: 462/464, loss: 0.0033619175665080547 2023-01-24 05:26:49.971945: step: 464/464, loss: 0.04729428142309189 2023-01-24 05:26:50.651355: step: 466/464, loss: 0.02667922154068947 2023-01-24 05:26:51.418992: step: 468/464, loss: 0.2248188853263855 2023-01-24 05:26:52.145588: step: 470/464, loss: 0.33987537026405334 2023-01-24 05:26:52.919469: step: 472/464, loss: 3.5491943359375 2023-01-24 05:26:53.587761: step: 474/464, loss: 0.017293350771069527 2023-01-24 05:26:54.326383: step: 476/464, loss: 0.022785795852541924 2023-01-24 05:26:55.077802: step: 478/464, loss: 0.017341703176498413 2023-01-24 05:26:55.745628: step: 480/464, loss: 0.0009825509041547775 2023-01-24 05:26:56.462805: step: 482/464, loss: 0.003864610567688942 2023-01-24 05:26:57.177965: step: 484/464, loss: 0.0037028638180345297 2023-01-24 05:26:57.931709: step: 486/464, loss: 0.017254330217838287 2023-01-24 05:26:58.718184: step: 488/464, loss: 0.06554201245307922 2023-01-24 05:26:59.528306: step: 490/464, loss: 0.23053628206253052 2023-01-24 05:27:00.258765: step: 492/464, loss: 0.08253292739391327 2023-01-24 05:27:00.943607: step: 494/464, loss: 0.012563752010464668 2023-01-24 05:27:01.719025: step: 496/464, loss: 0.004077407997101545 2023-01-24 05:27:02.468634: step: 498/464, loss: 0.013352013193070889 2023-01-24 05:27:03.160264: step: 500/464, loss: 0.014727744273841381 2023-01-24 05:27:03.880787: step: 502/464, loss: 0.10225684940814972 2023-01-24 05:27:04.568797: step: 504/464, loss: 0.0051687248051166534 2023-01-24 05:27:05.289150: step: 506/464, loss: 0.00040302166598849 2023-01-24 05:27:05.997986: step: 508/464, loss: 0.025112995877861977 2023-01-24 05:27:06.827539: step: 510/464, loss: 0.08379174023866653 2023-01-24 05:27:07.592781: step: 512/464, loss: 0.009772256948053837 2023-01-24 05:27:08.296602: step: 514/464, loss: 0.004904484376311302 2023-01-24 05:27:09.048532: step: 516/464, loss: 0.005667173303663731 2023-01-24 05:27:09.783926: step: 518/464, loss: 0.0352930948138237 2023-01-24 05:27:10.554776: step: 520/464, loss: 0.03419114649295807 2023-01-24 05:27:11.341170: step: 522/464, loss: 0.0036346132401376963 2023-01-24 05:27:12.094654: step: 524/464, loss: 0.04692724719643593 2023-01-24 05:27:12.859645: step: 526/464, loss: 0.5098605155944824 2023-01-24 05:27:13.738538: step: 528/464, loss: 0.022590486332774162 2023-01-24 05:27:14.449654: step: 530/464, loss: 0.018303902819752693 2023-01-24 05:27:15.143417: step: 532/464, loss: 0.008179579861462116 2023-01-24 05:27:15.894680: step: 534/464, loss: 0.004101166967302561 2023-01-24 05:27:16.609286: step: 536/464, loss: 0.06652644276618958 2023-01-24 05:27:17.392615: step: 538/464, loss: 0.22143836319446564 2023-01-24 05:27:18.243183: step: 540/464, loss: 0.05717802420258522 2023-01-24 05:27:18.980797: step: 542/464, loss: 0.008329024538397789 2023-01-24 05:27:19.810145: step: 544/464, loss: 0.016076695173978806 2023-01-24 05:27:20.519718: step: 546/464, loss: 0.023170355707406998 2023-01-24 05:27:21.280974: step: 548/464, loss: 0.03448038920760155 2023-01-24 05:27:22.090517: step: 550/464, loss: 0.03607087582349777 2023-01-24 05:27:22.851451: step: 552/464, loss: 0.11305753141641617 2023-01-24 05:27:23.658237: step: 554/464, loss: 0.006824792828410864 2023-01-24 05:27:24.488452: step: 556/464, loss: 0.0006629024283029139 2023-01-24 05:27:25.262251: step: 558/464, loss: 0.05242105573415756 2023-01-24 05:27:25.960591: step: 560/464, loss: 0.017612749710679054 2023-01-24 05:27:26.615496: step: 562/464, loss: 0.036728858947753906 2023-01-24 05:27:27.336501: step: 564/464, loss: 0.002265622839331627 2023-01-24 05:27:27.995335: step: 566/464, loss: 0.0009037033887580037 2023-01-24 05:27:28.784276: step: 568/464, loss: 0.020430414006114006 2023-01-24 05:27:29.505784: step: 570/464, loss: 0.02256557159125805 2023-01-24 05:27:30.148257: step: 572/464, loss: 0.04098518192768097 2023-01-24 05:27:30.925960: step: 574/464, loss: 0.14411544799804688 2023-01-24 05:27:31.712200: step: 576/464, loss: 0.0064660003408789635 2023-01-24 05:27:32.400465: step: 578/464, loss: 0.016785940155386925 2023-01-24 05:27:33.138701: step: 580/464, loss: 0.15375134348869324 2023-01-24 05:27:33.873480: step: 582/464, loss: 0.008497925475239754 2023-01-24 05:27:34.617160: step: 584/464, loss: 0.18103736639022827 2023-01-24 05:27:35.280789: step: 586/464, loss: 0.004437592811882496 2023-01-24 05:27:36.160840: step: 588/464, loss: 0.022399378940463066 2023-01-24 05:27:36.998415: step: 590/464, loss: 0.03182051330804825 2023-01-24 05:27:37.769205: step: 592/464, loss: 0.06716489046812057 2023-01-24 05:27:38.482196: step: 594/464, loss: 0.029364589601755142 2023-01-24 05:27:39.184844: step: 596/464, loss: 0.028085386380553246 2023-01-24 05:27:39.937339: step: 598/464, loss: 0.047149620950222015 2023-01-24 05:27:40.689320: step: 600/464, loss: 0.6726506948471069 2023-01-24 05:27:41.382210: step: 602/464, loss: 0.021409453824162483 2023-01-24 05:27:42.089053: step: 604/464, loss: 0.027266275137662888 2023-01-24 05:27:42.893762: step: 606/464, loss: 0.012125412002205849 2023-01-24 05:27:43.588344: step: 608/464, loss: 0.004791960120201111 2023-01-24 05:27:44.299589: step: 610/464, loss: 0.021916117519140244 2023-01-24 05:27:45.048085: step: 612/464, loss: 0.07800301909446716 2023-01-24 05:27:45.843194: step: 614/464, loss: 0.06997323036193848 2023-01-24 05:27:46.516670: step: 616/464, loss: 0.01541509572416544 2023-01-24 05:27:47.211636: step: 618/464, loss: 0.01439857017248869 2023-01-24 05:27:47.925230: step: 620/464, loss: 0.09722045809030533 2023-01-24 05:27:48.690107: step: 622/464, loss: 0.3760955035686493 2023-01-24 05:27:49.511169: step: 624/464, loss: 0.03853768855333328 2023-01-24 05:27:50.250683: step: 626/464, loss: 0.20559030771255493 2023-01-24 05:27:50.986073: step: 628/464, loss: 0.36281540989875793 2023-01-24 05:27:51.681469: step: 630/464, loss: 0.000528940639924258 2023-01-24 05:27:52.461586: step: 632/464, loss: 0.059164758771657944 2023-01-24 05:27:53.278555: step: 634/464, loss: 0.03127744048833847 2023-01-24 05:27:54.071886: step: 636/464, loss: 0.017752759158611298 2023-01-24 05:27:54.738532: step: 638/464, loss: 0.017974853515625 2023-01-24 05:27:55.474012: step: 640/464, loss: 0.004527505021542311 2023-01-24 05:27:56.192077: step: 642/464, loss: 0.8789519667625427 2023-01-24 05:27:56.883764: step: 644/464, loss: 0.022091975435614586 2023-01-24 05:27:57.631297: step: 646/464, loss: 0.029383093118667603 2023-01-24 05:27:58.379071: step: 648/464, loss: 0.014885574579238892 2023-01-24 05:27:59.110350: step: 650/464, loss: 0.09605132788419724 2023-01-24 05:28:00.004322: step: 652/464, loss: 0.015228205360472202 2023-01-24 05:28:00.709194: step: 654/464, loss: 0.03227443993091583 2023-01-24 05:28:01.486093: step: 656/464, loss: 0.02475246600806713 2023-01-24 05:28:02.279184: step: 658/464, loss: 0.05495090410113335 2023-01-24 05:28:03.052043: step: 660/464, loss: 0.023350244387984276 2023-01-24 05:28:03.803858: step: 662/464, loss: 0.00836243201047182 2023-01-24 05:28:04.527740: step: 664/464, loss: 0.05023346468806267 2023-01-24 05:28:05.287450: step: 666/464, loss: 0.019876714795827866 2023-01-24 05:28:06.028948: step: 668/464, loss: 0.04238751158118248 2023-01-24 05:28:06.773549: step: 670/464, loss: 0.00583220599219203 2023-01-24 05:28:07.570884: step: 672/464, loss: 0.027970099821686745 2023-01-24 05:28:08.301623: step: 674/464, loss: 0.06498485803604126 2023-01-24 05:28:09.026753: step: 676/464, loss: 0.06316278874874115 2023-01-24 05:28:09.927227: step: 678/464, loss: 0.011693473905324936 2023-01-24 05:28:10.693639: step: 680/464, loss: 0.04206886515021324 2023-01-24 05:28:11.410425: step: 682/464, loss: 0.01298786886036396 2023-01-24 05:28:12.184496: step: 684/464, loss: 0.0017875705379992723 2023-01-24 05:28:12.915135: step: 686/464, loss: 0.001596881658770144 2023-01-24 05:28:13.602476: step: 688/464, loss: 0.04022899642586708 2023-01-24 05:28:14.316310: step: 690/464, loss: 0.02122717723250389 2023-01-24 05:28:14.985200: step: 692/464, loss: 0.01992715150117874 2023-01-24 05:28:15.792440: step: 694/464, loss: 0.01370147429406643 2023-01-24 05:28:16.479889: step: 696/464, loss: 0.0580449253320694 2023-01-24 05:28:17.190721: step: 698/464, loss: 0.042913101613521576 2023-01-24 05:28:17.911228: step: 700/464, loss: 0.0175530593842268 2023-01-24 05:28:18.670492: step: 702/464, loss: 0.024001671001315117 2023-01-24 05:28:19.320740: step: 704/464, loss: 0.3223625719547272 2023-01-24 05:28:20.033249: step: 706/464, loss: 0.05440326780080795 2023-01-24 05:28:20.761642: step: 708/464, loss: 0.04393898695707321 2023-01-24 05:28:21.448208: step: 710/464, loss: 0.0779217854142189 2023-01-24 05:28:22.280651: step: 712/464, loss: 0.010698176920413971 2023-01-24 05:28:23.005969: step: 714/464, loss: 0.020368900150060654 2023-01-24 05:28:23.714466: step: 716/464, loss: 0.003039130475372076 2023-01-24 05:28:24.390257: step: 718/464, loss: 0.0023006058763712645 2023-01-24 05:28:25.081145: step: 720/464, loss: 0.05625094100832939 2023-01-24 05:28:25.739536: step: 722/464, loss: 0.025456681847572327 2023-01-24 05:28:26.544450: step: 724/464, loss: 0.18045490980148315 2023-01-24 05:28:27.295577: step: 726/464, loss: 0.021235687658190727 2023-01-24 05:28:28.045586: step: 728/464, loss: 0.11662095040082932 2023-01-24 05:28:28.822558: step: 730/464, loss: 0.013974427245557308 2023-01-24 05:28:29.548656: step: 732/464, loss: 0.01940869353711605 2023-01-24 05:28:30.322687: step: 734/464, loss: 0.24936962127685547 2023-01-24 05:28:31.077582: step: 736/464, loss: 0.0662817656993866 2023-01-24 05:28:31.773854: step: 738/464, loss: 0.005788351409137249 2023-01-24 05:28:32.501950: step: 740/464, loss: 0.00926015991717577 2023-01-24 05:28:33.223338: step: 742/464, loss: 0.03753805533051491 2023-01-24 05:28:34.003869: step: 744/464, loss: 0.06381073594093323 2023-01-24 05:28:34.739427: step: 746/464, loss: 0.025666510686278343 2023-01-24 05:28:35.477500: step: 748/464, loss: 0.012702060863375664 2023-01-24 05:28:36.294921: step: 750/464, loss: 0.05052117258310318 2023-01-24 05:28:36.983566: step: 752/464, loss: 0.0011634822003543377 2023-01-24 05:28:37.706228: step: 754/464, loss: 0.01186416856944561 2023-01-24 05:28:38.464309: step: 756/464, loss: 0.008127476088702679 2023-01-24 05:28:39.098974: step: 758/464, loss: 1.6381367444992065 2023-01-24 05:28:39.820398: step: 760/464, loss: 0.006272918079048395 2023-01-24 05:28:40.580304: step: 762/464, loss: 0.005116637796163559 2023-01-24 05:28:41.291197: step: 764/464, loss: 0.010884278453886509 2023-01-24 05:28:42.087913: step: 766/464, loss: 0.03772607445716858 2023-01-24 05:28:42.875433: step: 768/464, loss: 0.012753071263432503 2023-01-24 05:28:43.629819: step: 770/464, loss: 0.040373656898736954 2023-01-24 05:28:44.424468: step: 772/464, loss: 0.006438211537897587 2023-01-24 05:28:45.203647: step: 774/464, loss: 0.041876643896102905 2023-01-24 05:28:46.005610: step: 776/464, loss: 0.014445491135120392 2023-01-24 05:28:46.691729: step: 778/464, loss: 0.01640855334699154 2023-01-24 05:28:47.492691: step: 780/464, loss: 0.010428578592836857 2023-01-24 05:28:48.308259: step: 782/464, loss: 0.06605575233697891 2023-01-24 05:28:49.086014: step: 784/464, loss: 0.14504900574684143 2023-01-24 05:28:49.805831: step: 786/464, loss: 0.013085847720503807 2023-01-24 05:28:50.561248: step: 788/464, loss: 0.05918606370687485 2023-01-24 05:28:51.372836: step: 790/464, loss: 0.009224585257470608 2023-01-24 05:28:52.045238: step: 792/464, loss: 0.01954095996916294 2023-01-24 05:28:52.760209: step: 794/464, loss: 0.005487607326358557 2023-01-24 05:28:53.489870: step: 796/464, loss: 0.027371792122721672 2023-01-24 05:28:54.230332: step: 798/464, loss: 0.043121811002492905 2023-01-24 05:28:54.976466: step: 800/464, loss: 0.13599644601345062 2023-01-24 05:28:55.731845: step: 802/464, loss: 0.15219353139400482 2023-01-24 05:28:56.396935: step: 804/464, loss: 0.003684660419821739 2023-01-24 05:28:57.119195: step: 806/464, loss: 0.05656978115439415 2023-01-24 05:28:57.816441: step: 808/464, loss: 0.017085609957575798 2023-01-24 05:28:58.590937: step: 810/464, loss: 0.02435958757996559 2023-01-24 05:28:59.301798: step: 812/464, loss: 0.001792858587577939 2023-01-24 05:29:00.104335: step: 814/464, loss: 0.005744806956499815 2023-01-24 05:29:00.838833: step: 816/464, loss: 0.014488141983747482 2023-01-24 05:29:01.589358: step: 818/464, loss: 0.01181106548756361 2023-01-24 05:29:02.303956: step: 820/464, loss: 0.01934506557881832 2023-01-24 05:29:03.049246: step: 822/464, loss: 0.009796754457056522 2023-01-24 05:29:03.858551: step: 824/464, loss: 0.00042967224726453424 2023-01-24 05:29:04.590623: step: 826/464, loss: 0.01379795465618372 2023-01-24 05:29:05.320895: step: 828/464, loss: 0.014302251860499382 2023-01-24 05:29:06.171278: step: 830/464, loss: 0.016044262796640396 2023-01-24 05:29:06.897584: step: 832/464, loss: 0.04799362272024155 2023-01-24 05:29:07.515046: step: 834/464, loss: 0.006738803815096617 2023-01-24 05:29:08.188368: step: 836/464, loss: 0.001439097453840077 2023-01-24 05:29:08.950874: step: 838/464, loss: 0.11067359149456024 2023-01-24 05:29:09.653486: step: 840/464, loss: 0.0274420827627182 2023-01-24 05:29:10.363218: step: 842/464, loss: 0.013492869213223457 2023-01-24 05:29:11.117813: step: 844/464, loss: 0.0159356277436018 2023-01-24 05:29:11.850119: step: 846/464, loss: 0.0019269096665084362 2023-01-24 05:29:12.565297: step: 848/464, loss: 0.014913782477378845 2023-01-24 05:29:13.402285: step: 850/464, loss: 0.04737095534801483 2023-01-24 05:29:14.115399: step: 852/464, loss: 0.08423922210931778 2023-01-24 05:29:14.808791: step: 854/464, loss: 0.030404379591345787 2023-01-24 05:29:15.572727: step: 856/464, loss: 0.06620021164417267 2023-01-24 05:29:16.317500: step: 858/464, loss: 0.08976007252931595 2023-01-24 05:29:17.043923: step: 860/464, loss: 0.029361475259065628 2023-01-24 05:29:17.806688: step: 862/464, loss: 0.012592900544404984 2023-01-24 05:29:18.500793: step: 864/464, loss: 0.011326616629958153 2023-01-24 05:29:19.164332: step: 866/464, loss: 0.007855894044041634 2023-01-24 05:29:19.915943: step: 868/464, loss: 0.01106798741966486 2023-01-24 05:29:20.636531: step: 870/464, loss: 0.008292165584862232 2023-01-24 05:29:21.414412: step: 872/464, loss: 0.01627056859433651 2023-01-24 05:29:22.326700: step: 874/464, loss: 0.010699544101953506 2023-01-24 05:29:23.043325: step: 876/464, loss: 0.01734847202897072 2023-01-24 05:29:23.741764: step: 878/464, loss: 0.6863678097724915 2023-01-24 05:29:24.539804: step: 880/464, loss: 0.20707592368125916 2023-01-24 05:29:25.298633: step: 882/464, loss: 0.0022819163277745247 2023-01-24 05:29:26.020852: step: 884/464, loss: 0.013131760060787201 2023-01-24 05:29:26.660399: step: 886/464, loss: 0.008258351124823093 2023-01-24 05:29:27.434745: step: 888/464, loss: 0.10258307307958603 2023-01-24 05:29:28.166220: step: 890/464, loss: 0.010877292603254318 2023-01-24 05:29:28.929513: step: 892/464, loss: 0.02519080974161625 2023-01-24 05:29:29.626472: step: 894/464, loss: 0.02485913224518299 2023-01-24 05:29:30.296105: step: 896/464, loss: 0.00023697837605141103 2023-01-24 05:29:31.115468: step: 898/464, loss: 0.008679088205099106 2023-01-24 05:29:31.827904: step: 900/464, loss: 0.0034459216985851526 2023-01-24 05:29:32.552481: step: 902/464, loss: 0.0010122188832610846 2023-01-24 05:29:33.262627: step: 904/464, loss: 0.00626129936426878 2023-01-24 05:29:34.044208: step: 906/464, loss: 0.03434443101286888 2023-01-24 05:29:34.869371: step: 908/464, loss: 0.027134211733937263 2023-01-24 05:29:35.684926: step: 910/464, loss: 0.012275014072656631 2023-01-24 05:29:36.418511: step: 912/464, loss: 0.016073843464255333 2023-01-24 05:29:37.166900: step: 914/464, loss: 0.010795571841299534 2023-01-24 05:29:37.874687: step: 916/464, loss: 0.014998020604252815 2023-01-24 05:29:38.610215: step: 918/464, loss: 0.010157154873013496 2023-01-24 05:29:39.344374: step: 920/464, loss: 0.07434345781803131 2023-01-24 05:29:40.028670: step: 922/464, loss: 0.028125915676355362 2023-01-24 05:29:40.729694: step: 924/464, loss: 0.0008021766552701592 2023-01-24 05:29:41.464755: step: 926/464, loss: 0.023643728345632553 2023-01-24 05:29:42.180961: step: 928/464, loss: 0.009946424514055252 2023-01-24 05:29:42.870913: step: 930/464, loss: 0.0014948910102248192 ================================================== Loss: 0.099 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3399507947197117, 'r': 0.3309198438163417, 'f1': 0.3353745340215617}, 'combined': 0.24711807770009808, 'epoch': 27} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30889419702122933, 'r': 0.2579205498843269, 'f1': 0.28111534354651463}, 'combined': 0.17458742388678278, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32841653501025725, 'r': 0.32218472220171346, 'f1': 0.3252707827592012}, 'combined': 0.23967320834888506, 'epoch': 27} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30537472162717255, 'r': 0.2540765964526475, 'f1': 0.27737380324711897}, 'combined': 0.17226373043768442, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3426913173789137, 'r': 0.32838541798169146, 'f1': 0.33538588231851046}, 'combined': 0.24712643960311295, 'epoch': 27} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3147440294037032, 'r': 0.26130279242696386, 'f1': 0.2855444754352906}, 'combined': 0.17733814790191732, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.390625, 'r': 0.22321428571428573, 'f1': 0.2840909090909091}, 'combined': 0.1893939393939394, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26515151515151514, 'r': 0.3804347826086957, 'f1': 0.3125}, 'combined': 0.15625, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5079887218045113, 'r': 0.2452359346642468, 'f1': 0.3307833537331701}, 'combined': 0.2205222358221134, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:32:23.197907: step: 2/464, loss: 0.11802484840154648 2023-01-24 05:32:23.990732: step: 4/464, loss: 0.01309600193053484 2023-01-24 05:32:24.774895: step: 6/464, loss: 0.03771432489156723 2023-01-24 05:32:25.526085: step: 8/464, loss: 0.04672054946422577 2023-01-24 05:32:26.353200: step: 10/464, loss: 0.05573376640677452 2023-01-24 05:32:27.047920: step: 12/464, loss: 0.014672880060970783 2023-01-24 05:32:27.799515: step: 14/464, loss: 0.0027975246775895357 2023-01-24 05:32:28.474473: step: 16/464, loss: 0.011845177039504051 2023-01-24 05:32:29.178280: step: 18/464, loss: 0.0013881749473512173 2023-01-24 05:32:29.902305: step: 20/464, loss: 0.02932727336883545 2023-01-24 05:32:30.553474: step: 22/464, loss: 0.005997839383780956 2023-01-24 05:32:31.226671: step: 24/464, loss: 0.0056749386712908745 2023-01-24 05:32:32.040551: step: 26/464, loss: 0.017113935202360153 2023-01-24 05:32:32.760421: step: 28/464, loss: 0.014125406742095947 2023-01-24 05:32:33.509891: step: 30/464, loss: 0.03675199672579765 2023-01-24 05:32:34.257943: step: 32/464, loss: 0.019042707979679108 2023-01-24 05:32:34.922706: step: 34/464, loss: 0.006434685550630093 2023-01-24 05:32:35.714419: step: 36/464, loss: 0.03755321726202965 2023-01-24 05:32:36.456861: step: 38/464, loss: 0.023875346407294273 2023-01-24 05:32:37.191046: step: 40/464, loss: 0.34607502818107605 2023-01-24 05:32:37.943833: step: 42/464, loss: 0.0430816151201725 2023-01-24 05:32:38.694813: step: 44/464, loss: 0.034060847014188766 2023-01-24 05:32:39.422379: step: 46/464, loss: 0.0461505763232708 2023-01-24 05:32:40.146552: step: 48/464, loss: 0.02029733918607235 2023-01-24 05:32:40.915449: step: 50/464, loss: 0.0033927804324775934 2023-01-24 05:32:41.611262: step: 52/464, loss: 0.011056846007704735 2023-01-24 05:32:42.453873: step: 54/464, loss: 0.00704342732205987 2023-01-24 05:32:43.167013: step: 56/464, loss: 0.00036773111787624657 2023-01-24 05:32:43.901357: step: 58/464, loss: 0.003657267428934574 2023-01-24 05:32:44.629789: step: 60/464, loss: 0.08691982924938202 2023-01-24 05:32:45.433621: step: 62/464, loss: 0.003310913685709238 2023-01-24 05:32:46.132827: step: 64/464, loss: 0.033577486872673035 2023-01-24 05:32:46.846410: step: 66/464, loss: 0.01103971991688013 2023-01-24 05:32:47.601863: step: 68/464, loss: 0.27612701058387756 2023-01-24 05:32:48.408979: step: 70/464, loss: 0.0013540396466851234 2023-01-24 05:32:49.096059: step: 72/464, loss: 0.01651098020374775 2023-01-24 05:32:49.884684: step: 74/464, loss: 0.027707085013389587 2023-01-24 05:32:50.686521: step: 76/464, loss: 0.002544702496379614 2023-01-24 05:32:51.453979: step: 78/464, loss: 0.003023700788617134 2023-01-24 05:32:52.182104: step: 80/464, loss: 0.0047032893635332584 2023-01-24 05:32:52.858773: step: 82/464, loss: 0.00022587347484659404 2023-01-24 05:32:53.657376: step: 84/464, loss: 0.5387089252471924 2023-01-24 05:32:54.398470: step: 86/464, loss: 0.0016945467796176672 2023-01-24 05:32:55.153935: step: 88/464, loss: 0.010113071650266647 2023-01-24 05:32:55.845852: step: 90/464, loss: 0.0036650216206908226 2023-01-24 05:32:56.581729: step: 92/464, loss: 0.05071362480521202 2023-01-24 05:32:57.428664: step: 94/464, loss: 0.08658748120069504 2023-01-24 05:32:58.131732: step: 96/464, loss: 0.05931760370731354 2023-01-24 05:32:58.855437: step: 98/464, loss: 0.002367552602663636 2023-01-24 05:32:59.602498: step: 100/464, loss: 0.05622279644012451 2023-01-24 05:33:00.302621: step: 102/464, loss: 0.122266985476017 2023-01-24 05:33:01.045497: step: 104/464, loss: 0.10930424183607101 2023-01-24 05:33:01.855730: step: 106/464, loss: 0.01408810168504715 2023-01-24 05:33:02.612033: step: 108/464, loss: 6.7664475440979 2023-01-24 05:33:03.291410: step: 110/464, loss: 0.010170192457735538 2023-01-24 05:33:03.998067: step: 112/464, loss: 0.032275233417749405 2023-01-24 05:33:04.727272: step: 114/464, loss: 0.003709939308464527 2023-01-24 05:33:05.532991: step: 116/464, loss: 0.0035400777123868465 2023-01-24 05:33:06.303272: step: 118/464, loss: 0.1314106434583664 2023-01-24 05:33:07.175202: step: 120/464, loss: 0.0016641796100884676 2023-01-24 05:33:07.946395: step: 122/464, loss: 0.015162871219217777 2023-01-24 05:33:08.724228: step: 124/464, loss: 0.021675320342183113 2023-01-24 05:33:09.475485: step: 126/464, loss: 0.009907363913953304 2023-01-24 05:33:10.162360: step: 128/464, loss: 0.0007932535954751074 2023-01-24 05:33:10.851276: step: 130/464, loss: 0.011510932818055153 2023-01-24 05:33:11.703234: step: 132/464, loss: 0.06883779913187027 2023-01-24 05:33:12.503510: step: 134/464, loss: 0.015645796433091164 2023-01-24 05:33:13.267105: step: 136/464, loss: 0.07446697354316711 2023-01-24 05:33:13.992548: step: 138/464, loss: 0.019342824816703796 2023-01-24 05:33:14.726065: step: 140/464, loss: 0.0017768784891813993 2023-01-24 05:33:15.403029: step: 142/464, loss: 0.00040327577153220773 2023-01-24 05:33:16.094869: step: 144/464, loss: 0.004029213450849056 2023-01-24 05:33:16.790508: step: 146/464, loss: 0.0003138831234537065 2023-01-24 05:33:17.475518: step: 148/464, loss: 0.07467584311962128 2023-01-24 05:33:18.170012: step: 150/464, loss: 0.020751923322677612 2023-01-24 05:33:18.853301: step: 152/464, loss: 0.00014375359751284122 2023-01-24 05:33:19.588108: step: 154/464, loss: 0.005737461615353823 2023-01-24 05:33:20.269687: step: 156/464, loss: 0.020710809156298637 2023-01-24 05:33:20.958058: step: 158/464, loss: 0.16590137779712677 2023-01-24 05:33:21.638723: step: 160/464, loss: 0.0020352245774120092 2023-01-24 05:33:22.382455: step: 162/464, loss: 0.020962907001376152 2023-01-24 05:33:23.163827: step: 164/464, loss: 0.1972610354423523 2023-01-24 05:33:23.916470: step: 166/464, loss: 4.758993625640869 2023-01-24 05:33:24.617135: step: 168/464, loss: 0.08988256752490997 2023-01-24 05:33:25.425944: step: 170/464, loss: 0.0248276200145483 2023-01-24 05:33:26.170572: step: 172/464, loss: 0.005605712067335844 2023-01-24 05:33:26.871293: step: 174/464, loss: 0.02840728685259819 2023-01-24 05:33:27.679248: step: 176/464, loss: 0.00861166138201952 2023-01-24 05:33:28.390645: step: 178/464, loss: 0.004647831432521343 2023-01-24 05:33:29.115749: step: 180/464, loss: 0.008284042589366436 2023-01-24 05:33:29.852176: step: 182/464, loss: 0.038902122527360916 2023-01-24 05:33:30.553324: step: 184/464, loss: 0.013399926014244556 2023-01-24 05:33:31.252645: step: 186/464, loss: 0.0023059824015945196 2023-01-24 05:33:32.046114: step: 188/464, loss: 0.04218491166830063 2023-01-24 05:33:32.782382: step: 190/464, loss: 0.03554745763540268 2023-01-24 05:33:33.487820: step: 192/464, loss: 0.008880481123924255 2023-01-24 05:33:34.209924: step: 194/464, loss: 0.03854241967201233 2023-01-24 05:33:35.068866: step: 196/464, loss: 0.007150109391659498 2023-01-24 05:33:35.823595: step: 198/464, loss: 0.0255377609282732 2023-01-24 05:33:36.591542: step: 200/464, loss: 0.0677812471985817 2023-01-24 05:33:37.234120: step: 202/464, loss: 0.00023945064458530396 2023-01-24 05:33:38.015163: step: 204/464, loss: 0.3639880120754242 2023-01-24 05:33:38.791689: step: 206/464, loss: 0.0019438682356849313 2023-01-24 05:33:39.511368: step: 208/464, loss: 0.00396093400195241 2023-01-24 05:33:40.262833: step: 210/464, loss: 0.019185908138751984 2023-01-24 05:33:41.015807: step: 212/464, loss: 0.013483088463544846 2023-01-24 05:33:41.850895: step: 214/464, loss: 0.03135254606604576 2023-01-24 05:33:42.554440: step: 216/464, loss: 0.01842799037694931 2023-01-24 05:33:43.250989: step: 218/464, loss: 0.007716724649071693 2023-01-24 05:33:43.971795: step: 220/464, loss: 0.0006132380221970379 2023-01-24 05:33:44.698533: step: 222/464, loss: 0.025128891691565514 2023-01-24 05:33:45.438779: step: 224/464, loss: 0.015200333669781685 2023-01-24 05:33:46.234694: step: 226/464, loss: 0.033424243330955505 2023-01-24 05:33:46.983703: step: 228/464, loss: 0.01700105145573616 2023-01-24 05:33:47.693815: step: 230/464, loss: 0.45445510745048523 2023-01-24 05:33:48.410652: step: 232/464, loss: 0.00553317554295063 2023-01-24 05:33:49.231267: step: 234/464, loss: 0.06307711452245712 2023-01-24 05:33:49.959921: step: 236/464, loss: 0.024076784029603004 2023-01-24 05:33:50.818585: step: 238/464, loss: 0.049010127782821655 2023-01-24 05:33:51.541604: step: 240/464, loss: 0.003527000779286027 2023-01-24 05:33:52.322591: step: 242/464, loss: 0.04622814804315567 2023-01-24 05:33:53.088165: step: 244/464, loss: 0.0006396998069249094 2023-01-24 05:33:53.810325: step: 246/464, loss: 0.004830328747630119 2023-01-24 05:33:54.559736: step: 248/464, loss: 0.0254517775028944 2023-01-24 05:33:55.266578: step: 250/464, loss: 0.034026939421892166 2023-01-24 05:33:56.054563: step: 252/464, loss: 0.020695265382528305 2023-01-24 05:33:56.781553: step: 254/464, loss: 0.01600899174809456 2023-01-24 05:33:57.545170: step: 256/464, loss: 0.010306455194950104 2023-01-24 05:33:58.278800: step: 258/464, loss: 0.02375786192715168 2023-01-24 05:33:59.036912: step: 260/464, loss: 0.0194417554885149 2023-01-24 05:33:59.722109: step: 262/464, loss: 0.004774979781359434 2023-01-24 05:34:00.451284: step: 264/464, loss: 0.007717948406934738 2023-01-24 05:34:01.070051: step: 266/464, loss: 0.03315155208110809 2023-01-24 05:34:01.867373: step: 268/464, loss: 0.0035224573221057653 2023-01-24 05:34:02.660236: step: 270/464, loss: 0.0005481951520778239 2023-01-24 05:34:03.393230: step: 272/464, loss: 0.03926454111933708 2023-01-24 05:34:04.181769: step: 274/464, loss: 0.1650092452764511 2023-01-24 05:34:04.865119: step: 276/464, loss: 0.00951425638049841 2023-01-24 05:34:05.611013: step: 278/464, loss: 0.009358447976410389 2023-01-24 05:34:06.262436: step: 280/464, loss: 0.06159272417426109 2023-01-24 05:34:07.042906: step: 282/464, loss: 0.003746254835277796 2023-01-24 05:34:07.782763: step: 284/464, loss: 0.012985536828637123 2023-01-24 05:34:08.535489: step: 286/464, loss: 0.035654276609420776 2023-01-24 05:34:09.260422: step: 288/464, loss: 0.03835088014602661 2023-01-24 05:34:09.947680: step: 290/464, loss: 0.015234079211950302 2023-01-24 05:34:10.684911: step: 292/464, loss: 0.038829442113637924 2023-01-24 05:34:11.399483: step: 294/464, loss: 0.04349800571799278 2023-01-24 05:34:12.144353: step: 296/464, loss: 0.03376410901546478 2023-01-24 05:34:12.886069: step: 298/464, loss: 0.0038537858054041862 2023-01-24 05:34:13.603070: step: 300/464, loss: 0.0397925041615963 2023-01-24 05:34:14.300642: step: 302/464, loss: 0.013765391893684864 2023-01-24 05:34:14.996499: step: 304/464, loss: 0.008335323072969913 2023-01-24 05:34:15.703528: step: 306/464, loss: 0.04323340952396393 2023-01-24 05:34:16.435748: step: 308/464, loss: 0.010128780268132687 2023-01-24 05:34:17.147099: step: 310/464, loss: 0.013416034169495106 2023-01-24 05:34:17.902922: step: 312/464, loss: 0.00013531593140214682 2023-01-24 05:34:18.627844: step: 314/464, loss: 0.09947311133146286 2023-01-24 05:34:19.407504: step: 316/464, loss: 0.017739834263920784 2023-01-24 05:34:20.238348: step: 318/464, loss: 0.0012596314772963524 2023-01-24 05:34:20.972596: step: 320/464, loss: 0.0014567991020157933 2023-01-24 05:34:21.717124: step: 322/464, loss: 0.008959956467151642 2023-01-24 05:34:22.420864: step: 324/464, loss: 0.019878460094332695 2023-01-24 05:34:23.141453: step: 326/464, loss: 0.0139300636947155 2023-01-24 05:34:23.948370: step: 328/464, loss: 0.032952453941106796 2023-01-24 05:34:24.706193: step: 330/464, loss: 0.025093283504247665 2023-01-24 05:34:25.476951: step: 332/464, loss: 0.024029474705457687 2023-01-24 05:34:26.171967: step: 334/464, loss: 0.0024554578121751547 2023-01-24 05:34:26.874750: step: 336/464, loss: 0.09956522285938263 2023-01-24 05:34:27.598413: step: 338/464, loss: 0.20657359063625336 2023-01-24 05:34:28.280513: step: 340/464, loss: 0.052003707736730576 2023-01-24 05:34:29.051208: step: 342/464, loss: 0.3483356535434723 2023-01-24 05:34:29.810067: step: 344/464, loss: 0.012462802231311798 2023-01-24 05:34:30.498039: step: 346/464, loss: 0.0649271160364151 2023-01-24 05:34:31.195036: step: 348/464, loss: 0.031710434705019 2023-01-24 05:34:31.960002: step: 350/464, loss: 0.00686945766210556 2023-01-24 05:34:32.759199: step: 352/464, loss: 0.31140774488449097 2023-01-24 05:34:33.479325: step: 354/464, loss: 0.012221035547554493 2023-01-24 05:34:34.130382: step: 356/464, loss: 0.03105190582573414 2023-01-24 05:34:34.954998: step: 358/464, loss: 0.0019033612916246057 2023-01-24 05:34:35.663665: step: 360/464, loss: 0.007821962237358093 2023-01-24 05:34:36.470433: step: 362/464, loss: 0.1137755960226059 2023-01-24 05:34:37.137443: step: 364/464, loss: 0.007979289628565311 2023-01-24 05:34:37.784013: step: 366/464, loss: 0.002275730948895216 2023-01-24 05:34:38.440265: step: 368/464, loss: 0.05832372605800629 2023-01-24 05:34:39.167895: step: 370/464, loss: 0.03142813593149185 2023-01-24 05:34:39.871906: step: 372/464, loss: 0.016409264877438545 2023-01-24 05:34:40.594751: step: 374/464, loss: 0.010367317125201225 2023-01-24 05:34:41.292669: step: 376/464, loss: 0.001882447162643075 2023-01-24 05:34:41.996251: step: 378/464, loss: 0.00920257717370987 2023-01-24 05:34:42.737533: step: 380/464, loss: 0.025457818061113358 2023-01-24 05:34:43.493480: step: 382/464, loss: 0.007789787836372852 2023-01-24 05:34:44.227327: step: 384/464, loss: 0.057693831622600555 2023-01-24 05:34:44.932183: step: 386/464, loss: 0.008637432008981705 2023-01-24 05:34:45.632417: step: 388/464, loss: 0.014804269187152386 2023-01-24 05:34:46.344273: step: 390/464, loss: 0.03306438773870468 2023-01-24 05:34:47.041851: step: 392/464, loss: 0.013410833664238453 2023-01-24 05:34:47.766543: step: 394/464, loss: 0.0015661357901990414 2023-01-24 05:34:48.615283: step: 396/464, loss: 0.00610681576654315 2023-01-24 05:34:49.373640: step: 398/464, loss: 0.0018912320956587791 2023-01-24 05:34:50.023010: step: 400/464, loss: 0.005749634001404047 2023-01-24 05:34:50.732748: step: 402/464, loss: 0.008570604026317596 2023-01-24 05:34:51.526962: step: 404/464, loss: 0.05428850278258324 2023-01-24 05:34:52.284004: step: 406/464, loss: 0.02714327536523342 2023-01-24 05:34:53.070944: step: 408/464, loss: 0.005631400737911463 2023-01-24 05:34:53.898292: step: 410/464, loss: 0.02358373999595642 2023-01-24 05:34:54.707603: step: 412/464, loss: 0.011837883852422237 2023-01-24 05:34:55.388247: step: 414/464, loss: 0.010881095193326473 2023-01-24 05:34:56.114643: step: 416/464, loss: 0.009863450191915035 2023-01-24 05:34:56.839169: step: 418/464, loss: 0.11035460233688354 2023-01-24 05:34:57.505556: step: 420/464, loss: 0.017884111031889915 2023-01-24 05:34:58.272140: step: 422/464, loss: 0.05123107507824898 2023-01-24 05:34:59.132491: step: 424/464, loss: 0.02582388184964657 2023-01-24 05:34:59.877725: step: 426/464, loss: 0.044229090213775635 2023-01-24 05:35:00.566018: step: 428/464, loss: 0.03117489628493786 2023-01-24 05:35:01.278791: step: 430/464, loss: 0.011095764115452766 2023-01-24 05:35:02.068856: step: 432/464, loss: 0.6051144599914551 2023-01-24 05:35:02.827996: step: 434/464, loss: 0.018150007352232933 2023-01-24 05:35:03.565514: step: 436/464, loss: 0.0067179263569414616 2023-01-24 05:35:04.317221: step: 438/464, loss: 0.010419225320219994 2023-01-24 05:35:05.051020: step: 440/464, loss: 0.0008295488078147173 2023-01-24 05:35:05.749376: step: 442/464, loss: 0.0167152788490057 2023-01-24 05:35:06.572903: step: 444/464, loss: 0.024527952075004578 2023-01-24 05:35:07.302610: step: 446/464, loss: 0.0478508397936821 2023-01-24 05:35:07.986838: step: 448/464, loss: 0.00022361334413290024 2023-01-24 05:35:08.720813: step: 450/464, loss: 0.03885860741138458 2023-01-24 05:35:09.390799: step: 452/464, loss: 0.03442401811480522 2023-01-24 05:35:10.088185: step: 454/464, loss: 1.4656697511672974 2023-01-24 05:35:10.870327: step: 456/464, loss: 0.02225828543305397 2023-01-24 05:35:11.679227: step: 458/464, loss: 0.03477256000041962 2023-01-24 05:35:12.460612: step: 460/464, loss: 0.011446974240243435 2023-01-24 05:35:13.216875: step: 462/464, loss: 0.33820757269859314 2023-01-24 05:35:13.939437: step: 464/464, loss: 0.0659744068980217 2023-01-24 05:35:14.697441: step: 466/464, loss: 0.004842453636229038 2023-01-24 05:35:15.450555: step: 468/464, loss: 0.03637324273586273 2023-01-24 05:35:16.266069: step: 470/464, loss: 0.0970153734087944 2023-01-24 05:35:17.061072: step: 472/464, loss: 0.02549644187092781 2023-01-24 05:35:17.834397: step: 474/464, loss: 0.3136069178581238 2023-01-24 05:35:18.552687: step: 476/464, loss: 0.017255626618862152 2023-01-24 05:35:19.321070: step: 478/464, loss: 0.18557414412498474 2023-01-24 05:35:20.070699: step: 480/464, loss: 0.010793224908411503 2023-01-24 05:35:20.769077: step: 482/464, loss: 0.028504427522420883 2023-01-24 05:35:21.502416: step: 484/464, loss: 0.016538219526410103 2023-01-24 05:35:22.231154: step: 486/464, loss: 0.013090861961245537 2023-01-24 05:35:22.928346: step: 488/464, loss: 0.0014066733419895172 2023-01-24 05:35:23.706798: step: 490/464, loss: 0.01350055355578661 2023-01-24 05:35:24.411856: step: 492/464, loss: 0.016357606276869774 2023-01-24 05:35:25.129256: step: 494/464, loss: 0.07683868706226349 2023-01-24 05:35:25.983752: step: 496/464, loss: 0.028659097850322723 2023-01-24 05:35:26.743634: step: 498/464, loss: 0.06337568908929825 2023-01-24 05:35:27.442378: step: 500/464, loss: 0.0016648870659992099 2023-01-24 05:35:28.157752: step: 502/464, loss: 0.012672177515923977 2023-01-24 05:35:28.870700: step: 504/464, loss: 0.0046038078144192696 2023-01-24 05:35:29.647502: step: 506/464, loss: 0.01888994127511978 2023-01-24 05:35:30.371627: step: 508/464, loss: 0.01926352269947529 2023-01-24 05:35:31.126386: step: 510/464, loss: 0.03320739045739174 2023-01-24 05:35:31.815396: step: 512/464, loss: 0.013302896171808243 2023-01-24 05:35:32.500060: step: 514/464, loss: 0.01819944754242897 2023-01-24 05:35:33.204212: step: 516/464, loss: 0.007101289462298155 2023-01-24 05:35:34.028067: step: 518/464, loss: 0.06670579314231873 2023-01-24 05:35:34.684371: step: 520/464, loss: 0.3770799934864044 2023-01-24 05:35:35.391813: step: 522/464, loss: 0.010539170354604721 2023-01-24 05:35:36.167043: step: 524/464, loss: 0.32410728931427 2023-01-24 05:35:36.837065: step: 526/464, loss: 0.004301424603909254 2023-01-24 05:35:37.550752: step: 528/464, loss: 0.01964602991938591 2023-01-24 05:35:38.239265: step: 530/464, loss: 0.005726655479520559 2023-01-24 05:35:38.909602: step: 532/464, loss: 0.005193687044084072 2023-01-24 05:35:39.651143: step: 534/464, loss: 0.042894672602415085 2023-01-24 05:35:40.366328: step: 536/464, loss: 0.03241725265979767 2023-01-24 05:35:41.172443: step: 538/464, loss: 0.5052769780158997 2023-01-24 05:35:41.961559: step: 540/464, loss: 0.06744624674320221 2023-01-24 05:35:42.677182: step: 542/464, loss: 0.04684813320636749 2023-01-24 05:35:43.341610: step: 544/464, loss: 0.03330191969871521 2023-01-24 05:35:44.014420: step: 546/464, loss: 0.007320890203118324 2023-01-24 05:35:44.760688: step: 548/464, loss: 0.0037253601476550102 2023-01-24 05:35:45.594736: step: 550/464, loss: 0.21355178952217102 2023-01-24 05:35:46.326555: step: 552/464, loss: 0.05642886087298393 2023-01-24 05:35:47.036328: step: 554/464, loss: 0.02629709430038929 2023-01-24 05:35:47.745786: step: 556/464, loss: 0.017702855169773102 2023-01-24 05:35:48.456057: step: 558/464, loss: 0.003917295020073652 2023-01-24 05:35:49.255356: step: 560/464, loss: 0.0066395653411746025 2023-01-24 05:35:49.965528: step: 562/464, loss: 0.05594123154878616 2023-01-24 05:35:50.746512: step: 564/464, loss: 0.07240436226129532 2023-01-24 05:35:51.520444: step: 566/464, loss: 0.0046186018735170364 2023-01-24 05:35:52.320677: step: 568/464, loss: 0.00958124827593565 2023-01-24 05:35:53.031219: step: 570/464, loss: 0.04715810716152191 2023-01-24 05:35:53.931977: step: 572/464, loss: 0.011579863727092743 2023-01-24 05:35:54.612260: step: 574/464, loss: 0.02096959389746189 2023-01-24 05:35:55.351622: step: 576/464, loss: 0.033328570425510406 2023-01-24 05:35:56.089225: step: 578/464, loss: 0.004885158967226744 2023-01-24 05:35:56.901894: step: 580/464, loss: 0.04350057244300842 2023-01-24 05:35:57.678227: step: 582/464, loss: 0.020800787955522537 2023-01-24 05:35:58.346450: step: 584/464, loss: 0.005818777251988649 2023-01-24 05:35:59.087234: step: 586/464, loss: 0.1312158703804016 2023-01-24 05:35:59.839697: step: 588/464, loss: 0.008737059310078621 2023-01-24 05:36:00.517679: step: 590/464, loss: 0.03691103309392929 2023-01-24 05:36:01.289802: step: 592/464, loss: 0.011228111572563648 2023-01-24 05:36:02.051661: step: 594/464, loss: 0.0035349982790648937 2023-01-24 05:36:02.743924: step: 596/464, loss: 0.009966027922928333 2023-01-24 05:36:03.595025: step: 598/464, loss: 0.030326692387461662 2023-01-24 05:36:04.241652: step: 600/464, loss: 0.010724910534918308 2023-01-24 05:36:04.956824: step: 602/464, loss: 0.005237458273768425 2023-01-24 05:36:05.722838: step: 604/464, loss: 0.003180861007422209 2023-01-24 05:36:06.450369: step: 606/464, loss: 0.1141379177570343 2023-01-24 05:36:07.171561: step: 608/464, loss: 0.04503513500094414 2023-01-24 05:36:07.983474: step: 610/464, loss: 0.009239607490599155 2023-01-24 05:36:08.753560: step: 612/464, loss: 0.0586148202419281 2023-01-24 05:36:09.516483: step: 614/464, loss: 0.07480555772781372 2023-01-24 05:36:10.217591: step: 616/464, loss: 0.0015483495080843568 2023-01-24 05:36:10.951540: step: 618/464, loss: 0.029120994731783867 2023-01-24 05:36:11.668827: step: 620/464, loss: 0.0019428718369454145 2023-01-24 05:36:12.452560: step: 622/464, loss: 0.1298806518316269 2023-01-24 05:36:13.223988: step: 624/464, loss: 0.008236655034124851 2023-01-24 05:36:13.933535: step: 626/464, loss: 0.008735090494155884 2023-01-24 05:36:14.717406: step: 628/464, loss: 0.0024231334682554007 2023-01-24 05:36:15.439680: step: 630/464, loss: 0.015083258971571922 2023-01-24 05:36:16.124075: step: 632/464, loss: 0.027918506413698196 2023-01-24 05:36:16.899936: step: 634/464, loss: 0.017090972512960434 2023-01-24 05:36:17.665168: step: 636/464, loss: 0.008627823553979397 2023-01-24 05:36:18.416970: step: 638/464, loss: 0.000644507585093379 2023-01-24 05:36:19.119343: step: 640/464, loss: 0.00273617310449481 2023-01-24 05:36:19.851495: step: 642/464, loss: 0.10561248660087585 2023-01-24 05:36:20.633384: step: 644/464, loss: 0.2085895985364914 2023-01-24 05:36:21.388070: step: 646/464, loss: 0.17949917912483215 2023-01-24 05:36:22.133731: step: 648/464, loss: 0.00046418761485256255 2023-01-24 05:36:22.820988: step: 650/464, loss: 0.016573699191212654 2023-01-24 05:36:23.571722: step: 652/464, loss: 0.04553362727165222 2023-01-24 05:36:24.294819: step: 654/464, loss: 0.6091349720954895 2023-01-24 05:36:25.011364: step: 656/464, loss: 0.020109454169869423 2023-01-24 05:36:25.752710: step: 658/464, loss: 0.10786572843790054 2023-01-24 05:36:26.502283: step: 660/464, loss: 0.06860281527042389 2023-01-24 05:36:27.328490: step: 662/464, loss: 0.022475754842162132 2023-01-24 05:36:28.072922: step: 664/464, loss: 0.09202992916107178 2023-01-24 05:36:28.782926: step: 666/464, loss: 0.21183981001377106 2023-01-24 05:36:29.543894: step: 668/464, loss: 0.037260301411151886 2023-01-24 05:36:30.394613: step: 670/464, loss: 0.0022160960361361504 2023-01-24 05:36:31.130614: step: 672/464, loss: 0.0008184523903764784 2023-01-24 05:36:31.853003: step: 674/464, loss: 0.0066426536068320274 2023-01-24 05:36:32.617880: step: 676/464, loss: 0.03934881463646889 2023-01-24 05:36:33.314262: step: 678/464, loss: 0.03135685250163078 2023-01-24 05:36:34.026419: step: 680/464, loss: 0.003920204471796751 2023-01-24 05:36:34.889120: step: 682/464, loss: 0.05742049589753151 2023-01-24 05:36:35.635267: step: 684/464, loss: 0.006650918163359165 2023-01-24 05:36:36.350295: step: 686/464, loss: 0.00517960824072361 2023-01-24 05:36:37.073779: step: 688/464, loss: 0.027552543208003044 2023-01-24 05:36:37.837969: step: 690/464, loss: 0.01750790700316429 2023-01-24 05:36:38.713962: step: 692/464, loss: 0.21500498056411743 2023-01-24 05:36:39.459347: step: 694/464, loss: 0.0054095140658319 2023-01-24 05:36:40.206595: step: 696/464, loss: 0.36011627316474915 2023-01-24 05:36:40.929802: step: 698/464, loss: 0.04345450922846794 2023-01-24 05:36:41.698140: step: 700/464, loss: 3.994520664215088 2023-01-24 05:36:42.481000: step: 702/464, loss: 0.031575389206409454 2023-01-24 05:36:43.195741: step: 704/464, loss: 0.018297553062438965 2023-01-24 05:36:43.878624: step: 706/464, loss: 0.006059832405298948 2023-01-24 05:36:44.613517: step: 708/464, loss: 0.04579806327819824 2023-01-24 05:36:45.322456: step: 710/464, loss: 0.000498030858580023 2023-01-24 05:36:46.023479: step: 712/464, loss: 0.02703806944191456 2023-01-24 05:36:46.724117: step: 714/464, loss: 0.013390639796853065 2023-01-24 05:36:47.638583: step: 716/464, loss: 0.01270141638815403 2023-01-24 05:36:48.280827: step: 718/464, loss: 0.016825536265969276 2023-01-24 05:36:49.062292: step: 720/464, loss: 0.022613557055592537 2023-01-24 05:36:49.804315: step: 722/464, loss: 0.018898295238614082 2023-01-24 05:36:50.580111: step: 724/464, loss: 0.14791519939899445 2023-01-24 05:36:51.329926: step: 726/464, loss: 0.011607524938881397 2023-01-24 05:36:52.122722: step: 728/464, loss: 0.01842688024044037 2023-01-24 05:36:52.860600: step: 730/464, loss: 0.023921027779579163 2023-01-24 05:36:53.586547: step: 732/464, loss: 0.015431285835802555 2023-01-24 05:36:54.390813: step: 734/464, loss: 0.01931067556142807 2023-01-24 05:36:55.151951: step: 736/464, loss: 0.05411611124873161 2023-01-24 05:36:55.854755: step: 738/464, loss: 0.005268834065645933 2023-01-24 05:36:56.581944: step: 740/464, loss: 0.04305311292409897 2023-01-24 05:36:57.329617: step: 742/464, loss: 0.008710721507668495 2023-01-24 05:36:58.026352: step: 744/464, loss: 0.007986211217939854 2023-01-24 05:36:58.826651: step: 746/464, loss: 0.03394348919391632 2023-01-24 05:36:59.506101: step: 748/464, loss: 0.00660968292504549 2023-01-24 05:37:00.221885: step: 750/464, loss: 0.015248794108629227 2023-01-24 05:37:00.965717: step: 752/464, loss: 0.049215108156204224 2023-01-24 05:37:01.635695: step: 754/464, loss: 0.0020705063361674547 2023-01-24 05:37:02.316130: step: 756/464, loss: 0.009546882472932339 2023-01-24 05:37:03.109093: step: 758/464, loss: 0.010692611336708069 2023-01-24 05:37:03.843715: step: 760/464, loss: 0.002319645369425416 2023-01-24 05:37:04.541010: step: 762/464, loss: 0.33690714836120605 2023-01-24 05:37:05.286075: step: 764/464, loss: 0.013944489881396294 2023-01-24 05:37:06.047865: step: 766/464, loss: 0.011860419996082783 2023-01-24 05:37:06.858571: step: 768/464, loss: 0.10408028960227966 2023-01-24 05:37:07.630230: step: 770/464, loss: 0.007643452845513821 2023-01-24 05:37:08.359698: step: 772/464, loss: 0.037697263062000275 2023-01-24 05:37:09.095385: step: 774/464, loss: 0.012840951792895794 2023-01-24 05:37:09.751725: step: 776/464, loss: 0.0415637344121933 2023-01-24 05:37:10.513703: step: 778/464, loss: 0.0444411039352417 2023-01-24 05:37:11.311357: step: 780/464, loss: 0.012284292839467525 2023-01-24 05:37:12.134195: step: 782/464, loss: 0.0014497153460979462 2023-01-24 05:37:12.906834: step: 784/464, loss: 0.4550762474536896 2023-01-24 05:37:13.606157: step: 786/464, loss: 0.009761333465576172 2023-01-24 05:37:14.305329: step: 788/464, loss: 0.0007776019629091024 2023-01-24 05:37:15.006403: step: 790/464, loss: 0.025154652073979378 2023-01-24 05:37:15.677376: step: 792/464, loss: 0.026213031262159348 2023-01-24 05:37:16.473222: step: 794/464, loss: 0.007201395928859711 2023-01-24 05:37:17.216731: step: 796/464, loss: 0.020271655172109604 2023-01-24 05:37:17.923659: step: 798/464, loss: 0.025903355330228806 2023-01-24 05:37:18.632876: step: 800/464, loss: 0.029932750388979912 2023-01-24 05:37:19.367433: step: 802/464, loss: 0.2351268082857132 2023-01-24 05:37:20.082416: step: 804/464, loss: 0.05926898866891861 2023-01-24 05:37:20.781378: step: 806/464, loss: 0.034163087606430054 2023-01-24 05:37:21.459765: step: 808/464, loss: 0.024908997118473053 2023-01-24 05:37:22.204129: step: 810/464, loss: 0.013467278331518173 2023-01-24 05:37:22.945313: step: 812/464, loss: 0.006014563608914614 2023-01-24 05:37:23.661406: step: 814/464, loss: 0.11496934294700623 2023-01-24 05:37:24.362893: step: 816/464, loss: 0.013000456616282463 2023-01-24 05:37:25.017635: step: 818/464, loss: 0.0011854059994220734 2023-01-24 05:37:25.684226: step: 820/464, loss: 0.007496209349483252 2023-01-24 05:37:26.461122: step: 822/464, loss: 0.5041012167930603 2023-01-24 05:37:27.208345: step: 824/464, loss: 0.01389066781848669 2023-01-24 05:37:27.999631: step: 826/464, loss: 0.013370354659855366 2023-01-24 05:37:28.730230: step: 828/464, loss: 0.020678000524640083 2023-01-24 05:37:29.516458: step: 830/464, loss: 0.018191656097769737 2023-01-24 05:37:30.272883: step: 832/464, loss: 0.009238002821803093 2023-01-24 05:37:31.002462: step: 834/464, loss: 0.04901707544922829 2023-01-24 05:37:31.783558: step: 836/464, loss: 0.0111688869073987 2023-01-24 05:37:32.555056: step: 838/464, loss: 0.036923848092556 2023-01-24 05:37:33.340693: step: 840/464, loss: 0.003734600730240345 2023-01-24 05:37:34.124153: step: 842/464, loss: 0.0461021289229393 2023-01-24 05:37:34.884131: step: 844/464, loss: 0.17386293411254883 2023-01-24 05:37:35.575547: step: 846/464, loss: 0.31301191449165344 2023-01-24 05:37:36.423064: step: 848/464, loss: 0.04938540980219841 2023-01-24 05:37:37.225314: step: 850/464, loss: 0.0695425271987915 2023-01-24 05:37:38.010623: step: 852/464, loss: 0.043353304266929626 2023-01-24 05:37:38.835299: step: 854/464, loss: 0.021765680983662605 2023-01-24 05:37:39.721633: step: 856/464, loss: 0.10840235650539398 2023-01-24 05:37:40.482857: step: 858/464, loss: 0.010906664654612541 2023-01-24 05:37:41.159567: step: 860/464, loss: 0.0002889096213039011 2023-01-24 05:37:41.853326: step: 862/464, loss: 0.0025182217359542847 2023-01-24 05:37:42.603294: step: 864/464, loss: 0.1939670443534851 2023-01-24 05:37:43.402322: step: 866/464, loss: 0.038164377212524414 2023-01-24 05:37:44.057030: step: 868/464, loss: 0.0014197065029293299 2023-01-24 05:37:44.816659: step: 870/464, loss: 0.04917750880122185 2023-01-24 05:37:45.520087: step: 872/464, loss: 0.02373397909104824 2023-01-24 05:37:46.256314: step: 874/464, loss: 0.0019780031871050596 2023-01-24 05:37:47.013581: step: 876/464, loss: 0.005515122786164284 2023-01-24 05:37:47.734634: step: 878/464, loss: 0.09901914745569229 2023-01-24 05:37:48.467336: step: 880/464, loss: 0.02087888866662979 2023-01-24 05:37:49.220477: step: 882/464, loss: 0.025759685784578323 2023-01-24 05:37:49.939189: step: 884/464, loss: 0.004240122158080339 2023-01-24 05:37:50.708010: step: 886/464, loss: 0.03213540464639664 2023-01-24 05:37:51.344066: step: 888/464, loss: 0.004417374264448881 2023-01-24 05:37:52.142060: step: 890/464, loss: 0.06850989907979965 2023-01-24 05:37:52.830823: step: 892/464, loss: 0.007251196075230837 2023-01-24 05:37:53.608603: step: 894/464, loss: 0.0011848622234538198 2023-01-24 05:37:54.317468: step: 896/464, loss: 0.011365242302417755 2023-01-24 05:37:55.059536: step: 898/464, loss: 0.019021768122911453 2023-01-24 05:37:55.876969: step: 900/464, loss: 4.568624496459961 2023-01-24 05:37:56.624295: step: 902/464, loss: 0.001643879571929574 2023-01-24 05:37:57.383597: step: 904/464, loss: 0.005207096692174673 2023-01-24 05:37:58.094280: step: 906/464, loss: 0.00832145381718874 2023-01-24 05:37:58.864152: step: 908/464, loss: 0.007098441943526268 2023-01-24 05:37:59.601195: step: 910/464, loss: 0.00881668645888567 2023-01-24 05:38:00.404138: step: 912/464, loss: 0.015401276759803295 2023-01-24 05:38:01.084787: step: 914/464, loss: 0.07423000037670135 2023-01-24 05:38:01.830862: step: 916/464, loss: 0.06360428035259247 2023-01-24 05:38:02.519443: step: 918/464, loss: 0.08443159610033035 2023-01-24 05:38:03.289240: step: 920/464, loss: 0.0022100761998444796 2023-01-24 05:38:04.128146: step: 922/464, loss: 0.027309654280543327 2023-01-24 05:38:04.820926: step: 924/464, loss: 0.02387995645403862 2023-01-24 05:38:05.579207: step: 926/464, loss: 0.009875823743641376 2023-01-24 05:38:06.335167: step: 928/464, loss: 0.06939181685447693 2023-01-24 05:38:06.965100: step: 930/464, loss: 0.011594748124480247 ================================================== Loss: 0.092 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34709082823306964, 'r': 0.3437977463712758, 'f1': 0.34543643915664896}, 'combined': 0.25453211306279394, 'epoch': 28} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30995924628808863, 'r': 0.2682634426331133, 'f1': 0.28760799629064426}, 'combined': 0.17861970295945276, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31738256211575544, 'r': 0.31617807421398786, 'f1': 0.3167791732143947}, 'combined': 0.23341623289481714, 'epoch': 28} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.308905275977782, 'r': 0.2728510498992674, 'f1': 0.28976093639512535}, 'combined': 0.1799567920769726, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3338515911971416, 'r': 0.3300506243144417, 'f1': 0.3319402271254023}, 'combined': 0.24458753577661219, 'epoch': 28} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32183676473605377, 'r': 0.2731314976691732, 'f1': 0.29549057693262076}, 'combined': 0.18351520041078553, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2734375, 'r': 0.25, 'f1': 0.26119402985074625}, 'combined': 0.17412935323383083, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3225806451612903, 'r': 0.43478260869565216, 'f1': 0.37037037037037035}, 'combined': 0.18518518518518517, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.47412280701754383, 'r': 0.2452359346642468, 'f1': 0.3232655502392345}, 'combined': 0.21551036682615632, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:40:45.845620: step: 2/464, loss: 0.0029547016602009535 2023-01-24 05:40:46.555248: step: 4/464, loss: 0.0018905532779172063 2023-01-24 05:40:47.273882: step: 6/464, loss: 0.07960596680641174 2023-01-24 05:40:48.002773: step: 8/464, loss: 0.034294113516807556 2023-01-24 05:40:48.719095: step: 10/464, loss: 0.02181868441402912 2023-01-24 05:40:49.446858: step: 12/464, loss: 0.007761118467897177 2023-01-24 05:40:50.239391: step: 14/464, loss: 0.17579518258571625 2023-01-24 05:40:50.980308: step: 16/464, loss: 0.008423620834946632 2023-01-24 05:40:51.653920: step: 18/464, loss: 0.002608527196571231 2023-01-24 05:40:52.456833: step: 20/464, loss: 0.022364985197782516 2023-01-24 05:40:53.202076: step: 22/464, loss: 0.002348007168620825 2023-01-24 05:40:53.884316: step: 24/464, loss: 0.009676797315478325 2023-01-24 05:40:54.587870: step: 26/464, loss: 0.015824243426322937 2023-01-24 05:40:55.397557: step: 28/464, loss: 0.0011271298862993717 2023-01-24 05:40:56.086473: step: 30/464, loss: 0.015557849779725075 2023-01-24 05:40:56.840845: step: 32/464, loss: 0.0047828881070017815 2023-01-24 05:40:57.613434: step: 34/464, loss: 0.013710367493331432 2023-01-24 05:40:58.372921: step: 36/464, loss: 0.0010676380479708314 2023-01-24 05:40:59.014784: step: 38/464, loss: 0.006321811582893133 2023-01-24 05:40:59.750246: step: 40/464, loss: 0.03331097215414047 2023-01-24 05:41:00.541972: step: 42/464, loss: 0.05714160203933716 2023-01-24 05:41:01.240065: step: 44/464, loss: 0.010108050890266895 2023-01-24 05:41:01.959484: step: 46/464, loss: 0.0493391752243042 2023-01-24 05:41:02.724663: step: 48/464, loss: 0.029232880100607872 2023-01-24 05:41:03.388620: step: 50/464, loss: 0.025543780997395515 2023-01-24 05:41:04.123308: step: 52/464, loss: 0.003706761635839939 2023-01-24 05:41:04.843650: step: 54/464, loss: 3.4575983590912074e-05 2023-01-24 05:41:05.563818: step: 56/464, loss: 0.0759250670671463 2023-01-24 05:41:06.275118: step: 58/464, loss: 0.016924351453781128 2023-01-24 05:41:06.974986: step: 60/464, loss: 0.00038268452044576406 2023-01-24 05:41:07.746678: step: 62/464, loss: 0.007408566307276487 2023-01-24 05:41:08.442866: step: 64/464, loss: 0.000976721872575581 2023-01-24 05:41:09.172326: step: 66/464, loss: 0.04439283907413483 2023-01-24 05:41:09.954817: step: 68/464, loss: 0.025368480011820793 2023-01-24 05:41:10.643426: step: 70/464, loss: 0.04029041528701782 2023-01-24 05:41:11.371841: step: 72/464, loss: 0.007327367551624775 2023-01-24 05:41:12.091235: step: 74/464, loss: 0.025254884734749794 2023-01-24 05:41:12.856621: step: 76/464, loss: 0.020946325734257698 2023-01-24 05:41:13.579348: step: 78/464, loss: 0.02133583091199398 2023-01-24 05:41:14.420383: step: 80/464, loss: 0.11129105091094971 2023-01-24 05:41:15.108013: step: 82/464, loss: 0.1564992368221283 2023-01-24 05:41:15.845855: step: 84/464, loss: 0.026285603642463684 2023-01-24 05:41:16.605111: step: 86/464, loss: 0.014006583951413631 2023-01-24 05:41:17.374495: step: 88/464, loss: 0.08667285740375519 2023-01-24 05:41:18.116432: step: 90/464, loss: 0.022317009046673775 2023-01-24 05:41:18.766917: step: 92/464, loss: 0.012138741090893745 2023-01-24 05:41:19.506585: step: 94/464, loss: 0.04680892825126648 2023-01-24 05:41:20.310825: step: 96/464, loss: 0.06309213489294052 2023-01-24 05:41:21.133447: step: 98/464, loss: 0.0006251411396078765 2023-01-24 05:41:21.893152: step: 100/464, loss: 0.06023294851183891 2023-01-24 05:41:22.535822: step: 102/464, loss: 0.012864368967711926 2023-01-24 05:41:23.243538: step: 104/464, loss: 0.00024760988890193403 2023-01-24 05:41:23.972141: step: 106/464, loss: 0.027757912874221802 2023-01-24 05:41:24.748581: step: 108/464, loss: 0.054326508194208145 2023-01-24 05:41:25.405400: step: 110/464, loss: 0.0025361785665154457 2023-01-24 05:41:26.129524: step: 112/464, loss: 0.010986367240548134 2023-01-24 05:41:26.859803: step: 114/464, loss: 0.018608594313263893 2023-01-24 05:41:27.523128: step: 116/464, loss: 0.008475798182189465 2023-01-24 05:41:28.301593: step: 118/464, loss: 0.003442601067945361 2023-01-24 05:41:29.134974: step: 120/464, loss: 0.010374759323894978 2023-01-24 05:41:29.933052: step: 122/464, loss: 0.03338068723678589 2023-01-24 05:41:30.702345: step: 124/464, loss: 0.056789930909872055 2023-01-24 05:41:31.451523: step: 126/464, loss: 0.012232386507093906 2023-01-24 05:41:32.239256: step: 128/464, loss: 0.013899753801524639 2023-01-24 05:41:33.004809: step: 130/464, loss: 0.043581441044807434 2023-01-24 05:41:33.723135: step: 132/464, loss: 0.016672799363732338 2023-01-24 05:41:34.515436: step: 134/464, loss: 0.44271788001060486 2023-01-24 05:41:35.243899: step: 136/464, loss: 0.07831883430480957 2023-01-24 05:41:35.943409: step: 138/464, loss: 0.016645532101392746 2023-01-24 05:41:36.665098: step: 140/464, loss: 0.09777562320232391 2023-01-24 05:41:37.459136: step: 142/464, loss: 0.010654406622052193 2023-01-24 05:41:38.223196: step: 144/464, loss: 0.0020998416002839804 2023-01-24 05:41:38.938957: step: 146/464, loss: 0.030081741511821747 2023-01-24 05:41:39.642462: step: 148/464, loss: 0.013528715819120407 2023-01-24 05:41:40.407156: step: 150/464, loss: 0.002756628207862377 2023-01-24 05:41:41.105000: step: 152/464, loss: 0.02021130919456482 2023-01-24 05:41:41.886804: step: 154/464, loss: 0.026756566017866135 2023-01-24 05:41:42.558969: step: 156/464, loss: 0.005173517391085625 2023-01-24 05:41:43.324305: step: 158/464, loss: 0.20922529697418213 2023-01-24 05:41:44.007736: step: 160/464, loss: 0.0023500677198171616 2023-01-24 05:41:44.771215: step: 162/464, loss: 0.02557515911757946 2023-01-24 05:41:45.532114: step: 164/464, loss: 0.0018051753286272287 2023-01-24 05:41:46.376222: step: 166/464, loss: 0.03364339470863342 2023-01-24 05:41:47.143895: step: 168/464, loss: 0.019174523651599884 2023-01-24 05:41:47.852475: step: 170/464, loss: 0.06015171855688095 2023-01-24 05:41:48.535355: step: 172/464, loss: 0.002911254530772567 2023-01-24 05:41:49.354909: step: 174/464, loss: 0.028410404920578003 2023-01-24 05:41:50.109972: step: 176/464, loss: 0.012645215727388859 2023-01-24 05:41:50.975606: step: 178/464, loss: 0.004745079670101404 2023-01-24 05:41:51.672664: step: 180/464, loss: 0.01845906488597393 2023-01-24 05:41:52.411200: step: 182/464, loss: 0.03830547630786896 2023-01-24 05:41:53.165647: step: 184/464, loss: 0.014404712244868279 2023-01-24 05:41:53.901431: step: 186/464, loss: 0.008285674266517162 2023-01-24 05:41:54.562654: step: 188/464, loss: 0.0003698143409565091 2023-01-24 05:41:55.356745: step: 190/464, loss: 0.02062177285552025 2023-01-24 05:41:56.096185: step: 192/464, loss: 0.02673025242984295 2023-01-24 05:41:56.768898: step: 194/464, loss: 0.010820874013006687 2023-01-24 05:41:57.490368: step: 196/464, loss: 0.017097724601626396 2023-01-24 05:41:58.313557: step: 198/464, loss: 0.0009626204846426845 2023-01-24 05:41:59.130690: step: 200/464, loss: 0.00660901702940464 2023-01-24 05:41:59.946323: step: 202/464, loss: 0.008002715185284615 2023-01-24 05:42:00.681099: step: 204/464, loss: 0.00957464799284935 2023-01-24 05:42:01.363084: step: 206/464, loss: 0.0024384590797126293 2023-01-24 05:42:02.317559: step: 208/464, loss: 0.0061497800052165985 2023-01-24 05:42:03.078409: step: 210/464, loss: 0.007444137241691351 2023-01-24 05:42:03.786021: step: 212/464, loss: 0.026912638917565346 2023-01-24 05:42:04.470018: step: 214/464, loss: 0.0040068114176392555 2023-01-24 05:42:05.220142: step: 216/464, loss: 0.0004780768067575991 2023-01-24 05:42:05.945098: step: 218/464, loss: 0.3938106596469879 2023-01-24 05:42:06.629723: step: 220/464, loss: 0.009369696490466595 2023-01-24 05:42:07.384228: step: 222/464, loss: 0.0011333172442391515 2023-01-24 05:42:08.082312: step: 224/464, loss: 0.02935289777815342 2023-01-24 05:42:08.740232: step: 226/464, loss: 0.0071406010538339615 2023-01-24 05:42:09.472138: step: 228/464, loss: 0.01660071313381195 2023-01-24 05:42:10.220505: step: 230/464, loss: 0.29751288890838623 2023-01-24 05:42:10.908765: step: 232/464, loss: 0.0016616106731817126 2023-01-24 05:42:11.708184: step: 234/464, loss: 0.04663500189781189 2023-01-24 05:42:12.445900: step: 236/464, loss: 0.011690995655953884 2023-01-24 05:42:13.246118: step: 238/464, loss: 0.000685846374835819 2023-01-24 05:42:14.050713: step: 240/464, loss: 0.015121880918741226 2023-01-24 05:42:14.798333: step: 242/464, loss: 0.010790755040943623 2023-01-24 05:42:15.513740: step: 244/464, loss: 0.00013384269550442696 2023-01-24 05:42:16.353743: step: 246/464, loss: 0.005154958460479975 2023-01-24 05:42:17.002184: step: 248/464, loss: 0.009239543229341507 2023-01-24 05:42:17.744444: step: 250/464, loss: 0.016478722915053368 2023-01-24 05:42:18.560660: step: 252/464, loss: 0.00974748283624649 2023-01-24 05:42:19.249282: step: 254/464, loss: 0.008561811409890652 2023-01-24 05:42:19.987217: step: 256/464, loss: 0.0009420202695764601 2023-01-24 05:42:20.735847: step: 258/464, loss: 0.020443571731448174 2023-01-24 05:42:21.553077: step: 260/464, loss: 0.0278155654668808 2023-01-24 05:42:22.404547: step: 262/464, loss: 0.004828313831239939 2023-01-24 05:42:23.130149: step: 264/464, loss: 0.001934122759848833 2023-01-24 05:42:23.817002: step: 266/464, loss: 0.024927956983447075 2023-01-24 05:42:24.541743: step: 268/464, loss: 0.045292165130376816 2023-01-24 05:42:25.228224: step: 270/464, loss: 0.020056474953889847 2023-01-24 05:42:25.972165: step: 272/464, loss: 0.020017310976982117 2023-01-24 05:42:26.648994: step: 274/464, loss: 0.011287958361208439 2023-01-24 05:42:27.338432: step: 276/464, loss: 0.003020766656845808 2023-01-24 05:42:28.114860: step: 278/464, loss: 0.8390296101570129 2023-01-24 05:42:28.912303: step: 280/464, loss: 0.03157031536102295 2023-01-24 05:42:29.704133: step: 282/464, loss: 0.011981689371168613 2023-01-24 05:42:30.365502: step: 284/464, loss: 0.022621948271989822 2023-01-24 05:42:31.115330: step: 286/464, loss: 0.04245922714471817 2023-01-24 05:42:31.871433: step: 288/464, loss: 0.04538784548640251 2023-01-24 05:42:32.605135: step: 290/464, loss: 0.011972902342677116 2023-01-24 05:42:33.393569: step: 292/464, loss: 0.01967264525592327 2023-01-24 05:42:34.085449: step: 294/464, loss: 0.024140847846865654 2023-01-24 05:42:34.805872: step: 296/464, loss: 0.007386627607047558 2023-01-24 05:42:35.604496: step: 298/464, loss: 0.05708402022719383 2023-01-24 05:42:36.358546: step: 300/464, loss: 0.01678168959915638 2023-01-24 05:42:37.180511: step: 302/464, loss: 0.010171451605856419 2023-01-24 05:42:37.933224: step: 304/464, loss: 0.0012677903287112713 2023-01-24 05:42:38.664336: step: 306/464, loss: 0.0006428569904528558 2023-01-24 05:42:39.472515: step: 308/464, loss: 0.004791866987943649 2023-01-24 05:42:40.245940: step: 310/464, loss: 0.007664674427360296 2023-01-24 05:42:41.043280: step: 312/464, loss: 0.021277105435729027 2023-01-24 05:42:41.781798: step: 314/464, loss: 0.012961627915501595 2023-01-24 05:42:42.573760: step: 316/464, loss: 0.005071598570793867 2023-01-24 05:42:43.392908: step: 318/464, loss: 0.013653067871928215 2023-01-24 05:42:44.118529: step: 320/464, loss: 0.03530142456293106 2023-01-24 05:42:44.863266: step: 322/464, loss: 0.033330876380205154 2023-01-24 05:42:45.603544: step: 324/464, loss: 0.09162920713424683 2023-01-24 05:42:46.406000: step: 326/464, loss: 0.018649160861968994 2023-01-24 05:42:47.226511: step: 328/464, loss: 0.3532303273677826 2023-01-24 05:42:47.995481: step: 330/464, loss: 0.006986723281443119 2023-01-24 05:42:48.800455: step: 332/464, loss: 0.0072303530760109425 2023-01-24 05:42:49.595823: step: 334/464, loss: 0.04930386319756508 2023-01-24 05:42:50.281418: step: 336/464, loss: 0.02361915074288845 2023-01-24 05:42:50.980950: step: 338/464, loss: 0.05951722711324692 2023-01-24 05:42:51.767989: step: 340/464, loss: 0.00894229020923376 2023-01-24 05:42:52.517682: step: 342/464, loss: 0.022131511941552162 2023-01-24 05:42:53.218702: step: 344/464, loss: 0.10531365126371384 2023-01-24 05:42:53.901978: step: 346/464, loss: 0.002158042509108782 2023-01-24 05:42:54.673222: step: 348/464, loss: 0.01175218727439642 2023-01-24 05:42:55.442974: step: 350/464, loss: 0.003121676156297326 2023-01-24 05:42:56.227911: step: 352/464, loss: 0.0032621140126138926 2023-01-24 05:42:56.915209: step: 354/464, loss: 0.03148592263460159 2023-01-24 05:42:57.638822: step: 356/464, loss: 0.041832827031612396 2023-01-24 05:42:58.362027: step: 358/464, loss: 0.003561746794730425 2023-01-24 05:42:59.121074: step: 360/464, loss: 0.0007038118201307952 2023-01-24 05:42:59.797327: step: 362/464, loss: 0.005218755453824997 2023-01-24 05:43:00.555712: step: 364/464, loss: 0.05850491300225258 2023-01-24 05:43:01.324948: step: 366/464, loss: 0.01553135085850954 2023-01-24 05:43:02.148782: step: 368/464, loss: 0.0332174189388752 2023-01-24 05:43:02.846208: step: 370/464, loss: 0.009961692616343498 2023-01-24 05:43:03.590746: step: 372/464, loss: 0.03649236634373665 2023-01-24 05:43:04.311832: step: 374/464, loss: 0.022455479949712753 2023-01-24 05:43:05.121873: step: 376/464, loss: 0.07478629052639008 2023-01-24 05:43:05.867696: step: 378/464, loss: 0.020302915945649147 2023-01-24 05:43:06.672592: step: 380/464, loss: 0.3236561715602875 2023-01-24 05:43:07.442912: step: 382/464, loss: 0.005948520265519619 2023-01-24 05:43:08.177535: step: 384/464, loss: 0.019666900858283043 2023-01-24 05:43:08.949180: step: 386/464, loss: 0.003094746032729745 2023-01-24 05:43:09.815010: step: 388/464, loss: 0.010951261967420578 2023-01-24 05:43:10.637103: step: 390/464, loss: 0.004105293191969395 2023-01-24 05:43:11.518373: step: 392/464, loss: 0.016868796199560165 2023-01-24 05:43:12.249516: step: 394/464, loss: 0.020495926961302757 2023-01-24 05:43:12.996445: step: 396/464, loss: 0.13992923498153687 2023-01-24 05:43:13.738781: step: 398/464, loss: 0.004130828194320202 2023-01-24 05:43:14.440109: step: 400/464, loss: 0.01311857532709837 2023-01-24 05:43:15.220139: step: 402/464, loss: 0.002984261605888605 2023-01-24 05:43:16.018265: step: 404/464, loss: 0.007484616246074438 2023-01-24 05:43:16.735585: step: 406/464, loss: 0.0018927458440884948 2023-01-24 05:43:17.450976: step: 408/464, loss: 0.04563937708735466 2023-01-24 05:43:18.191570: step: 410/464, loss: 0.43953967094421387 2023-01-24 05:43:18.933115: step: 412/464, loss: 0.012645837850868702 2023-01-24 05:43:19.697885: step: 414/464, loss: 0.8327988386154175 2023-01-24 05:43:20.434585: step: 416/464, loss: 0.018644271418452263 2023-01-24 05:43:21.154791: step: 418/464, loss: 0.015001460909843445 2023-01-24 05:43:21.846749: step: 420/464, loss: 0.007504230365157127 2023-01-24 05:43:22.630164: step: 422/464, loss: 0.029034053906798363 2023-01-24 05:43:23.389479: step: 424/464, loss: 0.005283354315906763 2023-01-24 05:43:24.107045: step: 426/464, loss: 0.03228038176894188 2023-01-24 05:43:24.877834: step: 428/464, loss: 0.051429443061351776 2023-01-24 05:43:25.596914: step: 430/464, loss: 0.0792945921421051 2023-01-24 05:43:26.284338: step: 432/464, loss: 0.010515585541725159 2023-01-24 05:43:26.939038: step: 434/464, loss: 0.044191498309373856 2023-01-24 05:43:27.659364: step: 436/464, loss: 0.002709238789975643 2023-01-24 05:43:28.542020: step: 438/464, loss: 0.07902417331933975 2023-01-24 05:43:29.303863: step: 440/464, loss: 0.28564509749412537 2023-01-24 05:43:30.072354: step: 442/464, loss: 0.004908657167106867 2023-01-24 05:43:30.801023: step: 444/464, loss: 0.015626709908246994 2023-01-24 05:43:31.596748: step: 446/464, loss: 0.134577214717865 2023-01-24 05:43:32.370842: step: 448/464, loss: 0.006557188928127289 2023-01-24 05:43:33.122028: step: 450/464, loss: 0.06196486949920654 2023-01-24 05:43:33.887838: step: 452/464, loss: 0.009614722803235054 2023-01-24 05:43:34.650450: step: 454/464, loss: 0.32114338874816895 2023-01-24 05:43:35.385330: step: 456/464, loss: 0.08962027728557587 2023-01-24 05:43:36.107249: step: 458/464, loss: 0.0753081664443016 2023-01-24 05:43:36.839172: step: 460/464, loss: 0.02802230417728424 2023-01-24 05:43:37.526986: step: 462/464, loss: 0.006656539160758257 2023-01-24 05:43:38.193802: step: 464/464, loss: 0.009935715235769749 2023-01-24 05:43:38.892217: step: 466/464, loss: 0.025795668363571167 2023-01-24 05:43:39.592704: step: 468/464, loss: 0.0011418864596635103 2023-01-24 05:43:40.404094: step: 470/464, loss: 0.014790812507271767 2023-01-24 05:43:41.174007: step: 472/464, loss: 0.0025786729529500008 2023-01-24 05:43:42.038230: step: 474/464, loss: 0.08187508583068848 2023-01-24 05:43:42.724887: step: 476/464, loss: 0.021638842299580574 2023-01-24 05:43:43.446793: step: 478/464, loss: 0.004650462418794632 2023-01-24 05:43:44.132663: step: 480/464, loss: 0.031360264867544174 2023-01-24 05:43:44.884126: step: 482/464, loss: 0.0027292489539831877 2023-01-24 05:43:45.673947: step: 484/464, loss: 0.003940966445952654 2023-01-24 05:43:46.417944: step: 486/464, loss: 0.0316765122115612 2023-01-24 05:43:47.174555: step: 488/464, loss: 0.02473451755940914 2023-01-24 05:43:47.995908: step: 490/464, loss: 0.07920479029417038 2023-01-24 05:43:48.701464: step: 492/464, loss: 0.26752930879592896 2023-01-24 05:43:49.452349: step: 494/464, loss: 0.04645884409546852 2023-01-24 05:43:50.206199: step: 496/464, loss: 0.014812164008617401 2023-01-24 05:43:50.930839: step: 498/464, loss: 0.048786476254463196 2023-01-24 05:43:51.580157: step: 500/464, loss: 0.023341378197073936 2023-01-24 05:43:52.276188: step: 502/464, loss: 0.006561134476214647 2023-01-24 05:43:53.045907: step: 504/464, loss: 0.029558319598436356 2023-01-24 05:43:53.898336: step: 506/464, loss: 0.051057860255241394 2023-01-24 05:43:54.706847: step: 508/464, loss: 0.00839884765446186 2023-01-24 05:43:55.333022: step: 510/464, loss: 0.03669516742229462 2023-01-24 05:43:56.092493: step: 512/464, loss: 0.03794403746724129 2023-01-24 05:43:56.795199: step: 514/464, loss: 0.020145447924733162 2023-01-24 05:43:57.655309: step: 516/464, loss: 0.0722341388463974 2023-01-24 05:43:58.406206: step: 518/464, loss: 0.03425714001059532 2023-01-24 05:43:59.136120: step: 520/464, loss: 0.05341227725148201 2023-01-24 05:43:59.897438: step: 522/464, loss: 0.014097227714955807 2023-01-24 05:44:00.577120: step: 524/464, loss: 0.004033136647194624 2023-01-24 05:44:01.290825: step: 526/464, loss: 0.008113464340567589 2023-01-24 05:44:01.966529: step: 528/464, loss: 0.013366038911044598 2023-01-24 05:44:02.704898: step: 530/464, loss: 0.7725804448127747 2023-01-24 05:44:03.421584: step: 532/464, loss: 0.07331458479166031 2023-01-24 05:44:04.125404: step: 534/464, loss: 0.0014355615712702274 2023-01-24 05:44:04.839941: step: 536/464, loss: 0.06860658526420593 2023-01-24 05:44:05.539732: step: 538/464, loss: 0.025517934933304787 2023-01-24 05:44:06.273296: step: 540/464, loss: 0.009260217659175396 2023-01-24 05:44:07.015616: step: 542/464, loss: 0.019623389467597008 2023-01-24 05:44:07.637493: step: 544/464, loss: 0.008851874619722366 2023-01-24 05:44:08.320863: step: 546/464, loss: 0.02663077786564827 2023-01-24 05:44:09.096113: step: 548/464, loss: 0.020357387140393257 2023-01-24 05:44:09.857812: step: 550/464, loss: 0.002843934576958418 2023-01-24 05:44:10.536782: step: 552/464, loss: 0.0904221311211586 2023-01-24 05:44:11.324432: step: 554/464, loss: 0.01523869764059782 2023-01-24 05:44:11.996922: step: 556/464, loss: 0.03416203334927559 2023-01-24 05:44:12.714805: step: 558/464, loss: 0.03625953197479248 2023-01-24 05:44:13.459901: step: 560/464, loss: 0.30425795912742615 2023-01-24 05:44:14.224396: step: 562/464, loss: 0.023968152701854706 2023-01-24 05:44:14.979712: step: 564/464, loss: 0.06374843418598175 2023-01-24 05:44:15.718668: step: 566/464, loss: 0.03259289264678955 2023-01-24 05:44:16.420558: step: 568/464, loss: 0.02984294667840004 2023-01-24 05:44:17.127088: step: 570/464, loss: 0.013688970357179642 2023-01-24 05:44:17.866566: step: 572/464, loss: 0.015799837186932564 2023-01-24 05:44:18.733593: step: 574/464, loss: 0.08788468688726425 2023-01-24 05:44:19.408126: step: 576/464, loss: 0.022226519882678986 2023-01-24 05:44:20.157397: step: 578/464, loss: 0.0028302574064582586 2023-01-24 05:44:20.835408: step: 580/464, loss: 0.0019513736478984356 2023-01-24 05:44:21.632842: step: 582/464, loss: 0.029709680005908012 2023-01-24 05:44:22.428303: step: 584/464, loss: 0.06255444884300232 2023-01-24 05:44:23.168626: step: 586/464, loss: 0.0240376777946949 2023-01-24 05:44:23.821143: step: 588/464, loss: 0.002091553993523121 2023-01-24 05:44:24.557800: step: 590/464, loss: 0.008803433738648891 2023-01-24 05:44:25.256673: step: 592/464, loss: 0.02129915915429592 2023-01-24 05:44:26.005999: step: 594/464, loss: 0.01786724478006363 2023-01-24 05:44:26.799751: step: 596/464, loss: 0.008490340784192085 2023-01-24 05:44:27.505424: step: 598/464, loss: 1.899601697921753 2023-01-24 05:44:28.288874: step: 600/464, loss: 0.014766247011721134 2023-01-24 05:44:28.999030: step: 602/464, loss: 0.004581265151500702 2023-01-24 05:44:29.665702: step: 604/464, loss: 0.008937230333685875 2023-01-24 05:44:30.520160: step: 606/464, loss: 0.014192801900207996 2023-01-24 05:44:31.302355: step: 608/464, loss: 0.0029364165384322405 2023-01-24 05:44:32.020190: step: 610/464, loss: 0.005007661413401365 2023-01-24 05:44:32.797507: step: 612/464, loss: 0.1304178237915039 2023-01-24 05:44:33.516499: step: 614/464, loss: 0.0005642864853143692 2023-01-24 05:44:34.164035: step: 616/464, loss: 0.0005818564095534384 2023-01-24 05:44:34.886489: step: 618/464, loss: 0.018873225897550583 2023-01-24 05:44:35.637300: step: 620/464, loss: 0.0623464472591877 2023-01-24 05:44:36.361725: step: 622/464, loss: 0.05250658467411995 2023-01-24 05:44:37.050734: step: 624/464, loss: 0.20492692291736603 2023-01-24 05:44:37.746067: step: 626/464, loss: 0.007355514448136091 2023-01-24 05:44:38.436693: step: 628/464, loss: 0.006511658895760775 2023-01-24 05:44:39.135194: step: 630/464, loss: 0.43961629271507263 2023-01-24 05:44:39.904346: step: 632/464, loss: 0.02192610315978527 2023-01-24 05:44:40.571199: step: 634/464, loss: 0.011408335529267788 2023-01-24 05:44:41.329754: step: 636/464, loss: 0.03662848100066185 2023-01-24 05:44:42.103276: step: 638/464, loss: 0.00030731482547707856 2023-01-24 05:44:42.878280: step: 640/464, loss: 0.02446604333817959 2023-01-24 05:44:43.575847: step: 642/464, loss: 0.0036431632470339537 2023-01-24 05:44:44.346838: step: 644/464, loss: 0.0008363331435248256 2023-01-24 05:44:45.065407: step: 646/464, loss: 0.6851815581321716 2023-01-24 05:44:45.771788: step: 648/464, loss: 0.0022539652418345213 2023-01-24 05:44:46.427543: step: 650/464, loss: 0.004135349299758673 2023-01-24 05:44:47.202573: step: 652/464, loss: 0.01510525867342949 2023-01-24 05:44:47.920112: step: 654/464, loss: 0.02965255081653595 2023-01-24 05:44:48.713389: step: 656/464, loss: 0.015557816252112389 2023-01-24 05:44:49.422263: step: 658/464, loss: 0.02469242550432682 2023-01-24 05:44:50.109999: step: 660/464, loss: 0.2715980112552643 2023-01-24 05:44:50.989968: step: 662/464, loss: 0.01316728163510561 2023-01-24 05:44:51.791231: step: 664/464, loss: 0.004120450001209974 2023-01-24 05:44:52.512116: step: 666/464, loss: 0.004591710865497589 2023-01-24 05:44:53.147511: step: 668/464, loss: 0.03265814483165741 2023-01-24 05:44:53.854471: step: 670/464, loss: 0.0098827900364995 2023-01-24 05:44:54.551752: step: 672/464, loss: 0.026648204773664474 2023-01-24 05:44:55.282444: step: 674/464, loss: 0.010770948603749275 2023-01-24 05:44:55.991320: step: 676/464, loss: 0.012302640825510025 2023-01-24 05:44:56.796259: step: 678/464, loss: 0.013544720597565174 2023-01-24 05:44:57.580760: step: 680/464, loss: 0.01645328849554062 2023-01-24 05:44:58.312203: step: 682/464, loss: 0.015378853306174278 2023-01-24 05:44:59.020412: step: 684/464, loss: 0.08601805567741394 2023-01-24 05:44:59.780584: step: 686/464, loss: 0.001477090292610228 2023-01-24 05:45:00.471510: step: 688/464, loss: 0.08889120817184448 2023-01-24 05:45:01.163157: step: 690/464, loss: 0.014964031986892223 2023-01-24 05:45:01.912770: step: 692/464, loss: 0.027231650426983833 2023-01-24 05:45:02.681279: step: 694/464, loss: 0.08971980959177017 2023-01-24 05:45:03.516148: step: 696/464, loss: 0.2868679463863373 2023-01-24 05:45:04.304355: step: 698/464, loss: 0.0069223428145051 2023-01-24 05:45:04.942992: step: 700/464, loss: 0.017354296520352364 2023-01-24 05:45:05.606284: step: 702/464, loss: 0.00427975133061409 2023-01-24 05:45:06.341954: step: 704/464, loss: 0.0057968138717114925 2023-01-24 05:45:07.045055: step: 706/464, loss: 0.028853895142674446 2023-01-24 05:45:07.786203: step: 708/464, loss: 0.0894998162984848 2023-01-24 05:45:08.444430: step: 710/464, loss: 0.0027538700960576534 2023-01-24 05:45:09.210345: step: 712/464, loss: 0.0010236428352072835 2023-01-24 05:45:09.882289: step: 714/464, loss: 0.09417460858821869 2023-01-24 05:45:10.665023: step: 716/464, loss: 0.03869073837995529 2023-01-24 05:45:11.409562: step: 718/464, loss: 0.004569509066641331 2023-01-24 05:45:12.169985: step: 720/464, loss: 0.042331527918577194 2023-01-24 05:45:12.934961: step: 722/464, loss: 0.03878622502088547 2023-01-24 05:45:13.671370: step: 724/464, loss: 0.0014780150959268212 2023-01-24 05:45:14.323657: step: 726/464, loss: 0.0032074516639113426 2023-01-24 05:45:15.029148: step: 728/464, loss: 0.01672195829451084 2023-01-24 05:45:15.698178: step: 730/464, loss: 0.008378949947655201 2023-01-24 05:45:16.435631: step: 732/464, loss: 0.005625077523291111 2023-01-24 05:45:17.137906: step: 734/464, loss: 0.01687205582857132 2023-01-24 05:45:17.890078: step: 736/464, loss: 0.00461554154753685 2023-01-24 05:45:18.596414: step: 738/464, loss: 0.06471758335828781 2023-01-24 05:45:19.316848: step: 740/464, loss: 0.009379612281918526 2023-01-24 05:45:19.986259: step: 742/464, loss: 0.00973975658416748 2023-01-24 05:45:20.799816: step: 744/464, loss: 0.0017307644011452794 2023-01-24 05:45:21.449450: step: 746/464, loss: 0.046656664460897446 2023-01-24 05:45:22.221109: step: 748/464, loss: 0.009203736670315266 2023-01-24 05:45:23.009957: step: 750/464, loss: 0.08026555925607681 2023-01-24 05:45:23.733379: step: 752/464, loss: 0.011526745744049549 2023-01-24 05:45:24.490370: step: 754/464, loss: 0.02811591513454914 2023-01-24 05:45:25.252961: step: 756/464, loss: 0.011061850935220718 2023-01-24 05:45:25.905669: step: 758/464, loss: 0.002207051729783416 2023-01-24 05:45:26.585982: step: 760/464, loss: 0.07346905767917633 2023-01-24 05:45:27.286847: step: 762/464, loss: 0.09290290623903275 2023-01-24 05:45:28.072681: step: 764/464, loss: 0.004080015700310469 2023-01-24 05:45:28.825995: step: 766/464, loss: 0.01330144889652729 2023-01-24 05:45:29.556809: step: 768/464, loss: 0.057446204125881195 2023-01-24 05:45:30.300738: step: 770/464, loss: 0.002051639137789607 2023-01-24 05:45:31.043619: step: 772/464, loss: 0.04154136776924133 2023-01-24 05:45:31.687533: step: 774/464, loss: 0.0038536693900823593 2023-01-24 05:45:32.433564: step: 776/464, loss: 0.05529932305216789 2023-01-24 05:45:33.188564: step: 778/464, loss: 0.05054425820708275 2023-01-24 05:45:33.879554: step: 780/464, loss: 0.09152450412511826 2023-01-24 05:45:34.655450: step: 782/464, loss: 0.07490304112434387 2023-01-24 05:45:35.405105: step: 784/464, loss: 0.015331946313381195 2023-01-24 05:45:36.113295: step: 786/464, loss: 0.004108444321900606 2023-01-24 05:45:36.878127: step: 788/464, loss: 0.005928609520196915 2023-01-24 05:45:37.566485: step: 790/464, loss: 0.0025638609658926725 2023-01-24 05:45:38.248676: step: 792/464, loss: 6.599909102078527e-05 2023-01-24 05:45:38.954707: step: 794/464, loss: 0.006902155466377735 2023-01-24 05:45:39.713132: step: 796/464, loss: 0.01019245758652687 2023-01-24 05:45:40.406223: step: 798/464, loss: 0.0029711266979575157 2023-01-24 05:45:41.123617: step: 800/464, loss: 0.01275500375777483 2023-01-24 05:45:41.877320: step: 802/464, loss: 0.004396913107484579 2023-01-24 05:45:42.648796: step: 804/464, loss: 0.013614018447697163 2023-01-24 05:45:43.343237: step: 806/464, loss: 0.0005398777429945767 2023-01-24 05:45:43.973746: step: 808/464, loss: 0.09579914063215256 2023-01-24 05:45:44.726925: step: 810/464, loss: 0.03894653171300888 2023-01-24 05:45:45.536382: step: 812/464, loss: 0.1491641104221344 2023-01-24 05:45:46.256821: step: 814/464, loss: 0.0009035103139467537 2023-01-24 05:45:47.014279: step: 816/464, loss: 0.06483941525220871 2023-01-24 05:45:47.725695: step: 818/464, loss: 0.039540067315101624 2023-01-24 05:45:48.363259: step: 820/464, loss: 0.0060846139676868916 2023-01-24 05:45:49.115160: step: 822/464, loss: 0.025557899847626686 2023-01-24 05:45:49.813392: step: 824/464, loss: 0.006232323590666056 2023-01-24 05:45:50.524469: step: 826/464, loss: 0.0024852799251675606 2023-01-24 05:45:51.195437: step: 828/464, loss: 0.06190106272697449 2023-01-24 05:45:51.873417: step: 830/464, loss: 0.007011691574007273 2023-01-24 05:45:52.640246: step: 832/464, loss: 0.012379252351820469 2023-01-24 05:45:53.334601: step: 834/464, loss: 0.3338109254837036 2023-01-24 05:45:54.058172: step: 836/464, loss: 0.005469049327075481 2023-01-24 05:45:54.770308: step: 838/464, loss: 0.00754694314673543 2023-01-24 05:45:55.516654: step: 840/464, loss: 0.037966564297676086 2023-01-24 05:45:56.233268: step: 842/464, loss: 0.41443148255348206 2023-01-24 05:45:56.975814: step: 844/464, loss: 0.018485354259610176 2023-01-24 05:45:57.702681: step: 846/464, loss: 0.025510603561997414 2023-01-24 05:45:58.470449: step: 848/464, loss: 0.013038084842264652 2023-01-24 05:45:59.211119: step: 850/464, loss: 0.00841561984270811 2023-01-24 05:45:59.958809: step: 852/464, loss: 0.06011233106255531 2023-01-24 05:46:00.682255: step: 854/464, loss: 0.0247954148799181 2023-01-24 05:46:01.366748: step: 856/464, loss: 0.00681648775935173 2023-01-24 05:46:02.129362: step: 858/464, loss: 0.037794142961502075 2023-01-24 05:46:02.889537: step: 860/464, loss: 0.020910143852233887 2023-01-24 05:46:03.561241: step: 862/464, loss: 0.09857627749443054 2023-01-24 05:46:04.287876: step: 864/464, loss: 0.03198883682489395 2023-01-24 05:46:05.003752: step: 866/464, loss: 0.05377843603491783 2023-01-24 05:46:05.675779: step: 868/464, loss: 0.013491624034941196 2023-01-24 05:46:06.398126: step: 870/464, loss: 0.018908044323325157 2023-01-24 05:46:07.113080: step: 872/464, loss: 0.04457402229309082 2023-01-24 05:46:07.871701: step: 874/464, loss: 0.0008078064420260489 2023-01-24 05:46:08.593110: step: 876/464, loss: 0.07652704417705536 2023-01-24 05:46:09.391857: step: 878/464, loss: 0.018900269642472267 2023-01-24 05:46:10.141658: step: 880/464, loss: 2.967814725707285e-05 2023-01-24 05:46:11.075408: step: 882/464, loss: 0.04611194506287575 2023-01-24 05:46:11.818072: step: 884/464, loss: 0.01773378811776638 2023-01-24 05:46:12.620792: step: 886/464, loss: 0.01149496715515852 2023-01-24 05:46:13.378172: step: 888/464, loss: 0.04522504657506943 2023-01-24 05:46:14.080799: step: 890/464, loss: 0.0013022801140323281 2023-01-24 05:46:14.774688: step: 892/464, loss: 0.009706506505608559 2023-01-24 05:46:15.466131: step: 894/464, loss: 0.00707974610850215 2023-01-24 05:46:16.149028: step: 896/464, loss: 0.026814324781298637 2023-01-24 05:46:16.925648: step: 898/464, loss: 0.013630975037813187 2023-01-24 05:46:17.603988: step: 900/464, loss: 0.024357259273529053 2023-01-24 05:46:18.251024: step: 902/464, loss: 0.01049887202680111 2023-01-24 05:46:19.038386: step: 904/464, loss: 0.03636635094881058 2023-01-24 05:46:19.750106: step: 906/464, loss: 0.026952916756272316 2023-01-24 05:46:20.536112: step: 908/464, loss: 0.04131436347961426 2023-01-24 05:46:21.292687: step: 910/464, loss: 0.010415855795145035 2023-01-24 05:46:22.006681: step: 912/464, loss: 0.0918290987610817 2023-01-24 05:46:22.711498: step: 914/464, loss: 0.028102584183216095 2023-01-24 05:46:23.427416: step: 916/464, loss: 0.01105829793959856 2023-01-24 05:46:24.094826: step: 918/464, loss: 0.044654421508312225 2023-01-24 05:46:24.852944: step: 920/464, loss: 0.011219141073524952 2023-01-24 05:46:25.652061: step: 922/464, loss: 0.039576154202222824 2023-01-24 05:46:26.352599: step: 924/464, loss: 0.12635676562786102 2023-01-24 05:46:27.014247: step: 926/464, loss: 0.03394423425197601 2023-01-24 05:46:27.798545: step: 928/464, loss: 0.043058667331933975 2023-01-24 05:46:28.429347: step: 930/464, loss: 0.0004498627968132496 ================================================== Loss: 0.047 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36621235359732296, 'r': 0.33980614973262036, 'f1': 0.3525154348604152}, 'combined': 0.25974821516030594, 'epoch': 29} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31272884594762634, 'r': 0.27035114872228033, 'f1': 0.29000001205116754}, 'combined': 0.18010527064230406, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34464829035062294, 'r': 0.32241291677961503, 'f1': 0.3331600140056022}, 'combined': 0.24548632610939108, 'epoch': 29} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.309085290750516, 'r': 0.26693729655726384, 'f1': 0.2864692938663319}, 'combined': 0.17791250882224824, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.364603059775996, 'r': 0.33346997118032273, 'f1': 0.3483422692012489}, 'combined': 0.2566732509903939, 'epoch': 29} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3237990910628562, 'r': 0.27799961527453926, 'f1': 0.29915658439867926}, 'combined': 0.18579198399496924, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35597826086956524, 'r': 0.23392857142857143, 'f1': 0.2823275862068966}, 'combined': 0.1882183908045977, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31451612903225806, 'r': 0.42391304347826086, 'f1': 0.36111111111111105}, 'combined': 0.18055555555555552, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5113636363636364, 'r': 0.1939655172413793, 'f1': 0.28125}, 'combined': 0.1875, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:49:08.845449: step: 2/464, loss: 0.2376316487789154 2023-01-24 05:49:09.694246: step: 4/464, loss: 0.021520117297768593 2023-01-24 05:49:10.453404: step: 6/464, loss: 0.025482257828116417 2023-01-24 05:49:11.120765: step: 8/464, loss: 0.026679327711462975 2023-01-24 05:49:11.876462: step: 10/464, loss: 0.11775459349155426 2023-01-24 05:49:12.613445: step: 12/464, loss: 0.011979639530181885 2023-01-24 05:49:13.382893: step: 14/464, loss: 0.016509508714079857 2023-01-24 05:49:14.062522: step: 16/464, loss: 0.001127360388636589 2023-01-24 05:49:14.794849: step: 18/464, loss: 0.0002913039061240852 2023-01-24 05:49:15.639731: step: 20/464, loss: 0.0073746065609157085 2023-01-24 05:49:16.371570: step: 22/464, loss: 0.0017119019757956266 2023-01-24 05:49:17.125306: step: 24/464, loss: 0.01746697537600994 2023-01-24 05:49:17.917325: step: 26/464, loss: 0.003907793201506138 2023-01-24 05:49:18.701057: step: 28/464, loss: 0.03076671063899994 2023-01-24 05:49:19.417439: step: 30/464, loss: 0.02289789356291294 2023-01-24 05:49:20.205140: step: 32/464, loss: 1.358851671218872 2023-01-24 05:49:20.961838: step: 34/464, loss: 0.0058686695992946625 2023-01-24 05:49:21.721234: step: 36/464, loss: 0.000436199305113405 2023-01-24 05:49:22.420348: step: 38/464, loss: 0.08685613423585892 2023-01-24 05:49:23.177072: step: 40/464, loss: 0.013516401872038841 2023-01-24 05:49:23.852534: step: 42/464, loss: 0.005017260555177927 2023-01-24 05:49:24.620428: step: 44/464, loss: 0.007543819025158882 2023-01-24 05:49:25.420888: step: 46/464, loss: 0.23378436267375946 2023-01-24 05:49:26.071806: step: 48/464, loss: 0.024231471121311188 2023-01-24 05:49:26.775442: step: 50/464, loss: 0.021258708089590073 2023-01-24 05:49:27.511778: step: 52/464, loss: 0.011662538163363934 2023-01-24 05:49:28.239973: step: 54/464, loss: 0.2266659438610077 2023-01-24 05:49:29.055224: step: 56/464, loss: 0.024330206215381622 2023-01-24 05:49:29.762351: step: 58/464, loss: 0.0032375429291278124 2023-01-24 05:49:30.510371: step: 60/464, loss: 0.018813833594322205 2023-01-24 05:49:31.216748: step: 62/464, loss: 0.040911901742219925 2023-01-24 05:49:31.934428: step: 64/464, loss: 0.01895122602581978 2023-01-24 05:49:32.620380: step: 66/464, loss: 0.00867029745131731 2023-01-24 05:49:33.348628: step: 68/464, loss: 0.023470614105463028 2023-01-24 05:49:34.090175: step: 70/464, loss: 0.018438637256622314 2023-01-24 05:49:34.843283: step: 72/464, loss: 0.0023036121856421232 2023-01-24 05:49:35.567259: step: 74/464, loss: 0.002937216544523835 2023-01-24 05:49:36.306717: step: 76/464, loss: 0.5913159847259521 2023-01-24 05:49:37.066744: step: 78/464, loss: 0.007344986777752638 2023-01-24 05:49:37.815538: step: 80/464, loss: 0.009015419520437717 2023-01-24 05:49:38.598119: step: 82/464, loss: 0.10811667144298553 2023-01-24 05:49:39.268327: step: 84/464, loss: 0.010973623022437096 2023-01-24 05:49:39.959313: step: 86/464, loss: 0.00012188367691123858 2023-01-24 05:49:40.732043: step: 88/464, loss: 0.04058532416820526 2023-01-24 05:49:41.563304: step: 90/464, loss: 0.038530707359313965 2023-01-24 05:49:42.333955: step: 92/464, loss: 0.008467328734695911 2023-01-24 05:49:43.061300: step: 94/464, loss: 0.003021214623004198 2023-01-24 05:49:43.809191: step: 96/464, loss: 0.07035733014345169 2023-01-24 05:49:44.528774: step: 98/464, loss: 0.007195932790637016 2023-01-24 05:49:45.309395: step: 100/464, loss: 0.012332151643931866 2023-01-24 05:49:46.072326: step: 102/464, loss: 0.003940146416425705 2023-01-24 05:49:46.751760: step: 104/464, loss: 0.0043151951394975185 2023-01-24 05:49:47.480213: step: 106/464, loss: 0.062128935009241104 2023-01-24 05:49:48.226692: step: 108/464, loss: 0.021147824823856354 2023-01-24 05:49:49.008165: step: 110/464, loss: 0.004816403146833181 2023-01-24 05:49:49.783840: step: 112/464, loss: 0.07347775250673294 2023-01-24 05:49:50.540139: step: 114/464, loss: 0.0327020138502121 2023-01-24 05:49:51.247558: step: 116/464, loss: 0.0873594582080841 2023-01-24 05:49:51.954700: step: 118/464, loss: 0.03273965045809746 2023-01-24 05:49:52.639924: step: 120/464, loss: 0.03554631397128105 2023-01-24 05:49:53.455289: step: 122/464, loss: 0.14380039274692535 2023-01-24 05:49:54.221914: step: 124/464, loss: 0.05833486095070839 2023-01-24 05:49:54.949361: step: 126/464, loss: 0.006037155166268349 2023-01-24 05:49:55.603875: step: 128/464, loss: 0.0014223945327103138 2023-01-24 05:49:56.346926: step: 130/464, loss: 0.01266569271683693 2023-01-24 05:49:57.097686: step: 132/464, loss: 0.02276783436536789 2023-01-24 05:49:57.893217: step: 134/464, loss: 0.010872559621930122 2023-01-24 05:49:58.831351: step: 136/464, loss: 0.005215835757553577 2023-01-24 05:49:59.504014: step: 138/464, loss: 0.002627200447022915 2023-01-24 05:50:00.186331: step: 140/464, loss: 0.10269683599472046 2023-01-24 05:50:00.990988: step: 142/464, loss: 0.0073095522820949554 2023-01-24 05:50:01.769020: step: 144/464, loss: 0.13490694761276245 2023-01-24 05:50:02.530033: step: 146/464, loss: 0.008064567111432552 2023-01-24 05:50:03.232774: step: 148/464, loss: 0.0015113273402675986 2023-01-24 05:50:03.991160: step: 150/464, loss: 0.0027289805002510548 2023-01-24 05:50:04.751802: step: 152/464, loss: 0.012556140311062336 2023-01-24 05:50:05.433770: step: 154/464, loss: 0.013792785815894604 2023-01-24 05:50:06.207381: step: 156/464, loss: 0.037203893065452576 2023-01-24 05:50:06.947343: step: 158/464, loss: 0.13290239870548248 2023-01-24 05:50:07.721893: step: 160/464, loss: 0.03926850110292435 2023-01-24 05:50:08.481217: step: 162/464, loss: 0.09682679176330566 2023-01-24 05:50:09.206572: step: 164/464, loss: 0.05842824652791023 2023-01-24 05:50:09.933828: step: 166/464, loss: 0.00015586864901706576 2023-01-24 05:50:10.725746: step: 168/464, loss: 0.01858036406338215 2023-01-24 05:50:11.412673: step: 170/464, loss: 0.005358466412872076 2023-01-24 05:50:12.237430: step: 172/464, loss: 0.021037913858890533 2023-01-24 05:50:13.012774: step: 174/464, loss: 0.02369670197367668 2023-01-24 05:50:13.776031: step: 176/464, loss: 0.009025280363857746 2023-01-24 05:50:14.436318: step: 178/464, loss: 0.025579875335097313 2023-01-24 05:50:15.189022: step: 180/464, loss: 0.18517696857452393 2023-01-24 05:50:15.989921: step: 182/464, loss: 0.4869004189968109 2023-01-24 05:50:16.782068: step: 184/464, loss: 0.0266411192715168 2023-01-24 05:50:17.432096: step: 186/464, loss: 0.10192040354013443 2023-01-24 05:50:18.233356: step: 188/464, loss: 0.02971969172358513 2023-01-24 05:50:19.050694: step: 190/464, loss: 0.04456546530127525 2023-01-24 05:50:19.866139: step: 192/464, loss: 0.05243932828307152 2023-01-24 05:50:20.729458: step: 194/464, loss: 0.045412931591272354 2023-01-24 05:50:21.519878: step: 196/464, loss: 0.02161242440342903 2023-01-24 05:50:22.180252: step: 198/464, loss: 0.0315740630030632 2023-01-24 05:50:22.920043: step: 200/464, loss: 0.051023464649915695 2023-01-24 05:50:23.651406: step: 202/464, loss: 0.010172654874622822 2023-01-24 05:50:24.498294: step: 204/464, loss: 0.0024958052672445774 2023-01-24 05:50:25.214190: step: 206/464, loss: 0.011833332479000092 2023-01-24 05:50:25.994955: step: 208/464, loss: 0.05117599666118622 2023-01-24 05:50:26.828557: step: 210/464, loss: 0.09916941076517105 2023-01-24 05:50:27.611237: step: 212/464, loss: 0.014036203734576702 2023-01-24 05:50:28.356967: step: 214/464, loss: 0.0511137992143631 2023-01-24 05:50:29.095556: step: 216/464, loss: 0.06060526520013809 2023-01-24 05:50:29.781108: step: 218/464, loss: 0.003811330534517765 2023-01-24 05:50:30.494603: step: 220/464, loss: 0.020781518891453743 2023-01-24 05:50:31.242545: step: 222/464, loss: 0.006058407947421074 2023-01-24 05:50:31.990119: step: 224/464, loss: 0.003760756691917777 2023-01-24 05:50:32.833295: step: 226/464, loss: 0.10233800113201141 2023-01-24 05:50:33.587960: step: 228/464, loss: 0.004481780342757702 2023-01-24 05:50:34.300612: step: 230/464, loss: 0.01326355617493391 2023-01-24 05:50:35.039726: step: 232/464, loss: 0.0025657941587269306 2023-01-24 05:50:35.744174: step: 234/464, loss: 0.005028776824474335 2023-01-24 05:50:36.510224: step: 236/464, loss: 0.0007116646156646311 2023-01-24 05:50:37.165548: step: 238/464, loss: 0.05916238948702812 2023-01-24 05:50:37.994540: step: 240/464, loss: 0.007669151294976473 2023-01-24 05:50:38.726995: step: 242/464, loss: 0.030830563977360725 2023-01-24 05:50:39.540307: step: 244/464, loss: 0.012058115564286709 2023-01-24 05:50:40.326849: step: 246/464, loss: 0.005145329050719738 2023-01-24 05:50:41.053462: step: 248/464, loss: 0.0026552604977041483 2023-01-24 05:50:41.820413: step: 250/464, loss: 0.015001723542809486 2023-01-24 05:50:42.605356: step: 252/464, loss: 0.07432577013969421 2023-01-24 05:50:43.324521: step: 254/464, loss: 0.011798490770161152 2023-01-24 05:50:44.068922: step: 256/464, loss: 0.0030184625647962093 2023-01-24 05:50:44.817801: step: 258/464, loss: 0.012257128022611141 2023-01-24 05:50:45.468787: step: 260/464, loss: 0.0036082889419049025 2023-01-24 05:50:46.180031: step: 262/464, loss: 0.005945158191025257 2023-01-24 05:50:46.951386: step: 264/464, loss: 0.0023442127276211977 2023-01-24 05:50:47.627113: step: 266/464, loss: 0.02518412098288536 2023-01-24 05:50:48.369260: step: 268/464, loss: 0.01687704771757126 2023-01-24 05:50:49.163436: step: 270/464, loss: 0.06268581748008728 2023-01-24 05:50:49.867356: step: 272/464, loss: 0.3735574781894684 2023-01-24 05:50:50.571323: step: 274/464, loss: 0.011284289881587029 2023-01-24 05:50:51.340345: step: 276/464, loss: 0.014095884747803211 2023-01-24 05:50:52.004420: step: 278/464, loss: 0.018179569393396378 2023-01-24 05:50:52.722513: step: 280/464, loss: 0.04955065995454788 2023-01-24 05:50:53.482151: step: 282/464, loss: 0.003387266304343939 2023-01-24 05:50:54.172699: step: 284/464, loss: 0.01337368693202734 2023-01-24 05:50:54.947336: step: 286/464, loss: 0.024945693090558052 2023-01-24 05:50:55.648659: step: 288/464, loss: 0.013176217675209045 2023-01-24 05:50:56.425324: step: 290/464, loss: 0.00446714460849762 2023-01-24 05:50:57.181297: step: 292/464, loss: 0.011248644441366196 2023-01-24 05:50:57.984128: step: 294/464, loss: 0.012288033030927181 2023-01-24 05:50:58.724590: step: 296/464, loss: 0.017076879739761353 2023-01-24 05:50:59.510173: step: 298/464, loss: 0.2934398949146271 2023-01-24 05:51:00.193123: step: 300/464, loss: 0.034979406744241714 2023-01-24 05:51:00.934262: step: 302/464, loss: 0.06029561907052994 2023-01-24 05:51:01.664290: step: 304/464, loss: 0.001526201725937426 2023-01-24 05:51:02.379014: step: 306/464, loss: 0.021015586331486702 2023-01-24 05:51:03.089328: step: 308/464, loss: 0.005308941472321749 2023-01-24 05:51:03.826745: step: 310/464, loss: 0.26065701246261597 2023-01-24 05:51:04.469654: step: 312/464, loss: 0.0007154019549489021 2023-01-24 05:51:05.228550: step: 314/464, loss: 0.03025280497968197 2023-01-24 05:51:05.928294: step: 316/464, loss: 0.08667907863855362 2023-01-24 05:51:06.636465: step: 318/464, loss: 0.08646897971630096 2023-01-24 05:51:07.412152: step: 320/464, loss: 0.005823382176458836 2023-01-24 05:51:08.144447: step: 322/464, loss: 0.07302051782608032 2023-01-24 05:51:08.888042: step: 324/464, loss: 0.014572813175618649 2023-01-24 05:51:09.646040: step: 326/464, loss: 0.03465670347213745 2023-01-24 05:51:10.390901: step: 328/464, loss: 0.027535736560821533 2023-01-24 05:51:11.116215: step: 330/464, loss: 0.0012932810932397842 2023-01-24 05:51:11.851917: step: 332/464, loss: 0.0062098451890051365 2023-01-24 05:51:12.516391: step: 334/464, loss: 0.02418820932507515 2023-01-24 05:51:13.275784: step: 336/464, loss: 0.03034006431698799 2023-01-24 05:51:14.108169: step: 338/464, loss: 0.02313992753624916 2023-01-24 05:51:14.805826: step: 340/464, loss: 0.004631619900465012 2023-01-24 05:51:15.533068: step: 342/464, loss: 0.08408119529485703 2023-01-24 05:51:16.312383: step: 344/464, loss: 0.005881492979824543 2023-01-24 05:51:16.981712: step: 346/464, loss: 0.020383819937705994 2023-01-24 05:51:17.724460: step: 348/464, loss: 0.00032111912150867283 2023-01-24 05:51:18.571268: step: 350/464, loss: 0.0056900642812252045 2023-01-24 05:51:19.243135: step: 352/464, loss: 0.0036925787571817636 2023-01-24 05:51:20.023507: step: 354/464, loss: 0.0058372109197080135 2023-01-24 05:51:20.768011: step: 356/464, loss: 0.025614218786358833 2023-01-24 05:51:21.580971: step: 358/464, loss: 0.10615309327840805 2023-01-24 05:51:22.358547: step: 360/464, loss: 0.06694373488426208 2023-01-24 05:51:23.105838: step: 362/464, loss: 0.015115071088075638 2023-01-24 05:51:23.814918: step: 364/464, loss: 0.6433719992637634 2023-01-24 05:51:24.552802: step: 366/464, loss: 0.02050706557929516 2023-01-24 05:51:25.226604: step: 368/464, loss: 0.009005571715533733 2023-01-24 05:51:25.870248: step: 370/464, loss: 0.04691807180643082 2023-01-24 05:51:26.685507: step: 372/464, loss: 0.0010442383354529738 2023-01-24 05:51:27.388017: step: 374/464, loss: 0.014815493486821651 2023-01-24 05:51:28.164936: step: 376/464, loss: 0.9234954118728638 2023-01-24 05:51:28.917454: step: 378/464, loss: 0.05523635447025299 2023-01-24 05:51:29.792908: step: 380/464, loss: 0.008017596788704395 2023-01-24 05:51:30.514490: step: 382/464, loss: 0.006063917186111212 2023-01-24 05:51:31.220440: step: 384/464, loss: 0.005335265304893255 2023-01-24 05:51:31.926759: step: 386/464, loss: 0.010618263855576515 2023-01-24 05:51:32.668007: step: 388/464, loss: 0.00885799154639244 2023-01-24 05:51:33.448825: step: 390/464, loss: 0.026505211368203163 2023-01-24 05:51:34.163008: step: 392/464, loss: 0.0025265130680054426 2023-01-24 05:51:34.881042: step: 394/464, loss: 0.04610437527298927 2023-01-24 05:51:35.591316: step: 396/464, loss: 0.02633264847099781 2023-01-24 05:51:36.265753: step: 398/464, loss: 0.0013205071445554495 2023-01-24 05:51:37.002884: step: 400/464, loss: 0.006168350577354431 2023-01-24 05:51:37.695265: step: 402/464, loss: 0.00683568837121129 2023-01-24 05:51:38.425391: step: 404/464, loss: 0.05415549874305725 2023-01-24 05:51:39.153205: step: 406/464, loss: 0.019467797130346298 2023-01-24 05:51:39.841809: step: 408/464, loss: 0.1880302131175995 2023-01-24 05:51:40.614859: step: 410/464, loss: 0.059606973081827164 2023-01-24 05:51:41.296953: step: 412/464, loss: 0.010953270830214024 2023-01-24 05:51:42.091184: step: 414/464, loss: 0.017974497750401497 2023-01-24 05:51:42.881372: step: 416/464, loss: 0.021628299728035927 2023-01-24 05:51:43.586182: step: 418/464, loss: 0.0038115577772259712 2023-01-24 05:51:44.329883: step: 420/464, loss: 0.01009564008563757 2023-01-24 05:51:45.039712: step: 422/464, loss: 0.006931070238351822 2023-01-24 05:51:45.716550: step: 424/464, loss: 0.01273108460009098 2023-01-24 05:51:46.434343: step: 426/464, loss: 0.029455358162522316 2023-01-24 05:51:47.090301: step: 428/464, loss: 0.01086959894746542 2023-01-24 05:51:47.813058: step: 430/464, loss: 0.021176839247345924 2023-01-24 05:51:48.656453: step: 432/464, loss: 0.02633567713201046 2023-01-24 05:51:49.339344: step: 434/464, loss: 0.031902752816677094 2023-01-24 05:51:50.101364: step: 436/464, loss: 0.011048228479921818 2023-01-24 05:51:50.788683: step: 438/464, loss: 0.0007115676999092102 2023-01-24 05:51:51.583240: step: 440/464, loss: 0.07314342260360718 2023-01-24 05:51:52.261723: step: 442/464, loss: 0.0017838759813457727 2023-01-24 05:51:53.006281: step: 444/464, loss: 0.021234095096588135 2023-01-24 05:51:53.766247: step: 446/464, loss: 0.14134712517261505 2023-01-24 05:51:54.575214: step: 448/464, loss: 0.030335931107401848 2023-01-24 05:51:55.316142: step: 450/464, loss: 0.3117372691631317 2023-01-24 05:51:56.058204: step: 452/464, loss: 0.25308120250701904 2023-01-24 05:51:56.806168: step: 454/464, loss: 0.027010025456547737 2023-01-24 05:51:57.608042: step: 456/464, loss: 0.009400018490850925 2023-01-24 05:51:58.355366: step: 458/464, loss: 0.009423406794667244 2023-01-24 05:51:59.059760: step: 460/464, loss: 0.047979265451431274 2023-01-24 05:51:59.795658: step: 462/464, loss: 0.301637202501297 2023-01-24 05:52:00.538641: step: 464/464, loss: 0.5655904412269592 2023-01-24 05:52:01.244158: step: 466/464, loss: 0.005549916531890631 2023-01-24 05:52:01.973395: step: 468/464, loss: 0.040917810052633286 2023-01-24 05:52:02.653494: step: 470/464, loss: 0.03385911136865616 2023-01-24 05:52:03.408503: step: 472/464, loss: 0.0006872548838146031 2023-01-24 05:52:04.281870: step: 474/464, loss: 0.012125966139137745 2023-01-24 05:52:04.989486: step: 476/464, loss: 0.005242605693638325 2023-01-24 05:52:05.728879: step: 478/464, loss: 0.01875889115035534 2023-01-24 05:52:06.493246: step: 480/464, loss: 0.07527235895395279 2023-01-24 05:52:07.216995: step: 482/464, loss: 0.011973106302320957 2023-01-24 05:52:07.844730: step: 484/464, loss: 0.02894880622625351 2023-01-24 05:52:08.591587: step: 486/464, loss: 0.016231101006269455 2023-01-24 05:52:09.354663: step: 488/464, loss: 0.10126560181379318 2023-01-24 05:52:10.059295: step: 490/464, loss: 0.040294162929058075 2023-01-24 05:52:10.763591: step: 492/464, loss: 0.012498761527240276 2023-01-24 05:52:11.431128: step: 494/464, loss: 0.006310733500868082 2023-01-24 05:52:12.235690: step: 496/464, loss: 0.011964485980570316 2023-01-24 05:52:12.956753: step: 498/464, loss: 0.0012873383238911629 2023-01-24 05:52:13.638156: step: 500/464, loss: 0.03405240550637245 2023-01-24 05:52:14.331782: step: 502/464, loss: 0.021336054429411888 2023-01-24 05:52:15.034731: step: 504/464, loss: 0.022539552301168442 2023-01-24 05:52:15.733233: step: 506/464, loss: 0.7507018446922302 2023-01-24 05:52:16.390918: step: 508/464, loss: 0.0006076518911868334 2023-01-24 05:52:17.177827: step: 510/464, loss: 0.009058432653546333 2023-01-24 05:52:17.896872: step: 512/464, loss: 0.05086961388587952 2023-01-24 05:52:18.612807: step: 514/464, loss: 0.41207343339920044 2023-01-24 05:52:19.294640: step: 516/464, loss: 0.0023046203423291445 2023-01-24 05:52:20.043490: step: 518/464, loss: 0.020794259384274483 2023-01-24 05:52:20.766066: step: 520/464, loss: 0.022838469594717026 2023-01-24 05:52:21.475470: step: 522/464, loss: 0.05027368664741516 2023-01-24 05:52:22.194011: step: 524/464, loss: 0.006262997165322304 2023-01-24 05:52:22.917335: step: 526/464, loss: 0.003343733726069331 2023-01-24 05:52:23.699948: step: 528/464, loss: 0.008441003039479256 2023-01-24 05:52:24.345936: step: 530/464, loss: 0.00423327274620533 2023-01-24 05:52:25.124037: step: 532/464, loss: 0.05196404829621315 2023-01-24 05:52:25.855498: step: 534/464, loss: 0.016537966206669807 2023-01-24 05:52:26.581229: step: 536/464, loss: 0.023798886686563492 2023-01-24 05:52:27.326634: step: 538/464, loss: 0.02561027929186821 2023-01-24 05:52:28.107531: step: 540/464, loss: 0.013788457959890366 2023-01-24 05:52:28.899142: step: 542/464, loss: 0.0004529604921117425 2023-01-24 05:52:29.573158: step: 544/464, loss: 0.0018864155281335115 2023-01-24 05:52:30.373740: step: 546/464, loss: 0.013033518567681313 2023-01-24 05:52:31.108242: step: 548/464, loss: 0.061912618577480316 2023-01-24 05:52:31.809486: step: 550/464, loss: 0.06112837791442871 2023-01-24 05:52:32.633995: step: 552/464, loss: 0.005378578323870897 2023-01-24 05:52:33.429246: step: 554/464, loss: 0.06155424192547798 2023-01-24 05:52:34.196412: step: 556/464, loss: 0.03351220861077309 2023-01-24 05:52:34.921864: step: 558/464, loss: 0.003313565393909812 2023-01-24 05:52:35.711607: step: 560/464, loss: 0.006736339069902897 2023-01-24 05:52:36.453159: step: 562/464, loss: 0.3144710063934326 2023-01-24 05:52:37.181509: step: 564/464, loss: 0.02292277291417122 2023-01-24 05:52:37.942602: step: 566/464, loss: 0.0073122428730130196 2023-01-24 05:52:38.627531: step: 568/464, loss: 0.0016507483087480068 2023-01-24 05:52:39.391394: step: 570/464, loss: 0.016362829133868217 2023-01-24 05:52:40.083825: step: 572/464, loss: 0.027213526889681816 2023-01-24 05:52:40.786468: step: 574/464, loss: 0.0034306913148611784 2023-01-24 05:52:41.479183: step: 576/464, loss: 0.0063187661580741405 2023-01-24 05:52:42.260554: step: 578/464, loss: 0.002447170903906226 2023-01-24 05:52:43.010217: step: 580/464, loss: 0.05246298760175705 2023-01-24 05:52:43.708736: step: 582/464, loss: 0.002014671452343464 2023-01-24 05:52:44.500083: step: 584/464, loss: 0.0020586480386555195 2023-01-24 05:52:45.222015: step: 586/464, loss: 0.04257035627961159 2023-01-24 05:52:45.915396: step: 588/464, loss: 0.011674296110868454 2023-01-24 05:52:46.681571: step: 590/464, loss: 0.0045217531733214855 2023-01-24 05:52:47.395573: step: 592/464, loss: 0.03411796689033508 2023-01-24 05:52:48.143262: step: 594/464, loss: 0.002003189641982317 2023-01-24 05:52:48.882220: step: 596/464, loss: 0.009588696993887424 2023-01-24 05:52:49.595392: step: 598/464, loss: 0.02322070114314556 2023-01-24 05:52:50.261497: step: 600/464, loss: 0.0009125259821303189 2023-01-24 05:52:51.011455: step: 602/464, loss: 0.0006416767719201744 2023-01-24 05:52:51.924411: step: 604/464, loss: 0.0335293784737587 2023-01-24 05:52:52.654140: step: 606/464, loss: 0.004608626943081617 2023-01-24 05:52:53.432316: step: 608/464, loss: 0.06503929942846298 2023-01-24 05:52:54.169440: step: 610/464, loss: 0.0037166120018810034 2023-01-24 05:52:54.925443: step: 612/464, loss: 0.002145248232409358 2023-01-24 05:52:55.683913: step: 614/464, loss: 0.012959728017449379 2023-01-24 05:52:56.425804: step: 616/464, loss: 0.008085524663329124 2023-01-24 05:52:57.033248: step: 618/464, loss: 0.005036820657551289 2023-01-24 05:52:57.819460: step: 620/464, loss: 0.08978661894798279 2023-01-24 05:52:58.514721: step: 622/464, loss: 0.0007188359159044921 2023-01-24 05:52:59.277248: step: 624/464, loss: 0.002841709880158305 2023-01-24 05:52:59.969647: step: 626/464, loss: 0.00509470934048295 2023-01-24 05:53:00.738893: step: 628/464, loss: 0.005124698393046856 2023-01-24 05:53:01.458136: step: 630/464, loss: 0.09914972633123398 2023-01-24 05:53:02.134914: step: 632/464, loss: 0.007541782688349485 2023-01-24 05:53:02.884206: step: 634/464, loss: 0.004804661963135004 2023-01-24 05:53:03.657599: step: 636/464, loss: 0.20996464788913727 2023-01-24 05:53:04.445068: step: 638/464, loss: 9.226617839885876e-05 2023-01-24 05:53:05.247563: step: 640/464, loss: 0.0010231471387669444 2023-01-24 05:53:05.962833: step: 642/464, loss: 0.007877390831708908 2023-01-24 05:53:06.700423: step: 644/464, loss: 0.020709160715341568 2023-01-24 05:53:07.401178: step: 646/464, loss: 0.18257126212120056 2023-01-24 05:53:08.118655: step: 648/464, loss: 0.005651933141052723 2023-01-24 05:53:08.830723: step: 650/464, loss: 0.0015648715198040009 2023-01-24 05:53:09.523996: step: 652/464, loss: 0.03712617978453636 2023-01-24 05:53:10.179147: step: 654/464, loss: 0.3934994637966156 2023-01-24 05:53:10.940605: step: 656/464, loss: 0.053740598261356354 2023-01-24 05:53:11.726719: step: 658/464, loss: 0.005652363412082195 2023-01-24 05:53:12.564948: step: 660/464, loss: 0.04853597655892372 2023-01-24 05:53:13.320078: step: 662/464, loss: 0.045159582048654556 2023-01-24 05:53:14.083047: step: 664/464, loss: 0.08590636402368546 2023-01-24 05:53:14.718006: step: 666/464, loss: 0.03339572995901108 2023-01-24 05:53:15.450519: step: 668/464, loss: 0.005777023267000914 2023-01-24 05:53:16.191755: step: 670/464, loss: 0.0036592998076230288 2023-01-24 05:53:16.874494: step: 672/464, loss: 0.004354698583483696 2023-01-24 05:53:17.590718: step: 674/464, loss: 0.0054754349403083324 2023-01-24 05:53:18.280951: step: 676/464, loss: 0.002870019059628248 2023-01-24 05:53:19.034226: step: 678/464, loss: 0.04529373720288277 2023-01-24 05:53:19.800819: step: 680/464, loss: 0.0007131235906854272 2023-01-24 05:53:20.543480: step: 682/464, loss: 0.049643710255622864 2023-01-24 05:53:21.332143: step: 684/464, loss: 0.00527068879455328 2023-01-24 05:53:22.032840: step: 686/464, loss: 0.015814781188964844 2023-01-24 05:53:22.701573: step: 688/464, loss: 0.017705701291561127 2023-01-24 05:53:23.410034: step: 690/464, loss: 0.020682422444224358 2023-01-24 05:53:24.208540: step: 692/464, loss: 0.0031925418879836798 2023-01-24 05:53:24.938736: step: 694/464, loss: 0.008852974511682987 2023-01-24 05:53:25.696531: step: 696/464, loss: 0.0342039093375206 2023-01-24 05:53:26.412374: step: 698/464, loss: 0.1069117933511734 2023-01-24 05:53:27.238565: step: 700/464, loss: 0.04989028722047806 2023-01-24 05:53:27.908889: step: 702/464, loss: 0.0005538988625630736 2023-01-24 05:53:28.671537: step: 704/464, loss: 0.004752847366034985 2023-01-24 05:53:29.471668: step: 706/464, loss: 0.00048479001270607114 2023-01-24 05:53:30.244617: step: 708/464, loss: 0.011142611503601074 2023-01-24 05:53:30.990254: step: 710/464, loss: 0.0004893583245575428 2023-01-24 05:53:31.656480: step: 712/464, loss: 0.0023483308032155037 2023-01-24 05:53:32.385341: step: 714/464, loss: 0.009256926365196705 2023-01-24 05:53:33.145139: step: 716/464, loss: 0.007569923531264067 2023-01-24 05:53:33.800441: step: 718/464, loss: 0.000603106280323118 2023-01-24 05:53:34.570788: step: 720/464, loss: 0.2925114631652832 2023-01-24 05:53:35.290523: step: 722/464, loss: 0.01142470259219408 2023-01-24 05:53:35.990708: step: 724/464, loss: 0.0077668167650699615 2023-01-24 05:53:36.719046: step: 726/464, loss: 0.011982940137386322 2023-01-24 05:53:37.395932: step: 728/464, loss: 0.03843516856431961 2023-01-24 05:53:38.122572: step: 730/464, loss: 0.0030147875659167767 2023-01-24 05:53:38.849496: step: 732/464, loss: 0.010897437110543251 2023-01-24 05:53:39.496425: step: 734/464, loss: 0.0892910584807396 2023-01-24 05:53:40.254827: step: 736/464, loss: 0.026463089510798454 2023-01-24 05:53:41.035784: step: 738/464, loss: 0.007182304281741381 2023-01-24 05:53:41.832956: step: 740/464, loss: 0.08904621750116348 2023-01-24 05:53:42.608278: step: 742/464, loss: 0.02075066789984703 2023-01-24 05:53:43.346545: step: 744/464, loss: 0.05011540278792381 2023-01-24 05:53:44.156784: step: 746/464, loss: 0.0033906898461282253 2023-01-24 05:53:45.017096: step: 748/464, loss: 0.02135995402932167 2023-01-24 05:53:45.821529: step: 750/464, loss: 0.03749980032444 2023-01-24 05:53:46.545188: step: 752/464, loss: 0.013032837770879269 2023-01-24 05:53:47.304650: step: 754/464, loss: 0.03569572791457176 2023-01-24 05:53:48.021703: step: 756/464, loss: 0.028139611706137657 2023-01-24 05:53:48.754935: step: 758/464, loss: 0.0023053884506225586 2023-01-24 05:53:49.524477: step: 760/464, loss: 0.037611622363328934 2023-01-24 05:53:50.186499: step: 762/464, loss: 0.1201668381690979 2023-01-24 05:53:50.969936: step: 764/464, loss: 0.18216541409492493 2023-01-24 05:53:51.695853: step: 766/464, loss: 0.006229735910892487 2023-01-24 05:53:52.395913: step: 768/464, loss: 0.004708054009824991 2023-01-24 05:53:53.137038: step: 770/464, loss: 0.06688012182712555 2023-01-24 05:53:53.900841: step: 772/464, loss: 0.023829631507396698 2023-01-24 05:53:54.705951: step: 774/464, loss: 0.0007440192857757211 2023-01-24 05:53:55.548421: step: 776/464, loss: 0.0322493351995945 2023-01-24 05:53:56.358358: step: 778/464, loss: 0.013235216960310936 2023-01-24 05:53:57.154239: step: 780/464, loss: 0.001265358179807663 2023-01-24 05:53:57.849689: step: 782/464, loss: 0.017159154638648033 2023-01-24 05:53:58.638165: step: 784/464, loss: 0.0022521738428622484 2023-01-24 05:53:59.420104: step: 786/464, loss: 0.003812970593571663 2023-01-24 05:54:00.075759: step: 788/464, loss: 0.0006713416078127921 2023-01-24 05:54:00.820543: step: 790/464, loss: 0.04698442295193672 2023-01-24 05:54:01.561954: step: 792/464, loss: 0.23842373490333557 2023-01-24 05:54:02.264449: step: 794/464, loss: 0.03917529806494713 2023-01-24 05:54:02.930006: step: 796/464, loss: 0.018516667187213898 2023-01-24 05:54:03.678791: step: 798/464, loss: 0.07548412680625916 2023-01-24 05:54:04.415431: step: 800/464, loss: 0.021237986162304878 2023-01-24 05:54:05.134964: step: 802/464, loss: 0.01646965742111206 2023-01-24 05:54:05.846753: step: 804/464, loss: 0.01056175772100687 2023-01-24 05:54:06.497310: step: 806/464, loss: 0.024984072893857956 2023-01-24 05:54:07.214122: step: 808/464, loss: 0.31603842973709106 2023-01-24 05:54:07.964424: step: 810/464, loss: 0.012879881076514721 2023-01-24 05:54:08.714881: step: 812/464, loss: 0.024146106094121933 2023-01-24 05:54:09.425802: step: 814/464, loss: 0.03637917712330818 2023-01-24 05:54:10.167330: step: 816/464, loss: 0.04843752458691597 2023-01-24 05:54:10.799313: step: 818/464, loss: 0.013010178692638874 2023-01-24 05:54:11.600238: step: 820/464, loss: 0.008544894866645336 2023-01-24 05:54:12.302883: step: 822/464, loss: 0.2635175883769989 2023-01-24 05:54:12.982604: step: 824/464, loss: 0.13282142579555511 2023-01-24 05:54:13.656987: step: 826/464, loss: 0.002384813968092203 2023-01-24 05:54:14.409972: step: 828/464, loss: 0.04145323857665062 2023-01-24 05:54:15.035385: step: 830/464, loss: 0.10565589368343353 2023-01-24 05:54:15.750971: step: 832/464, loss: 0.001017981325276196 2023-01-24 05:54:16.463518: step: 834/464, loss: 0.0007453096332028508 2023-01-24 05:54:17.188517: step: 836/464, loss: 0.022320443764328957 2023-01-24 05:54:17.921899: step: 838/464, loss: 0.0025967489928007126 2023-01-24 05:54:18.588187: step: 840/464, loss: 0.035556502640247345 2023-01-24 05:54:19.342756: step: 842/464, loss: 0.013183936476707458 2023-01-24 05:54:20.102675: step: 844/464, loss: 0.0010444383369758725 2023-01-24 05:54:20.926242: step: 846/464, loss: 0.03276212140917778 2023-01-24 05:54:21.667227: step: 848/464, loss: 0.003905899589881301 2023-01-24 05:54:22.434298: step: 850/464, loss: 0.01465561706572771 2023-01-24 05:54:23.183786: step: 852/464, loss: 0.023813139647245407 2023-01-24 05:54:23.837267: step: 854/464, loss: 0.0006879746215417981 2023-01-24 05:54:24.607200: step: 856/464, loss: 0.03710510954260826 2023-01-24 05:54:25.243743: step: 858/464, loss: 0.003056836314499378 2023-01-24 05:54:25.959784: step: 860/464, loss: 0.06689032912254333 2023-01-24 05:54:26.597640: step: 862/464, loss: 0.030993498861789703 2023-01-24 05:54:27.363489: step: 864/464, loss: 0.44617322087287903 2023-01-24 05:54:28.044789: step: 866/464, loss: 0.040697306394577026 2023-01-24 05:54:28.738355: step: 868/464, loss: 0.0204264298081398 2023-01-24 05:54:29.419685: step: 870/464, loss: 0.012933028861880302 2023-01-24 05:54:30.160020: step: 872/464, loss: 0.0016678719548508525 2023-01-24 05:54:30.985969: step: 874/464, loss: 0.05385873094201088 2023-01-24 05:54:31.797435: step: 876/464, loss: 0.022039365023374557 2023-01-24 05:54:32.564546: step: 878/464, loss: 0.007482157554477453 2023-01-24 05:54:33.309257: step: 880/464, loss: 1.6804180145263672 2023-01-24 05:54:34.069893: step: 882/464, loss: 0.02266129106283188 2023-01-24 05:54:34.868109: step: 884/464, loss: 0.01153059396892786 2023-01-24 05:54:35.576078: step: 886/464, loss: 0.2091490924358368 2023-01-24 05:54:36.297615: step: 888/464, loss: 0.015897979959845543 2023-01-24 05:54:37.041433: step: 890/464, loss: 0.0003611915453802794 2023-01-24 05:54:37.865801: step: 892/464, loss: 0.4301433265209198 2023-01-24 05:54:38.624130: step: 894/464, loss: 6.273853068705648e-05 2023-01-24 05:54:39.399382: step: 896/464, loss: 0.05613543838262558 2023-01-24 05:54:40.179913: step: 898/464, loss: 0.02761615626513958 2023-01-24 05:54:40.837553: step: 900/464, loss: 0.0015836823731660843 2023-01-24 05:54:41.508055: step: 902/464, loss: 0.047353874891996384 2023-01-24 05:54:42.232510: step: 904/464, loss: 0.05324863642454147 2023-01-24 05:54:42.979249: step: 906/464, loss: 0.03325873240828514 2023-01-24 05:54:43.669263: step: 908/464, loss: 0.1007314920425415 2023-01-24 05:54:44.534719: step: 910/464, loss: 0.051610033959150314 2023-01-24 05:54:45.221689: step: 912/464, loss: 0.001794341136701405 2023-01-24 05:54:45.928220: step: 914/464, loss: 0.009611096233129501 2023-01-24 05:54:46.636594: step: 916/464, loss: 0.014779017306864262 2023-01-24 05:54:47.285055: step: 918/464, loss: 0.019373752176761627 2023-01-24 05:54:48.033155: step: 920/464, loss: 0.0994553416967392 2023-01-24 05:54:48.814594: step: 922/464, loss: 0.0182206928730011 2023-01-24 05:54:49.546623: step: 924/464, loss: 0.04314351826906204 2023-01-24 05:54:50.236558: step: 926/464, loss: 0.0043644472025334835 2023-01-24 05:54:51.016921: step: 928/464, loss: 0.021911775693297386 2023-01-24 05:54:51.598667: step: 930/464, loss: 0.001663040486164391 ================================================== Loss: 0.053 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3387041896459705, 'r': 0.3284209504916336, 'f1': 0.33348331581713087}, 'combined': 0.24572454849683326, 'epoch': 30} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3207861494357296, 'r': 0.2725571734572618, 'f1': 0.2947115533318628}, 'combined': 0.1830313857534727, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31856211848549604, 'r': 0.31614418969243724, 'f1': 0.3173485485103132}, 'combined': 0.23383577258654656, 'epoch': 30} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3230130962565176, 'r': 0.26875200301184565, 'f1': 0.2933948511844529}, 'combined': 0.18221364441981813, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33681452144127527, 'r': 0.33042335405149775, 'f1': 0.33358832870716343}, 'combined': 0.24580192641580462, 'epoch': 30} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3370516401286407, 'r': 0.27937613692166263, 'f1': 0.30551571057631255}, 'combined': 0.18974133604213098, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.25, 'f1': 0.2777777777777778}, 'combined': 0.18518518518518517, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2903225806451613, 'r': 0.391304347826087, 'f1': 0.33333333333333337}, 'combined': 0.16666666666666669, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5673076923076923, 'r': 0.2543103448275862, 'f1': 0.3511904761904762}, 'combined': 0.23412698412698413, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 05:57:30.866469: step: 2/464, loss: 0.007450570352375507 2023-01-24 05:57:31.554834: step: 4/464, loss: 0.10737699270248413 2023-01-24 05:57:32.311409: step: 6/464, loss: 0.002511220285668969 2023-01-24 05:57:32.970905: step: 8/464, loss: 0.004273401573300362 2023-01-24 05:57:33.731230: step: 10/464, loss: 0.002521744230762124 2023-01-24 05:57:34.491231: step: 12/464, loss: 0.00019417490693740547 2023-01-24 05:57:35.205043: step: 14/464, loss: 0.002275019185617566 2023-01-24 05:57:35.938124: step: 16/464, loss: 0.12431767582893372 2023-01-24 05:57:36.648580: step: 18/464, loss: 0.005227986723184586 2023-01-24 05:57:37.416994: step: 20/464, loss: 0.056336916983127594 2023-01-24 05:57:38.209375: step: 22/464, loss: 0.0018891810905188322 2023-01-24 05:57:38.951652: step: 24/464, loss: 0.03804721310734749 2023-01-24 05:57:39.854729: step: 26/464, loss: 0.009079745039343834 2023-01-24 05:57:40.533474: step: 28/464, loss: 0.006622061599045992 2023-01-24 05:57:41.389639: step: 30/464, loss: 0.048646826297044754 2023-01-24 05:57:42.167221: step: 32/464, loss: 0.01823214814066887 2023-01-24 05:57:42.909379: step: 34/464, loss: 0.08220667392015457 2023-01-24 05:57:43.614996: step: 36/464, loss: 0.010882941074669361 2023-01-24 05:57:44.380203: step: 38/464, loss: 0.013139336369931698 2023-01-24 05:57:45.114897: step: 40/464, loss: 0.006461418699473143 2023-01-24 05:57:45.848387: step: 42/464, loss: 0.0024266575928777456 2023-01-24 05:57:46.539380: step: 44/464, loss: 0.5456418991088867 2023-01-24 05:57:47.210602: step: 46/464, loss: 0.00018644030205905437 2023-01-24 05:57:47.979138: step: 48/464, loss: 4.954357791575603e-05 2023-01-24 05:57:48.648839: step: 50/464, loss: 0.006846207659691572 2023-01-24 05:57:49.443533: step: 52/464, loss: 0.0027074338868260384 2023-01-24 05:57:50.152786: step: 54/464, loss: 0.01821725256741047 2023-01-24 05:57:50.813303: step: 56/464, loss: 0.007177872117608786 2023-01-24 05:57:51.589769: step: 58/464, loss: 0.01788950525224209 2023-01-24 05:57:52.356877: step: 60/464, loss: 0.0005401679081842303 2023-01-24 05:57:53.182008: step: 62/464, loss: 0.05340760201215744 2023-01-24 05:57:53.813495: step: 64/464, loss: 4.108704160898924e-05 2023-01-24 05:57:54.472374: step: 66/464, loss: 0.001280111842788756 2023-01-24 05:57:55.124755: step: 68/464, loss: 0.002295039128512144 2023-01-24 05:57:55.844501: step: 70/464, loss: 0.00215263688005507 2023-01-24 05:57:56.669581: step: 72/464, loss: 0.06053628399968147 2023-01-24 05:57:57.380923: step: 74/464, loss: 0.005914163775742054 2023-01-24 05:57:58.123919: step: 76/464, loss: 2.9299533367156982 2023-01-24 05:57:58.940700: step: 78/464, loss: 0.03084677644073963 2023-01-24 05:57:59.704223: step: 80/464, loss: 0.0009321445832028985 2023-01-24 05:58:00.471354: step: 82/464, loss: 0.0003534965217113495 2023-01-24 05:58:01.254681: step: 84/464, loss: 0.004339384380728006 2023-01-24 05:58:01.931575: step: 86/464, loss: 2.9442649974953383e-05 2023-01-24 05:58:02.698843: step: 88/464, loss: 0.0012765272986143827 2023-01-24 05:58:03.464973: step: 90/464, loss: 0.008331461809575558 2023-01-24 05:58:04.263184: step: 92/464, loss: 0.016410961747169495 2023-01-24 05:58:04.958399: step: 94/464, loss: 0.003537172917276621 2023-01-24 05:58:05.633553: step: 96/464, loss: 0.01527501456439495 2023-01-24 05:58:06.368922: step: 98/464, loss: 0.0031059093307703733 2023-01-24 05:58:07.045797: step: 100/464, loss: 0.001596541958861053 2023-01-24 05:58:07.812254: step: 102/464, loss: 0.02693060413002968 2023-01-24 05:58:08.502693: step: 104/464, loss: 0.02274322509765625 2023-01-24 05:58:09.202587: step: 106/464, loss: 0.0003036792913917452 2023-01-24 05:58:09.892475: step: 108/464, loss: 0.002669687382876873 2023-01-24 05:58:10.621909: step: 110/464, loss: 0.006395821925252676 2023-01-24 05:58:11.354149: step: 112/464, loss: 0.0009263441315852106 2023-01-24 05:58:12.195350: step: 114/464, loss: 0.19540373980998993 2023-01-24 05:58:12.885514: step: 116/464, loss: 0.019305413588881493 2023-01-24 05:58:13.646947: step: 118/464, loss: 0.01990027353167534 2023-01-24 05:58:14.366381: step: 120/464, loss: 0.3651050925254822 2023-01-24 05:58:15.059247: step: 122/464, loss: 0.0071814339607954025 2023-01-24 05:58:15.746199: step: 124/464, loss: 0.04094426706433296 2023-01-24 05:58:16.451784: step: 126/464, loss: 0.026430925354361534 2023-01-24 05:58:17.178243: step: 128/464, loss: 0.028471818193793297 2023-01-24 05:58:17.936111: step: 130/464, loss: 0.0014034638879820704 2023-01-24 05:58:18.703936: step: 132/464, loss: 0.026817435398697853 2023-01-24 05:58:19.430650: step: 134/464, loss: 0.011616945266723633 2023-01-24 05:58:20.062713: step: 136/464, loss: 0.013456545770168304 2023-01-24 05:58:20.745634: step: 138/464, loss: 0.00027974642580375075 2023-01-24 05:58:21.444742: step: 140/464, loss: 0.03479605168104172 2023-01-24 05:58:22.104645: step: 142/464, loss: 2.8743435905198567e-05 2023-01-24 05:58:22.983274: step: 144/464, loss: 0.014272580854594707 2023-01-24 05:58:23.768528: step: 146/464, loss: 0.009507148526608944 2023-01-24 05:58:24.501530: step: 148/464, loss: 0.11540583521127701 2023-01-24 05:58:25.285873: step: 150/464, loss: 0.03117658942937851 2023-01-24 05:58:25.960724: step: 152/464, loss: 0.05746026337146759 2023-01-24 05:58:26.705206: step: 154/464, loss: 0.03753349557518959 2023-01-24 05:58:27.445706: step: 156/464, loss: 0.030995476990938187 2023-01-24 05:58:28.247595: step: 158/464, loss: 0.020567825064063072 2023-01-24 05:58:28.936841: step: 160/464, loss: 0.008550383150577545 2023-01-24 05:58:29.687800: step: 162/464, loss: 0.032725293189287186 2023-01-24 05:58:30.368365: step: 164/464, loss: 0.05792173370718956 2023-01-24 05:58:31.080950: step: 166/464, loss: 0.015494300983846188 2023-01-24 05:58:31.778172: step: 168/464, loss: 0.016035746783018112 2023-01-24 05:58:32.504416: step: 170/464, loss: 0.017190538346767426 2023-01-24 05:58:33.194670: step: 172/464, loss: 0.10798454284667969 2023-01-24 05:58:33.969396: step: 174/464, loss: 0.019811222329735756 2023-01-24 05:58:34.662819: step: 176/464, loss: 0.010970844887197018 2023-01-24 05:58:35.463460: step: 178/464, loss: 0.02815398946404457 2023-01-24 05:58:36.282326: step: 180/464, loss: 0.6478611826896667 2023-01-24 05:58:37.024749: step: 182/464, loss: 0.1307576447725296 2023-01-24 05:58:37.747609: step: 184/464, loss: 0.015528128482401371 2023-01-24 05:58:38.467127: step: 186/464, loss: 0.003247783752158284 2023-01-24 05:58:39.176501: step: 188/464, loss: 0.03964921832084656 2023-01-24 05:58:39.867704: step: 190/464, loss: 0.004766841884702444 2023-01-24 05:58:40.636712: step: 192/464, loss: 0.03177197650074959 2023-01-24 05:58:41.400940: step: 194/464, loss: 0.01902329921722412 2023-01-24 05:58:42.186847: step: 196/464, loss: 0.00788768008351326 2023-01-24 05:58:42.867054: step: 198/464, loss: 0.0015454021049663424 2023-01-24 05:58:43.583924: step: 200/464, loss: 0.01335904560983181 2023-01-24 05:58:44.265541: step: 202/464, loss: 0.012268331833183765 2023-01-24 05:58:45.014277: step: 204/464, loss: 0.03662366420030594 2023-01-24 05:58:45.746202: step: 206/464, loss: 0.02568664960563183 2023-01-24 05:58:46.393984: step: 208/464, loss: 0.029208391904830933 2023-01-24 05:58:47.123480: step: 210/464, loss: 0.005202494096010923 2023-01-24 05:58:47.802502: step: 212/464, loss: 0.022604580968618393 2023-01-24 05:58:48.494611: step: 214/464, loss: 0.04248643293976784 2023-01-24 05:58:49.304130: step: 216/464, loss: 0.05426526442170143 2023-01-24 05:58:50.051203: step: 218/464, loss: 0.171112060546875 2023-01-24 05:58:50.758118: step: 220/464, loss: 0.00690581975504756 2023-01-24 05:58:51.415095: step: 222/464, loss: 0.0018096556887030602 2023-01-24 05:58:52.113896: step: 224/464, loss: 0.013162568211555481 2023-01-24 05:58:52.835738: step: 226/464, loss: 0.03787326067686081 2023-01-24 05:58:53.592801: step: 228/464, loss: 19.952999114990234 2023-01-24 05:58:54.387369: step: 230/464, loss: 0.010027474723756313 2023-01-24 05:58:55.127516: step: 232/464, loss: 0.0030535452533513308 2023-01-24 05:58:55.943604: step: 234/464, loss: 0.010311653837561607 2023-01-24 05:58:56.703984: step: 236/464, loss: 0.03960442915558815 2023-01-24 05:58:57.500985: step: 238/464, loss: 0.01425941288471222 2023-01-24 05:58:58.242891: step: 240/464, loss: 0.016366029158234596 2023-01-24 05:58:58.957821: step: 242/464, loss: 0.1724376380443573 2023-01-24 05:58:59.712525: step: 244/464, loss: 0.02939794212579727 2023-01-24 05:59:00.402843: step: 246/464, loss: 0.0029553743079304695 2023-01-24 05:59:01.063724: step: 248/464, loss: 0.00352324265986681 2023-01-24 05:59:01.815922: step: 250/464, loss: 0.012227809056639671 2023-01-24 05:59:02.570555: step: 252/464, loss: 0.046687014400959015 2023-01-24 05:59:03.233741: step: 254/464, loss: 0.0011639392469078302 2023-01-24 05:59:03.956221: step: 256/464, loss: 0.0010756740812212229 2023-01-24 05:59:04.592518: step: 258/464, loss: 0.026980912312865257 2023-01-24 05:59:05.332531: step: 260/464, loss: 0.03663994371891022 2023-01-24 05:59:06.017454: step: 262/464, loss: 0.17038607597351074 2023-01-24 05:59:06.776843: step: 264/464, loss: 0.01435135118663311 2023-01-24 05:59:07.464210: step: 266/464, loss: 0.008019298315048218 2023-01-24 05:59:08.230630: step: 268/464, loss: 0.0016009538667276502 2023-01-24 05:59:08.991207: step: 270/464, loss: 0.04047354310750961 2023-01-24 05:59:09.752465: step: 272/464, loss: 0.011036979034543037 2023-01-24 05:59:10.575137: step: 274/464, loss: 0.13029997050762177 2023-01-24 05:59:11.378544: step: 276/464, loss: 0.023806972429156303 2023-01-24 05:59:12.169851: step: 278/464, loss: 0.0013524888781830668 2023-01-24 05:59:12.947174: step: 280/464, loss: 0.0006496798596344888 2023-01-24 05:59:13.599557: step: 282/464, loss: 0.0014774593291804194 2023-01-24 05:59:14.306266: step: 284/464, loss: 0.20750577747821808 2023-01-24 05:59:15.023707: step: 286/464, loss: 0.042337559163570404 2023-01-24 05:59:15.832362: step: 288/464, loss: 0.004411226604133844 2023-01-24 05:59:16.472558: step: 290/464, loss: 0.1359243392944336 2023-01-24 05:59:17.255123: step: 292/464, loss: 0.015041140839457512 2023-01-24 05:59:18.153027: step: 294/464, loss: 0.0005121452268213034 2023-01-24 05:59:18.834462: step: 296/464, loss: 0.013008585199713707 2023-01-24 05:59:19.581231: step: 298/464, loss: 0.04285898804664612 2023-01-24 05:59:20.353062: step: 300/464, loss: 0.02849295549094677 2023-01-24 05:59:21.071938: step: 302/464, loss: 0.018777921795845032 2023-01-24 05:59:21.866999: step: 304/464, loss: 0.07624068111181259 2023-01-24 05:59:22.600109: step: 306/464, loss: 0.00042426210711710155 2023-01-24 05:59:23.317133: step: 308/464, loss: 0.0076195960864424706 2023-01-24 05:59:24.019037: step: 310/464, loss: 0.0007941815420053899 2023-01-24 05:59:24.737631: step: 312/464, loss: 0.005111439619213343 2023-01-24 05:59:25.492512: step: 314/464, loss: 0.07643471658229828 2023-01-24 05:59:26.207209: step: 316/464, loss: 0.21657128632068634 2023-01-24 05:59:26.912551: step: 318/464, loss: 0.009065191261470318 2023-01-24 05:59:27.592159: step: 320/464, loss: 0.0010730769718065858 2023-01-24 05:59:28.353310: step: 322/464, loss: 0.09378761798143387 2023-01-24 05:59:29.056245: step: 324/464, loss: 0.005632663611322641 2023-01-24 05:59:29.711826: step: 326/464, loss: 0.3768630921840668 2023-01-24 05:59:30.495644: step: 328/464, loss: 0.6099423766136169 2023-01-24 05:59:31.179434: step: 330/464, loss: 0.022345110774040222 2023-01-24 05:59:31.991453: step: 332/464, loss: 0.0025050854310393333 2023-01-24 05:59:32.665133: step: 334/464, loss: 0.0053538125939667225 2023-01-24 05:59:33.405943: step: 336/464, loss: 0.009213737212121487 2023-01-24 05:59:34.093483: step: 338/464, loss: 0.00515262084081769 2023-01-24 05:59:34.792024: step: 340/464, loss: 0.000878259539604187 2023-01-24 05:59:35.474136: step: 342/464, loss: 0.003352927975356579 2023-01-24 05:59:36.123910: step: 344/464, loss: 0.0005760050262324512 2023-01-24 05:59:36.811315: step: 346/464, loss: 0.010878819040954113 2023-01-24 05:59:37.548828: step: 348/464, loss: 0.0002599854487925768 2023-01-24 05:59:38.423096: step: 350/464, loss: 0.0049443976022303104 2023-01-24 05:59:39.095243: step: 352/464, loss: 0.003746855305507779 2023-01-24 05:59:39.764021: step: 354/464, loss: 0.22560137510299683 2023-01-24 05:59:40.503220: step: 356/464, loss: 0.006116420961916447 2023-01-24 05:59:41.180781: step: 358/464, loss: 4.0767979953670874e-05 2023-01-24 05:59:41.884873: step: 360/464, loss: 0.0025461202021688223 2023-01-24 05:59:42.692927: step: 362/464, loss: 0.06831776350736618 2023-01-24 05:59:43.496134: step: 364/464, loss: 0.00244891457259655 2023-01-24 05:59:44.267649: step: 366/464, loss: 0.008710219524800777 2023-01-24 05:59:44.957332: step: 368/464, loss: 0.008920171298086643 2023-01-24 05:59:45.774822: step: 370/464, loss: 0.023013954982161522 2023-01-24 05:59:46.519926: step: 372/464, loss: 0.028991742059588432 2023-01-24 05:59:47.262767: step: 374/464, loss: 0.018598882481455803 2023-01-24 05:59:48.022469: step: 376/464, loss: 0.0032753553241491318 2023-01-24 05:59:48.685820: step: 378/464, loss: 0.0008013547048904002 2023-01-24 05:59:49.418423: step: 380/464, loss: 0.0356551855802536 2023-01-24 05:59:50.204909: step: 382/464, loss: 0.019524503499269485 2023-01-24 05:59:50.935609: step: 384/464, loss: 0.008192269131541252 2023-01-24 05:59:51.669692: step: 386/464, loss: 0.0015428874175995588 2023-01-24 05:59:52.340607: step: 388/464, loss: 0.02081419713795185 2023-01-24 05:59:53.175640: step: 390/464, loss: 0.01860181801021099 2023-01-24 05:59:53.931529: step: 392/464, loss: 2.680257603060454e-05 2023-01-24 05:59:54.579006: step: 394/464, loss: 0.015773866325616837 2023-01-24 05:59:55.302777: step: 396/464, loss: 0.021631063893437386 2023-01-24 05:59:56.008456: step: 398/464, loss: 0.0528394915163517 2023-01-24 05:59:56.810529: step: 400/464, loss: 0.11525987088680267 2023-01-24 05:59:57.500443: step: 402/464, loss: 0.0013505503302440047 2023-01-24 05:59:58.219216: step: 404/464, loss: 0.05220959335565567 2023-01-24 05:59:58.965196: step: 406/464, loss: 0.3575024902820587 2023-01-24 05:59:59.717960: step: 408/464, loss: 0.15189459919929504 2023-01-24 06:00:00.419347: step: 410/464, loss: 0.030411798506975174 2023-01-24 06:00:01.172033: step: 412/464, loss: 0.0368804894387722 2023-01-24 06:00:01.991741: step: 414/464, loss: 0.011386717669665813 2023-01-24 06:00:02.768526: step: 416/464, loss: 0.027711808681488037 2023-01-24 06:00:03.447739: step: 418/464, loss: 0.00030285576940514147 2023-01-24 06:00:04.168902: step: 420/464, loss: 0.02967562898993492 2023-01-24 06:00:04.930739: step: 422/464, loss: 0.004899161402136087 2023-01-24 06:00:05.588759: step: 424/464, loss: 0.005439385771751404 2023-01-24 06:00:06.390576: step: 426/464, loss: 0.00611588079482317 2023-01-24 06:00:07.154694: step: 428/464, loss: 0.009580517187714577 2023-01-24 06:00:07.911810: step: 430/464, loss: 0.10024572908878326 2023-01-24 06:00:08.580738: step: 432/464, loss: 0.0046290247701108456 2023-01-24 06:00:09.268247: step: 434/464, loss: 0.004417914431542158 2023-01-24 06:00:10.038237: step: 436/464, loss: 0.014274193905293941 2023-01-24 06:00:10.718434: step: 438/464, loss: 0.0033748922869563103 2023-01-24 06:00:11.498653: step: 440/464, loss: 0.006222773343324661 2023-01-24 06:00:12.231101: step: 442/464, loss: 0.025498919188976288 2023-01-24 06:00:12.965881: step: 444/464, loss: 0.017806345596909523 2023-01-24 06:00:13.751890: step: 446/464, loss: 0.003274332731962204 2023-01-24 06:00:14.410940: step: 448/464, loss: 0.0009049939690157771 2023-01-24 06:00:15.237697: step: 450/464, loss: 0.005761545151472092 2023-01-24 06:00:15.982271: step: 452/464, loss: 0.013424933888018131 2023-01-24 06:00:16.717156: step: 454/464, loss: 0.009402374736964703 2023-01-24 06:00:17.391856: step: 456/464, loss: 0.0019368845969438553 2023-01-24 06:00:18.165765: step: 458/464, loss: 0.029811447486281395 2023-01-24 06:00:18.941744: step: 460/464, loss: 0.00637458823621273 2023-01-24 06:00:19.705548: step: 462/464, loss: 0.035835716873407364 2023-01-24 06:00:20.443035: step: 464/464, loss: 0.08940885215997696 2023-01-24 06:00:21.197560: step: 466/464, loss: 0.055901557207107544 2023-01-24 06:00:22.022697: step: 468/464, loss: 0.001355905318632722 2023-01-24 06:00:22.685859: step: 470/464, loss: 0.0005913148052059114 2023-01-24 06:00:23.336994: step: 472/464, loss: 0.004807848483324051 2023-01-24 06:00:24.042779: step: 474/464, loss: 0.01984637789428234 2023-01-24 06:00:24.851155: step: 476/464, loss: 0.16941934823989868 2023-01-24 06:00:25.558899: step: 478/464, loss: 0.0008034526836127043 2023-01-24 06:00:26.347500: step: 480/464, loss: 0.019079819321632385 2023-01-24 06:00:27.172750: step: 482/464, loss: 0.06966307759284973 2023-01-24 06:00:27.885539: step: 484/464, loss: 0.11122968792915344 2023-01-24 06:00:28.677189: step: 486/464, loss: 0.05611448734998703 2023-01-24 06:00:29.455015: step: 488/464, loss: 0.05052759125828743 2023-01-24 06:00:30.194131: step: 490/464, loss: 0.309314489364624 2023-01-24 06:00:30.960522: step: 492/464, loss: 0.023992551490664482 2023-01-24 06:00:31.649258: step: 494/464, loss: 0.051077570766210556 2023-01-24 06:00:32.487931: step: 496/464, loss: 0.004689997062087059 2023-01-24 06:00:33.323581: step: 498/464, loss: 0.014873744919896126 2023-01-24 06:00:34.037198: step: 500/464, loss: 0.030518092215061188 2023-01-24 06:00:34.804113: step: 502/464, loss: 0.00024345805286429822 2023-01-24 06:00:35.518436: step: 504/464, loss: 0.02177843451499939 2023-01-24 06:00:36.244992: step: 506/464, loss: 0.011750375851988792 2023-01-24 06:00:36.992284: step: 508/464, loss: 0.024676991626620293 2023-01-24 06:00:37.733223: step: 510/464, loss: 0.018763341009616852 2023-01-24 06:00:38.529559: step: 512/464, loss: 0.014819656498730183 2023-01-24 06:00:39.221197: step: 514/464, loss: 0.0508464016020298 2023-01-24 06:00:40.007723: step: 516/464, loss: 0.11383599042892456 2023-01-24 06:00:40.756338: step: 518/464, loss: 0.009834877215325832 2023-01-24 06:00:41.564800: step: 520/464, loss: 0.004556347616016865 2023-01-24 06:00:42.379687: step: 522/464, loss: 0.0045471033081412315 2023-01-24 06:00:43.091865: step: 524/464, loss: 0.010128447785973549 2023-01-24 06:00:43.789384: step: 526/464, loss: 0.01945709064602852 2023-01-24 06:00:44.586208: step: 528/464, loss: 0.03735598549246788 2023-01-24 06:00:45.286831: step: 530/464, loss: 0.009542558342218399 2023-01-24 06:00:46.046011: step: 532/464, loss: 0.020316550508141518 2023-01-24 06:00:46.770179: step: 534/464, loss: 0.0042798384092748165 2023-01-24 06:00:47.483805: step: 536/464, loss: 0.027576670050621033 2023-01-24 06:00:48.247231: step: 538/464, loss: 0.005680317524820566 2023-01-24 06:00:48.959078: step: 540/464, loss: 0.005088218487799168 2023-01-24 06:00:49.636685: step: 542/464, loss: 0.0036994877737015486 2023-01-24 06:00:50.306317: step: 544/464, loss: 0.08250491321086884 2023-01-24 06:00:51.051663: step: 546/464, loss: 0.003032066859304905 2023-01-24 06:00:51.723215: step: 548/464, loss: 0.006267304066568613 2023-01-24 06:00:52.505508: step: 550/464, loss: 0.012118152342736721 2023-01-24 06:00:53.274029: step: 552/464, loss: 0.3567858040332794 2023-01-24 06:00:53.999999: step: 554/464, loss: 0.06748120486736298 2023-01-24 06:00:54.729482: step: 556/464, loss: 0.0005088126054033637 2023-01-24 06:00:55.423560: step: 558/464, loss: 0.05795755609869957 2023-01-24 06:00:56.121788: step: 560/464, loss: 0.0013993033207952976 2023-01-24 06:00:56.866037: step: 562/464, loss: 0.002470789710059762 2023-01-24 06:00:57.597866: step: 564/464, loss: 0.02224491722881794 2023-01-24 06:00:58.349652: step: 566/464, loss: 0.02115313708782196 2023-01-24 06:00:59.130061: step: 568/464, loss: 0.010152964852750301 2023-01-24 06:00:59.915450: step: 570/464, loss: 0.016146540641784668 2023-01-24 06:01:00.656689: step: 572/464, loss: 0.0042912825010716915 2023-01-24 06:01:01.430160: step: 574/464, loss: 0.0017807194963097572 2023-01-24 06:01:02.220401: step: 576/464, loss: 0.04278545081615448 2023-01-24 06:01:02.990201: step: 578/464, loss: 0.006922352127730846 2023-01-24 06:01:03.732102: step: 580/464, loss: 0.006656852085143328 2023-01-24 06:01:04.475682: step: 582/464, loss: 0.0019682818092405796 2023-01-24 06:01:05.157321: step: 584/464, loss: 0.00039459459367208183 2023-01-24 06:01:05.927232: step: 586/464, loss: 0.0036324732936918736 2023-01-24 06:01:06.741213: step: 588/464, loss: 0.05086535960435867 2023-01-24 06:01:07.449464: step: 590/464, loss: 0.023409342393279076 2023-01-24 06:01:08.170861: step: 592/464, loss: 0.9159653186798096 2023-01-24 06:01:08.810853: step: 594/464, loss: 0.009631011635065079 2023-01-24 06:01:09.532371: step: 596/464, loss: 0.0059930006973445415 2023-01-24 06:01:10.262649: step: 598/464, loss: 0.010481104254722595 2023-01-24 06:01:11.014609: step: 600/464, loss: 0.2565106749534607 2023-01-24 06:01:11.699223: step: 602/464, loss: 0.003442235291004181 2023-01-24 06:01:12.455404: step: 604/464, loss: 0.01562698930501938 2023-01-24 06:01:13.093729: step: 606/464, loss: 0.012435711920261383 2023-01-24 06:01:13.817298: step: 608/464, loss: 0.00962899997830391 2023-01-24 06:01:14.572242: step: 610/464, loss: 0.007187160197645426 2023-01-24 06:01:15.207060: step: 612/464, loss: 0.008403594605624676 2023-01-24 06:01:15.877529: step: 614/464, loss: 0.001437154714949429 2023-01-24 06:01:16.659172: step: 616/464, loss: 0.013526865281164646 2023-01-24 06:01:17.320789: step: 618/464, loss: 0.00013178416702430695 2023-01-24 06:01:18.036682: step: 620/464, loss: 0.016230305656790733 2023-01-24 06:01:18.729815: step: 622/464, loss: 0.04665118828415871 2023-01-24 06:01:19.421903: step: 624/464, loss: 0.012508937157690525 2023-01-24 06:01:20.305762: step: 626/464, loss: 0.040033601224422455 2023-01-24 06:01:20.990940: step: 628/464, loss: 0.00021456902322825044 2023-01-24 06:01:21.726766: step: 630/464, loss: 0.013318480923771858 2023-01-24 06:01:22.383802: step: 632/464, loss: 0.05723757669329643 2023-01-24 06:01:23.097647: step: 634/464, loss: 0.005295167677104473 2023-01-24 06:01:23.867314: step: 636/464, loss: 0.3125 2023-01-24 06:01:24.645068: step: 638/464, loss: 0.07070460170507431 2023-01-24 06:01:25.353302: step: 640/464, loss: 0.0036590429954230785 2023-01-24 06:01:26.051665: step: 642/464, loss: 0.00417109951376915 2023-01-24 06:01:26.783405: step: 644/464, loss: 0.0002276741579407826 2023-01-24 06:01:27.532173: step: 646/464, loss: 0.025444338098168373 2023-01-24 06:01:28.327365: step: 648/464, loss: 0.006258037872612476 2023-01-24 06:01:29.100795: step: 650/464, loss: 0.006045197602361441 2023-01-24 06:01:29.822754: step: 652/464, loss: 0.0074656312353909016 2023-01-24 06:01:30.460557: step: 654/464, loss: 0.003545462852343917 2023-01-24 06:01:31.247785: step: 656/464, loss: 0.09607963263988495 2023-01-24 06:01:31.958504: step: 658/464, loss: 0.014723972417414188 2023-01-24 06:01:32.633140: step: 660/464, loss: 0.025644179433584213 2023-01-24 06:01:33.307146: step: 662/464, loss: 0.11263712495565414 2023-01-24 06:01:34.000907: step: 664/464, loss: 0.001962358597666025 2023-01-24 06:01:34.764554: step: 666/464, loss: 0.004253838676959276 2023-01-24 06:01:35.414666: step: 668/464, loss: 0.0016341455047950149 2023-01-24 06:01:36.125409: step: 670/464, loss: 0.0016103158704936504 2023-01-24 06:01:36.838937: step: 672/464, loss: 0.005251705646514893 2023-01-24 06:01:37.526134: step: 674/464, loss: 0.1602022647857666 2023-01-24 06:01:38.202337: step: 676/464, loss: 0.007221348118036985 2023-01-24 06:01:38.904355: step: 678/464, loss: 0.03209817409515381 2023-01-24 06:01:39.589354: step: 680/464, loss: 0.06724380701780319 2023-01-24 06:01:40.338321: step: 682/464, loss: 0.0058819022960960865 2023-01-24 06:01:41.165553: step: 684/464, loss: 0.009005686268210411 2023-01-24 06:01:41.877550: step: 686/464, loss: 0.012575240805745125 2023-01-24 06:01:42.574340: step: 688/464, loss: 0.008312860503792763 2023-01-24 06:01:43.403941: step: 690/464, loss: 0.08882316201925278 2023-01-24 06:01:44.285588: step: 692/464, loss: 0.011073197238147259 2023-01-24 06:01:44.955522: step: 694/464, loss: 0.07950153946876526 2023-01-24 06:01:45.560612: step: 696/464, loss: 0.011889828369021416 2023-01-24 06:01:46.323999: step: 698/464, loss: 0.23003815114498138 2023-01-24 06:01:47.037208: step: 700/464, loss: 0.00033969481592066586 2023-01-24 06:01:47.820816: step: 702/464, loss: 0.013609882444143295 2023-01-24 06:01:48.550535: step: 704/464, loss: 0.046654343605041504 2023-01-24 06:01:49.240804: step: 706/464, loss: 0.040340326726436615 2023-01-24 06:01:49.926010: step: 708/464, loss: 0.022635284811258316 2023-01-24 06:01:50.625692: step: 710/464, loss: 0.009701196104288101 2023-01-24 06:01:51.426664: step: 712/464, loss: 0.019521228969097137 2023-01-24 06:01:52.126594: step: 714/464, loss: 0.0022151279263198376 2023-01-24 06:01:52.901913: step: 716/464, loss: 0.008278511464595795 2023-01-24 06:01:53.602628: step: 718/464, loss: 0.025777986273169518 2023-01-24 06:01:54.334196: step: 720/464, loss: 0.01621449738740921 2023-01-24 06:01:55.098623: step: 722/464, loss: 0.004078149329870939 2023-01-24 06:01:55.772178: step: 724/464, loss: 0.0037733963690698147 2023-01-24 06:01:56.516534: step: 726/464, loss: 0.019777603447437286 2023-01-24 06:01:57.324255: step: 728/464, loss: 0.009691568091511726 2023-01-24 06:01:57.956161: step: 730/464, loss: 0.0017670301022008061 2023-01-24 06:01:58.661963: step: 732/464, loss: 0.0014202864840626717 2023-01-24 06:01:59.389343: step: 734/464, loss: 0.004550382494926453 2023-01-24 06:02:00.128430: step: 736/464, loss: 0.01193520799279213 2023-01-24 06:02:00.796075: step: 738/464, loss: 0.009102024137973785 2023-01-24 06:02:01.568636: step: 740/464, loss: 0.022574957460165024 2023-01-24 06:02:02.352667: step: 742/464, loss: 0.017012832686305046 2023-01-24 06:02:03.111453: step: 744/464, loss: 0.07949044555425644 2023-01-24 06:02:03.843204: step: 746/464, loss: 0.03681956231594086 2023-01-24 06:02:04.563269: step: 748/464, loss: 0.0013021467020735145 2023-01-24 06:02:05.280723: step: 750/464, loss: 0.02038952335715294 2023-01-24 06:02:06.095911: step: 752/464, loss: 0.16530972719192505 2023-01-24 06:02:06.824662: step: 754/464, loss: 0.014058690518140793 2023-01-24 06:02:07.548268: step: 756/464, loss: 0.016745617613196373 2023-01-24 06:02:08.277084: step: 758/464, loss: 0.017155256122350693 2023-01-24 06:02:08.998781: step: 760/464, loss: 0.7648048400878906 2023-01-24 06:02:09.728337: step: 762/464, loss: 0.003873082809150219 2023-01-24 06:02:10.462462: step: 764/464, loss: 0.0006463755271397531 2023-01-24 06:02:11.323543: step: 766/464, loss: 0.014763910323381424 2023-01-24 06:02:12.077228: step: 768/464, loss: 0.02320656180381775 2023-01-24 06:02:12.807326: step: 770/464, loss: 0.04300972819328308 2023-01-24 06:02:13.503556: step: 772/464, loss: 0.012654591351747513 2023-01-24 06:02:14.306306: step: 774/464, loss: 0.004290216602385044 2023-01-24 06:02:15.118220: step: 776/464, loss: 0.007781681604683399 2023-01-24 06:02:15.854463: step: 778/464, loss: 0.006587921176105738 2023-01-24 06:02:16.634222: step: 780/464, loss: 0.0022355031687766314 2023-01-24 06:02:17.356936: step: 782/464, loss: 0.011383858509361744 2023-01-24 06:02:18.154201: step: 784/464, loss: 0.00821562111377716 2023-01-24 06:02:18.937980: step: 786/464, loss: 0.015217304229736328 2023-01-24 06:02:19.672995: step: 788/464, loss: 0.001672367099672556 2023-01-24 06:02:20.393889: step: 790/464, loss: 0.02738807536661625 2023-01-24 06:02:21.107458: step: 792/464, loss: 0.23105153441429138 2023-01-24 06:02:21.829872: step: 794/464, loss: 0.025443637743592262 2023-01-24 06:02:22.576943: step: 796/464, loss: 0.046076126396656036 2023-01-24 06:02:23.322630: step: 798/464, loss: 0.022370098158717155 2023-01-24 06:02:24.029021: step: 800/464, loss: 0.0020557662937790155 2023-01-24 06:02:24.713684: step: 802/464, loss: 0.0009072026587091386 2023-01-24 06:02:25.443874: step: 804/464, loss: 0.002057021716609597 2023-01-24 06:02:26.192952: step: 806/464, loss: 0.05425307899713516 2023-01-24 06:02:26.953023: step: 808/464, loss: 0.082684725522995 2023-01-24 06:02:27.677830: step: 810/464, loss: 0.05836813524365425 2023-01-24 06:02:28.359974: step: 812/464, loss: 0.35637176036834717 2023-01-24 06:02:29.195481: step: 814/464, loss: 0.0005271787522360682 2023-01-24 06:02:29.884650: step: 816/464, loss: 0.0050505283288657665 2023-01-24 06:02:30.709040: step: 818/464, loss: 0.05915964022278786 2023-01-24 06:02:31.510229: step: 820/464, loss: 0.14169052243232727 2023-01-24 06:02:32.228599: step: 822/464, loss: 0.27492451667785645 2023-01-24 06:02:32.929651: step: 824/464, loss: 0.0031870307866483927 2023-01-24 06:02:33.651158: step: 826/464, loss: 0.04295702278614044 2023-01-24 06:02:34.369407: step: 828/464, loss: 0.28713443875312805 2023-01-24 06:02:35.086930: step: 830/464, loss: 0.00047704242751933634 2023-01-24 06:02:35.789463: step: 832/464, loss: 0.009593743830919266 2023-01-24 06:02:36.436761: step: 834/464, loss: 0.02768618054687977 2023-01-24 06:02:37.129982: step: 836/464, loss: 0.01568949967622757 2023-01-24 06:02:37.859954: step: 838/464, loss: 0.002845700131729245 2023-01-24 06:02:38.840235: step: 840/464, loss: 0.0011342305224388838 2023-01-24 06:02:39.550066: step: 842/464, loss: 0.0013543365057557821 2023-01-24 06:02:40.271714: step: 844/464, loss: 0.02245929092168808 2023-01-24 06:02:41.020866: step: 846/464, loss: 0.03169582411646843 2023-01-24 06:02:41.842175: step: 848/464, loss: 0.05935420095920563 2023-01-24 06:02:42.531357: step: 850/464, loss: 0.017007581889629364 2023-01-24 06:02:43.260386: step: 852/464, loss: 0.010829615406692028 2023-01-24 06:02:43.896417: step: 854/464, loss: 0.0015771217877045274 2023-01-24 06:02:44.567568: step: 856/464, loss: 0.05150710418820381 2023-01-24 06:02:45.274840: step: 858/464, loss: 0.020031724125146866 2023-01-24 06:02:46.151825: step: 860/464, loss: 0.004909739829599857 2023-01-24 06:02:46.900279: step: 862/464, loss: 0.017233915627002716 2023-01-24 06:02:47.607327: step: 864/464, loss: 0.01338171400129795 2023-01-24 06:02:48.297601: step: 866/464, loss: 0.007492452394217253 2023-01-24 06:02:49.077110: step: 868/464, loss: 0.001295449328608811 2023-01-24 06:02:49.831812: step: 870/464, loss: 0.003779294900596142 2023-01-24 06:02:50.538619: step: 872/464, loss: 0.023648492991924286 2023-01-24 06:02:51.209679: step: 874/464, loss: 0.01128461305052042 2023-01-24 06:02:51.946540: step: 876/464, loss: 0.007469322998076677 2023-01-24 06:02:52.646573: step: 878/464, loss: 0.0035442670341581106 2023-01-24 06:02:53.418210: step: 880/464, loss: 0.024421758949756622 2023-01-24 06:02:54.194837: step: 882/464, loss: 0.0779152438044548 2023-01-24 06:02:55.043835: step: 884/464, loss: 0.03339240700006485 2023-01-24 06:02:55.748364: step: 886/464, loss: 4.4183776481077075e-05 2023-01-24 06:02:56.474287: step: 888/464, loss: 0.004640334751456976 2023-01-24 06:02:57.123291: step: 890/464, loss: 0.13504371047019958 2023-01-24 06:02:57.893934: step: 892/464, loss: 0.012566328048706055 2023-01-24 06:02:58.614613: step: 894/464, loss: 0.005399944726377726 2023-01-24 06:02:59.352740: step: 896/464, loss: 0.0016925518866628408 2023-01-24 06:03:00.093993: step: 898/464, loss: 0.007467552553862333 2023-01-24 06:03:00.915159: step: 900/464, loss: 0.15433648228645325 2023-01-24 06:03:01.723702: step: 902/464, loss: 0.024352390319108963 2023-01-24 06:03:02.569597: step: 904/464, loss: 0.009650146588683128 2023-01-24 06:03:03.307053: step: 906/464, loss: 0.03369938209652901 2023-01-24 06:03:04.091233: step: 908/464, loss: 0.001058822381310165 2023-01-24 06:03:04.820278: step: 910/464, loss: 0.010992859490215778 2023-01-24 06:03:05.555609: step: 912/464, loss: 0.0026332309935241938 2023-01-24 06:03:06.207246: step: 914/464, loss: 0.005801694467663765 2023-01-24 06:03:07.008675: step: 916/464, loss: 0.017871178686618805 2023-01-24 06:03:07.735200: step: 918/464, loss: 0.04245564341545105 2023-01-24 06:03:08.631022: step: 920/464, loss: 0.26371610164642334 2023-01-24 06:03:09.390658: step: 922/464, loss: 0.027379389852285385 2023-01-24 06:03:10.180908: step: 924/464, loss: 0.025049660354852676 2023-01-24 06:03:10.940823: step: 926/464, loss: 0.015954000875353813 2023-01-24 06:03:11.637499: step: 928/464, loss: 0.0003321287513244897 2023-01-24 06:03:12.248930: step: 930/464, loss: 1.5560624888166785e-05 ================================================== Loss: 0.089 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3296310750492652, 'r': 0.3421407932674536, 'f1': 0.3357694563350988}, 'combined': 0.24740907308902016, 'epoch': 31} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3066112076581451, 'r': 0.2672243924451423, 'f1': 0.2855660878083252}, 'combined': 0.17735157032306514, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3108123335329218, 'r': 0.32732608180412065, 'f1': 0.3188555362491157}, 'combined': 0.23494618460461159, 'epoch': 31} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.30444780673186617, 'r': 0.2638347099840382, 'f1': 0.28269002276743954}, 'combined': 0.17556538256083087, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32950922811895256, 'r': 0.33638702984439556, 'f1': 0.33291260981783377}, 'combined': 0.24530402828682488, 'epoch': 31} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3195668373453204, 'r': 0.27386940991428343, 'f1': 0.29495866346583477}, 'combined': 0.18318485415246583, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.33653846153846156, 'r': 0.25, 'f1': 0.28688524590163933}, 'combined': 0.1912568306010929, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.358695652173913, 'f1': 0.3113207547169812}, 'combined': 0.1556603773584906, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5454545454545454, 'r': 0.20689655172413793, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:05:52.288242: step: 2/464, loss: 0.006672237068414688 2023-01-24 06:05:53.055747: step: 4/464, loss: 0.00039798574289307 2023-01-24 06:05:53.715370: step: 6/464, loss: 0.005223429761826992 2023-01-24 06:05:54.389327: step: 8/464, loss: 0.0015860287239775062 2023-01-24 06:05:55.058544: step: 10/464, loss: 9.945368219632655e-05 2023-01-24 06:05:55.746125: step: 12/464, loss: 0.06839819252490997 2023-01-24 06:05:56.508326: step: 14/464, loss: 0.0004776528512593359 2023-01-24 06:05:57.224162: step: 16/464, loss: 0.003025172045454383 2023-01-24 06:05:58.083216: step: 18/464, loss: 0.07096771895885468 2023-01-24 06:05:58.855828: step: 20/464, loss: 0.0016611287137493491 2023-01-24 06:05:59.543011: step: 22/464, loss: 0.004521335940808058 2023-01-24 06:06:00.268252: step: 24/464, loss: 0.012017553672194481 2023-01-24 06:06:01.031499: step: 26/464, loss: 0.01229409035295248 2023-01-24 06:06:01.957984: step: 28/464, loss: 0.04657362774014473 2023-01-24 06:06:02.718651: step: 30/464, loss: 0.06783688068389893 2023-01-24 06:06:03.375059: step: 32/464, loss: 0.0014619616558775306 2023-01-24 06:06:04.118249: step: 34/464, loss: 0.021388240158557892 2023-01-24 06:06:04.791486: step: 36/464, loss: 0.009178460575640202 2023-01-24 06:06:05.515911: step: 38/464, loss: 0.008435037918388844 2023-01-24 06:06:06.238258: step: 40/464, loss: 0.0066080521792173386 2023-01-24 06:06:06.956583: step: 42/464, loss: 0.0008859040099196136 2023-01-24 06:06:07.674345: step: 44/464, loss: 0.020066630095243454 2023-01-24 06:06:08.474464: step: 46/464, loss: 0.003385049058124423 2023-01-24 06:06:09.244949: step: 48/464, loss: 0.00659538246691227 2023-01-24 06:06:10.030014: step: 50/464, loss: 0.0007390569080598652 2023-01-24 06:06:10.743916: step: 52/464, loss: 0.03373732045292854 2023-01-24 06:06:11.492332: step: 54/464, loss: 0.09182209521532059 2023-01-24 06:06:12.235489: step: 56/464, loss: 0.007950812578201294 2023-01-24 06:06:12.915642: step: 58/464, loss: 0.03541431576013565 2023-01-24 06:06:13.577092: step: 60/464, loss: 1.0369291305541992 2023-01-24 06:06:14.325340: step: 62/464, loss: 0.008863094262778759 2023-01-24 06:06:15.049520: step: 64/464, loss: 0.06318230926990509 2023-01-24 06:06:15.908918: step: 66/464, loss: 0.0228151585906744 2023-01-24 06:06:16.634645: step: 68/464, loss: 0.0006504048360511661 2023-01-24 06:06:17.377354: step: 70/464, loss: 0.030889738351106644 2023-01-24 06:06:18.052008: step: 72/464, loss: 0.011693434789776802 2023-01-24 06:06:18.784526: step: 74/464, loss: 0.01864388957619667 2023-01-24 06:06:19.528814: step: 76/464, loss: 0.005030644591897726 2023-01-24 06:06:20.313029: step: 78/464, loss: 0.0004858130414504558 2023-01-24 06:06:20.961626: step: 80/464, loss: 0.7136760950088501 2023-01-24 06:06:21.773389: step: 82/464, loss: 0.13197855651378632 2023-01-24 06:06:22.562874: step: 84/464, loss: 0.08248142898082733 2023-01-24 06:06:23.302280: step: 86/464, loss: 0.07017794251441956 2023-01-24 06:06:24.051615: step: 88/464, loss: 0.0020354515872895718 2023-01-24 06:06:24.842247: step: 90/464, loss: 0.015875235199928284 2023-01-24 06:06:25.681740: step: 92/464, loss: 0.05482667312026024 2023-01-24 06:06:26.387116: step: 94/464, loss: 0.005427805706858635 2023-01-24 06:06:27.058599: step: 96/464, loss: 0.03023233264684677 2023-01-24 06:06:27.752967: step: 98/464, loss: 0.005275893025100231 2023-01-24 06:06:28.631394: step: 100/464, loss: 0.7011404633522034 2023-01-24 06:06:29.411635: step: 102/464, loss: 0.0805116519331932 2023-01-24 06:06:30.204591: step: 104/464, loss: 0.0021324814297258854 2023-01-24 06:06:30.932497: step: 106/464, loss: 0.016958115622401237 2023-01-24 06:06:31.624192: step: 108/464, loss: 0.0028949484694749117 2023-01-24 06:06:32.361297: step: 110/464, loss: 0.01721436157822609 2023-01-24 06:06:33.119491: step: 112/464, loss: 0.001554527203552425 2023-01-24 06:06:33.834596: step: 114/464, loss: 0.0034987321123480797 2023-01-24 06:06:34.596163: step: 116/464, loss: 0.021231235936284065 2023-01-24 06:06:35.288701: step: 118/464, loss: 0.05176122859120369 2023-01-24 06:06:36.081635: step: 120/464, loss: 0.010055532678961754 2023-01-24 06:06:36.842654: step: 122/464, loss: 0.005222069099545479 2023-01-24 06:06:37.512125: step: 124/464, loss: 0.0016218442469835281 2023-01-24 06:06:38.291968: step: 126/464, loss: 0.0006480618030764163 2023-01-24 06:06:39.059907: step: 128/464, loss: 0.006303100846707821 2023-01-24 06:06:39.811463: step: 130/464, loss: 0.12113538384437561 2023-01-24 06:06:40.501586: step: 132/464, loss: 0.0008748953696340322 2023-01-24 06:06:41.247248: step: 134/464, loss: 0.03531118482351303 2023-01-24 06:06:42.097152: step: 136/464, loss: 0.015936831012368202 2023-01-24 06:06:42.818413: step: 138/464, loss: 0.05206011235713959 2023-01-24 06:06:43.605840: step: 140/464, loss: 0.052800193428993225 2023-01-24 06:06:44.333738: step: 142/464, loss: 0.019787365570664406 2023-01-24 06:06:45.020275: step: 144/464, loss: 0.0025979802012443542 2023-01-24 06:06:45.774069: step: 146/464, loss: 0.006643963512033224 2023-01-24 06:06:46.440350: step: 148/464, loss: 0.001538788783363998 2023-01-24 06:06:47.199551: step: 150/464, loss: 0.007603897247463465 2023-01-24 06:06:47.963534: step: 152/464, loss: 0.023585248738527298 2023-01-24 06:06:48.738401: step: 154/464, loss: 0.019093260169029236 2023-01-24 06:06:49.399272: step: 156/464, loss: 0.005717174615710974 2023-01-24 06:06:50.086087: step: 158/464, loss: 0.009188220836222172 2023-01-24 06:06:50.800987: step: 160/464, loss: 0.0014259631279855967 2023-01-24 06:06:51.586709: step: 162/464, loss: 0.028234383091330528 2023-01-24 06:06:52.300484: step: 164/464, loss: 3.184197339578532e-05 2023-01-24 06:06:53.118204: step: 166/464, loss: 0.04168025776743889 2023-01-24 06:06:53.836312: step: 168/464, loss: 0.06091833859682083 2023-01-24 06:06:54.517505: step: 170/464, loss: 0.003078239969909191 2023-01-24 06:06:55.296289: step: 172/464, loss: 0.005105409771203995 2023-01-24 06:06:56.009102: step: 174/464, loss: 0.01845010742545128 2023-01-24 06:06:56.752405: step: 176/464, loss: 0.004629744682461023 2023-01-24 06:06:57.489386: step: 178/464, loss: 0.018534662202000618 2023-01-24 06:06:58.218960: step: 180/464, loss: 0.02453276515007019 2023-01-24 06:06:59.041114: step: 182/464, loss: 0.0069121792912483215 2023-01-24 06:06:59.739128: step: 184/464, loss: 0.012893215753138065 2023-01-24 06:07:00.480982: step: 186/464, loss: 0.007988216355443 2023-01-24 06:07:01.185196: step: 188/464, loss: 0.008411542512476444 2023-01-24 06:07:01.839331: step: 190/464, loss: 0.008808481507003307 2023-01-24 06:07:02.499365: step: 192/464, loss: 0.019918840378522873 2023-01-24 06:07:03.243309: step: 194/464, loss: 0.009231162257492542 2023-01-24 06:07:03.909777: step: 196/464, loss: 0.003246739273890853 2023-01-24 06:07:04.611983: step: 198/464, loss: 0.027443023398518562 2023-01-24 06:07:05.302094: step: 200/464, loss: 0.02603556402027607 2023-01-24 06:07:06.023063: step: 202/464, loss: 0.014729096554219723 2023-01-24 06:07:06.710338: step: 204/464, loss: 0.02216481789946556 2023-01-24 06:07:07.455821: step: 206/464, loss: 0.017174361273646355 2023-01-24 06:07:08.159272: step: 208/464, loss: 0.017632676288485527 2023-01-24 06:07:08.873930: step: 210/464, loss: 0.000630052643828094 2023-01-24 06:07:09.576481: step: 212/464, loss: 0.0018898368580266833 2023-01-24 06:07:10.319398: step: 214/464, loss: 0.0033350172452628613 2023-01-24 06:07:11.127726: step: 216/464, loss: 0.014642687514424324 2023-01-24 06:07:11.856972: step: 218/464, loss: 0.0010271642822772264 2023-01-24 06:07:12.610534: step: 220/464, loss: 0.01610853709280491 2023-01-24 06:07:13.338693: step: 222/464, loss: 0.018197592347860336 2023-01-24 06:07:14.069746: step: 224/464, loss: 0.0016287442995235324 2023-01-24 06:07:14.761536: step: 226/464, loss: 0.001314380788244307 2023-01-24 06:07:15.491754: step: 228/464, loss: 0.004783174954354763 2023-01-24 06:07:16.219427: step: 230/464, loss: 0.0020701689645648003 2023-01-24 06:07:17.021246: step: 232/464, loss: 0.008220354095101357 2023-01-24 06:07:17.739076: step: 234/464, loss: 0.013796810060739517 2023-01-24 06:07:18.461552: step: 236/464, loss: 0.00972803309559822 2023-01-24 06:07:19.177834: step: 238/464, loss: 0.0003476463898550719 2023-01-24 06:07:19.967788: step: 240/464, loss: 0.029232390224933624 2023-01-24 06:07:20.638472: step: 242/464, loss: 0.007535560987889767 2023-01-24 06:07:21.342488: step: 244/464, loss: 0.038384776562452316 2023-01-24 06:07:22.028516: step: 246/464, loss: 0.00025465062935836613 2023-01-24 06:07:22.696721: step: 248/464, loss: 0.01523551158607006 2023-01-24 06:07:23.384565: step: 250/464, loss: 0.008116251789033413 2023-01-24 06:07:24.121923: step: 252/464, loss: 0.3224211037158966 2023-01-24 06:07:24.783940: step: 254/464, loss: 0.0007047428516671062 2023-01-24 06:07:25.513064: step: 256/464, loss: 0.024528512731194496 2023-01-24 06:07:26.222330: step: 258/464, loss: 0.10475929081439972 2023-01-24 06:07:26.889885: step: 260/464, loss: 0.002466861391440034 2023-01-24 06:07:27.663383: step: 262/464, loss: 0.013222538866102695 2023-01-24 06:07:28.393932: step: 264/464, loss: 0.020091462880373 2023-01-24 06:07:29.153094: step: 266/464, loss: 0.008655213750898838 2023-01-24 06:07:29.819043: step: 268/464, loss: 0.4579889178276062 2023-01-24 06:07:30.624550: step: 270/464, loss: 0.01043748389929533 2023-01-24 06:07:31.362911: step: 272/464, loss: 0.002390442881733179 2023-01-24 06:07:32.133973: step: 274/464, loss: 0.01000374648720026 2023-01-24 06:07:32.852313: step: 276/464, loss: 0.07795630395412445 2023-01-24 06:07:33.530849: step: 278/464, loss: 0.13697943091392517 2023-01-24 06:07:34.304372: step: 280/464, loss: 0.025146078318357468 2023-01-24 06:07:35.033079: step: 282/464, loss: 0.015179144218564034 2023-01-24 06:07:35.694350: step: 284/464, loss: 0.0067111230455338955 2023-01-24 06:07:36.499492: step: 286/464, loss: 0.009315651841461658 2023-01-24 06:07:37.252537: step: 288/464, loss: 0.001091480371542275 2023-01-24 06:07:37.982969: step: 290/464, loss: 0.1570238173007965 2023-01-24 06:07:38.714456: step: 292/464, loss: 0.00019482392235659063 2023-01-24 06:07:39.444908: step: 294/464, loss: 0.017355773597955704 2023-01-24 06:07:40.066985: step: 296/464, loss: 0.009909744374454021 2023-01-24 06:07:40.839692: step: 298/464, loss: 0.11022976040840149 2023-01-24 06:07:41.679785: step: 300/464, loss: 0.032275114208459854 2023-01-24 06:07:42.432353: step: 302/464, loss: 0.09890097379684448 2023-01-24 06:07:43.197694: step: 304/464, loss: 0.030709289014339447 2023-01-24 06:07:43.986985: step: 306/464, loss: 0.011800462380051613 2023-01-24 06:07:44.760306: step: 308/464, loss: 0.024882722645998 2023-01-24 06:07:45.466497: step: 310/464, loss: 0.00911180954426527 2023-01-24 06:07:46.152876: step: 312/464, loss: 0.2190168797969818 2023-01-24 06:07:47.004824: step: 314/464, loss: 1.5261512994766235 2023-01-24 06:07:47.806325: step: 316/464, loss: 0.002229273086413741 2023-01-24 06:07:48.571892: step: 318/464, loss: 0.025895819067955017 2023-01-24 06:07:49.269563: step: 320/464, loss: 0.0013171464670449495 2023-01-24 06:07:50.040324: step: 322/464, loss: 0.012127063237130642 2023-01-24 06:07:50.777059: step: 324/464, loss: 0.03169773146510124 2023-01-24 06:07:51.527160: step: 326/464, loss: 0.08925698697566986 2023-01-24 06:07:52.241515: step: 328/464, loss: 0.003148272167891264 2023-01-24 06:07:53.030852: step: 330/464, loss: 0.17901629209518433 2023-01-24 06:07:53.733258: step: 332/464, loss: 0.011668531224131584 2023-01-24 06:07:54.472239: step: 334/464, loss: 0.003586321836337447 2023-01-24 06:07:55.191858: step: 336/464, loss: 0.003102678107097745 2023-01-24 06:07:55.870212: step: 338/464, loss: 8.600712317274883e-05 2023-01-24 06:07:56.539540: step: 340/464, loss: 0.02279592677950859 2023-01-24 06:07:57.229920: step: 342/464, loss: 0.008252730593085289 2023-01-24 06:07:57.900879: step: 344/464, loss: 0.0008808693382889032 2023-01-24 06:07:58.629393: step: 346/464, loss: 0.0013259114930406213 2023-01-24 06:07:59.376584: step: 348/464, loss: 0.2572783827781677 2023-01-24 06:08:00.109358: step: 350/464, loss: 0.00201928592287004 2023-01-24 06:08:00.778936: step: 352/464, loss: 0.015110744163393974 2023-01-24 06:08:01.485052: step: 354/464, loss: 0.007167731411755085 2023-01-24 06:08:02.274418: step: 356/464, loss: 0.011743845418095589 2023-01-24 06:08:03.011978: step: 358/464, loss: 0.20097900927066803 2023-01-24 06:08:03.714117: step: 360/464, loss: 0.007075333036482334 2023-01-24 06:08:04.392552: step: 362/464, loss: 0.0007429586839862168 2023-01-24 06:08:05.109784: step: 364/464, loss: 0.0007310786750167608 2023-01-24 06:08:05.881024: step: 366/464, loss: 0.001109412987716496 2023-01-24 06:08:06.483987: step: 368/464, loss: 0.0006234599859453738 2023-01-24 06:08:07.184439: step: 370/464, loss: 0.0329633466899395 2023-01-24 06:08:07.834593: step: 372/464, loss: 0.00012382878048811108 2023-01-24 06:08:08.494726: step: 374/464, loss: 0.01997263915836811 2023-01-24 06:08:09.305424: step: 376/464, loss: 0.0039342851378023624 2023-01-24 06:08:09.989161: step: 378/464, loss: 0.009925339370965958 2023-01-24 06:08:10.851829: step: 380/464, loss: 0.024046069011092186 2023-01-24 06:08:11.605801: step: 382/464, loss: 0.003556904848664999 2023-01-24 06:08:12.307944: step: 384/464, loss: 0.00487151462584734 2023-01-24 06:08:13.140225: step: 386/464, loss: 0.40563297271728516 2023-01-24 06:08:13.842118: step: 388/464, loss: 0.12907062470912933 2023-01-24 06:08:14.518068: step: 390/464, loss: 0.0023276321589946747 2023-01-24 06:08:15.280051: step: 392/464, loss: 0.027979984879493713 2023-01-24 06:08:16.000641: step: 394/464, loss: 0.00020647967176046222 2023-01-24 06:08:16.817348: step: 396/464, loss: 0.10996302962303162 2023-01-24 06:08:17.550259: step: 398/464, loss: 0.005133859347552061 2023-01-24 06:08:18.216663: step: 400/464, loss: 0.0011490934994071722 2023-01-24 06:08:19.077304: step: 402/464, loss: 0.04221979156136513 2023-01-24 06:08:19.885430: step: 404/464, loss: 0.021809222176671028 2023-01-24 06:08:20.670844: step: 406/464, loss: 7.231834888458252 2023-01-24 06:08:21.419072: step: 408/464, loss: 0.1555778831243515 2023-01-24 06:08:22.143765: step: 410/464, loss: 0.032972101122140884 2023-01-24 06:08:22.894267: step: 412/464, loss: 0.01077666599303484 2023-01-24 06:08:23.709552: step: 414/464, loss: 0.025415778160095215 2023-01-24 06:08:24.399191: step: 416/464, loss: 0.0023834318853914738 2023-01-24 06:08:25.206168: step: 418/464, loss: 0.020479848608374596 2023-01-24 06:08:25.877074: step: 420/464, loss: 0.02557503432035446 2023-01-24 06:08:26.577760: step: 422/464, loss: 0.005967243108898401 2023-01-24 06:08:27.357897: step: 424/464, loss: 0.007172387093305588 2023-01-24 06:08:28.124504: step: 426/464, loss: 0.029743533581495285 2023-01-24 06:08:28.831627: step: 428/464, loss: 0.019332105293869972 2023-01-24 06:08:29.602052: step: 430/464, loss: 5.191092895984184e-06 2023-01-24 06:08:30.377363: step: 432/464, loss: 0.06320548802614212 2023-01-24 06:08:31.101588: step: 434/464, loss: 0.008165508508682251 2023-01-24 06:08:31.767172: step: 436/464, loss: 0.0007337440620176494 2023-01-24 06:08:32.540184: step: 438/464, loss: 0.019695475697517395 2023-01-24 06:08:33.217504: step: 440/464, loss: 0.010529866442084312 2023-01-24 06:08:33.978587: step: 442/464, loss: 0.0012097518192604184 2023-01-24 06:08:34.695000: step: 444/464, loss: 0.039149560034275055 2023-01-24 06:08:35.377023: step: 446/464, loss: 0.00028673012275248766 2023-01-24 06:08:36.182859: step: 448/464, loss: 0.01631099171936512 2023-01-24 06:08:36.949347: step: 450/464, loss: 0.01560910977423191 2023-01-24 06:08:37.655064: step: 452/464, loss: 0.01941799744963646 2023-01-24 06:08:38.416632: step: 454/464, loss: 0.03485127538442612 2023-01-24 06:08:39.138668: step: 456/464, loss: 0.025263575837016106 2023-01-24 06:08:39.891215: step: 458/464, loss: 0.02511150948703289 2023-01-24 06:08:40.597111: step: 460/464, loss: 0.08032690733671188 2023-01-24 06:08:41.389769: step: 462/464, loss: 0.0008171582594513893 2023-01-24 06:08:42.119021: step: 464/464, loss: 0.001809613429941237 2023-01-24 06:08:42.872742: step: 466/464, loss: 0.0789065957069397 2023-01-24 06:08:43.588869: step: 468/464, loss: 0.10419817268848419 2023-01-24 06:08:44.349433: step: 470/464, loss: 0.009127458557486534 2023-01-24 06:08:45.035322: step: 472/464, loss: 0.13789282739162445 2023-01-24 06:08:45.737371: step: 474/464, loss: 0.01613314263522625 2023-01-24 06:08:46.494268: step: 476/464, loss: 0.0026753416750580072 2023-01-24 06:08:47.331462: step: 478/464, loss: 0.035527680069208145 2023-01-24 06:08:48.173899: step: 480/464, loss: 0.005595831666141748 2023-01-24 06:08:48.870517: step: 482/464, loss: 0.013181515969336033 2023-01-24 06:08:49.686060: step: 484/464, loss: 5.290010452270508 2023-01-24 06:08:50.372607: step: 486/464, loss: 0.00702305743470788 2023-01-24 06:08:51.047087: step: 488/464, loss: 0.014755690470337868 2023-01-24 06:08:51.815263: step: 490/464, loss: 0.016292473301291466 2023-01-24 06:08:52.511914: step: 492/464, loss: 0.003482257016003132 2023-01-24 06:08:53.315545: step: 494/464, loss: 0.006003714632242918 2023-01-24 06:08:54.034690: step: 496/464, loss: 0.006600044202059507 2023-01-24 06:08:54.768989: step: 498/464, loss: 0.0008444880368188024 2023-01-24 06:08:55.413129: step: 500/464, loss: 0.003154363017529249 2023-01-24 06:08:56.160169: step: 502/464, loss: 0.006443787831813097 2023-01-24 06:08:56.900475: step: 504/464, loss: 0.01576988771557808 2023-01-24 06:08:57.662776: step: 506/464, loss: 0.058171436190605164 2023-01-24 06:08:58.343693: step: 508/464, loss: 0.06349492818117142 2023-01-24 06:08:59.077904: step: 510/464, loss: 0.0003439192078076303 2023-01-24 06:08:59.704974: step: 512/464, loss: 3.413219747017138e-05 2023-01-24 06:09:00.403143: step: 514/464, loss: 0.007753042504191399 2023-01-24 06:09:01.123013: step: 516/464, loss: 0.0002126079925801605 2023-01-24 06:09:01.891017: step: 518/464, loss: 0.22025060653686523 2023-01-24 06:09:02.703772: step: 520/464, loss: 0.03245438635349274 2023-01-24 06:09:03.478308: step: 522/464, loss: 0.00424537668004632 2023-01-24 06:09:04.237037: step: 524/464, loss: 0.22938542068004608 2023-01-24 06:09:04.982528: step: 526/464, loss: 0.0029693772085011005 2023-01-24 06:09:05.682473: step: 528/464, loss: 0.043420515954494476 2023-01-24 06:09:06.367387: step: 530/464, loss: 0.003392632585018873 2023-01-24 06:09:07.095979: step: 532/464, loss: 0.007097211200743914 2023-01-24 06:09:07.815337: step: 534/464, loss: 0.3268764019012451 2023-01-24 06:09:08.519287: step: 536/464, loss: 0.013606131076812744 2023-01-24 06:09:09.247302: step: 538/464, loss: 0.0022379527799785137 2023-01-24 06:09:09.968421: step: 540/464, loss: 0.010322199203073978 2023-01-24 06:09:10.672601: step: 542/464, loss: 0.143593430519104 2023-01-24 06:09:11.337822: step: 544/464, loss: 0.006676128134131432 2023-01-24 06:09:12.026286: step: 546/464, loss: 0.10117863118648529 2023-01-24 06:09:12.776266: step: 548/464, loss: 0.0023868491407483816 2023-01-24 06:09:13.535865: step: 550/464, loss: 0.0019674180075526237 2023-01-24 06:09:14.235277: step: 552/464, loss: 0.0002450496540404856 2023-01-24 06:09:15.033529: step: 554/464, loss: 0.03220265731215477 2023-01-24 06:09:15.709510: step: 556/464, loss: 0.012878494337201118 2023-01-24 06:09:16.484657: step: 558/464, loss: 0.014794738963246346 2023-01-24 06:09:17.239517: step: 560/464, loss: 0.005773617420345545 2023-01-24 06:09:17.990525: step: 562/464, loss: 0.005836400203406811 2023-01-24 06:09:18.804118: step: 564/464, loss: 0.011275751516222954 2023-01-24 06:09:19.549636: step: 566/464, loss: 0.3081274628639221 2023-01-24 06:09:20.279813: step: 568/464, loss: 0.00132565398234874 2023-01-24 06:09:20.991420: step: 570/464, loss: 0.02459624595940113 2023-01-24 06:09:21.756904: step: 572/464, loss: 0.0017288029193878174 2023-01-24 06:09:22.527214: step: 574/464, loss: 0.029550323262810707 2023-01-24 06:09:23.333723: step: 576/464, loss: 0.0034652845934033394 2023-01-24 06:09:24.009354: step: 578/464, loss: 0.016499284654855728 2023-01-24 06:09:24.714264: step: 580/464, loss: 0.025307010859251022 2023-01-24 06:09:25.460540: step: 582/464, loss: 0.0004522954404819757 2023-01-24 06:09:26.264061: step: 584/464, loss: 0.04433143511414528 2023-01-24 06:09:27.067953: step: 586/464, loss: 0.029701635241508484 2023-01-24 06:09:27.790449: step: 588/464, loss: 0.0012609551195055246 2023-01-24 06:09:28.650565: step: 590/464, loss: 0.005977481137961149 2023-01-24 06:09:29.364196: step: 592/464, loss: 0.022268379107117653 2023-01-24 06:09:30.095293: step: 594/464, loss: 0.007316329050809145 2023-01-24 06:09:30.811096: step: 596/464, loss: 0.0010635869111865759 2023-01-24 06:09:31.472075: step: 598/464, loss: 0.004664260894060135 2023-01-24 06:09:32.192430: step: 600/464, loss: 0.013532687909901142 2023-01-24 06:09:32.954245: step: 602/464, loss: 0.09427149593830109 2023-01-24 06:09:33.696735: step: 604/464, loss: 0.05134844407439232 2023-01-24 06:09:34.418362: step: 606/464, loss: 9.73513160715811e-05 2023-01-24 06:09:35.164330: step: 608/464, loss: 0.20104339718818665 2023-01-24 06:09:35.862453: step: 610/464, loss: 9.715192794799805 2023-01-24 06:09:36.540081: step: 612/464, loss: 0.5705389976501465 2023-01-24 06:09:37.264493: step: 614/464, loss: 0.01592349261045456 2023-01-24 06:09:37.982838: step: 616/464, loss: 0.04646773636341095 2023-01-24 06:09:38.667374: step: 618/464, loss: 0.11873049288988113 2023-01-24 06:09:39.355859: step: 620/464, loss: 0.24818703532218933 2023-01-24 06:09:40.058283: step: 622/464, loss: 0.09224333614110947 2023-01-24 06:09:40.834839: step: 624/464, loss: 0.052292946726083755 2023-01-24 06:09:41.613841: step: 626/464, loss: 0.03228820115327835 2023-01-24 06:09:42.416161: step: 628/464, loss: 0.004404714331030846 2023-01-24 06:09:43.146159: step: 630/464, loss: 0.32245051860809326 2023-01-24 06:09:43.829925: step: 632/464, loss: 0.016344305127859116 2023-01-24 06:09:44.592025: step: 634/464, loss: 0.0008270391263067722 2023-01-24 06:09:45.267702: step: 636/464, loss: 0.002304938854649663 2023-01-24 06:09:45.979274: step: 638/464, loss: 0.04042618349194527 2023-01-24 06:09:46.777738: step: 640/464, loss: 0.04226240888237953 2023-01-24 06:09:47.479990: step: 642/464, loss: 0.004248725716024637 2023-01-24 06:09:48.223900: step: 644/464, loss: 0.003116982989013195 2023-01-24 06:09:49.074095: step: 646/464, loss: 0.001212684321217239 2023-01-24 06:09:49.895365: step: 648/464, loss: 0.010079382918775082 2023-01-24 06:09:50.579322: step: 650/464, loss: 0.022457556799054146 2023-01-24 06:09:51.227318: step: 652/464, loss: 0.00903982575982809 2023-01-24 06:09:51.982932: step: 654/464, loss: 0.0014533146750181913 2023-01-24 06:09:52.671820: step: 656/464, loss: 0.07138672471046448 2023-01-24 06:09:53.369737: step: 658/464, loss: 0.00844450481235981 2023-01-24 06:09:54.117216: step: 660/464, loss: 0.010615388862788677 2023-01-24 06:09:54.831143: step: 662/464, loss: 0.1854914128780365 2023-01-24 06:09:55.509610: step: 664/464, loss: 0.015586788766086102 2023-01-24 06:09:56.214175: step: 666/464, loss: 0.00446612574160099 2023-01-24 06:09:56.894625: step: 668/464, loss: 9.343422425445169e-05 2023-01-24 06:09:57.691869: step: 670/464, loss: 0.0018104618648067117 2023-01-24 06:09:58.542708: step: 672/464, loss: 0.03854462876915932 2023-01-24 06:09:59.234517: step: 674/464, loss: 0.009988157078623772 2023-01-24 06:10:00.029135: step: 676/464, loss: 0.054880864918231964 2023-01-24 06:10:00.731353: step: 678/464, loss: 0.09408848732709885 2023-01-24 06:10:01.484366: step: 680/464, loss: 0.0537644661962986 2023-01-24 06:10:02.341746: step: 682/464, loss: 0.005208852235227823 2023-01-24 06:10:03.069782: step: 684/464, loss: 0.002501447219401598 2023-01-24 06:10:03.838149: step: 686/464, loss: 0.05998269096016884 2023-01-24 06:10:04.609829: step: 688/464, loss: 0.15073977410793304 2023-01-24 06:10:05.350282: step: 690/464, loss: 0.001690528355538845 2023-01-24 06:10:06.087445: step: 692/464, loss: 0.0025427769869565964 2023-01-24 06:10:06.878081: step: 694/464, loss: 0.035794906318187714 2023-01-24 06:10:07.603259: step: 696/464, loss: 0.0347069650888443 2023-01-24 06:10:08.340530: step: 698/464, loss: 0.04867216572165489 2023-01-24 06:10:09.057338: step: 700/464, loss: 0.028771717101335526 2023-01-24 06:10:09.799881: step: 702/464, loss: 0.017442874610424042 2023-01-24 06:10:10.550349: step: 704/464, loss: 0.05742257088422775 2023-01-24 06:10:11.316078: step: 706/464, loss: 0.02653212659060955 2023-01-24 06:10:12.119739: step: 708/464, loss: 0.0015883547021076083 2023-01-24 06:10:12.830593: step: 710/464, loss: 0.012720917351543903 2023-01-24 06:10:13.575226: step: 712/464, loss: 0.0412566177546978 2023-01-24 06:10:14.319852: step: 714/464, loss: 0.00924015324562788 2023-01-24 06:10:15.118655: step: 716/464, loss: 0.0015413612127304077 2023-01-24 06:10:15.845442: step: 718/464, loss: 0.17302869260311127 2023-01-24 06:10:16.537668: step: 720/464, loss: 0.2881391644477844 2023-01-24 06:10:17.302366: step: 722/464, loss: 0.007352608256042004 2023-01-24 06:10:18.082042: step: 724/464, loss: 0.011552570387721062 2023-01-24 06:10:18.827703: step: 726/464, loss: 0.03673321753740311 2023-01-24 06:10:19.565324: step: 728/464, loss: 0.002769971964880824 2023-01-24 06:10:20.367243: step: 730/464, loss: 0.0043175918981432915 2023-01-24 06:10:21.036988: step: 732/464, loss: 0.02598220482468605 2023-01-24 06:10:21.873910: step: 734/464, loss: 0.0896417498588562 2023-01-24 06:10:22.642678: step: 736/464, loss: 0.07240792363882065 2023-01-24 06:10:23.428571: step: 738/464, loss: 0.004663995932787657 2023-01-24 06:10:24.175456: step: 740/464, loss: 0.007267426233738661 2023-01-24 06:10:24.949709: step: 742/464, loss: 0.026949474588036537 2023-01-24 06:10:25.591683: step: 744/464, loss: 0.02133816108107567 2023-01-24 06:10:26.334007: step: 746/464, loss: 0.0009054642869159579 2023-01-24 06:10:27.123415: step: 748/464, loss: 0.027352290228009224 2023-01-24 06:10:27.854247: step: 750/464, loss: 0.031512752175331116 2023-01-24 06:10:28.558129: step: 752/464, loss: 0.001044303411617875 2023-01-24 06:10:29.309601: step: 754/464, loss: 0.0032093217596411705 2023-01-24 06:10:30.132356: step: 756/464, loss: 0.0123143857344985 2023-01-24 06:10:30.828116: step: 758/464, loss: 0.0051696086302399635 2023-01-24 06:10:31.519153: step: 760/464, loss: 0.04265246167778969 2023-01-24 06:10:32.240417: step: 762/464, loss: 0.03394169732928276 2023-01-24 06:10:32.979710: step: 764/464, loss: 0.009011897258460522 2023-01-24 06:10:33.665473: step: 766/464, loss: 0.009601665660738945 2023-01-24 06:10:34.422655: step: 768/464, loss: 0.001267896848730743 2023-01-24 06:10:35.189448: step: 770/464, loss: 0.002758424961939454 2023-01-24 06:10:35.911797: step: 772/464, loss: 0.000714236288331449 2023-01-24 06:10:36.774000: step: 774/464, loss: 0.01285460963845253 2023-01-24 06:10:37.548963: step: 776/464, loss: 0.03342447802424431 2023-01-24 06:10:38.198968: step: 778/464, loss: 0.013974891044199467 2023-01-24 06:10:38.903963: step: 780/464, loss: 0.021319733932614326 2023-01-24 06:10:39.631483: step: 782/464, loss: 0.0034458094742149115 2023-01-24 06:10:40.364836: step: 784/464, loss: 0.005771171301603317 2023-01-24 06:10:41.124462: step: 786/464, loss: 0.1669687032699585 2023-01-24 06:10:41.831918: step: 788/464, loss: 0.01793059892952442 2023-01-24 06:10:42.588390: step: 790/464, loss: 0.1300530731678009 2023-01-24 06:10:43.363345: step: 792/464, loss: 0.02867552451789379 2023-01-24 06:10:44.121043: step: 794/464, loss: 0.007103492971509695 2023-01-24 06:10:44.793816: step: 796/464, loss: 0.00393635593354702 2023-01-24 06:10:45.417104: step: 798/464, loss: 0.0006900137523189187 2023-01-24 06:10:46.134275: step: 800/464, loss: 0.007099061738699675 2023-01-24 06:10:46.927030: step: 802/464, loss: 0.09573821723461151 2023-01-24 06:10:47.651327: step: 804/464, loss: 0.05650990083813667 2023-01-24 06:10:48.318705: step: 806/464, loss: 0.01625436171889305 2023-01-24 06:10:49.083536: step: 808/464, loss: 0.006412264425307512 2023-01-24 06:10:49.796765: step: 810/464, loss: 0.011279560625553131 2023-01-24 06:10:50.602404: step: 812/464, loss: 0.004043412860482931 2023-01-24 06:10:51.325295: step: 814/464, loss: 0.022079112008213997 2023-01-24 06:10:52.019990: step: 816/464, loss: 0.024804426357150078 2023-01-24 06:10:52.679833: step: 818/464, loss: 0.0019032611744478345 2023-01-24 06:10:53.464974: step: 820/464, loss: 0.01402607373893261 2023-01-24 06:10:54.217033: step: 822/464, loss: 0.010601145215332508 2023-01-24 06:10:54.924254: step: 824/464, loss: 0.005030548200011253 2023-01-24 06:10:55.735731: step: 826/464, loss: 0.01895087957382202 2023-01-24 06:10:56.528363: step: 828/464, loss: 0.016737395897507668 2023-01-24 06:10:57.328138: step: 830/464, loss: 0.009662316180765629 2023-01-24 06:10:58.147778: step: 832/464, loss: 0.13119490444660187 2023-01-24 06:10:58.940291: step: 834/464, loss: 0.03237944841384888 2023-01-24 06:10:59.648486: step: 836/464, loss: 0.07424513250589371 2023-01-24 06:11:00.433725: step: 838/464, loss: 0.00170231016818434 2023-01-24 06:11:01.215794: step: 840/464, loss: 0.024623574689030647 2023-01-24 06:11:01.965613: step: 842/464, loss: 0.005516501143574715 2023-01-24 06:11:02.727192: step: 844/464, loss: 0.00010877756722038612 2023-01-24 06:11:03.455705: step: 846/464, loss: 0.004584172740578651 2023-01-24 06:11:04.188855: step: 848/464, loss: 0.03591621667146683 2023-01-24 06:11:04.870949: step: 850/464, loss: 0.046530336141586304 2023-01-24 06:11:05.608641: step: 852/464, loss: 0.005217722151428461 2023-01-24 06:11:06.314469: step: 854/464, loss: 0.0305581483989954 2023-01-24 06:11:07.105136: step: 856/464, loss: 0.0009626333485357463 2023-01-24 06:11:07.873714: step: 858/464, loss: 0.05425672233104706 2023-01-24 06:11:08.606744: step: 860/464, loss: 0.0038780109025537968 2023-01-24 06:11:09.280930: step: 862/464, loss: 0.02388712950050831 2023-01-24 06:11:10.023531: step: 864/464, loss: 0.033917948603630066 2023-01-24 06:11:10.718736: step: 866/464, loss: 0.00023149006301537156 2023-01-24 06:11:11.577650: step: 868/464, loss: 0.039608728140592575 2023-01-24 06:11:12.387105: step: 870/464, loss: 0.015074286609888077 2023-01-24 06:11:13.061357: step: 872/464, loss: 0.0005526712047867477 2023-01-24 06:11:13.797875: step: 874/464, loss: 0.0229922104626894 2023-01-24 06:11:14.531148: step: 876/464, loss: 0.008480170741677284 2023-01-24 06:11:15.341660: step: 878/464, loss: 0.0006979976897127926 2023-01-24 06:11:16.034215: step: 880/464, loss: 0.03483644500374794 2023-01-24 06:11:16.757541: step: 882/464, loss: 0.1716504991054535 2023-01-24 06:11:17.423002: step: 884/464, loss: 0.0076842340640723705 2023-01-24 06:11:18.175698: step: 886/464, loss: 0.02543296478688717 2023-01-24 06:11:18.959782: step: 888/464, loss: 0.0012912801466882229 2023-01-24 06:11:19.669000: step: 890/464, loss: 0.10062738507986069 2023-01-24 06:11:20.315672: step: 892/464, loss: 0.002663268242031336 2023-01-24 06:11:21.002006: step: 894/464, loss: 0.0084781963378191 2023-01-24 06:11:21.755821: step: 896/464, loss: 0.013621035031974316 2023-01-24 06:11:22.498702: step: 898/464, loss: 0.005025189369916916 2023-01-24 06:11:23.192500: step: 900/464, loss: 0.6844657063484192 2023-01-24 06:11:23.921294: step: 902/464, loss: 0.05140011012554169 2023-01-24 06:11:24.762759: step: 904/464, loss: 0.2284938097000122 2023-01-24 06:11:25.666335: step: 906/464, loss: 0.033565670251846313 2023-01-24 06:11:26.422222: step: 908/464, loss: 0.011310895904898643 2023-01-24 06:11:27.134155: step: 910/464, loss: 0.01956539787352085 2023-01-24 06:11:27.867550: step: 912/464, loss: 0.0008775049936957657 2023-01-24 06:11:28.648646: step: 914/464, loss: 0.0022613483015447855 2023-01-24 06:11:29.479307: step: 916/464, loss: 0.02137225866317749 2023-01-24 06:11:30.143071: step: 918/464, loss: 0.012067358009517193 2023-01-24 06:11:30.919064: step: 920/464, loss: 0.04696095362305641 2023-01-24 06:11:31.680987: step: 922/464, loss: 0.020845679566264153 2023-01-24 06:11:32.438177: step: 924/464, loss: 0.023146284744143486 2023-01-24 06:11:33.195279: step: 926/464, loss: 0.004508962854743004 2023-01-24 06:11:33.918851: step: 928/464, loss: 0.003973621409386396 2023-01-24 06:11:34.619907: step: 930/464, loss: 0.006215892732143402 ================================================== Loss: 0.091 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34180601566795693, 'r': 0.338563074342834, 'f1': 0.3401768163559076}, 'combined': 0.2506566015254056, 'epoch': 32} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3204615038418997, 'r': 0.2634624221308899, 'f1': 0.28918001214366656}, 'combined': 0.1795960075418561, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.316319191108517, 'r': 0.32052077429212156, 'f1': 0.3184061226238418}, 'combined': 0.2346150377228308, 'epoch': 32} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31809290249674765, 'r': 0.26219898675694403, 'f1': 0.2874540839662168}, 'combined': 0.17852411530533466, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3423474905463267, 'r': 0.33195363884093537, 'f1': 0.3370704579367494}, 'combined': 0.24836770584813112, 'epoch': 32} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.334663070958697, 'r': 0.27017759779965955, 'f1': 0.2989827544814166}, 'combined': 0.18568402646740612, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3472222222222222, 'r': 0.26785714285714285, 'f1': 0.30241935483870963}, 'combined': 0.2016129032258064, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3046875, 'r': 0.42391304347826086, 'f1': 0.3545454545454545}, 'combined': 0.17727272727272725, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5096153846153846, 'r': 0.22844827586206898, 'f1': 0.31547619047619047}, 'combined': 0.2103174603174603, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:14:13.326971: step: 2/464, loss: 0.00039098167326301336 2023-01-24 06:14:13.996006: step: 4/464, loss: 0.0019515041494742036 2023-01-24 06:14:14.689669: step: 6/464, loss: 0.00439158221706748 2023-01-24 06:14:15.329095: step: 8/464, loss: 0.001543243182823062 2023-01-24 06:14:16.101199: step: 10/464, loss: 0.012657595798373222 2023-01-24 06:14:16.843597: step: 12/464, loss: 0.012029886245727539 2023-01-24 06:14:17.550655: step: 14/464, loss: 0.0003889836370944977 2023-01-24 06:14:18.358464: step: 16/464, loss: 0.03859444707632065 2023-01-24 06:14:19.068467: step: 18/464, loss: 0.005608262028545141 2023-01-24 06:14:19.853459: step: 20/464, loss: 0.029072171077132225 2023-01-24 06:14:20.687906: step: 22/464, loss: 0.002594446297734976 2023-01-24 06:14:21.465116: step: 24/464, loss: 0.03561032935976982 2023-01-24 06:14:22.179206: step: 26/464, loss: 0.008360390551388264 2023-01-24 06:14:22.945252: step: 28/464, loss: 0.02092169225215912 2023-01-24 06:14:23.647153: step: 30/464, loss: 0.01666991412639618 2023-01-24 06:14:24.395073: step: 32/464, loss: 0.0022188760340213776 2023-01-24 06:14:25.141133: step: 34/464, loss: 0.03185074031352997 2023-01-24 06:14:25.889132: step: 36/464, loss: 0.027600795030593872 2023-01-24 06:14:26.570632: step: 38/464, loss: 0.16664382815361023 2023-01-24 06:14:27.293086: step: 40/464, loss: 0.0018672705627977848 2023-01-24 06:14:27.985653: step: 42/464, loss: 0.002315365942195058 2023-01-24 06:14:28.745482: step: 44/464, loss: 0.0025608143769204617 2023-01-24 06:14:29.413902: step: 46/464, loss: 0.025399038568139076 2023-01-24 06:14:30.186023: step: 48/464, loss: 0.008031615987420082 2023-01-24 06:14:30.946059: step: 50/464, loss: 0.13993428647518158 2023-01-24 06:14:31.610770: step: 52/464, loss: 0.02097419835627079 2023-01-24 06:14:32.405919: step: 54/464, loss: 0.00789231713861227 2023-01-24 06:14:33.087724: step: 56/464, loss: 0.0020788833498954773 2023-01-24 06:14:33.822626: step: 58/464, loss: 0.0011773493606597185 2023-01-24 06:14:34.532487: step: 60/464, loss: 0.0006447955383919179 2023-01-24 06:14:35.276700: step: 62/464, loss: 0.010896679945290089 2023-01-24 06:14:36.068827: step: 64/464, loss: 0.005736788734793663 2023-01-24 06:14:36.881470: step: 66/464, loss: 0.0026948293671011925 2023-01-24 06:14:37.635451: step: 68/464, loss: 0.03583534061908722 2023-01-24 06:14:38.375685: step: 70/464, loss: 0.0006530498503707349 2023-01-24 06:14:39.156865: step: 72/464, loss: 0.1562230885028839 2023-01-24 06:14:39.842007: step: 74/464, loss: 0.0003401939757168293 2023-01-24 06:14:40.565577: step: 76/464, loss: 0.015257839113473892 2023-01-24 06:14:41.300815: step: 78/464, loss: 0.06005620211362839 2023-01-24 06:14:42.025808: step: 80/464, loss: 0.003746387083083391 2023-01-24 06:14:42.731964: step: 82/464, loss: 0.015048716217279434 2023-01-24 06:14:43.451218: step: 84/464, loss: 0.003595659276470542 2023-01-24 06:14:44.095782: step: 86/464, loss: 0.008061950094997883 2023-01-24 06:14:44.900882: step: 88/464, loss: 0.0037724985741078854 2023-01-24 06:14:45.592167: step: 90/464, loss: 0.00019585739937610924 2023-01-24 06:14:46.563905: step: 92/464, loss: 0.0028360611759126186 2023-01-24 06:14:47.307905: step: 94/464, loss: 0.0010991257149726152 2023-01-24 06:14:48.088215: step: 96/464, loss: 0.01256584469228983 2023-01-24 06:14:48.861501: step: 98/464, loss: 0.012041222304105759 2023-01-24 06:14:49.538400: step: 100/464, loss: 0.23359690606594086 2023-01-24 06:14:50.256274: step: 102/464, loss: 0.0024997605942189693 2023-01-24 06:14:51.023011: step: 104/464, loss: 0.005596070550382137 2023-01-24 06:14:51.772296: step: 106/464, loss: 0.026922399178147316 2023-01-24 06:14:52.488551: step: 108/464, loss: 0.00506148487329483 2023-01-24 06:14:53.335185: step: 110/464, loss: 0.09594239294528961 2023-01-24 06:14:54.039495: step: 112/464, loss: 0.017567602917551994 2023-01-24 06:14:54.728529: step: 114/464, loss: 0.04349970072507858 2023-01-24 06:14:55.392711: step: 116/464, loss: 0.0021890331991016865 2023-01-24 06:14:56.134439: step: 118/464, loss: 0.005069403443485498 2023-01-24 06:14:56.914535: step: 120/464, loss: 0.08780425041913986 2023-01-24 06:14:57.691482: step: 122/464, loss: 0.2537309229373932 2023-01-24 06:14:58.396889: step: 124/464, loss: 2.541379690170288 2023-01-24 06:14:59.074799: step: 126/464, loss: 0.01147314440459013 2023-01-24 06:14:59.868699: step: 128/464, loss: 0.017582839354872704 2023-01-24 06:15:00.704749: step: 130/464, loss: 0.1737164407968521 2023-01-24 06:15:01.442235: step: 132/464, loss: 0.002044209511950612 2023-01-24 06:15:02.133414: step: 134/464, loss: 0.0007453494472429156 2023-01-24 06:15:02.866395: step: 136/464, loss: 0.0845332145690918 2023-01-24 06:15:03.629986: step: 138/464, loss: 0.04407544061541557 2023-01-24 06:15:04.373426: step: 140/464, loss: 0.09098831564188004 2023-01-24 06:15:05.111886: step: 142/464, loss: 0.011893196031451225 2023-01-24 06:15:05.962824: step: 144/464, loss: 0.009819947183132172 2023-01-24 06:15:06.725459: step: 146/464, loss: 0.009369265288114548 2023-01-24 06:15:07.382203: step: 148/464, loss: 0.008498580195009708 2023-01-24 06:15:08.042696: step: 150/464, loss: 0.0017171023646369576 2023-01-24 06:15:08.737588: step: 152/464, loss: 0.037098441272974014 2023-01-24 06:15:09.445836: step: 154/464, loss: 0.005075047258287668 2023-01-24 06:15:10.143354: step: 156/464, loss: 0.014594484120607376 2023-01-24 06:15:10.885806: step: 158/464, loss: 0.0008824823307804763 2023-01-24 06:15:11.564761: step: 160/464, loss: 1.8132606744766235 2023-01-24 06:15:12.327360: step: 162/464, loss: 0.002179122529923916 2023-01-24 06:15:12.966263: step: 164/464, loss: 0.033111296594142914 2023-01-24 06:15:13.686973: step: 166/464, loss: 0.008549502119421959 2023-01-24 06:15:14.356350: step: 168/464, loss: 0.011830088682472706 2023-01-24 06:15:15.051637: step: 170/464, loss: 0.0008865146664902568 2023-01-24 06:15:15.727664: step: 172/464, loss: 0.0002516806125640869 2023-01-24 06:15:16.419736: step: 174/464, loss: 0.0003081305476371199 2023-01-24 06:15:17.149038: step: 176/464, loss: 0.03597329556941986 2023-01-24 06:15:17.902605: step: 178/464, loss: 0.08819016069173813 2023-01-24 06:15:18.676058: step: 180/464, loss: 0.023465489968657494 2023-01-24 06:15:19.466274: step: 182/464, loss: 0.010968365706503391 2023-01-24 06:15:20.119369: step: 184/464, loss: 0.3380451500415802 2023-01-24 06:15:20.819941: step: 186/464, loss: 0.0003763749555218965 2023-01-24 06:15:21.479190: step: 188/464, loss: 0.011835026554763317 2023-01-24 06:15:22.390773: step: 190/464, loss: 0.0027454986702650785 2023-01-24 06:15:23.123451: step: 192/464, loss: 0.055424366146326065 2023-01-24 06:15:23.853149: step: 194/464, loss: 0.03973700851202011 2023-01-24 06:15:24.608686: step: 196/464, loss: 0.023937562480568886 2023-01-24 06:15:25.292576: step: 198/464, loss: 0.006588765420019627 2023-01-24 06:15:25.955818: step: 200/464, loss: 0.04747241362929344 2023-01-24 06:15:26.607042: step: 202/464, loss: 0.04055408015847206 2023-01-24 06:15:27.374720: step: 204/464, loss: 0.010046295821666718 2023-01-24 06:15:28.085191: step: 206/464, loss: 0.005254621617496014 2023-01-24 06:15:28.769219: step: 208/464, loss: 0.06961327791213989 2023-01-24 06:15:29.548170: step: 210/464, loss: 2.223823503300082e-05 2023-01-24 06:15:30.283808: step: 212/464, loss: 0.008146322332322598 2023-01-24 06:15:30.983941: step: 214/464, loss: 0.37217724323272705 2023-01-24 06:15:31.606778: step: 216/464, loss: 0.005295636132359505 2023-01-24 06:15:32.267047: step: 218/464, loss: 0.012367482297122478 2023-01-24 06:15:33.045618: step: 220/464, loss: 0.047387026250362396 2023-01-24 06:15:33.745989: step: 222/464, loss: 0.01395697146654129 2023-01-24 06:15:34.452290: step: 224/464, loss: 0.014371508732438087 2023-01-24 06:15:35.141067: step: 226/464, loss: 0.012212062254548073 2023-01-24 06:15:35.833790: step: 228/464, loss: 0.1137467548251152 2023-01-24 06:15:36.568537: step: 230/464, loss: 0.028719283640384674 2023-01-24 06:15:37.316510: step: 232/464, loss: 0.16531309485435486 2023-01-24 06:15:38.042838: step: 234/464, loss: 0.00018524908227846026 2023-01-24 06:15:38.803691: step: 236/464, loss: 0.008265960030257702 2023-01-24 06:15:39.652702: step: 238/464, loss: 0.015571714378893375 2023-01-24 06:15:40.373684: step: 240/464, loss: 0.09191140532493591 2023-01-24 06:15:41.069878: step: 242/464, loss: 0.01177075132727623 2023-01-24 06:15:41.805153: step: 244/464, loss: 0.005926132667809725 2023-01-24 06:15:42.434651: step: 246/464, loss: 0.27264249324798584 2023-01-24 06:15:43.127044: step: 248/464, loss: 0.09546758979558945 2023-01-24 06:15:43.939466: step: 250/464, loss: 0.01481780968606472 2023-01-24 06:15:44.690131: step: 252/464, loss: 0.17518430948257446 2023-01-24 06:15:45.556926: step: 254/464, loss: 0.09268547594547272 2023-01-24 06:15:46.296216: step: 256/464, loss: 0.04955311119556427 2023-01-24 06:15:46.990388: step: 258/464, loss: 0.008020931854844093 2023-01-24 06:15:47.711533: step: 260/464, loss: 0.0050893924199044704 2023-01-24 06:15:48.418554: step: 262/464, loss: 0.0004801676550414413 2023-01-24 06:15:49.106187: step: 264/464, loss: 0.012764999642968178 2023-01-24 06:15:49.845265: step: 266/464, loss: 0.008500345051288605 2023-01-24 06:15:50.692331: step: 268/464, loss: 0.007304369006305933 2023-01-24 06:15:51.425748: step: 270/464, loss: 0.03284875303506851 2023-01-24 06:15:52.106602: step: 272/464, loss: 0.007013040129095316 2023-01-24 06:15:52.911321: step: 274/464, loss: 0.029180755838751793 2023-01-24 06:15:53.597250: step: 276/464, loss: 0.011627051047980785 2023-01-24 06:15:54.278615: step: 278/464, loss: 0.0007608159794472158 2023-01-24 06:15:54.917040: step: 280/464, loss: 0.004587231669574976 2023-01-24 06:15:55.626781: step: 282/464, loss: 0.002692803042009473 2023-01-24 06:15:56.471510: step: 284/464, loss: 0.008794981054961681 2023-01-24 06:15:57.168579: step: 286/464, loss: 0.00858909823000431 2023-01-24 06:15:58.042504: step: 288/464, loss: 0.0035707568749785423 2023-01-24 06:15:58.729916: step: 290/464, loss: 0.00039932093932293355 2023-01-24 06:15:59.532442: step: 292/464, loss: 0.016323773190379143 2023-01-24 06:16:00.329743: step: 294/464, loss: 0.003390966448932886 2023-01-24 06:16:01.034486: step: 296/464, loss: 0.0008662844775244594 2023-01-24 06:16:01.760678: step: 298/464, loss: 0.00046012995881028473 2023-01-24 06:16:02.570742: step: 300/464, loss: 0.007354297209531069 2023-01-24 06:16:03.266722: step: 302/464, loss: 0.0022028302773833275 2023-01-24 06:16:03.994869: step: 304/464, loss: 0.07865099608898163 2023-01-24 06:16:04.707136: step: 306/464, loss: 0.03541542962193489 2023-01-24 06:16:05.497574: step: 308/464, loss: 0.03597771376371384 2023-01-24 06:16:06.241217: step: 310/464, loss: 0.0018971062963828444 2023-01-24 06:16:06.922174: step: 312/464, loss: 0.004431482870131731 2023-01-24 06:16:07.682745: step: 314/464, loss: 0.045061685144901276 2023-01-24 06:16:08.424894: step: 316/464, loss: 0.0004633663047570735 2023-01-24 06:16:09.149886: step: 318/464, loss: 0.014325146563351154 2023-01-24 06:16:09.910437: step: 320/464, loss: 0.016019832342863083 2023-01-24 06:16:10.598232: step: 322/464, loss: 0.005721123423427343 2023-01-24 06:16:11.385871: step: 324/464, loss: 0.0054297856986522675 2023-01-24 06:16:12.075415: step: 326/464, loss: 0.013479417189955711 2023-01-24 06:16:13.040564: step: 328/464, loss: 0.07097252458333969 2023-01-24 06:16:13.804774: step: 330/464, loss: 0.005652104038745165 2023-01-24 06:16:14.564044: step: 332/464, loss: 0.002012834884226322 2023-01-24 06:16:15.259968: step: 334/464, loss: 0.03151766210794449 2023-01-24 06:16:15.906944: step: 336/464, loss: 0.08759447932243347 2023-01-24 06:16:16.682229: step: 338/464, loss: 0.0022924193181097507 2023-01-24 06:16:17.423468: step: 340/464, loss: 0.0057031805627048016 2023-01-24 06:16:18.143126: step: 342/464, loss: 0.016630645841360092 2023-01-24 06:16:18.812525: step: 344/464, loss: 0.00012650784628931433 2023-01-24 06:16:19.496947: step: 346/464, loss: 0.052899766713380814 2023-01-24 06:16:20.198505: step: 348/464, loss: 0.055778276175260544 2023-01-24 06:16:20.841784: step: 350/464, loss: 0.003215274540707469 2023-01-24 06:16:21.530355: step: 352/464, loss: 0.12934081256389618 2023-01-24 06:16:22.274111: step: 354/464, loss: 0.053472839295864105 2023-01-24 06:16:22.929344: step: 356/464, loss: 0.0236610546708107 2023-01-24 06:16:23.676326: step: 358/464, loss: 0.010390755720436573 2023-01-24 06:16:24.396159: step: 360/464, loss: 0.0007524581160396338 2023-01-24 06:16:25.160425: step: 362/464, loss: 0.0035021156072616577 2023-01-24 06:16:25.825035: step: 364/464, loss: 0.0011628000065684319 2023-01-24 06:16:26.543046: step: 366/464, loss: 0.00948494952172041 2023-01-24 06:16:27.281981: step: 368/464, loss: 0.09849855303764343 2023-01-24 06:16:28.008500: step: 370/464, loss: 0.03865564614534378 2023-01-24 06:16:28.758474: step: 372/464, loss: 0.04403558745980263 2023-01-24 06:16:29.575729: step: 374/464, loss: 0.05303948000073433 2023-01-24 06:16:30.298958: step: 376/464, loss: 0.01641235314309597 2023-01-24 06:16:31.068586: step: 378/464, loss: 0.4176364839076996 2023-01-24 06:16:31.844357: step: 380/464, loss: 0.03687671944499016 2023-01-24 06:16:32.601495: step: 382/464, loss: 0.007635573390871286 2023-01-24 06:16:33.306756: step: 384/464, loss: 0.03301868215203285 2023-01-24 06:16:34.062678: step: 386/464, loss: 0.03237801417708397 2023-01-24 06:16:34.791076: step: 388/464, loss: 0.0010952817974612117 2023-01-24 06:16:35.509173: step: 390/464, loss: 0.013851060532033443 2023-01-24 06:16:36.195573: step: 392/464, loss: 0.004478788003325462 2023-01-24 06:16:36.992697: step: 394/464, loss: 0.3380841612815857 2023-01-24 06:16:37.745544: step: 396/464, loss: 0.004648469388484955 2023-01-24 06:16:38.468057: step: 398/464, loss: 0.035101454704999924 2023-01-24 06:16:39.170784: step: 400/464, loss: 0.01252185832709074 2023-01-24 06:16:39.841870: step: 402/464, loss: 0.003056521527469158 2023-01-24 06:16:40.490571: step: 404/464, loss: 0.002402772894129157 2023-01-24 06:16:41.323781: step: 406/464, loss: 0.1006186380982399 2023-01-24 06:16:42.103487: step: 408/464, loss: 0.02391218952834606 2023-01-24 06:16:42.882086: step: 410/464, loss: 0.055983979254961014 2023-01-24 06:16:43.582563: step: 412/464, loss: 0.013008703477680683 2023-01-24 06:16:44.310904: step: 414/464, loss: 0.23352257907390594 2023-01-24 06:16:45.012526: step: 416/464, loss: 0.07463035732507706 2023-01-24 06:16:45.671346: step: 418/464, loss: 0.002670279471203685 2023-01-24 06:16:46.433422: step: 420/464, loss: 0.013034089468419552 2023-01-24 06:16:47.190864: step: 422/464, loss: 0.06503226608037949 2023-01-24 06:16:48.018226: step: 424/464, loss: 0.0711727887392044 2023-01-24 06:16:48.818501: step: 426/464, loss: 0.016943490132689476 2023-01-24 06:16:49.546340: step: 428/464, loss: 0.017905110493302345 2023-01-24 06:16:50.274430: step: 430/464, loss: 0.010580254718661308 2023-01-24 06:16:51.025774: step: 432/464, loss: 0.0017730090767145157 2023-01-24 06:16:51.774542: step: 434/464, loss: 0.012521494179964066 2023-01-24 06:16:52.471227: step: 436/464, loss: 0.037310272455215454 2023-01-24 06:16:53.184430: step: 438/464, loss: 0.3737095296382904 2023-01-24 06:16:53.888743: step: 440/464, loss: 0.0009822368156164885 2023-01-24 06:16:54.668449: step: 442/464, loss: 0.034751612693071365 2023-01-24 06:16:55.322431: step: 444/464, loss: 0.18611420691013336 2023-01-24 06:16:56.011838: step: 446/464, loss: 0.004463686607778072 2023-01-24 06:16:56.719232: step: 448/464, loss: 0.014982398599386215 2023-01-24 06:16:57.419348: step: 450/464, loss: 0.025062285363674164 2023-01-24 06:16:58.155833: step: 452/464, loss: 0.02551797218620777 2023-01-24 06:16:58.878784: step: 454/464, loss: 0.010809944942593575 2023-01-24 06:16:59.627135: step: 456/464, loss: 0.019055066630244255 2023-01-24 06:17:00.326814: step: 458/464, loss: 0.00012230365246068686 2023-01-24 06:17:00.996539: step: 460/464, loss: 0.007807716727256775 2023-01-24 06:17:01.663718: step: 462/464, loss: 0.0007277731783688068 2023-01-24 06:17:02.435611: step: 464/464, loss: 0.00027377461083233356 2023-01-24 06:17:03.161274: step: 466/464, loss: 0.0009555912110954523 2023-01-24 06:17:03.934036: step: 468/464, loss: 0.0014475013595074415 2023-01-24 06:17:04.724712: step: 470/464, loss: 0.019103819504380226 2023-01-24 06:17:05.380897: step: 472/464, loss: 0.006632484495639801 2023-01-24 06:17:06.180496: step: 474/464, loss: 0.015860576182603836 2023-01-24 06:17:07.076161: step: 476/464, loss: 0.005315141286700964 2023-01-24 06:17:07.777720: step: 478/464, loss: 0.0025896148290485144 2023-01-24 06:17:08.535914: step: 480/464, loss: 0.004105839412659407 2023-01-24 06:17:09.248617: step: 482/464, loss: 0.30062341690063477 2023-01-24 06:17:09.999737: step: 484/464, loss: 0.004436559975147247 2023-01-24 06:17:10.694490: step: 486/464, loss: 0.006108072120696306 2023-01-24 06:17:11.374274: step: 488/464, loss: 0.003864583559334278 2023-01-24 06:17:12.187756: step: 490/464, loss: 0.007178569212555885 2023-01-24 06:17:13.025889: step: 492/464, loss: 0.018330058082938194 2023-01-24 06:17:13.721364: step: 494/464, loss: 0.005468042101711035 2023-01-24 06:17:14.533112: step: 496/464, loss: 0.0365530401468277 2023-01-24 06:17:15.157659: step: 498/464, loss: 0.0017041832907125354 2023-01-24 06:17:15.884021: step: 500/464, loss: 0.03681609407067299 2023-01-24 06:17:16.615069: step: 502/464, loss: 0.19347411394119263 2023-01-24 06:17:17.504824: step: 504/464, loss: 0.01480608619749546 2023-01-24 06:17:18.267980: step: 506/464, loss: 0.0002922680287156254 2023-01-24 06:17:19.029083: step: 508/464, loss: 0.014807172119617462 2023-01-24 06:17:19.798146: step: 510/464, loss: 0.019604379311203957 2023-01-24 06:17:20.536160: step: 512/464, loss: 0.5452590584754944 2023-01-24 06:17:21.306585: step: 514/464, loss: 0.04370192810893059 2023-01-24 06:17:21.967807: step: 516/464, loss: 0.0015719156945124269 2023-01-24 06:17:22.693944: step: 518/464, loss: 0.017483972012996674 2023-01-24 06:17:23.400703: step: 520/464, loss: 0.026621662080287933 2023-01-24 06:17:24.130032: step: 522/464, loss: 0.02510252222418785 2023-01-24 06:17:24.851162: step: 524/464, loss: 0.0642847940325737 2023-01-24 06:17:25.609393: step: 526/464, loss: 0.009867520071566105 2023-01-24 06:17:26.333447: step: 528/464, loss: 0.0023654180113226175 2023-01-24 06:17:27.168530: step: 530/464, loss: 0.0015580817125737667 2023-01-24 06:17:27.898021: step: 532/464, loss: 0.04824097454547882 2023-01-24 06:17:28.607360: step: 534/464, loss: 0.020770171657204628 2023-01-24 06:17:29.286033: step: 536/464, loss: 0.015186947770416737 2023-01-24 06:17:30.093227: step: 538/464, loss: 0.0014887871220707893 2023-01-24 06:17:30.819046: step: 540/464, loss: 0.0006223876844160259 2023-01-24 06:17:31.487729: step: 542/464, loss: 0.008479096926748753 2023-01-24 06:17:32.190223: step: 544/464, loss: 0.00052025041077286 2023-01-24 06:17:33.017935: step: 546/464, loss: 0.014327477663755417 2023-01-24 06:17:33.762188: step: 548/464, loss: 0.054649192839860916 2023-01-24 06:17:34.505752: step: 550/464, loss: 0.01268855668604374 2023-01-24 06:17:35.245618: step: 552/464, loss: 0.12560972571372986 2023-01-24 06:17:35.988858: step: 554/464, loss: 0.050108108669519424 2023-01-24 06:17:36.776472: step: 556/464, loss: 0.15157826244831085 2023-01-24 06:17:37.485008: step: 558/464, loss: 0.0012913616374135017 2023-01-24 06:17:38.253258: step: 560/464, loss: 0.02061178907752037 2023-01-24 06:17:39.020033: step: 562/464, loss: 0.0076378644444048405 2023-01-24 06:17:39.798006: step: 564/464, loss: 0.01353029441088438 2023-01-24 06:17:40.523055: step: 566/464, loss: 0.0009691324084997177 2023-01-24 06:17:41.256669: step: 568/464, loss: 0.29185059666633606 2023-01-24 06:17:42.075283: step: 570/464, loss: 0.0015059993602335453 2023-01-24 06:17:42.828702: step: 572/464, loss: 0.005179137922823429 2023-01-24 06:17:43.493268: step: 574/464, loss: 4.8018511733971536e-05 2023-01-24 06:17:44.300707: step: 576/464, loss: 0.17213767766952515 2023-01-24 06:17:45.125824: step: 578/464, loss: 0.06760312616825104 2023-01-24 06:17:45.836441: step: 580/464, loss: 0.06528377532958984 2023-01-24 06:17:46.601987: step: 582/464, loss: 0.006387208588421345 2023-01-24 06:17:47.356477: step: 584/464, loss: 0.011440436355769634 2023-01-24 06:17:48.110965: step: 586/464, loss: 0.003430222626775503 2023-01-24 06:17:48.925625: step: 588/464, loss: 0.0009344254503957927 2023-01-24 06:17:49.704484: step: 590/464, loss: 0.002713944064453244 2023-01-24 06:17:50.422604: step: 592/464, loss: 0.002663680585101247 2023-01-24 06:17:51.185961: step: 594/464, loss: 0.001129120122641325 2023-01-24 06:17:51.937451: step: 596/464, loss: 0.0009066627826541662 2023-01-24 06:17:52.735167: step: 598/464, loss: 0.016030069440603256 2023-01-24 06:17:53.411911: step: 600/464, loss: 0.0125338826328516 2023-01-24 06:17:54.180872: step: 602/464, loss: 0.006337358150631189 2023-01-24 06:17:54.904824: step: 604/464, loss: 0.0018472378142178059 2023-01-24 06:17:55.594490: step: 606/464, loss: 0.0031487769447267056 2023-01-24 06:17:56.363632: step: 608/464, loss: 0.005827262531965971 2023-01-24 06:17:57.067921: step: 610/464, loss: 0.014079701155424118 2023-01-24 06:17:57.766680: step: 612/464, loss: 0.0038269374053925276 2023-01-24 06:17:58.582432: step: 614/464, loss: 0.0021537039428949356 2023-01-24 06:17:59.335166: step: 616/464, loss: 0.002285576891154051 2023-01-24 06:18:00.041061: step: 618/464, loss: 0.0056445905938744545 2023-01-24 06:18:00.765460: step: 620/464, loss: 0.06387317180633545 2023-01-24 06:18:01.459353: step: 622/464, loss: 0.0125731797888875 2023-01-24 06:18:02.259276: step: 624/464, loss: 0.04404031112790108 2023-01-24 06:18:02.947211: step: 626/464, loss: 0.006039231084287167 2023-01-24 06:18:03.705642: step: 628/464, loss: 0.018473368138074875 2023-01-24 06:18:04.464870: step: 630/464, loss: 0.000345776294125244 2023-01-24 06:18:05.314644: step: 632/464, loss: 0.048042964190244675 2023-01-24 06:18:06.015279: step: 634/464, loss: 0.008211801759898663 2023-01-24 06:18:06.720995: step: 636/464, loss: 0.00013828724331688136 2023-01-24 06:18:07.409889: step: 638/464, loss: 0.02018279954791069 2023-01-24 06:18:08.128247: step: 640/464, loss: 0.029835864901542664 2023-01-24 06:18:08.772413: step: 642/464, loss: 0.004973770119249821 2023-01-24 06:18:09.504388: step: 644/464, loss: 0.002753573004156351 2023-01-24 06:18:10.182947: step: 646/464, loss: 0.006737233605235815 2023-01-24 06:18:10.854805: step: 648/464, loss: 0.05646848306059837 2023-01-24 06:18:11.530806: step: 650/464, loss: 0.01825702004134655 2023-01-24 06:18:12.227310: step: 652/464, loss: 0.10580414533615112 2023-01-24 06:18:12.986794: step: 654/464, loss: 0.00877810176461935 2023-01-24 06:18:13.706271: step: 656/464, loss: 0.004991421941667795 2023-01-24 06:18:14.441877: step: 658/464, loss: 0.00306524196639657 2023-01-24 06:18:15.222974: step: 660/464, loss: 0.011912654154002666 2023-01-24 06:18:15.999103: step: 662/464, loss: 0.0008568924968130887 2023-01-24 06:18:16.714004: step: 664/464, loss: 0.015382496640086174 2023-01-24 06:18:17.431057: step: 666/464, loss: 0.016268685460090637 2023-01-24 06:18:18.158905: step: 668/464, loss: 0.04140138998627663 2023-01-24 06:18:18.935203: step: 670/464, loss: 0.024702560156583786 2023-01-24 06:18:19.604490: step: 672/464, loss: 0.007844786159694195 2023-01-24 06:18:20.287840: step: 674/464, loss: 0.03346068784594536 2023-01-24 06:18:21.062661: step: 676/464, loss: 0.01021251454949379 2023-01-24 06:18:21.834704: step: 678/464, loss: 0.005769534967839718 2023-01-24 06:18:22.535227: step: 680/464, loss: 0.0011539680417627096 2023-01-24 06:18:23.289179: step: 682/464, loss: 0.010150428861379623 2023-01-24 06:18:24.065957: step: 684/464, loss: 0.013346421532332897 2023-01-24 06:18:24.877229: step: 686/464, loss: 0.014792312867939472 2023-01-24 06:18:25.705214: step: 688/464, loss: 0.0027319081127643585 2023-01-24 06:18:26.475738: step: 690/464, loss: 0.015660211443901062 2023-01-24 06:18:27.313235: step: 692/464, loss: 0.006680083926767111 2023-01-24 06:18:28.056727: step: 694/464, loss: 0.012355759739875793 2023-01-24 06:18:28.788198: step: 696/464, loss: 0.002599672181531787 2023-01-24 06:18:29.478769: step: 698/464, loss: 0.0004135113849770278 2023-01-24 06:18:30.210619: step: 700/464, loss: 0.053030844777822495 2023-01-24 06:18:30.978903: step: 702/464, loss: 0.07157432287931442 2023-01-24 06:18:31.731311: step: 704/464, loss: 0.01568608172237873 2023-01-24 06:18:32.445848: step: 706/464, loss: 0.04196842387318611 2023-01-24 06:18:33.173285: step: 708/464, loss: 0.00017093642964027822 2023-01-24 06:18:33.839046: step: 710/464, loss: 0.048214443027973175 2023-01-24 06:18:34.529086: step: 712/464, loss: 0.005283031612634659 2023-01-24 06:18:35.233190: step: 714/464, loss: 0.0061831907369196415 2023-01-24 06:18:36.026894: step: 716/464, loss: 0.0009360117837786674 2023-01-24 06:18:36.763734: step: 718/464, loss: 0.018874213099479675 2023-01-24 06:18:37.627169: step: 720/464, loss: 0.0016444490756839514 2023-01-24 06:18:38.307122: step: 722/464, loss: 0.023046409711241722 2023-01-24 06:18:39.023150: step: 724/464, loss: 0.010292811319231987 2023-01-24 06:18:39.880940: step: 726/464, loss: 0.0054589444771409035 2023-01-24 06:18:40.556270: step: 728/464, loss: 0.10302948206663132 2023-01-24 06:18:41.264779: step: 730/464, loss: 0.0001261269935639575 2023-01-24 06:18:42.036875: step: 732/464, loss: 0.027284221723675728 2023-01-24 06:18:42.783789: step: 734/464, loss: 0.03829146549105644 2023-01-24 06:18:43.489129: step: 736/464, loss: 0.005423355381935835 2023-01-24 06:18:44.265981: step: 738/464, loss: 0.039719391614198685 2023-01-24 06:18:45.023479: step: 740/464, loss: 0.0002570571086835116 2023-01-24 06:18:45.828372: step: 742/464, loss: 0.0417335107922554 2023-01-24 06:18:46.519119: step: 744/464, loss: 0.0009967689402401447 2023-01-24 06:18:47.262567: step: 746/464, loss: 0.013086330145597458 2023-01-24 06:18:48.023668: step: 748/464, loss: 0.0364522710442543 2023-01-24 06:18:48.783675: step: 750/464, loss: 0.024580299854278564 2023-01-24 06:18:49.470261: step: 752/464, loss: 0.00044813542626798153 2023-01-24 06:18:50.181518: step: 754/464, loss: 0.005226737353950739 2023-01-24 06:18:51.020291: step: 756/464, loss: 0.012509177438914776 2023-01-24 06:18:51.712370: step: 758/464, loss: 0.004933376796543598 2023-01-24 06:18:52.418857: step: 760/464, loss: 0.0008160553989000618 2023-01-24 06:18:53.092644: step: 762/464, loss: 0.025479281321167946 2023-01-24 06:18:53.749684: step: 764/464, loss: 0.04945717751979828 2023-01-24 06:18:54.514265: step: 766/464, loss: 0.000553000601939857 2023-01-24 06:18:55.176545: step: 768/464, loss: 0.0039887516759335995 2023-01-24 06:18:55.887323: step: 770/464, loss: 0.004526576027274132 2023-01-24 06:18:56.709622: step: 772/464, loss: 0.006515008397400379 2023-01-24 06:18:57.463137: step: 774/464, loss: 0.004391736816614866 2023-01-24 06:18:58.156191: step: 776/464, loss: 0.0011453385232016444 2023-01-24 06:18:58.865461: step: 778/464, loss: 0.048640068620443344 2023-01-24 06:18:59.582487: step: 780/464, loss: 0.004788265563547611 2023-01-24 06:19:00.375637: step: 782/464, loss: 0.004345182795077562 2023-01-24 06:19:01.123408: step: 784/464, loss: 0.07326582074165344 2023-01-24 06:19:01.793514: step: 786/464, loss: 0.000605506356805563 2023-01-24 06:19:02.523167: step: 788/464, loss: 0.00305646238848567 2023-01-24 06:19:03.337907: step: 790/464, loss: 0.008802256546914577 2023-01-24 06:19:04.103997: step: 792/464, loss: 0.025788472965359688 2023-01-24 06:19:04.816377: step: 794/464, loss: 0.2637900114059448 2023-01-24 06:19:05.499545: step: 796/464, loss: 0.0015494396211579442 2023-01-24 06:19:06.179770: step: 798/464, loss: 0.00048792597954161465 2023-01-24 06:19:06.936189: step: 800/464, loss: 0.005383032839745283 2023-01-24 06:19:07.703749: step: 802/464, loss: 0.2706470191478729 2023-01-24 06:19:08.427171: step: 804/464, loss: 0.002237283391878009 2023-01-24 06:19:09.287262: step: 806/464, loss: 0.23974576592445374 2023-01-24 06:19:10.053817: step: 808/464, loss: 0.00011458772496553138 2023-01-24 06:19:10.782972: step: 810/464, loss: 0.046538788825273514 2023-01-24 06:19:11.500821: step: 812/464, loss: 0.024802470579743385 2023-01-24 06:19:12.291707: step: 814/464, loss: 0.02206752821803093 2023-01-24 06:19:13.005343: step: 816/464, loss: 0.01053948700428009 2023-01-24 06:19:13.715111: step: 818/464, loss: 0.02718799002468586 2023-01-24 06:19:14.473726: step: 820/464, loss: 0.03310324624180794 2023-01-24 06:19:15.232579: step: 822/464, loss: 0.6390102505683899 2023-01-24 06:19:15.915014: step: 824/464, loss: 0.004658107180148363 2023-01-24 06:19:16.591877: step: 826/464, loss: 0.013531827367842197 2023-01-24 06:19:17.306811: step: 828/464, loss: 0.014665245078504086 2023-01-24 06:19:18.041309: step: 830/464, loss: 0.021113982424139977 2023-01-24 06:19:18.741843: step: 832/464, loss: 0.011678681708872318 2023-01-24 06:19:19.476618: step: 834/464, loss: 0.06600571423768997 2023-01-24 06:19:20.216007: step: 836/464, loss: 0.0012176345335319638 2023-01-24 06:19:21.036292: step: 838/464, loss: 0.04618173465132713 2023-01-24 06:19:21.812840: step: 840/464, loss: 0.013476877473294735 2023-01-24 06:19:22.547137: step: 842/464, loss: 0.03788414224982262 2023-01-24 06:19:23.154277: step: 844/464, loss: 0.005714490544050932 2023-01-24 06:19:23.901340: step: 846/464, loss: 0.0042811487801373005 2023-01-24 06:19:24.715400: step: 848/464, loss: 0.028491834178566933 2023-01-24 06:19:25.420990: step: 850/464, loss: 0.027468910440802574 2023-01-24 06:19:26.105442: step: 852/464, loss: 0.004805763252079487 2023-01-24 06:19:26.822175: step: 854/464, loss: 0.011337238363921642 2023-01-24 06:19:27.503225: step: 856/464, loss: 0.0019541652873158455 2023-01-24 06:19:28.222786: step: 858/464, loss: 0.000762142997700721 2023-01-24 06:19:28.972455: step: 860/464, loss: 0.0006989953690208495 2023-01-24 06:19:29.687749: step: 862/464, loss: 0.056273747235536575 2023-01-24 06:19:30.433226: step: 864/464, loss: 0.015350564382970333 2023-01-24 06:19:31.246723: step: 866/464, loss: 0.004561841022223234 2023-01-24 06:19:31.937481: step: 868/464, loss: 0.022886553779244423 2023-01-24 06:19:32.668713: step: 870/464, loss: 0.003614415880292654 2023-01-24 06:19:33.419421: step: 872/464, loss: 0.06976622343063354 2023-01-24 06:19:34.170706: step: 874/464, loss: 0.014863256365060806 2023-01-24 06:19:34.876765: step: 876/464, loss: 0.04626696929335594 2023-01-24 06:19:35.637891: step: 878/464, loss: 0.02423112466931343 2023-01-24 06:19:36.404408: step: 880/464, loss: 1.710673213005066 2023-01-24 06:19:37.167886: step: 882/464, loss: 0.6897745132446289 2023-01-24 06:19:37.914638: step: 884/464, loss: 0.042005181312561035 2023-01-24 06:19:38.677798: step: 886/464, loss: 0.002239571651443839 2023-01-24 06:19:39.396795: step: 888/464, loss: 0.012205242179334164 2023-01-24 06:19:40.134798: step: 890/464, loss: 0.01918407529592514 2023-01-24 06:19:40.921411: step: 892/464, loss: 0.0025333764497190714 2023-01-24 06:19:41.725792: step: 894/464, loss: 0.008899757638573647 2023-01-24 06:19:42.488043: step: 896/464, loss: 0.0008298902539536357 2023-01-24 06:19:43.264500: step: 898/464, loss: 0.0368281826376915 2023-01-24 06:19:43.962097: step: 900/464, loss: 0.0003198850608896464 2023-01-24 06:19:44.746073: step: 902/464, loss: 0.0575554184615612 2023-01-24 06:19:45.563064: step: 904/464, loss: 0.01885542832314968 2023-01-24 06:19:46.280824: step: 906/464, loss: 0.09064995497465134 2023-01-24 06:19:47.128436: step: 908/464, loss: 0.047099050134420395 2023-01-24 06:19:47.872927: step: 910/464, loss: 0.027352536097168922 2023-01-24 06:19:48.606637: step: 912/464, loss: 0.008324525319039822 2023-01-24 06:19:49.313732: step: 914/464, loss: 0.006355096586048603 2023-01-24 06:19:49.973941: step: 916/464, loss: 0.002466668142005801 2023-01-24 06:19:50.628566: step: 918/464, loss: 0.003562645521014929 2023-01-24 06:19:51.349927: step: 920/464, loss: 0.01138092577457428 2023-01-24 06:19:52.116369: step: 922/464, loss: 0.0006305737770162523 2023-01-24 06:19:52.819776: step: 924/464, loss: 0.09922892600297928 2023-01-24 06:19:53.556957: step: 926/464, loss: 0.026989903301000595 2023-01-24 06:19:54.328657: step: 928/464, loss: 0.004400115460157394 2023-01-24 06:19:54.951693: step: 930/464, loss: 0.013907156884670258 ================================================== Loss: 0.048 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3438144580717226, 'r': 0.33663806520874545, 'f1': 0.3401884187248492}, 'combined': 0.2506651506393625, 'epoch': 33} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31963776499663243, 'r': 0.26631221486392676, 'f1': 0.290548490745458}, 'combined': 0.18044590477875816, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3214905788235219, 'r': 0.317830344529516, 'f1': 0.3196499839065933}, 'combined': 0.23553156708906875, 'epoch': 33} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31848432618806055, 'r': 0.2618928368357892, 'f1': 0.28742951648391596}, 'combined': 0.17850885760580046, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34111937251418384, 'r': 0.33011552178791986, 'f1': 0.3355272516532956}, 'combined': 0.2472306064813757, 'epoch': 33} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3308505792458912, 'r': 0.2685919973385363, 'f1': 0.29648817541991773}, 'combined': 0.18413476157658049, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3706896551724138, 'r': 0.30714285714285716, 'f1': 0.3359375}, 'combined': 0.22395833333333331, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3103448275862069, 'r': 0.391304347826087, 'f1': 0.34615384615384615}, 'combined': 0.17307692307692307, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.23275862068965517, 'f1': 0.3292682926829268}, 'combined': 0.2195121951219512, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:22:34.962469: step: 2/464, loss: 0.004507106263190508 2023-01-24 06:22:35.707601: step: 4/464, loss: 0.0138564957305789 2023-01-24 06:22:36.394699: step: 6/464, loss: 0.012533614411950111 2023-01-24 06:22:37.129834: step: 8/464, loss: 0.028478601947426796 2023-01-24 06:22:37.860240: step: 10/464, loss: 0.023900024592876434 2023-01-24 06:22:38.622105: step: 12/464, loss: 0.03197980672121048 2023-01-24 06:22:39.362851: step: 14/464, loss: 0.0026648296043276787 2023-01-24 06:22:40.060931: step: 16/464, loss: 0.0032839514315128326 2023-01-24 06:22:40.985364: step: 18/464, loss: 0.13947302103042603 2023-01-24 06:22:41.799960: step: 20/464, loss: 0.004450374748557806 2023-01-24 06:22:42.506377: step: 22/464, loss: 0.00821272749453783 2023-01-24 06:22:43.306776: step: 24/464, loss: 0.05076976493000984 2023-01-24 06:22:44.080763: step: 26/464, loss: 0.0003773788339458406 2023-01-24 06:22:44.730976: step: 28/464, loss: 0.00023975061776582152 2023-01-24 06:22:45.470033: step: 30/464, loss: 0.0016178319929167628 2023-01-24 06:22:46.285898: step: 32/464, loss: 0.012625518254935741 2023-01-24 06:22:47.011790: step: 34/464, loss: 0.2688717544078827 2023-01-24 06:22:47.738232: step: 36/464, loss: 0.008189328014850616 2023-01-24 06:22:48.467560: step: 38/464, loss: 0.020980000495910645 2023-01-24 06:22:49.143168: step: 40/464, loss: 0.002992508001625538 2023-01-24 06:22:49.894699: step: 42/464, loss: 0.025903644040226936 2023-01-24 06:22:50.627450: step: 44/464, loss: 0.030666600912809372 2023-01-24 06:22:51.309166: step: 46/464, loss: 0.0001177599624497816 2023-01-24 06:22:52.047442: step: 48/464, loss: 0.006271448452025652 2023-01-24 06:22:52.778677: step: 50/464, loss: 0.0019224463030695915 2023-01-24 06:22:53.522651: step: 52/464, loss: 0.519623875617981 2023-01-24 06:22:54.239965: step: 54/464, loss: 0.029549848288297653 2023-01-24 06:22:54.969259: step: 56/464, loss: 0.008546078577637672 2023-01-24 06:22:55.764280: step: 58/464, loss: 0.0016022090567275882 2023-01-24 06:22:56.443069: step: 60/464, loss: 0.008452469483017921 2023-01-24 06:22:57.127316: step: 62/464, loss: 0.0006270703161135316 2023-01-24 06:22:57.808324: step: 64/464, loss: 0.03125409036874771 2023-01-24 06:22:58.544469: step: 66/464, loss: 0.00010930612916126847 2023-01-24 06:22:59.307422: step: 68/464, loss: 0.06612391024827957 2023-01-24 06:23:00.060019: step: 70/464, loss: 0.00494617223739624 2023-01-24 06:23:00.765614: step: 72/464, loss: 0.01734868995845318 2023-01-24 06:23:01.530927: step: 74/464, loss: 0.4363549053668976 2023-01-24 06:23:02.245079: step: 76/464, loss: 0.01373057346791029 2023-01-24 06:23:02.954678: step: 78/464, loss: 0.008064981549978256 2023-01-24 06:23:03.703105: step: 80/464, loss: 0.03656484931707382 2023-01-24 06:23:04.498286: step: 82/464, loss: 0.13750436902046204 2023-01-24 06:23:05.163937: step: 84/464, loss: 0.00047446993994526565 2023-01-24 06:23:05.944542: step: 86/464, loss: 0.006199462339282036 2023-01-24 06:23:06.722216: step: 88/464, loss: 0.0008233002736233175 2023-01-24 06:23:07.482494: step: 90/464, loss: 0.0014060763642191887 2023-01-24 06:23:08.186407: step: 92/464, loss: 0.009109342470765114 2023-01-24 06:23:08.963444: step: 94/464, loss: 0.0016980544896796346 2023-01-24 06:23:09.652499: step: 96/464, loss: 0.0004116365744266659 2023-01-24 06:23:10.392485: step: 98/464, loss: 0.011986698023974895 2023-01-24 06:23:11.350547: step: 100/464, loss: 0.007710412610322237 2023-01-24 06:23:12.138746: step: 102/464, loss: 0.0005119118723087013 2023-01-24 06:23:12.925224: step: 104/464, loss: 0.020808612927794456 2023-01-24 06:23:13.732410: step: 106/464, loss: 0.007151258178055286 2023-01-24 06:23:14.485183: step: 108/464, loss: 0.009474542923271656 2023-01-24 06:23:15.319319: step: 110/464, loss: 4.1004888771567494e-05 2023-01-24 06:23:16.025938: step: 112/464, loss: 0.0347786508500576 2023-01-24 06:23:16.805593: step: 114/464, loss: 0.0015426678583025932 2023-01-24 06:23:17.656114: step: 116/464, loss: 0.0017722718184813857 2023-01-24 06:23:18.350208: step: 118/464, loss: 0.014240579679608345 2023-01-24 06:23:19.059703: step: 120/464, loss: 0.09115071594715118 2023-01-24 06:23:20.013026: step: 122/464, loss: 0.008522900752723217 2023-01-24 06:23:20.750313: step: 124/464, loss: 0.0022267361637204885 2023-01-24 06:23:21.571542: step: 126/464, loss: 0.0030100380536168814 2023-01-24 06:23:22.285166: step: 128/464, loss: 0.0011554791126400232 2023-01-24 06:23:23.127705: step: 130/464, loss: 0.0004891178687103093 2023-01-24 06:23:23.831318: step: 132/464, loss: 0.004247648175805807 2023-01-24 06:23:24.522168: step: 134/464, loss: 0.0036355378106236458 2023-01-24 06:23:25.265770: step: 136/464, loss: 0.14950382709503174 2023-01-24 06:23:26.006454: step: 138/464, loss: 0.006487012840807438 2023-01-24 06:23:26.731531: step: 140/464, loss: 0.0005286363302730024 2023-01-24 06:23:27.448403: step: 142/464, loss: 0.0003259664517827332 2023-01-24 06:23:28.187399: step: 144/464, loss: 0.0174573864787817 2023-01-24 06:23:28.897968: step: 146/464, loss: 0.0023650091607123613 2023-01-24 06:23:29.736634: step: 148/464, loss: 0.002246855990961194 2023-01-24 06:23:30.608600: step: 150/464, loss: 0.010467849671840668 2023-01-24 06:23:31.308577: step: 152/464, loss: 0.0027248847763985395 2023-01-24 06:23:31.985385: step: 154/464, loss: 0.004155377391725779 2023-01-24 06:23:32.744226: step: 156/464, loss: 0.0012879250571131706 2023-01-24 06:23:33.458475: step: 158/464, loss: 0.035379212349653244 2023-01-24 06:23:34.149892: step: 160/464, loss: 0.005478814709931612 2023-01-24 06:23:34.816473: step: 162/464, loss: 0.0020441957749426365 2023-01-24 06:23:35.540353: step: 164/464, loss: 0.027075497433543205 2023-01-24 06:23:36.295650: step: 166/464, loss: 0.014531458728015423 2023-01-24 06:23:37.144785: step: 168/464, loss: 0.029720718041062355 2023-01-24 06:23:37.917828: step: 170/464, loss: 0.0023813534062355757 2023-01-24 06:23:38.714604: step: 172/464, loss: 0.8960830569267273 2023-01-24 06:23:39.431571: step: 174/464, loss: 0.04537244141101837 2023-01-24 06:23:40.167361: step: 176/464, loss: 0.08595781773328781 2023-01-24 06:23:40.935749: step: 178/464, loss: 0.022702278569340706 2023-01-24 06:23:41.669960: step: 180/464, loss: 0.0006193833542056382 2023-01-24 06:23:42.398450: step: 182/464, loss: 0.06762062013149261 2023-01-24 06:23:43.176053: step: 184/464, loss: 0.06323347240686417 2023-01-24 06:23:43.863601: step: 186/464, loss: 0.001289387815631926 2023-01-24 06:23:44.550047: step: 188/464, loss: 0.015292837284505367 2023-01-24 06:23:45.294992: step: 190/464, loss: 0.005967683624476194 2023-01-24 06:23:46.027601: step: 192/464, loss: 0.19376680254936218 2023-01-24 06:23:46.705358: step: 194/464, loss: 0.009574813768267632 2023-01-24 06:23:47.417254: step: 196/464, loss: 0.006247211713343859 2023-01-24 06:23:48.175959: step: 198/464, loss: 0.049863290041685104 2023-01-24 06:23:48.891370: step: 200/464, loss: 0.0009026709012687206 2023-01-24 06:23:49.629808: step: 202/464, loss: 0.009047025814652443 2023-01-24 06:23:50.290268: step: 204/464, loss: 0.003018538001924753 2023-01-24 06:23:51.025192: step: 206/464, loss: 0.06789993494749069 2023-01-24 06:23:51.771517: step: 208/464, loss: 0.014406219124794006 2023-01-24 06:23:52.509850: step: 210/464, loss: 0.009798445738852024 2023-01-24 06:23:53.176308: step: 212/464, loss: 0.005850615445524454 2023-01-24 06:23:53.878945: step: 214/464, loss: 0.0489523746073246 2023-01-24 06:23:54.631368: step: 216/464, loss: 0.005099345929920673 2023-01-24 06:23:55.404948: step: 218/464, loss: 0.0015978205483406782 2023-01-24 06:23:56.072770: step: 220/464, loss: 0.04420878738164902 2023-01-24 06:23:56.852824: step: 222/464, loss: 0.020301537588238716 2023-01-24 06:23:57.540660: step: 224/464, loss: 0.0011024517007172108 2023-01-24 06:23:58.299494: step: 226/464, loss: 0.0043716104701161385 2023-01-24 06:23:58.968295: step: 228/464, loss: 0.0008457738440483809 2023-01-24 06:23:59.694022: step: 230/464, loss: 0.04437427967786789 2023-01-24 06:24:00.378597: step: 232/464, loss: 0.004700162913650274 2023-01-24 06:24:01.054565: step: 234/464, loss: 0.007807662710547447 2023-01-24 06:24:01.800209: step: 236/464, loss: 0.005388321820646524 2023-01-24 06:24:02.515973: step: 238/464, loss: 0.03318406641483307 2023-01-24 06:24:03.249317: step: 240/464, loss: 0.003423970425501466 2023-01-24 06:24:03.944620: step: 242/464, loss: 0.007497243583202362 2023-01-24 06:24:04.605088: step: 244/464, loss: 0.01163523830473423 2023-01-24 06:24:05.368506: step: 246/464, loss: 0.018005944788455963 2023-01-24 06:24:06.188372: step: 248/464, loss: 0.05900489538908005 2023-01-24 06:24:06.871349: step: 250/464, loss: 0.005102918948978186 2023-01-24 06:24:07.591628: step: 252/464, loss: 0.004294229205697775 2023-01-24 06:24:08.300778: step: 254/464, loss: 0.004841707646846771 2023-01-24 06:24:08.935973: step: 256/464, loss: 0.0008235534187406301 2023-01-24 06:24:09.634665: step: 258/464, loss: 1.809576315281447e-05 2023-01-24 06:24:10.382351: step: 260/464, loss: 0.0009198468178510666 2023-01-24 06:24:11.068474: step: 262/464, loss: 0.01337486132979393 2023-01-24 06:24:11.861526: step: 264/464, loss: 0.012098829261958599 2023-01-24 06:24:12.669900: step: 266/464, loss: 0.007691856473684311 2023-01-24 06:24:13.364760: step: 268/464, loss: 0.18738287687301636 2023-01-24 06:24:14.131236: step: 270/464, loss: 0.01887785643339157 2023-01-24 06:24:14.907271: step: 272/464, loss: 0.03846096247434616 2023-01-24 06:24:15.629441: step: 274/464, loss: 0.012068726122379303 2023-01-24 06:24:16.307203: step: 276/464, loss: 0.029271041974425316 2023-01-24 06:24:17.046084: step: 278/464, loss: 0.00018030410865321755 2023-01-24 06:24:17.738801: step: 280/464, loss: 0.0576542504131794 2023-01-24 06:24:18.534815: step: 282/464, loss: 0.019819024950265884 2023-01-24 06:24:19.380907: step: 284/464, loss: 0.036222707480192184 2023-01-24 06:24:20.088488: step: 286/464, loss: 0.03119976446032524 2023-01-24 06:24:20.830118: step: 288/464, loss: 0.008279160596430302 2023-01-24 06:24:21.593120: step: 290/464, loss: 0.0022513007279485464 2023-01-24 06:24:22.297898: step: 292/464, loss: 1.0045217095466796e-05 2023-01-24 06:24:22.996705: step: 294/464, loss: 6.919210136402398e-05 2023-01-24 06:24:23.761618: step: 296/464, loss: 0.008782978169620037 2023-01-24 06:24:24.483067: step: 298/464, loss: 0.005144828464835882 2023-01-24 06:24:25.171170: step: 300/464, loss: 0.004423404578119516 2023-01-24 06:24:25.952498: step: 302/464, loss: 0.02502557262778282 2023-01-24 06:24:26.709673: step: 304/464, loss: 0.03563341870903969 2023-01-24 06:24:27.436764: step: 306/464, loss: 0.011802875436842442 2023-01-24 06:24:28.074802: step: 308/464, loss: 0.00023112045892048627 2023-01-24 06:24:28.744687: step: 310/464, loss: 0.07555259764194489 2023-01-24 06:24:29.440729: step: 312/464, loss: 0.011428655125200748 2023-01-24 06:24:30.231220: step: 314/464, loss: 0.004951578099280596 2023-01-24 06:24:31.002133: step: 316/464, loss: 0.282394677400589 2023-01-24 06:24:31.711901: step: 318/464, loss: 0.002324760193005204 2023-01-24 06:24:32.435895: step: 320/464, loss: 0.061712756752967834 2023-01-24 06:24:33.126333: step: 322/464, loss: 0.0064103505574166775 2023-01-24 06:24:33.900427: step: 324/464, loss: 0.025882260873913765 2023-01-24 06:24:34.682857: step: 326/464, loss: 0.06493687629699707 2023-01-24 06:24:35.499648: step: 328/464, loss: 0.059337176382541656 2023-01-24 06:24:36.320531: step: 330/464, loss: 0.05586615949869156 2023-01-24 06:24:37.103884: step: 332/464, loss: 0.0006606185343116522 2023-01-24 06:24:37.808247: step: 334/464, loss: 0.019652051851153374 2023-01-24 06:24:38.610123: step: 336/464, loss: 0.06849510222673416 2023-01-24 06:24:39.394046: step: 338/464, loss: 0.027316365391016006 2023-01-24 06:24:40.049773: step: 340/464, loss: 0.008591653779149055 2023-01-24 06:24:40.842800: step: 342/464, loss: 0.02025693655014038 2023-01-24 06:24:41.519524: step: 344/464, loss: 0.02047627419233322 2023-01-24 06:24:42.220322: step: 346/464, loss: 0.003447972470894456 2023-01-24 06:24:42.947548: step: 348/464, loss: 0.020870212465524673 2023-01-24 06:24:43.673961: step: 350/464, loss: 0.049200639128685 2023-01-24 06:24:44.467221: step: 352/464, loss: 0.013158795423805714 2023-01-24 06:24:45.309395: step: 354/464, loss: 0.0025647184811532497 2023-01-24 06:24:46.027454: step: 356/464, loss: 0.028932079672813416 2023-01-24 06:24:46.712468: step: 358/464, loss: 0.0025970793794840574 2023-01-24 06:24:47.426271: step: 360/464, loss: 0.03821602091193199 2023-01-24 06:24:48.107119: step: 362/464, loss: 0.0049066864885389805 2023-01-24 06:24:48.821536: step: 364/464, loss: 0.0007283754530362785 2023-01-24 06:24:49.561220: step: 366/464, loss: 0.011494730599224567 2023-01-24 06:24:50.244719: step: 368/464, loss: 0.01839861087501049 2023-01-24 06:24:50.928725: step: 370/464, loss: 0.07493963837623596 2023-01-24 06:24:51.763661: step: 372/464, loss: 0.0021905205212533474 2023-01-24 06:24:52.494156: step: 374/464, loss: 0.0001082076778402552 2023-01-24 06:24:53.193613: step: 376/464, loss: 0.001402409397996962 2023-01-24 06:24:53.906439: step: 378/464, loss: 0.013940723612904549 2023-01-24 06:24:54.618338: step: 380/464, loss: 0.0044783600606024265 2023-01-24 06:24:55.324231: step: 382/464, loss: 0.006135037634521723 2023-01-24 06:24:56.097356: step: 384/464, loss: 0.01858883909881115 2023-01-24 06:24:56.873562: step: 386/464, loss: 0.011167095974087715 2023-01-24 06:24:57.634179: step: 388/464, loss: 0.0011094283545389771 2023-01-24 06:24:58.396754: step: 390/464, loss: 0.003803882747888565 2023-01-24 06:24:59.221206: step: 392/464, loss: 0.0013371568638831377 2023-01-24 06:24:59.991637: step: 394/464, loss: 0.05424215644598007 2023-01-24 06:25:00.746199: step: 396/464, loss: 0.0014253269182518125 2023-01-24 06:25:01.553760: step: 398/464, loss: 0.0028973803855478764 2023-01-24 06:25:02.230650: step: 400/464, loss: 0.0002119986165780574 2023-01-24 06:25:02.994070: step: 402/464, loss: 0.005772217642515898 2023-01-24 06:25:03.782411: step: 404/464, loss: 0.19733327627182007 2023-01-24 06:25:04.459598: step: 406/464, loss: 0.005514085758477449 2023-01-24 06:25:05.167074: step: 408/464, loss: 0.005787344183772802 2023-01-24 06:25:05.860568: step: 410/464, loss: 0.007726243697106838 2023-01-24 06:25:06.664177: step: 412/464, loss: 0.811470091342926 2023-01-24 06:25:07.382044: step: 414/464, loss: 0.0013876872835680842 2023-01-24 06:25:08.109414: step: 416/464, loss: 0.002277060877531767 2023-01-24 06:25:08.829059: step: 418/464, loss: 0.00730202067643404 2023-01-24 06:25:09.610487: step: 420/464, loss: 0.007765179965645075 2023-01-24 06:25:10.329753: step: 422/464, loss: 0.00024765662965364754 2023-01-24 06:25:11.120231: step: 424/464, loss: 0.009965931065380573 2023-01-24 06:25:11.885639: step: 426/464, loss: 0.0009719174704514444 2023-01-24 06:25:12.508954: step: 428/464, loss: 0.015123143792152405 2023-01-24 06:25:13.265529: step: 430/464, loss: 0.008549573831260204 2023-01-24 06:25:14.042028: step: 432/464, loss: 0.009291200898587704 2023-01-24 06:25:14.737927: step: 434/464, loss: 0.0007830564863979816 2023-01-24 06:25:15.497383: step: 436/464, loss: 0.00584413344040513 2023-01-24 06:25:16.197541: step: 438/464, loss: 0.01247811783105135 2023-01-24 06:25:16.986716: step: 440/464, loss: 0.0721527636051178 2023-01-24 06:25:17.694507: step: 442/464, loss: 0.011251426301896572 2023-01-24 06:25:18.514664: step: 444/464, loss: 0.04375053942203522 2023-01-24 06:25:19.255795: step: 446/464, loss: 0.0027558563742786646 2023-01-24 06:25:19.989387: step: 448/464, loss: 0.00021523504983633757 2023-01-24 06:25:20.737723: step: 450/464, loss: 0.014090361073613167 2023-01-24 06:25:21.565922: step: 452/464, loss: 0.022461047396063805 2023-01-24 06:25:22.315336: step: 454/464, loss: 0.015601426362991333 2023-01-24 06:25:23.035949: step: 456/464, loss: 0.006117715500295162 2023-01-24 06:25:23.729543: step: 458/464, loss: 0.00012083905312465504 2023-01-24 06:25:24.488386: step: 460/464, loss: 0.0008911213371902704 2023-01-24 06:25:25.266876: step: 462/464, loss: 0.011780163273215294 2023-01-24 06:25:25.967464: step: 464/464, loss: 0.00481388159096241 2023-01-24 06:25:26.652862: step: 466/464, loss: 0.0182182714343071 2023-01-24 06:25:27.320212: step: 468/464, loss: 0.007232617121189833 2023-01-24 06:25:28.066487: step: 470/464, loss: 0.00524465087801218 2023-01-24 06:25:28.751105: step: 472/464, loss: 0.03622736409306526 2023-01-24 06:25:29.493643: step: 474/464, loss: 0.003495218697935343 2023-01-24 06:25:30.252350: step: 476/464, loss: 0.0007282923324964941 2023-01-24 06:25:30.990312: step: 478/464, loss: 0.018887333571910858 2023-01-24 06:25:31.715699: step: 480/464, loss: 0.03536510467529297 2023-01-24 06:25:32.419364: step: 482/464, loss: 0.0029719527810811996 2023-01-24 06:25:33.147844: step: 484/464, loss: 0.013268392533063889 2023-01-24 06:25:33.798336: step: 486/464, loss: 0.0023780071642249823 2023-01-24 06:25:34.557117: step: 488/464, loss: 0.00029961683321744204 2023-01-24 06:25:35.254152: step: 490/464, loss: 0.00504639558494091 2023-01-24 06:25:36.025984: step: 492/464, loss: 0.0035301174502819777 2023-01-24 06:25:36.759840: step: 494/464, loss: 0.027179397642612457 2023-01-24 06:25:37.552660: step: 496/464, loss: 0.01423187181353569 2023-01-24 06:25:38.285479: step: 498/464, loss: 0.006988744717091322 2023-01-24 06:25:39.117273: step: 500/464, loss: 0.0017694245325401425 2023-01-24 06:25:39.896687: step: 502/464, loss: 0.0043494729325175285 2023-01-24 06:25:40.588035: step: 504/464, loss: 0.02056262083351612 2023-01-24 06:25:41.271740: step: 506/464, loss: 0.0001409807737218216 2023-01-24 06:25:42.044933: step: 508/464, loss: 0.007760221604257822 2023-01-24 06:25:42.737017: step: 510/464, loss: 0.04806235432624817 2023-01-24 06:25:43.512361: step: 512/464, loss: 0.00937352143228054 2023-01-24 06:25:44.189433: step: 514/464, loss: 0.0003969741228502244 2023-01-24 06:25:44.897325: step: 516/464, loss: 0.002457494381815195 2023-01-24 06:25:45.653959: step: 518/464, loss: 5.364713433664292e-05 2023-01-24 06:25:46.344793: step: 520/464, loss: 0.016665572300553322 2023-01-24 06:25:47.169979: step: 522/464, loss: 0.0065728141926229 2023-01-24 06:25:47.928997: step: 524/464, loss: 0.03412323817610741 2023-01-24 06:25:48.682132: step: 526/464, loss: 0.02081715129315853 2023-01-24 06:25:49.381599: step: 528/464, loss: 0.01984614133834839 2023-01-24 06:25:50.056346: step: 530/464, loss: 0.0027193299029022455 2023-01-24 06:25:50.744019: step: 532/464, loss: 0.06476839631795883 2023-01-24 06:25:51.448719: step: 534/464, loss: 0.023456497117877007 2023-01-24 06:25:52.219493: step: 536/464, loss: 0.014758966863155365 2023-01-24 06:25:53.009176: step: 538/464, loss: 0.018996067345142365 2023-01-24 06:25:53.683439: step: 540/464, loss: 0.001212551025673747 2023-01-24 06:25:54.347962: step: 542/464, loss: 0.015158602967858315 2023-01-24 06:25:55.212997: step: 544/464, loss: 0.051817577332258224 2023-01-24 06:25:55.891314: step: 546/464, loss: 0.08023293316364288 2023-01-24 06:25:56.569513: step: 548/464, loss: 0.006792579777538776 2023-01-24 06:25:57.335295: step: 550/464, loss: 0.0006085671484470367 2023-01-24 06:25:58.094790: step: 552/464, loss: 0.0005508697358891368 2023-01-24 06:25:58.919410: step: 554/464, loss: 0.09090903401374817 2023-01-24 06:25:59.727269: step: 556/464, loss: 0.01320156641304493 2023-01-24 06:26:00.540994: step: 558/464, loss: 0.027111440896987915 2023-01-24 06:26:01.374779: step: 560/464, loss: 0.0351184718310833 2023-01-24 06:26:02.140309: step: 562/464, loss: 0.025485580787062645 2023-01-24 06:26:02.834003: step: 564/464, loss: 0.010311855003237724 2023-01-24 06:26:03.572376: step: 566/464, loss: 0.04128720611333847 2023-01-24 06:26:04.303417: step: 568/464, loss: 0.03193189948797226 2023-01-24 06:26:05.103485: step: 570/464, loss: 0.29782480001449585 2023-01-24 06:26:05.867840: step: 572/464, loss: 0.03099055029451847 2023-01-24 06:26:06.604828: step: 574/464, loss: 0.009467075578868389 2023-01-24 06:26:07.351797: step: 576/464, loss: 0.11119036376476288 2023-01-24 06:26:08.093285: step: 578/464, loss: 0.04529016092419624 2023-01-24 06:26:08.891356: step: 580/464, loss: 0.0063427952118217945 2023-01-24 06:26:09.635178: step: 582/464, loss: 0.006485740188509226 2023-01-24 06:26:10.354874: step: 584/464, loss: 0.007228881120681763 2023-01-24 06:26:11.109074: step: 586/464, loss: 0.021861482411623 2023-01-24 06:26:11.878065: step: 588/464, loss: 0.003522490616887808 2023-01-24 06:26:12.686061: step: 590/464, loss: 0.055823516100645065 2023-01-24 06:26:13.452960: step: 592/464, loss: 0.026966070756316185 2023-01-24 06:26:14.214205: step: 594/464, loss: 1.141110897064209 2023-01-24 06:26:14.984275: step: 596/464, loss: 0.01313886046409607 2023-01-24 06:26:15.720735: step: 598/464, loss: 0.009779131039977074 2023-01-24 06:26:16.500064: step: 600/464, loss: 0.0019635711796581745 2023-01-24 06:26:17.254653: step: 602/464, loss: 0.009792122058570385 2023-01-24 06:26:17.934810: step: 604/464, loss: 0.0413961298763752 2023-01-24 06:26:18.683998: step: 606/464, loss: 0.11280696839094162 2023-01-24 06:26:19.443834: step: 608/464, loss: 0.044669028371572495 2023-01-24 06:26:20.197946: step: 610/464, loss: 0.009040174074470997 2023-01-24 06:26:21.042640: step: 612/464, loss: 0.03160270303487778 2023-01-24 06:26:21.739293: step: 614/464, loss: 0.014951037243008614 2023-01-24 06:26:22.386343: step: 616/464, loss: 0.01582406647503376 2023-01-24 06:26:23.204903: step: 618/464, loss: 0.010089858435094357 2023-01-24 06:26:23.920441: step: 620/464, loss: 0.05757782980799675 2023-01-24 06:26:24.643335: step: 622/464, loss: 0.002146498067304492 2023-01-24 06:26:25.365049: step: 624/464, loss: 0.008466691710054874 2023-01-24 06:26:26.275106: step: 626/464, loss: 0.016295647248625755 2023-01-24 06:26:26.970385: step: 628/464, loss: 0.04091161862015724 2023-01-24 06:26:27.709356: step: 630/464, loss: 0.0015703781973570585 2023-01-24 06:26:28.404108: step: 632/464, loss: 0.011625193059444427 2023-01-24 06:26:29.134717: step: 634/464, loss: 0.016939258202910423 2023-01-24 06:26:29.845455: step: 636/464, loss: 0.0007383174379356205 2023-01-24 06:26:30.690579: step: 638/464, loss: 0.00040101975901052356 2023-01-24 06:26:31.461211: step: 640/464, loss: 0.0008818531641736627 2023-01-24 06:26:32.257879: step: 642/464, loss: 0.024727830663323402 2023-01-24 06:26:32.941193: step: 644/464, loss: 0.03569096326828003 2023-01-24 06:26:33.719801: step: 646/464, loss: 0.0017474403139203787 2023-01-24 06:26:34.430279: step: 648/464, loss: 0.01313767209649086 2023-01-24 06:26:35.253978: step: 650/464, loss: 0.030891088768839836 2023-01-24 06:26:35.896607: step: 652/464, loss: 0.01417345367372036 2023-01-24 06:26:36.605188: step: 654/464, loss: 0.02216888591647148 2023-01-24 06:26:37.343957: step: 656/464, loss: 0.0041503203101456165 2023-01-24 06:26:38.021059: step: 658/464, loss: 0.024956727400422096 2023-01-24 06:26:38.744949: step: 660/464, loss: 0.03885156288743019 2023-01-24 06:26:39.460723: step: 662/464, loss: 0.034349534660577774 2023-01-24 06:26:40.158330: step: 664/464, loss: 0.005326046142727137 2023-01-24 06:26:40.868517: step: 666/464, loss: 0.0045426227152347565 2023-01-24 06:26:41.604886: step: 668/464, loss: 0.007938658818602562 2023-01-24 06:26:42.280182: step: 670/464, loss: 0.0032222664449363947 2023-01-24 06:26:42.972227: step: 672/464, loss: 0.02680307626724243 2023-01-24 06:26:43.652234: step: 674/464, loss: 0.008386366069316864 2023-01-24 06:26:44.386738: step: 676/464, loss: 0.010037817060947418 2023-01-24 06:26:45.140073: step: 678/464, loss: 0.40986937284469604 2023-01-24 06:26:45.819120: step: 680/464, loss: 0.0359659306704998 2023-01-24 06:26:46.602414: step: 682/464, loss: 0.0025637580547481775 2023-01-24 06:26:47.309284: step: 684/464, loss: 0.0163812804967165 2023-01-24 06:26:48.017313: step: 686/464, loss: 0.0011752874124795198 2023-01-24 06:26:48.745013: step: 688/464, loss: 0.011594103649258614 2023-01-24 06:26:49.478121: step: 690/464, loss: 0.003088256809860468 2023-01-24 06:26:50.147936: step: 692/464, loss: 0.0018205085070803761 2023-01-24 06:26:50.894888: step: 694/464, loss: 0.005875764414668083 2023-01-24 06:26:51.739855: step: 696/464, loss: 0.0025446880608797073 2023-01-24 06:26:52.372381: step: 698/464, loss: 6.435057002818212e-05 2023-01-24 06:26:53.031616: step: 700/464, loss: 0.006112277507781982 2023-01-24 06:26:53.748796: step: 702/464, loss: 0.03258458524942398 2023-01-24 06:26:54.461864: step: 704/464, loss: 0.0015139655442908406 2023-01-24 06:26:55.202194: step: 706/464, loss: 0.0433700829744339 2023-01-24 06:26:55.928726: step: 708/464, loss: 0.04399729147553444 2023-01-24 06:26:56.669998: step: 710/464, loss: 0.01889374852180481 2023-01-24 06:26:57.409106: step: 712/464, loss: 0.0007756176055409014 2023-01-24 06:26:58.178868: step: 714/464, loss: 0.011662695556879044 2023-01-24 06:26:58.863287: step: 716/464, loss: 0.0009218156919814646 2023-01-24 06:26:59.576400: step: 718/464, loss: 0.01887955144047737 2023-01-24 06:27:00.355803: step: 720/464, loss: 0.1750974804162979 2023-01-24 06:27:01.015178: step: 722/464, loss: 0.0001728379138512537 2023-01-24 06:27:01.696359: step: 724/464, loss: 0.009151075035333633 2023-01-24 06:27:02.439582: step: 726/464, loss: 0.0006785045843571424 2023-01-24 06:27:03.197338: step: 728/464, loss: 0.0013461982598528266 2023-01-24 06:27:03.997466: step: 730/464, loss: 0.010900832712650299 2023-01-24 06:27:04.755758: step: 732/464, loss: 0.01641363464295864 2023-01-24 06:27:05.535316: step: 734/464, loss: 0.010521006770431995 2023-01-24 06:27:06.297548: step: 736/464, loss: 0.0008467060979455709 2023-01-24 06:27:06.956389: step: 738/464, loss: 0.015393110923469067 2023-01-24 06:27:07.643241: step: 740/464, loss: 0.0008853751933202147 2023-01-24 06:27:08.428686: step: 742/464, loss: 0.0014580088900402188 2023-01-24 06:27:09.156923: step: 744/464, loss: 0.22433538734912872 2023-01-24 06:27:09.902418: step: 746/464, loss: 0.0008134776726365089 2023-01-24 06:27:10.617219: step: 748/464, loss: 0.007577679585665464 2023-01-24 06:27:11.431783: step: 750/464, loss: 0.0440809428691864 2023-01-24 06:27:12.137755: step: 752/464, loss: 0.010974534787237644 2023-01-24 06:27:12.892392: step: 754/464, loss: 0.036295562982559204 2023-01-24 06:27:13.578973: step: 756/464, loss: 0.00034559096093289554 2023-01-24 06:27:14.272239: step: 758/464, loss: 0.006786028388887644 2023-01-24 06:27:15.045871: step: 760/464, loss: 0.43013352155685425 2023-01-24 06:27:15.784273: step: 762/464, loss: 0.0012140395119786263 2023-01-24 06:27:16.555262: step: 764/464, loss: 0.020293693989515305 2023-01-24 06:27:17.401618: step: 766/464, loss: 0.0025471991393715143 2023-01-24 06:27:18.090945: step: 768/464, loss: 0.0038294994737952948 2023-01-24 06:27:18.780308: step: 770/464, loss: 0.00017575285164639354 2023-01-24 06:27:19.515907: step: 772/464, loss: 0.0003553438582457602 2023-01-24 06:27:20.253474: step: 774/464, loss: 0.06080649048089981 2023-01-24 06:27:20.993600: step: 776/464, loss: 8.546032040612772e-05 2023-01-24 06:27:21.750458: step: 778/464, loss: 0.06992447376251221 2023-01-24 06:27:22.415399: step: 780/464, loss: 0.004821065813302994 2023-01-24 06:27:23.135335: step: 782/464, loss: 0.03713594749569893 2023-01-24 06:27:23.922497: step: 784/464, loss: 0.006182161625474691 2023-01-24 06:27:24.657486: step: 786/464, loss: 0.0001664771552896127 2023-01-24 06:27:25.342279: step: 788/464, loss: 0.029279792681336403 2023-01-24 06:27:26.035300: step: 790/464, loss: 0.08578027039766312 2023-01-24 06:27:26.671254: step: 792/464, loss: 0.0020249513909220695 2023-01-24 06:27:27.389532: step: 794/464, loss: 0.013059469871222973 2023-01-24 06:27:28.075970: step: 796/464, loss: 0.0051474785432219505 2023-01-24 06:27:28.794426: step: 798/464, loss: 0.03641364723443985 2023-01-24 06:27:29.500923: step: 800/464, loss: 0.0007527320994995534 2023-01-24 06:27:30.237949: step: 802/464, loss: 0.012699021026492119 2023-01-24 06:27:30.982497: step: 804/464, loss: 0.00019273671205155551 2023-01-24 06:27:31.779068: step: 806/464, loss: 0.01091878954321146 2023-01-24 06:27:32.480259: step: 808/464, loss: 0.019247926771640778 2023-01-24 06:27:33.177311: step: 810/464, loss: 0.012168015353381634 2023-01-24 06:27:34.004162: step: 812/464, loss: 0.01567387580871582 2023-01-24 06:27:34.754992: step: 814/464, loss: 0.0009724770206958055 2023-01-24 06:27:35.431555: step: 816/464, loss: 0.0037853403482586145 2023-01-24 06:27:36.161883: step: 818/464, loss: 0.0049398913979530334 2023-01-24 06:27:36.863159: step: 820/464, loss: 0.006806234363466501 2023-01-24 06:27:37.609116: step: 822/464, loss: 0.005008451174944639 2023-01-24 06:27:38.429779: step: 824/464, loss: 0.16014228761196136 2023-01-24 06:27:39.246687: step: 826/464, loss: 0.0002244079951196909 2023-01-24 06:27:40.016571: step: 828/464, loss: 0.005260965786874294 2023-01-24 06:27:40.786839: step: 830/464, loss: 0.012760087847709656 2023-01-24 06:27:41.514327: step: 832/464, loss: 0.0007453107973560691 2023-01-24 06:27:42.261521: step: 834/464, loss: 0.02452758140861988 2023-01-24 06:27:42.985478: step: 836/464, loss: 0.058134328573942184 2023-01-24 06:27:43.686692: step: 838/464, loss: 0.02890494465827942 2023-01-24 06:27:44.433125: step: 840/464, loss: 0.008339189924299717 2023-01-24 06:27:45.201281: step: 842/464, loss: 0.00544707989320159 2023-01-24 06:27:45.888078: step: 844/464, loss: 9.86708837444894e-05 2023-01-24 06:27:46.623718: step: 846/464, loss: 0.00672255689278245 2023-01-24 06:27:47.254947: step: 848/464, loss: 0.006709387991577387 2023-01-24 06:27:48.008493: step: 850/464, loss: 0.022981248795986176 2023-01-24 06:27:48.744188: step: 852/464, loss: 0.2067977786064148 2023-01-24 06:27:49.519272: step: 854/464, loss: 0.2143319994211197 2023-01-24 06:27:50.222469: step: 856/464, loss: 0.004001646768301725 2023-01-24 06:27:51.019511: step: 858/464, loss: 0.08282311260700226 2023-01-24 06:27:51.668048: step: 860/464, loss: 0.006189326755702496 2023-01-24 06:27:52.346956: step: 862/464, loss: 0.00050537777133286 2023-01-24 06:27:53.053350: step: 864/464, loss: 0.002543987240642309 2023-01-24 06:27:53.736337: step: 866/464, loss: 0.0006222067750059068 2023-01-24 06:27:54.455012: step: 868/464, loss: 0.0864039734005928 2023-01-24 06:27:55.167993: step: 870/464, loss: 0.009794293902814388 2023-01-24 06:27:55.924605: step: 872/464, loss: 0.0035909328144043684 2023-01-24 06:27:56.642408: step: 874/464, loss: 0.15241585671901703 2023-01-24 06:27:57.389097: step: 876/464, loss: 0.0015292530879378319 2023-01-24 06:27:58.066758: step: 878/464, loss: 0.017026687040925026 2023-01-24 06:27:58.805471: step: 880/464, loss: 0.011374955996870995 2023-01-24 06:27:59.550524: step: 882/464, loss: 0.01445054356008768 2023-01-24 06:28:00.220405: step: 884/464, loss: 0.01035305205732584 2023-01-24 06:28:00.959080: step: 886/464, loss: 0.003956570755690336 2023-01-24 06:28:01.686036: step: 888/464, loss: 0.1221495121717453 2023-01-24 06:28:02.498814: step: 890/464, loss: 0.013920644298195839 2023-01-24 06:28:03.281689: step: 892/464, loss: 0.002684567356482148 2023-01-24 06:28:04.044326: step: 894/464, loss: 0.07813320308923721 2023-01-24 06:28:04.789068: step: 896/464, loss: 0.01626390591263771 2023-01-24 06:28:05.542211: step: 898/464, loss: 0.5564307570457458 2023-01-24 06:28:06.261587: step: 900/464, loss: 0.0021616253070533276 2023-01-24 06:28:06.953041: step: 902/464, loss: 0.06270702183246613 2023-01-24 06:28:07.611487: step: 904/464, loss: 0.0008776888716965914 2023-01-24 06:28:08.340257: step: 906/464, loss: 0.004115441348403692 2023-01-24 06:28:09.093597: step: 908/464, loss: 0.004244999028742313 2023-01-24 06:28:09.844070: step: 910/464, loss: 0.035991959273815155 2023-01-24 06:28:10.573529: step: 912/464, loss: 0.005663156975060701 2023-01-24 06:28:11.289628: step: 914/464, loss: 0.03923436626791954 2023-01-24 06:28:12.017573: step: 916/464, loss: 0.0590827614068985 2023-01-24 06:28:12.718579: step: 918/464, loss: 0.019325165078043938 2023-01-24 06:28:13.419474: step: 920/464, loss: 0.009129509329795837 2023-01-24 06:28:14.156608: step: 922/464, loss: 0.13903824985027313 2023-01-24 06:28:14.903388: step: 924/464, loss: 0.0009124143980443478 2023-01-24 06:28:15.660069: step: 926/464, loss: 0.005049745086580515 2023-01-24 06:28:16.407737: step: 928/464, loss: 0.0027745855040848255 2023-01-24 06:28:17.096222: step: 930/464, loss: 0.04152434319257736 ================================================== Loss: 0.034 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35167552241471695, 'r': 0.33966383474210804, 'f1': 0.34556532994033}, 'combined': 0.25462708521919053, 'epoch': 34} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.311644194325731, 'r': 0.26694745033242634, 'f1': 0.28756938975608204}, 'combined': 0.17859572626956674, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3302688678053464, 'r': 0.3221218179353853, 'f1': 0.32614447272228253}, 'combined': 0.24031697990062922, 'epoch': 34} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3018503282010279, 'r': 0.26184247065509547, 'f1': 0.28042662905964144}, 'combined': 0.17415969594230366, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34856663533834586, 'r': 0.3307083826739524, 'f1': 0.3394027607968314}, 'combined': 0.2500862447976652, 'epoch': 34} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3126564180424864, 'r': 0.2685203195429054, 'f1': 0.28891246276894655}, 'combined': 0.17942984529860892, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.43, 'r': 0.30714285714285716, 'f1': 0.3583333333333334}, 'combined': 0.23888888888888893, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27586206896551724, 'r': 0.34782608695652173, 'f1': 0.3076923076923077}, 'combined': 0.15384615384615385, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5197368421052632, 'r': 0.21506352087114336, 'f1': 0.3042362002567394}, 'combined': 0.20282413350449294, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:30:56.410832: step: 2/464, loss: 0.00206485646776855 2023-01-24 06:30:57.169188: step: 4/464, loss: 0.0008505339501425624 2023-01-24 06:30:57.811063: step: 6/464, loss: 0.021608030423521996 2023-01-24 06:30:58.515066: step: 8/464, loss: 0.010798071511089802 2023-01-24 06:30:59.225722: step: 10/464, loss: 0.024295778945088387 2023-01-24 06:30:59.902258: step: 12/464, loss: 0.003499263199046254 2023-01-24 06:31:00.675367: step: 14/464, loss: 0.03136338293552399 2023-01-24 06:31:01.381572: step: 16/464, loss: 0.00037076350417919457 2023-01-24 06:31:02.076174: step: 18/464, loss: 0.06392989307641983 2023-01-24 06:31:02.822740: step: 20/464, loss: 0.021260160952806473 2023-01-24 06:31:03.492283: step: 22/464, loss: 0.0022600204683840275 2023-01-24 06:31:04.163183: step: 24/464, loss: 0.0006521603208966553 2023-01-24 06:31:04.869353: step: 26/464, loss: 0.0018833683570846915 2023-01-24 06:31:05.808817: step: 28/464, loss: 0.004336627200245857 2023-01-24 06:31:06.586479: step: 30/464, loss: 0.11404263228178024 2023-01-24 06:31:07.284342: step: 32/464, loss: 0.0011401284718886018 2023-01-24 06:31:07.963771: step: 34/464, loss: 3.995103179477155e-05 2023-01-24 06:31:08.719434: step: 36/464, loss: 0.004084045998752117 2023-01-24 06:31:09.598835: step: 38/464, loss: 0.00012493340182118118 2023-01-24 06:31:10.327050: step: 40/464, loss: 0.03752082213759422 2023-01-24 06:31:11.045000: step: 42/464, loss: 0.002574938116595149 2023-01-24 06:31:11.716973: step: 44/464, loss: 0.00015688323765061796 2023-01-24 06:31:12.410612: step: 46/464, loss: 0.00012026409240206704 2023-01-24 06:31:13.218755: step: 48/464, loss: 0.0011076332302764058 2023-01-24 06:31:13.944528: step: 50/464, loss: 0.0035281500313431025 2023-01-24 06:31:14.733298: step: 52/464, loss: 0.0036910888738930225 2023-01-24 06:31:15.477531: step: 54/464, loss: 0.001720602624118328 2023-01-24 06:31:16.150164: step: 56/464, loss: 0.389969140291214 2023-01-24 06:31:16.926059: step: 58/464, loss: 0.003773375414311886 2023-01-24 06:31:17.679403: step: 60/464, loss: 0.0032824124209582806 2023-01-24 06:31:18.462954: step: 62/464, loss: 0.0008738187025301158 2023-01-24 06:31:19.176768: step: 64/464, loss: 8.97213103598915e-05 2023-01-24 06:31:19.875901: step: 66/464, loss: 0.00034595775650814176 2023-01-24 06:31:20.593582: step: 68/464, loss: 0.0012479635188356042 2023-01-24 06:31:21.328977: step: 70/464, loss: 0.012148449197411537 2023-01-24 06:31:22.007202: step: 72/464, loss: 0.0016343119787052274 2023-01-24 06:31:22.746635: step: 74/464, loss: 0.012638113461434841 2023-01-24 06:31:23.496098: step: 76/464, loss: 9.724879055283964e-05 2023-01-24 06:31:24.189105: step: 78/464, loss: 0.07232683897018433 2023-01-24 06:31:24.927971: step: 80/464, loss: 0.0045994920656085014 2023-01-24 06:31:25.590639: step: 82/464, loss: 0.0067751286551356316 2023-01-24 06:31:26.334894: step: 84/464, loss: 0.004917151760309935 2023-01-24 06:31:27.076230: step: 86/464, loss: 0.0031492365524172783 2023-01-24 06:31:27.765498: step: 88/464, loss: 0.14419697225093842 2023-01-24 06:31:28.507026: step: 90/464, loss: 0.010157251730561256 2023-01-24 06:31:29.206565: step: 92/464, loss: 0.028758902102708817 2023-01-24 06:31:29.917093: step: 94/464, loss: 0.015158405527472496 2023-01-24 06:31:30.681111: step: 96/464, loss: 0.08756659179925919 2023-01-24 06:31:31.401186: step: 98/464, loss: 0.0077830045484006405 2023-01-24 06:31:32.138078: step: 100/464, loss: 0.11185619980096817 2023-01-24 06:31:32.890051: step: 102/464, loss: 0.18486009538173676 2023-01-24 06:31:33.680694: step: 104/464, loss: 0.011137974448502064 2023-01-24 06:31:34.420394: step: 106/464, loss: 0.0011875235941261053 2023-01-24 06:31:35.082970: step: 108/464, loss: 0.003935209009796381 2023-01-24 06:31:35.849533: step: 110/464, loss: 0.0009032696834765375 2023-01-24 06:31:36.516949: step: 112/464, loss: 0.0010450384579598904 2023-01-24 06:31:37.189144: step: 114/464, loss: 0.006690158508718014 2023-01-24 06:31:37.896327: step: 116/464, loss: 0.001348541583865881 2023-01-24 06:31:38.687418: step: 118/464, loss: 0.024258917197585106 2023-01-24 06:31:39.419936: step: 120/464, loss: 0.0007817841251380742 2023-01-24 06:31:40.105796: step: 122/464, loss: 0.000877066922839731 2023-01-24 06:31:40.775681: step: 124/464, loss: 0.002365712309256196 2023-01-24 06:31:41.600035: step: 126/464, loss: 0.004180488176643848 2023-01-24 06:31:42.371771: step: 128/464, loss: 0.013827762566506863 2023-01-24 06:31:43.059354: step: 130/464, loss: 0.000862470711581409 2023-01-24 06:31:43.745045: step: 132/464, loss: 0.10408184677362442 2023-01-24 06:31:44.490827: step: 134/464, loss: 0.017195992171764374 2023-01-24 06:31:45.196908: step: 136/464, loss: 0.01907893270254135 2023-01-24 06:31:45.931525: step: 138/464, loss: 0.15260766446590424 2023-01-24 06:31:46.662936: step: 140/464, loss: 6.318865780485794e-05 2023-01-24 06:31:47.360317: step: 142/464, loss: 0.00449691666290164 2023-01-24 06:31:48.058779: step: 144/464, loss: 0.00045366917038336396 2023-01-24 06:31:48.823155: step: 146/464, loss: 1.1969776096520945e-05 2023-01-24 06:31:49.540098: step: 148/464, loss: 0.0013482400681823492 2023-01-24 06:31:50.375210: step: 150/464, loss: 0.03972357511520386 2023-01-24 06:31:51.062225: step: 152/464, loss: 0.004650574177503586 2023-01-24 06:31:51.729669: step: 154/464, loss: 0.0019915217999368906 2023-01-24 06:31:52.445295: step: 156/464, loss: 0.00637625390663743 2023-01-24 06:31:53.218955: step: 158/464, loss: 0.007026030216366053 2023-01-24 06:31:53.999193: step: 160/464, loss: 0.0008738907054066658 2023-01-24 06:31:54.820344: step: 162/464, loss: 0.003001864068210125 2023-01-24 06:31:55.573482: step: 164/464, loss: 0.0010084062814712524 2023-01-24 06:31:56.339130: step: 166/464, loss: 0.014043791219592094 2023-01-24 06:31:56.991972: step: 168/464, loss: 0.009049778804183006 2023-01-24 06:31:57.756387: step: 170/464, loss: 0.025290049612522125 2023-01-24 06:31:58.501612: step: 172/464, loss: 0.017468422651290894 2023-01-24 06:31:59.246608: step: 174/464, loss: 0.3379783034324646 2023-01-24 06:31:59.961823: step: 176/464, loss: 0.009229674004018307 2023-01-24 06:32:00.806303: step: 178/464, loss: 0.08187200129032135 2023-01-24 06:32:01.543929: step: 180/464, loss: 0.0059152403846383095 2023-01-24 06:32:02.244569: step: 182/464, loss: 0.0016771698137745261 2023-01-24 06:32:02.968701: step: 184/464, loss: 0.0017269115196540952 2023-01-24 06:32:03.649417: step: 186/464, loss: 0.06236407905817032 2023-01-24 06:32:04.354484: step: 188/464, loss: 0.06140582635998726 2023-01-24 06:32:05.131506: step: 190/464, loss: 0.0031047058291733265 2023-01-24 06:32:05.815221: step: 192/464, loss: 0.006041200365871191 2023-01-24 06:32:06.518952: step: 194/464, loss: 0.00044914300087839365 2023-01-24 06:32:07.213172: step: 196/464, loss: 0.0009885894833132625 2023-01-24 06:32:07.858976: step: 198/464, loss: 0.009171642363071442 2023-01-24 06:32:08.524617: step: 200/464, loss: 0.0006789682083763182 2023-01-24 06:32:09.385990: step: 202/464, loss: 0.17516861855983734 2023-01-24 06:32:10.182087: step: 204/464, loss: 0.006859530229121447 2023-01-24 06:32:10.969346: step: 206/464, loss: 0.0049885171465575695 2023-01-24 06:32:11.649670: step: 208/464, loss: 0.0389438234269619 2023-01-24 06:32:12.484997: step: 210/464, loss: 0.004782094154506922 2023-01-24 06:32:13.263802: step: 212/464, loss: 0.00029921767418272793 2023-01-24 06:32:13.967665: step: 214/464, loss: 0.0008353688172064722 2023-01-24 06:32:14.742003: step: 216/464, loss: 0.0006163629586808383 2023-01-24 06:32:15.446223: step: 218/464, loss: 0.009952775202691555 2023-01-24 06:32:16.174279: step: 220/464, loss: 0.004259512759745121 2023-01-24 06:32:16.908219: step: 222/464, loss: 0.1413278877735138 2023-01-24 06:32:17.666647: step: 224/464, loss: 0.006144764833152294 2023-01-24 06:32:18.408806: step: 226/464, loss: 0.35328689217567444 2023-01-24 06:32:19.195274: step: 228/464, loss: 0.001853810390457511 2023-01-24 06:32:19.982114: step: 230/464, loss: 0.016084423288702965 2023-01-24 06:32:20.652671: step: 232/464, loss: 0.005866493564099073 2023-01-24 06:32:21.281615: step: 234/464, loss: 0.0011610686779022217 2023-01-24 06:32:22.012286: step: 236/464, loss: 0.0002700358454603702 2023-01-24 06:32:22.721081: step: 238/464, loss: 0.0011712840059772134 2023-01-24 06:32:23.425029: step: 240/464, loss: 0.03383626788854599 2023-01-24 06:32:24.142389: step: 242/464, loss: 0.0025248515885323286 2023-01-24 06:32:24.904708: step: 244/464, loss: 0.007510875351727009 2023-01-24 06:32:25.632917: step: 246/464, loss: 0.02027537114918232 2023-01-24 06:32:26.373082: step: 248/464, loss: 0.0294361412525177 2023-01-24 06:32:27.039078: step: 250/464, loss: 0.14149460196495056 2023-01-24 06:32:27.814798: step: 252/464, loss: 0.002963610924780369 2023-01-24 06:32:28.648504: step: 254/464, loss: 0.004055642522871494 2023-01-24 06:32:29.398052: step: 256/464, loss: 0.004812467377632856 2023-01-24 06:32:30.138418: step: 258/464, loss: 0.009900541044771671 2023-01-24 06:32:30.828126: step: 260/464, loss: 0.007619260810315609 2023-01-24 06:32:31.521590: step: 262/464, loss: 0.0006539294845424592 2023-01-24 06:32:32.342016: step: 264/464, loss: 0.005958850029855967 2023-01-24 06:32:33.095651: step: 266/464, loss: 0.006719652563333511 2023-01-24 06:32:33.806733: step: 268/464, loss: 0.004569544456899166 2023-01-24 06:32:34.493167: step: 270/464, loss: 0.0017016378697007895 2023-01-24 06:32:35.212201: step: 272/464, loss: 0.005704191979020834 2023-01-24 06:32:36.008004: step: 274/464, loss: 0.002754301531240344 2023-01-24 06:32:36.713451: step: 276/464, loss: 0.0028462556656450033 2023-01-24 06:32:37.466509: step: 278/464, loss: 0.0028985131066292524 2023-01-24 06:32:38.211335: step: 280/464, loss: 0.019997483119368553 2023-01-24 06:32:38.904602: step: 282/464, loss: 0.0039010359905660152 2023-01-24 06:32:39.630644: step: 284/464, loss: 8.691203402122483e-05 2023-01-24 06:32:40.269925: step: 286/464, loss: 0.0056920647621154785 2023-01-24 06:32:41.006128: step: 288/464, loss: 0.009940583258867264 2023-01-24 06:32:41.819981: step: 290/464, loss: 0.26928314566612244 2023-01-24 06:32:42.522540: step: 292/464, loss: 0.00306792207993567 2023-01-24 06:32:43.224676: step: 294/464, loss: 0.0039005670696496964 2023-01-24 06:32:43.912069: step: 296/464, loss: 0.0011833877069875598 2023-01-24 06:32:44.728596: step: 298/464, loss: 0.0015655200695618987 2023-01-24 06:32:45.428092: step: 300/464, loss: 0.1414550095796585 2023-01-24 06:32:46.106065: step: 302/464, loss: 0.00019204954151064157 2023-01-24 06:32:46.878551: step: 304/464, loss: 0.2077431082725525 2023-01-24 06:32:47.588442: step: 306/464, loss: 0.010231670923531055 2023-01-24 06:32:48.290872: step: 308/464, loss: 0.000895325792953372 2023-01-24 06:32:49.078877: step: 310/464, loss: 2.6799411898537073e-06 2023-01-24 06:32:49.823784: step: 312/464, loss: 0.04067198559641838 2023-01-24 06:32:50.512178: step: 314/464, loss: 0.002498132176697254 2023-01-24 06:32:51.259266: step: 316/464, loss: 0.015202999114990234 2023-01-24 06:32:52.005623: step: 318/464, loss: 0.0015499275177717209 2023-01-24 06:32:52.717824: step: 320/464, loss: 0.019704598933458328 2023-01-24 06:32:53.366659: step: 322/464, loss: 0.006374839227646589 2023-01-24 06:32:54.093505: step: 324/464, loss: 0.007268859073519707 2023-01-24 06:32:54.789298: step: 326/464, loss: 0.0003769928589463234 2023-01-24 06:32:55.566501: step: 328/464, loss: 0.30836057662963867 2023-01-24 06:32:56.358771: step: 330/464, loss: 0.005678210873156786 2023-01-24 06:32:57.047993: step: 332/464, loss: 0.025175319984555244 2023-01-24 06:32:57.700643: step: 334/464, loss: 0.00025786174228414893 2023-01-24 06:32:58.375828: step: 336/464, loss: 0.002200294751673937 2023-01-24 06:32:59.148516: step: 338/464, loss: 0.009255696088075638 2023-01-24 06:32:59.849798: step: 340/464, loss: 0.0015450895298272371 2023-01-24 06:33:00.679895: step: 342/464, loss: 0.0051421429961919785 2023-01-24 06:33:01.373175: step: 344/464, loss: 0.010502039454877377 2023-01-24 06:33:02.139696: step: 346/464, loss: 0.033631693571805954 2023-01-24 06:33:02.847433: step: 348/464, loss: 0.06362398713827133 2023-01-24 06:33:03.698456: step: 350/464, loss: 0.012094840407371521 2023-01-24 06:33:04.435671: step: 352/464, loss: 0.06655651330947876 2023-01-24 06:33:05.107465: step: 354/464, loss: 0.048538390547037125 2023-01-24 06:33:05.824315: step: 356/464, loss: 0.0003698621585499495 2023-01-24 06:33:06.592361: step: 358/464, loss: 0.3159915804862976 2023-01-24 06:33:07.289365: step: 360/464, loss: 0.0037905359640717506 2023-01-24 06:33:08.037121: step: 362/464, loss: 0.0015841275453567505 2023-01-24 06:33:08.767066: step: 364/464, loss: 0.0017936860676854849 2023-01-24 06:33:09.523309: step: 366/464, loss: 0.0035860745701938868 2023-01-24 06:33:10.232855: step: 368/464, loss: 0.0042134555988013744 2023-01-24 06:33:10.964850: step: 370/464, loss: 0.02218709886074066 2023-01-24 06:33:11.685495: step: 372/464, loss: 1.334579348564148 2023-01-24 06:33:12.459363: step: 374/464, loss: 0.009962356649339199 2023-01-24 06:33:13.243654: step: 376/464, loss: 0.02284334972500801 2023-01-24 06:33:13.917472: step: 378/464, loss: 0.003422256326302886 2023-01-24 06:33:14.638719: step: 380/464, loss: 0.007594207767397165 2023-01-24 06:33:15.387018: step: 382/464, loss: 0.008857734501361847 2023-01-24 06:33:16.114369: step: 384/464, loss: 0.01347698550671339 2023-01-24 06:33:16.825764: step: 386/464, loss: 0.03327897563576698 2023-01-24 06:33:17.625342: step: 388/464, loss: 9.90178159554489e-05 2023-01-24 06:33:18.360353: step: 390/464, loss: 0.0015657603507861495 2023-01-24 06:33:19.119466: step: 392/464, loss: 0.01993647962808609 2023-01-24 06:33:19.815395: step: 394/464, loss: 3.237658893340267e-05 2023-01-24 06:33:20.578807: step: 396/464, loss: 0.00817184243351221 2023-01-24 06:33:21.276496: step: 398/464, loss: 0.0779031291604042 2023-01-24 06:33:22.002462: step: 400/464, loss: 0.0011360801290720701 2023-01-24 06:33:22.724628: step: 402/464, loss: 0.005148367024958134 2023-01-24 06:33:23.513675: step: 404/464, loss: 0.008539358153939247 2023-01-24 06:33:24.250218: step: 406/464, loss: 0.09758865833282471 2023-01-24 06:33:24.944317: step: 408/464, loss: 0.0017494140192866325 2023-01-24 06:33:25.602325: step: 410/464, loss: 0.1648520678281784 2023-01-24 06:33:26.240686: step: 412/464, loss: 0.014005640521645546 2023-01-24 06:33:26.965325: step: 414/464, loss: 0.002599345985800028 2023-01-24 06:33:27.700057: step: 416/464, loss: 0.00011764621740439907 2023-01-24 06:33:28.459510: step: 418/464, loss: 0.0003326554433442652 2023-01-24 06:33:29.200248: step: 420/464, loss: 0.03486809507012367 2023-01-24 06:33:29.961270: step: 422/464, loss: 0.016408352181315422 2023-01-24 06:33:30.639837: step: 424/464, loss: 0.003920829389244318 2023-01-24 06:33:31.362309: step: 426/464, loss: 0.02062283083796501 2023-01-24 06:33:32.023019: step: 428/464, loss: 0.009015236981213093 2023-01-24 06:33:32.690932: step: 430/464, loss: 0.003978882450610399 2023-01-24 06:33:33.439074: step: 432/464, loss: 0.00880517903715372 2023-01-24 06:33:34.192518: step: 434/464, loss: 0.011073540896177292 2023-01-24 06:33:34.924975: step: 436/464, loss: 0.03089657984673977 2023-01-24 06:33:35.612560: step: 438/464, loss: 0.0012413024669513106 2023-01-24 06:33:36.322757: step: 440/464, loss: 0.01738613471388817 2023-01-24 06:33:37.033528: step: 442/464, loss: 0.0015985603677108884 2023-01-24 06:33:37.844278: step: 444/464, loss: 0.0005925216828472912 2023-01-24 06:33:38.592150: step: 446/464, loss: 0.17484651505947113 2023-01-24 06:33:39.357238: step: 448/464, loss: 0.0009653688175603747 2023-01-24 06:33:40.220219: step: 450/464, loss: 0.007736225612461567 2023-01-24 06:33:40.894439: step: 452/464, loss: 0.0001321414310950786 2023-01-24 06:33:41.595113: step: 454/464, loss: 0.11280196905136108 2023-01-24 06:33:42.346950: step: 456/464, loss: 0.07258933782577515 2023-01-24 06:33:42.982538: step: 458/464, loss: 0.0474422313272953 2023-01-24 06:33:43.765216: step: 460/464, loss: 0.024412963539361954 2023-01-24 06:33:44.560493: step: 462/464, loss: 0.028620421886444092 2023-01-24 06:33:45.279818: step: 464/464, loss: 0.010022708214819431 2023-01-24 06:33:46.120344: step: 466/464, loss: 0.04155333712697029 2023-01-24 06:33:46.848586: step: 468/464, loss: 0.004220477305352688 2023-01-24 06:33:47.635293: step: 470/464, loss: 0.017753012478351593 2023-01-24 06:33:48.357685: step: 472/464, loss: 0.0043309456668794155 2023-01-24 06:33:49.139421: step: 474/464, loss: 0.0008676517754793167 2023-01-24 06:33:49.878836: step: 476/464, loss: 0.011539888568222523 2023-01-24 06:33:50.547150: step: 478/464, loss: 0.0016300047282129526 2023-01-24 06:33:51.295981: step: 480/464, loss: 0.0011122695868834853 2023-01-24 06:33:52.103764: step: 482/464, loss: 0.009475810453295708 2023-01-24 06:33:52.871795: step: 484/464, loss: 0.013957416638731956 2023-01-24 06:33:53.599758: step: 486/464, loss: 0.022875269874930382 2023-01-24 06:33:54.383383: step: 488/464, loss: 0.0001867082464741543 2023-01-24 06:33:55.073073: step: 490/464, loss: 0.5683570504188538 2023-01-24 06:33:55.770453: step: 492/464, loss: 0.0029813034925609827 2023-01-24 06:33:56.560887: step: 494/464, loss: 0.041646551340818405 2023-01-24 06:33:57.269271: step: 496/464, loss: 0.14465898275375366 2023-01-24 06:33:57.966055: step: 498/464, loss: 0.008280070498585701 2023-01-24 06:33:58.775544: step: 500/464, loss: 0.008318918757140636 2023-01-24 06:33:59.517176: step: 502/464, loss: 0.009130694903433323 2023-01-24 06:34:00.223099: step: 504/464, loss: 6.741621473338455e-05 2023-01-24 06:34:01.036471: step: 506/464, loss: 0.0035972786135971546 2023-01-24 06:34:01.738903: step: 508/464, loss: 0.022480538114905357 2023-01-24 06:34:02.466900: step: 510/464, loss: 0.07424750179052353 2023-01-24 06:34:03.246614: step: 512/464, loss: 0.15917329490184784 2023-01-24 06:34:03.958275: step: 514/464, loss: 0.0038534412160515785 2023-01-24 06:34:04.677393: step: 516/464, loss: 0.013208887539803982 2023-01-24 06:34:05.319854: step: 518/464, loss: 0.038714699447155 2023-01-24 06:34:06.050237: step: 520/464, loss: 0.0006091871182434261 2023-01-24 06:34:06.803270: step: 522/464, loss: 0.01111649814993143 2023-01-24 06:34:07.518692: step: 524/464, loss: 0.0007218350074253976 2023-01-24 06:34:08.247907: step: 526/464, loss: 0.031764477491378784 2023-01-24 06:34:08.923630: step: 528/464, loss: 0.0005717077874578536 2023-01-24 06:34:09.706944: step: 530/464, loss: 0.048164039850234985 2023-01-24 06:34:10.319634: step: 532/464, loss: 0.001963170012459159 2023-01-24 06:34:11.029298: step: 534/464, loss: 0.008921549655497074 2023-01-24 06:34:11.702141: step: 536/464, loss: 0.00019145449914503843 2023-01-24 06:34:12.524166: step: 538/464, loss: 0.01980605162680149 2023-01-24 06:34:13.223761: step: 540/464, loss: 0.000405008060624823 2023-01-24 06:34:13.948373: step: 542/464, loss: 0.003804337466135621 2023-01-24 06:34:14.618226: step: 544/464, loss: 0.00018756087229121476 2023-01-24 06:34:15.373093: step: 546/464, loss: 0.012008019722998142 2023-01-24 06:34:16.101786: step: 548/464, loss: 0.0032865030225366354 2023-01-24 06:34:16.780044: step: 550/464, loss: 0.05710751563310623 2023-01-24 06:34:17.545004: step: 552/464, loss: 0.04066885635256767 2023-01-24 06:34:18.440018: step: 554/464, loss: 0.0033198478631675243 2023-01-24 06:34:19.131420: step: 556/464, loss: 0.00019103632075712085 2023-01-24 06:34:19.879769: step: 558/464, loss: 0.007582390680909157 2023-01-24 06:34:20.606911: step: 560/464, loss: 0.013868369162082672 2023-01-24 06:34:21.359680: step: 562/464, loss: 0.09755430370569229 2023-01-24 06:34:22.186617: step: 564/464, loss: 0.017535768449306488 2023-01-24 06:34:22.897189: step: 566/464, loss: 0.0010590796591714025 2023-01-24 06:34:23.633851: step: 568/464, loss: 0.032498300075531006 2023-01-24 06:34:24.418190: step: 570/464, loss: 0.055956095457077026 2023-01-24 06:34:25.231034: step: 572/464, loss: 0.010634099133312702 2023-01-24 06:34:25.993788: step: 574/464, loss: 0.015558160841464996 2023-01-24 06:34:26.678656: step: 576/464, loss: 0.010191281326115131 2023-01-24 06:34:27.402363: step: 578/464, loss: 0.0025147004052996635 2023-01-24 06:34:28.093878: step: 580/464, loss: 0.00410130200907588 2023-01-24 06:34:28.875702: step: 582/464, loss: 0.09718958288431168 2023-01-24 06:34:29.577069: step: 584/464, loss: 0.04974781349301338 2023-01-24 06:34:30.389757: step: 586/464, loss: 0.018057700246572495 2023-01-24 06:34:31.106462: step: 588/464, loss: 0.7479690313339233 2023-01-24 06:34:31.845550: step: 590/464, loss: 0.01689426228404045 2023-01-24 06:34:32.552832: step: 592/464, loss: 0.00427657924592495 2023-01-24 06:34:33.368142: step: 594/464, loss: 0.0009600772173143923 2023-01-24 06:34:34.048210: step: 596/464, loss: 0.005607732106000185 2023-01-24 06:34:34.671606: step: 598/464, loss: 0.0006944190827198327 2023-01-24 06:34:35.370745: step: 600/464, loss: 0.022029733285307884 2023-01-24 06:34:36.199548: step: 602/464, loss: 0.07722200453281403 2023-01-24 06:34:36.974071: step: 604/464, loss: 0.011648462153971195 2023-01-24 06:34:37.717279: step: 606/464, loss: 0.051798105239868164 2023-01-24 06:34:38.440727: step: 608/464, loss: 0.10283471643924713 2023-01-24 06:34:39.201426: step: 610/464, loss: 0.03725217655301094 2023-01-24 06:34:39.945020: step: 612/464, loss: 0.017023751512169838 2023-01-24 06:34:40.648628: step: 614/464, loss: 0.00312893302179873 2023-01-24 06:34:41.358705: step: 616/464, loss: 0.22248615324497223 2023-01-24 06:34:42.275000: step: 618/464, loss: 0.0312360692769289 2023-01-24 06:34:42.966985: step: 620/464, loss: 0.0007553557516075671 2023-01-24 06:34:43.705323: step: 622/464, loss: 0.0033220225013792515 2023-01-24 06:34:44.386387: step: 624/464, loss: 0.021085388958454132 2023-01-24 06:34:45.081502: step: 626/464, loss: 0.009414348751306534 2023-01-24 06:34:45.779245: step: 628/464, loss: 0.0015694421017542481 2023-01-24 06:34:46.496762: step: 630/464, loss: 0.0058471160009503365 2023-01-24 06:34:47.140297: step: 632/464, loss: 0.00471019372344017 2023-01-24 06:34:47.803837: step: 634/464, loss: 0.0036878592800348997 2023-01-24 06:34:48.485341: step: 636/464, loss: 0.0015276795020326972 2023-01-24 06:34:49.167420: step: 638/464, loss: 0.004554287530481815 2023-01-24 06:34:49.864770: step: 640/464, loss: 0.1497090756893158 2023-01-24 06:34:50.580403: step: 642/464, loss: 0.02090488001704216 2023-01-24 06:34:51.325707: step: 644/464, loss: 0.0030281986109912395 2023-01-24 06:34:52.031753: step: 646/464, loss: 0.04247652739286423 2023-01-24 06:34:52.789547: step: 648/464, loss: 0.006784561090171337 2023-01-24 06:34:53.506815: step: 650/464, loss: 0.00543493265286088 2023-01-24 06:34:54.149618: step: 652/464, loss: 0.004617628175765276 2023-01-24 06:34:54.864820: step: 654/464, loss: 0.005777742248028517 2023-01-24 06:34:55.565861: step: 656/464, loss: 0.02436165325343609 2023-01-24 06:34:56.268028: step: 658/464, loss: 0.005392088089138269 2023-01-24 06:34:56.975223: step: 660/464, loss: 1.3804744867229601e-06 2023-01-24 06:34:57.677773: step: 662/464, loss: 8.009708108147606e-05 2023-01-24 06:34:58.367595: step: 664/464, loss: 0.008129194378852844 2023-01-24 06:34:59.140307: step: 666/464, loss: 0.028133362531661987 2023-01-24 06:34:59.828129: step: 668/464, loss: 0.001996027771383524 2023-01-24 06:35:00.551867: step: 670/464, loss: 0.0009242978994734585 2023-01-24 06:35:01.285610: step: 672/464, loss: 0.09116656333208084 2023-01-24 06:35:02.147605: step: 674/464, loss: 0.008167148567736149 2023-01-24 06:35:02.855510: step: 676/464, loss: 0.00443984242156148 2023-01-24 06:35:03.640850: step: 678/464, loss: 0.0008679300080984831 2023-01-24 06:35:04.303312: step: 680/464, loss: 0.00025120421196334064 2023-01-24 06:35:05.013572: step: 682/464, loss: 0.00019450573017820716 2023-01-24 06:35:05.796503: step: 684/464, loss: 0.028216423466801643 2023-01-24 06:35:06.611319: step: 686/464, loss: 0.013654750771820545 2023-01-24 06:35:07.392455: step: 688/464, loss: 0.03024298883974552 2023-01-24 06:35:08.203934: step: 690/464, loss: 0.8305896520614624 2023-01-24 06:35:08.906358: step: 692/464, loss: 0.012249198742210865 2023-01-24 06:35:09.680540: step: 694/464, loss: 0.0007778406143188477 2023-01-24 06:35:10.441505: step: 696/464, loss: 0.00023288335069082677 2023-01-24 06:35:11.221607: step: 698/464, loss: 0.01683759316802025 2023-01-24 06:35:11.938075: step: 700/464, loss: 0.0005743993096984923 2023-01-24 06:35:12.590989: step: 702/464, loss: 0.0045036799274384975 2023-01-24 06:35:13.335403: step: 704/464, loss: 0.0013135545887053013 2023-01-24 06:35:14.028178: step: 706/464, loss: 0.0026535000652074814 2023-01-24 06:35:14.746478: step: 708/464, loss: 0.0027274100575596094 2023-01-24 06:35:15.501462: step: 710/464, loss: 0.002692986512556672 2023-01-24 06:35:16.265733: step: 712/464, loss: 0.0008930904441513121 2023-01-24 06:35:17.017810: step: 714/464, loss: 0.015236853621900082 2023-01-24 06:35:17.767783: step: 716/464, loss: 0.02877141535282135 2023-01-24 06:35:18.565396: step: 718/464, loss: 0.004633526783436537 2023-01-24 06:35:19.341498: step: 720/464, loss: 0.14433535933494568 2023-01-24 06:35:20.036272: step: 722/464, loss: 0.0099337138235569 2023-01-24 06:35:20.727869: step: 724/464, loss: 0.03656081482768059 2023-01-24 06:35:21.393707: step: 726/464, loss: 0.0005096670356579125 2023-01-24 06:35:22.153589: step: 728/464, loss: 0.0008024107082746923 2023-01-24 06:35:22.992023: step: 730/464, loss: 0.0012601092457771301 2023-01-24 06:35:23.779645: step: 732/464, loss: 0.019106421619653702 2023-01-24 06:35:24.417978: step: 734/464, loss: 0.007535758428275585 2023-01-24 06:35:25.111997: step: 736/464, loss: 0.002676013857126236 2023-01-24 06:35:25.742318: step: 738/464, loss: 2.7624613721854985e-05 2023-01-24 06:35:26.517372: step: 740/464, loss: 0.0021485830657184124 2023-01-24 06:35:27.188314: step: 742/464, loss: 0.007855184376239777 2023-01-24 06:35:27.938159: step: 744/464, loss: 0.0017511058831587434 2023-01-24 06:35:28.655341: step: 746/464, loss: 0.0011880536330863833 2023-01-24 06:35:29.341521: step: 748/464, loss: 0.01896476000547409 2023-01-24 06:35:30.031951: step: 750/464, loss: 0.003101673675701022 2023-01-24 06:35:30.829275: step: 752/464, loss: 0.12919455766677856 2023-01-24 06:35:31.554816: step: 754/464, loss: 0.030971094965934753 2023-01-24 06:35:32.300874: step: 756/464, loss: 0.009586269967257977 2023-01-24 06:35:33.028022: step: 758/464, loss: 0.011086368933320045 2023-01-24 06:35:33.736384: step: 760/464, loss: 0.015494439750909805 2023-01-24 06:35:34.430068: step: 762/464, loss: 0.004420689307153225 2023-01-24 06:35:35.316454: step: 764/464, loss: 0.02614568918943405 2023-01-24 06:35:36.225474: step: 766/464, loss: 0.0012165152002125978 2023-01-24 06:35:36.921336: step: 768/464, loss: 0.0009944859193637967 2023-01-24 06:35:37.645491: step: 770/464, loss: 0.008068513125181198 2023-01-24 06:35:38.343600: step: 772/464, loss: 0.0010496607283130288 2023-01-24 06:35:39.103612: step: 774/464, loss: 0.37772834300994873 2023-01-24 06:35:39.816571: step: 776/464, loss: 0.012436087243258953 2023-01-24 06:35:40.648503: step: 778/464, loss: 0.0002699866017792374 2023-01-24 06:35:41.297512: step: 780/464, loss: 3.1445986678591e-05 2023-01-24 06:35:42.135231: step: 782/464, loss: 0.006023346912115812 2023-01-24 06:35:42.830416: step: 784/464, loss: 0.006444807164371014 2023-01-24 06:35:43.616862: step: 786/464, loss: 0.04231029376387596 2023-01-24 06:35:44.359966: step: 788/464, loss: 0.0011864429106935859 2023-01-24 06:35:45.066095: step: 790/464, loss: 0.05042388290166855 2023-01-24 06:35:45.889857: step: 792/464, loss: 0.019405636936426163 2023-01-24 06:35:46.635971: step: 794/464, loss: 0.03940063714981079 2023-01-24 06:35:47.412650: step: 796/464, loss: 0.021257543936371803 2023-01-24 06:35:48.120382: step: 798/464, loss: 0.10211928188800812 2023-01-24 06:35:48.881218: step: 800/464, loss: 0.09499234706163406 2023-01-24 06:35:49.645557: step: 802/464, loss: 0.000208395067602396 2023-01-24 06:35:50.339506: step: 804/464, loss: 0.002811941783875227 2023-01-24 06:35:51.069917: step: 806/464, loss: 0.0010172117035835981 2023-01-24 06:35:51.783919: step: 808/464, loss: 0.02037464641034603 2023-01-24 06:35:52.532765: step: 810/464, loss: 0.016519617289304733 2023-01-24 06:35:53.186849: step: 812/464, loss: 0.004088247194886208 2023-01-24 06:35:53.969967: step: 814/464, loss: 0.0362807996571064 2023-01-24 06:35:54.738872: step: 816/464, loss: 0.022565070539712906 2023-01-24 06:35:55.457892: step: 818/464, loss: 0.0001856112648965791 2023-01-24 06:35:56.164129: step: 820/464, loss: 0.0030008764006197453 2023-01-24 06:35:56.860656: step: 822/464, loss: 0.00351099600084126 2023-01-24 06:35:57.574724: step: 824/464, loss: 0.19104412198066711 2023-01-24 06:35:58.315223: step: 826/464, loss: 0.026989376172423363 2023-01-24 06:35:59.069631: step: 828/464, loss: 0.003458687337115407 2023-01-24 06:35:59.810990: step: 830/464, loss: 0.026364948600530624 2023-01-24 06:36:00.480735: step: 832/464, loss: 0.007490881253033876 2023-01-24 06:36:01.198042: step: 834/464, loss: 0.0003644174139481038 2023-01-24 06:36:01.864897: step: 836/464, loss: 0.012916052713990211 2023-01-24 06:36:02.549311: step: 838/464, loss: 0.045503780245780945 2023-01-24 06:36:03.295709: step: 840/464, loss: 0.012989304959774017 2023-01-24 06:36:04.021246: step: 842/464, loss: 0.021143782883882523 2023-01-24 06:36:04.780787: step: 844/464, loss: 0.0042087603360414505 2023-01-24 06:36:05.511838: step: 846/464, loss: 0.0018162406049668789 2023-01-24 06:36:06.325145: step: 848/464, loss: 0.0046364981681108475 2023-01-24 06:36:07.092205: step: 850/464, loss: 0.005943454336374998 2023-01-24 06:36:07.885640: step: 852/464, loss: 0.030280398204922676 2023-01-24 06:36:08.708130: step: 854/464, loss: 0.05643213912844658 2023-01-24 06:36:09.492066: step: 856/464, loss: 0.00012839706323575228 2023-01-24 06:36:10.181629: step: 858/464, loss: 7.575419294880703e-05 2023-01-24 06:36:10.943457: step: 860/464, loss: 0.3615998923778534 2023-01-24 06:36:11.663067: step: 862/464, loss: 0.006690404377877712 2023-01-24 06:36:12.447348: step: 864/464, loss: 1.8872606754302979 2023-01-24 06:36:13.180129: step: 866/464, loss: 0.00042271538404747844 2023-01-24 06:36:13.946361: step: 868/464, loss: 0.006247181911021471 2023-01-24 06:36:14.696926: step: 870/464, loss: 0.06341104209423065 2023-01-24 06:36:15.422878: step: 872/464, loss: 0.2562231719493866 2023-01-24 06:36:16.105954: step: 874/464, loss: 0.002793203806504607 2023-01-24 06:36:16.821812: step: 876/464, loss: 0.008603735826909542 2023-01-24 06:36:17.589847: step: 878/464, loss: 1.310736894607544 2023-01-24 06:36:18.373936: step: 880/464, loss: 0.007669101003557444 2023-01-24 06:36:19.125740: step: 882/464, loss: 0.007660615257918835 2023-01-24 06:36:19.870717: step: 884/464, loss: 0.0009276815690100193 2023-01-24 06:36:20.653760: step: 886/464, loss: 0.006773849483579397 2023-01-24 06:36:21.391885: step: 888/464, loss: 0.009978453628718853 2023-01-24 06:36:22.129210: step: 890/464, loss: 0.01657634600996971 2023-01-24 06:36:22.934608: step: 892/464, loss: 0.011022034101188183 2023-01-24 06:36:23.670297: step: 894/464, loss: 0.00021206651581451297 2023-01-24 06:36:24.535197: step: 896/464, loss: 0.023269668221473694 2023-01-24 06:36:25.185073: step: 898/464, loss: 0.013293704949319363 2023-01-24 06:36:25.900638: step: 900/464, loss: 0.0005787741974927485 2023-01-24 06:36:26.602377: step: 902/464, loss: 0.056386083364486694 2023-01-24 06:36:27.358555: step: 904/464, loss: 0.007417832501232624 2023-01-24 06:36:28.070374: step: 906/464, loss: 0.00048767743282951415 2023-01-24 06:36:28.858934: step: 908/464, loss: 0.16414684057235718 2023-01-24 06:36:29.575343: step: 910/464, loss: 0.0373823456466198 2023-01-24 06:36:30.296814: step: 912/464, loss: 0.030118806287646294 2023-01-24 06:36:31.130533: step: 914/464, loss: 0.012832955457270145 2023-01-24 06:36:31.879471: step: 916/464, loss: 0.0005492176860570908 2023-01-24 06:36:32.603008: step: 918/464, loss: 0.00959594827145338 2023-01-24 06:36:33.343868: step: 920/464, loss: 0.00033759669167920947 2023-01-24 06:36:34.124678: step: 922/464, loss: 0.03864290192723274 2023-01-24 06:36:34.782131: step: 924/464, loss: 0.0018645358504727483 2023-01-24 06:36:35.601779: step: 926/464, loss: 0.006714235059916973 2023-01-24 06:36:36.466171: step: 928/464, loss: 0.008423232473433018 2023-01-24 06:36:37.114269: step: 930/464, loss: 0.02039126679301262 ================================================== Loss: 0.040 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3508092889919573, 'r': 0.34348689396555965, 'f1': 0.3471094786574304}, 'combined': 0.2557648790107382, 'epoch': 35} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3146845094558001, 'r': 0.260942732421394, 'f1': 0.28530489794157804}, 'combined': 0.17718935766898006, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3243203671328671, 'r': 0.32001250646886314, 'f1': 0.32215203612051746}, 'combined': 0.23737518450985495, 'epoch': 35} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3149461693126098, 'r': 0.2602269928081485, 'f1': 0.2849837229347615}, 'combined': 0.17698989108579924, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34181461621998993, 'r': 0.3333827566168403, 'f1': 0.3375460379194521}, 'combined': 0.24871813320380678, 'epoch': 35} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3312280322377851, 'r': 0.268836953187078, 'f1': 0.29678897172748736}, 'combined': 0.18432157191496584, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29276315789473684, 'r': 0.31785714285714284, 'f1': 0.3047945205479452}, 'combined': 0.20319634703196346, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24166666666666667, 'r': 0.31521739130434784, 'f1': 0.27358490566037735}, 'combined': 0.13679245283018868, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4326923076923077, 'r': 0.1939655172413793, 'f1': 0.26785714285714285}, 'combined': 0.17857142857142855, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:39:16.406974: step: 2/464, loss: 0.25695911049842834 2023-01-24 06:39:17.209564: step: 4/464, loss: 0.007319469936192036 2023-01-24 06:39:17.959805: step: 6/464, loss: 0.002419233787804842 2023-01-24 06:39:18.696037: step: 8/464, loss: 0.005410829558968544 2023-01-24 06:39:19.457749: step: 10/464, loss: 0.01767798513174057 2023-01-24 06:39:20.229991: step: 12/464, loss: 0.0055710808373987675 2023-01-24 06:39:20.963191: step: 14/464, loss: 0.0014307210221886635 2023-01-24 06:39:21.621621: step: 16/464, loss: 0.01295340247452259 2023-01-24 06:39:22.289075: step: 18/464, loss: 0.003472167532891035 2023-01-24 06:39:22.991717: step: 20/464, loss: 0.012799971736967564 2023-01-24 06:39:23.699728: step: 22/464, loss: 0.0007676775567233562 2023-01-24 06:39:24.432324: step: 24/464, loss: 0.000440617382992059 2023-01-24 06:39:25.250343: step: 26/464, loss: 0.0155528225004673 2023-01-24 06:39:25.996557: step: 28/464, loss: 0.010228335857391357 2023-01-24 06:39:26.752190: step: 30/464, loss: 0.00015586575318593532 2023-01-24 06:39:27.405826: step: 32/464, loss: 0.0031636471394449472 2023-01-24 06:39:28.142632: step: 34/464, loss: 0.015593956224620342 2023-01-24 06:39:28.882548: step: 36/464, loss: 0.027952920645475388 2023-01-24 06:39:29.635349: step: 38/464, loss: 0.032278873026371 2023-01-24 06:39:30.382024: step: 40/464, loss: 0.004284294322133064 2023-01-24 06:39:31.119468: step: 42/464, loss: 0.009568164125084877 2023-01-24 06:39:31.892465: step: 44/464, loss: 0.023043537512421608 2023-01-24 06:39:32.667616: step: 46/464, loss: 0.0008782692020758986 2023-01-24 06:39:33.442841: step: 48/464, loss: 0.001116393250413239 2023-01-24 06:39:34.189302: step: 50/464, loss: 0.00017436177586205304 2023-01-24 06:39:34.929929: step: 52/464, loss: 0.013959270901978016 2023-01-24 06:39:35.658621: step: 54/464, loss: 0.00013729554484598339 2023-01-24 06:39:36.398588: step: 56/464, loss: 0.03546988219022751 2023-01-24 06:39:37.103651: step: 58/464, loss: 0.02767900936305523 2023-01-24 06:39:37.845416: step: 60/464, loss: 0.000629195652436465 2023-01-24 06:39:38.593577: step: 62/464, loss: 0.014760047197341919 2023-01-24 06:39:39.364095: step: 64/464, loss: 0.01793184131383896 2023-01-24 06:39:40.096919: step: 66/464, loss: 0.038921162486076355 2023-01-24 06:39:40.968319: step: 68/464, loss: 0.0007313909009099007 2023-01-24 06:39:41.740721: step: 70/464, loss: 0.00042105818283744156 2023-01-24 06:39:42.490938: step: 72/464, loss: 0.0014784769155085087 2023-01-24 06:39:43.205379: step: 74/464, loss: 0.10685709118843079 2023-01-24 06:39:43.918679: step: 76/464, loss: 0.004993189591914415 2023-01-24 06:39:44.708182: step: 78/464, loss: 0.005299483425915241 2023-01-24 06:39:45.522376: step: 80/464, loss: 1.9907414753106423e-05 2023-01-24 06:39:46.401586: step: 82/464, loss: 0.04119177907705307 2023-01-24 06:39:47.082320: step: 84/464, loss: 0.02733980491757393 2023-01-24 06:39:47.834903: step: 86/464, loss: 0.0033457595854997635 2023-01-24 06:39:48.697331: step: 88/464, loss: 0.0589422769844532 2023-01-24 06:39:49.475115: step: 90/464, loss: 0.006607845425605774 2023-01-24 06:39:50.211889: step: 92/464, loss: 0.004729326348751783 2023-01-24 06:39:50.991298: step: 94/464, loss: 0.22634100914001465 2023-01-24 06:39:51.712968: step: 96/464, loss: 0.005008341744542122 2023-01-24 06:39:52.442946: step: 98/464, loss: 7.832510164007545e-05 2023-01-24 06:39:53.215754: step: 100/464, loss: 0.01266252901405096 2023-01-24 06:39:53.936760: step: 102/464, loss: 0.0002192519896198064 2023-01-24 06:39:54.620169: step: 104/464, loss: 0.02024812437593937 2023-01-24 06:39:55.389155: step: 106/464, loss: 0.010026028379797935 2023-01-24 06:39:56.145404: step: 108/464, loss: 0.005464911926537752 2023-01-24 06:39:56.836200: step: 110/464, loss: 0.00029571220511570573 2023-01-24 06:39:57.610046: step: 112/464, loss: 0.0034745652228593826 2023-01-24 06:39:58.286085: step: 114/464, loss: 0.0015284789260476828 2023-01-24 06:39:59.092615: step: 116/464, loss: 0.014450756832957268 2023-01-24 06:39:59.839197: step: 118/464, loss: 0.06091814860701561 2023-01-24 06:40:00.597758: step: 120/464, loss: 0.004064726177603006 2023-01-24 06:40:01.295429: step: 122/464, loss: 0.002943573519587517 2023-01-24 06:40:02.036349: step: 124/464, loss: 0.0004433517169672996 2023-01-24 06:40:02.770932: step: 126/464, loss: 0.04658803343772888 2023-01-24 06:40:03.488401: step: 128/464, loss: 0.0030392904300242662 2023-01-24 06:40:04.177298: step: 130/464, loss: 0.001116839237511158 2023-01-24 06:40:04.846689: step: 132/464, loss: 0.0006100613973103464 2023-01-24 06:40:05.515612: step: 134/464, loss: 3.7895260902587324e-06 2023-01-24 06:40:06.220772: step: 136/464, loss: 0.012914041988551617 2023-01-24 06:40:06.944232: step: 138/464, loss: 0.0003613363951444626 2023-01-24 06:40:07.641165: step: 140/464, loss: 0.0018395596416667104 2023-01-24 06:40:08.336248: step: 142/464, loss: 0.02837635949254036 2023-01-24 06:40:09.036449: step: 144/464, loss: 0.016525769606232643 2023-01-24 06:40:09.730440: step: 146/464, loss: 0.004591218661516905 2023-01-24 06:40:10.505865: step: 148/464, loss: 0.29191404581069946 2023-01-24 06:40:11.394317: step: 150/464, loss: 0.006326992064714432 2023-01-24 06:40:12.180451: step: 152/464, loss: 0.023135047405958176 2023-01-24 06:40:12.920033: step: 154/464, loss: 0.0006658299244008958 2023-01-24 06:40:13.618975: step: 156/464, loss: 0.0024545418564230204 2023-01-24 06:40:14.408981: step: 158/464, loss: 0.0008211713866330683 2023-01-24 06:40:15.184847: step: 160/464, loss: 0.000625859247520566 2023-01-24 06:40:15.940144: step: 162/464, loss: 0.009707236662507057 2023-01-24 06:40:16.687359: step: 164/464, loss: 0.0004584605630952865 2023-01-24 06:40:17.468051: step: 166/464, loss: 0.09940136224031448 2023-01-24 06:40:18.230169: step: 168/464, loss: 0.06070875748991966 2023-01-24 06:40:18.962169: step: 170/464, loss: 0.005534209311008453 2023-01-24 06:40:19.768931: step: 172/464, loss: 0.011373227462172508 2023-01-24 06:40:20.516533: step: 174/464, loss: 0.03093552775681019 2023-01-24 06:40:21.308179: step: 176/464, loss: 0.026455167680978775 2023-01-24 06:40:22.096345: step: 178/464, loss: 0.016621779650449753 2023-01-24 06:40:22.820977: step: 180/464, loss: 0.01870632730424404 2023-01-24 06:40:23.581621: step: 182/464, loss: 0.00010793719411594793 2023-01-24 06:40:24.374733: step: 184/464, loss: 0.0004974519833922386 2023-01-24 06:40:25.124031: step: 186/464, loss: 0.028530167415738106 2023-01-24 06:40:25.804572: step: 188/464, loss: 0.01475331000983715 2023-01-24 06:40:26.578664: step: 190/464, loss: 0.026886673644185066 2023-01-24 06:40:27.284764: step: 192/464, loss: 0.006785533856600523 2023-01-24 06:40:28.062305: step: 194/464, loss: 0.04471007362008095 2023-01-24 06:40:28.822945: step: 196/464, loss: 0.026023143902420998 2023-01-24 06:40:29.496805: step: 198/464, loss: 0.009699026122689247 2023-01-24 06:40:30.190186: step: 200/464, loss: 0.011059535667300224 2023-01-24 06:40:30.939001: step: 202/464, loss: 0.002562645822763443 2023-01-24 06:40:31.672623: step: 204/464, loss: 0.0005993549129925668 2023-01-24 06:40:32.379579: step: 206/464, loss: 0.0010731748770922422 2023-01-24 06:40:33.097168: step: 208/464, loss: 0.008336545899510384 2023-01-24 06:40:33.869333: step: 210/464, loss: 0.04709343984723091 2023-01-24 06:40:34.741245: step: 212/464, loss: 0.00975609477609396 2023-01-24 06:40:35.454425: step: 214/464, loss: 0.020269447937607765 2023-01-24 06:40:36.249099: step: 216/464, loss: 0.05424981191754341 2023-01-24 06:40:36.999816: step: 218/464, loss: 0.11086882650852203 2023-01-24 06:40:37.732162: step: 220/464, loss: 0.16429883241653442 2023-01-24 06:40:38.408540: step: 222/464, loss: 0.0005940513801760972 2023-01-24 06:40:39.119007: step: 224/464, loss: 0.008422630839049816 2023-01-24 06:40:39.843567: step: 226/464, loss: 0.0005509871407411993 2023-01-24 06:40:40.521596: step: 228/464, loss: 0.009248113259673119 2023-01-24 06:40:41.251050: step: 230/464, loss: 0.06530479341745377 2023-01-24 06:40:41.955696: step: 232/464, loss: 0.014671806246042252 2023-01-24 06:40:42.682830: step: 234/464, loss: 0.006998498924076557 2023-01-24 06:40:43.412292: step: 236/464, loss: 0.0015183803625404835 2023-01-24 06:40:44.100480: step: 238/464, loss: 0.004076081793755293 2023-01-24 06:40:44.810471: step: 240/464, loss: 0.08578099310398102 2023-01-24 06:40:45.612861: step: 242/464, loss: 0.0002759688359219581 2023-01-24 06:40:46.411277: step: 244/464, loss: 0.06304151564836502 2023-01-24 06:40:47.115230: step: 246/464, loss: 0.0003053829132113606 2023-01-24 06:40:47.823013: step: 248/464, loss: 0.013026222586631775 2023-01-24 06:40:48.568356: step: 250/464, loss: 0.04093734547495842 2023-01-24 06:40:49.313932: step: 252/464, loss: 0.0015117143047973514 2023-01-24 06:40:50.049706: step: 254/464, loss: 0.006387445144355297 2023-01-24 06:40:50.738630: step: 256/464, loss: 0.00026626704493537545 2023-01-24 06:40:51.455876: step: 258/464, loss: 0.005855533294379711 2023-01-24 06:40:52.131121: step: 260/464, loss: 0.03732157498598099 2023-01-24 06:40:52.942204: step: 262/464, loss: 0.000551620963960886 2023-01-24 06:40:53.789599: step: 264/464, loss: 0.017255334183573723 2023-01-24 06:40:54.469776: step: 266/464, loss: 0.01849391497671604 2023-01-24 06:40:55.208118: step: 268/464, loss: 0.0038914477918297052 2023-01-24 06:40:55.982691: step: 270/464, loss: 0.018955865874886513 2023-01-24 06:40:56.807561: step: 272/464, loss: 0.01634865067899227 2023-01-24 06:40:57.418409: step: 274/464, loss: 0.0004928586422465742 2023-01-24 06:40:58.247744: step: 276/464, loss: 0.00017035173368640244 2023-01-24 06:40:58.951757: step: 278/464, loss: 0.02748502977192402 2023-01-24 06:40:59.676013: step: 280/464, loss: 0.004082882311195135 2023-01-24 06:41:00.392595: step: 282/464, loss: 0.0005174506222829223 2023-01-24 06:41:01.116678: step: 284/464, loss: 0.006498162169009447 2023-01-24 06:41:01.815506: step: 286/464, loss: 0.003868537489324808 2023-01-24 06:41:02.643901: step: 288/464, loss: 0.25979575514793396 2023-01-24 06:41:03.354637: step: 290/464, loss: 9.816534293349832e-05 2023-01-24 06:41:04.145017: step: 292/464, loss: 0.0020555928349494934 2023-01-24 06:41:04.946042: step: 294/464, loss: 0.04013385251164436 2023-01-24 06:41:05.671282: step: 296/464, loss: 0.12664145231246948 2023-01-24 06:41:06.395580: step: 298/464, loss: 0.0008692051051184535 2023-01-24 06:41:07.102983: step: 300/464, loss: 0.11506323516368866 2023-01-24 06:41:07.910612: step: 302/464, loss: 0.052584897726774216 2023-01-24 06:41:08.606056: step: 304/464, loss: 0.036980610340833664 2023-01-24 06:41:09.358656: step: 306/464, loss: 0.00859944149851799 2023-01-24 06:41:10.204230: step: 308/464, loss: 0.013367211446166039 2023-01-24 06:41:10.972334: step: 310/464, loss: 0.0016901890048757195 2023-01-24 06:41:11.817937: step: 312/464, loss: 0.004952017683535814 2023-01-24 06:41:12.638275: step: 314/464, loss: 0.005257464945316315 2023-01-24 06:41:13.365070: step: 316/464, loss: 0.06977822631597519 2023-01-24 06:41:14.173154: step: 318/464, loss: 0.03525172919034958 2023-01-24 06:41:14.918867: step: 320/464, loss: 0.0061018988490104675 2023-01-24 06:41:15.630518: step: 322/464, loss: 0.0011195632396265864 2023-01-24 06:41:16.368284: step: 324/464, loss: 0.001332466141320765 2023-01-24 06:41:17.121470: step: 326/464, loss: 0.1197722926735878 2023-01-24 06:41:17.852532: step: 328/464, loss: 0.01823013462126255 2023-01-24 06:41:18.556182: step: 330/464, loss: 0.00781966932117939 2023-01-24 06:41:19.276490: step: 332/464, loss: 0.01433896366506815 2023-01-24 06:41:19.975236: step: 334/464, loss: 0.00025075027951970696 2023-01-24 06:41:20.736387: step: 336/464, loss: 0.01363467425107956 2023-01-24 06:41:21.457903: step: 338/464, loss: 0.08750608563423157 2023-01-24 06:41:22.206255: step: 340/464, loss: 0.009655867703258991 2023-01-24 06:41:22.872762: step: 342/464, loss: 0.0014510346809402108 2023-01-24 06:41:23.631358: step: 344/464, loss: 0.04058638960123062 2023-01-24 06:41:24.342932: step: 346/464, loss: 2.510811827960424e-05 2023-01-24 06:41:25.115745: step: 348/464, loss: 0.00536707928404212 2023-01-24 06:41:25.942890: step: 350/464, loss: 0.006009410135447979 2023-01-24 06:41:26.720014: step: 352/464, loss: 0.0005397546919994056 2023-01-24 06:41:27.542010: step: 354/464, loss: 0.026657378301024437 2023-01-24 06:41:28.282357: step: 356/464, loss: 0.022915314882993698 2023-01-24 06:41:29.149604: step: 358/464, loss: 0.008625758811831474 2023-01-24 06:41:29.906988: step: 360/464, loss: 0.05162311717867851 2023-01-24 06:41:30.692889: step: 362/464, loss: 0.010607045143842697 2023-01-24 06:41:31.452169: step: 364/464, loss: 0.0035178023390471935 2023-01-24 06:41:32.198642: step: 366/464, loss: 0.0001674180821282789 2023-01-24 06:41:32.964659: step: 368/464, loss: 0.01395686436444521 2023-01-24 06:41:33.763192: step: 370/464, loss: 0.134748175740242 2023-01-24 06:41:34.484981: step: 372/464, loss: 0.010596811771392822 2023-01-24 06:41:35.165606: step: 374/464, loss: 0.006897300481796265 2023-01-24 06:41:35.946584: step: 376/464, loss: 0.010236544534564018 2023-01-24 06:41:36.646730: step: 378/464, loss: 0.014542913995683193 2023-01-24 06:41:37.382742: step: 380/464, loss: 0.029503915458917618 2023-01-24 06:41:38.223957: step: 382/464, loss: 0.0007390844402834773 2023-01-24 06:41:38.979604: step: 384/464, loss: 0.055452119559049606 2023-01-24 06:41:39.715821: step: 386/464, loss: 0.04913180321455002 2023-01-24 06:41:40.429595: step: 388/464, loss: 0.009322376921772957 2023-01-24 06:41:41.139867: step: 390/464, loss: 0.040665339678525925 2023-01-24 06:41:41.881239: step: 392/464, loss: 0.0012931600213050842 2023-01-24 06:41:42.589433: step: 394/464, loss: 0.012746023014187813 2023-01-24 06:41:43.428497: step: 396/464, loss: 0.022761639207601547 2023-01-24 06:41:44.112833: step: 398/464, loss: 0.002250086283311248 2023-01-24 06:41:44.818531: step: 400/464, loss: 0.10658414661884308 2023-01-24 06:41:45.450338: step: 402/464, loss: 1.0466003004694358e-05 2023-01-24 06:41:46.135875: step: 404/464, loss: 0.007765035144984722 2023-01-24 06:41:46.886850: step: 406/464, loss: 0.023086359724402428 2023-01-24 06:41:47.678394: step: 408/464, loss: 0.0032354777213186026 2023-01-24 06:41:48.388728: step: 410/464, loss: 8.746585808694363e-05 2023-01-24 06:41:49.166213: step: 412/464, loss: 0.011903539299964905 2023-01-24 06:41:49.850457: step: 414/464, loss: 0.00037599849747493863 2023-01-24 06:41:50.592645: step: 416/464, loss: 0.009331165812909603 2023-01-24 06:41:51.317216: step: 418/464, loss: 0.009211680851876736 2023-01-24 06:41:52.033279: step: 420/464, loss: 0.0010665275622159243 2023-01-24 06:41:52.857745: step: 422/464, loss: 0.025540944188833237 2023-01-24 06:41:53.559113: step: 424/464, loss: 0.003281003562733531 2023-01-24 06:41:54.299876: step: 426/464, loss: 0.000535392202436924 2023-01-24 06:41:55.043848: step: 428/464, loss: 0.0040351953357458115 2023-01-24 06:41:55.751379: step: 430/464, loss: 8.87652004166739e-06 2023-01-24 06:41:56.526457: step: 432/464, loss: 0.015301528386771679 2023-01-24 06:41:57.262601: step: 434/464, loss: 0.0045889755710959435 2023-01-24 06:41:58.013547: step: 436/464, loss: 0.0021863903384655714 2023-01-24 06:41:58.722110: step: 438/464, loss: 0.0011723927455022931 2023-01-24 06:41:59.595498: step: 440/464, loss: 0.6480519771575928 2023-01-24 06:42:00.311376: step: 442/464, loss: 0.003675105283036828 2023-01-24 06:42:01.088175: step: 444/464, loss: 0.01643628627061844 2023-01-24 06:42:01.805172: step: 446/464, loss: 0.08712334930896759 2023-01-24 06:42:02.541635: step: 448/464, loss: 0.0016254110960289836 2023-01-24 06:42:03.238888: step: 450/464, loss: 0.013469139114022255 2023-01-24 06:42:04.048472: step: 452/464, loss: 0.01300358772277832 2023-01-24 06:42:04.696697: step: 454/464, loss: 0.00330551341176033 2023-01-24 06:42:05.440629: step: 456/464, loss: 0.058972395956516266 2023-01-24 06:42:06.144300: step: 458/464, loss: 0.6161903738975525 2023-01-24 06:42:06.926631: step: 460/464, loss: 0.00022773313685320318 2023-01-24 06:42:07.640463: step: 462/464, loss: 3.4806244373321533 2023-01-24 06:42:08.381007: step: 464/464, loss: 0.0018051972147077322 2023-01-24 06:42:09.115238: step: 466/464, loss: 0.001051753293722868 2023-01-24 06:42:09.878335: step: 468/464, loss: 0.0013541424414142966 2023-01-24 06:42:10.534235: step: 470/464, loss: 0.0036550683435052633 2023-01-24 06:42:11.236245: step: 472/464, loss: 0.04064280539751053 2023-01-24 06:42:12.023022: step: 474/464, loss: 0.000147580387420021 2023-01-24 06:42:12.680816: step: 476/464, loss: 3.274055416113697e-05 2023-01-24 06:42:13.338252: step: 478/464, loss: 0.05929414555430412 2023-01-24 06:42:14.054291: step: 480/464, loss: 0.0007548134890384972 2023-01-24 06:42:14.759677: step: 482/464, loss: 0.004415807314217091 2023-01-24 06:42:15.520080: step: 484/464, loss: 0.017077995464205742 2023-01-24 06:42:16.354522: step: 486/464, loss: 0.43001672625541687 2023-01-24 06:42:17.057790: step: 488/464, loss: 0.03174535930156708 2023-01-24 06:42:17.785841: step: 490/464, loss: 0.006033504381775856 2023-01-24 06:42:18.509596: step: 492/464, loss: 0.0073009757325053215 2023-01-24 06:42:19.175207: step: 494/464, loss: 0.0071230302564799786 2023-01-24 06:42:19.835165: step: 496/464, loss: 0.00764810387045145 2023-01-24 06:42:20.552525: step: 498/464, loss: 0.00485795084387064 2023-01-24 06:42:21.236449: step: 500/464, loss: 0.0005850406014360487 2023-01-24 06:42:21.959783: step: 502/464, loss: 6.588442920474336e-05 2023-01-24 06:42:22.632043: step: 504/464, loss: 0.004987492226064205 2023-01-24 06:42:23.335681: step: 506/464, loss: 0.01008535549044609 2023-01-24 06:42:24.008204: step: 508/464, loss: 0.0010731680085882545 2023-01-24 06:42:24.726429: step: 510/464, loss: 0.0017684295307844877 2023-01-24 06:42:25.507913: step: 512/464, loss: 0.0022716608364135027 2023-01-24 06:42:26.324155: step: 514/464, loss: 0.012158623896539211 2023-01-24 06:42:26.992738: step: 516/464, loss: 0.018801629543304443 2023-01-24 06:42:27.784021: step: 518/464, loss: 0.03261919319629669 2023-01-24 06:42:28.521275: step: 520/464, loss: 0.00035173961077816784 2023-01-24 06:42:29.221509: step: 522/464, loss: 4.745915430248715e-05 2023-01-24 06:42:29.996639: step: 524/464, loss: 0.0011711184633895755 2023-01-24 06:42:30.745172: step: 526/464, loss: 0.05389797315001488 2023-01-24 06:42:31.485428: step: 528/464, loss: 0.0376632958650589 2023-01-24 06:42:32.228804: step: 530/464, loss: 0.002463065553456545 2023-01-24 06:42:32.934612: step: 532/464, loss: 0.01789242774248123 2023-01-24 06:42:33.591917: step: 534/464, loss: 0.02504398301243782 2023-01-24 06:42:34.234166: step: 536/464, loss: 0.0015117475995793939 2023-01-24 06:42:34.990111: step: 538/464, loss: 0.00411178357899189 2023-01-24 06:42:35.699569: step: 540/464, loss: 0.024579649791121483 2023-01-24 06:42:36.542753: step: 542/464, loss: 0.016789443790912628 2023-01-24 06:42:37.298158: step: 544/464, loss: 0.13599908351898193 2023-01-24 06:42:38.006868: step: 546/464, loss: 7.640109834028408e-05 2023-01-24 06:42:38.850229: step: 548/464, loss: 0.013117525726556778 2023-01-24 06:42:39.548188: step: 550/464, loss: 0.00526499655097723 2023-01-24 06:42:40.280428: step: 552/464, loss: 0.0007741588051430881 2023-01-24 06:42:41.009680: step: 554/464, loss: 0.004953427240252495 2023-01-24 06:42:41.719018: step: 556/464, loss: 0.03301551938056946 2023-01-24 06:42:42.507152: step: 558/464, loss: 0.10400167852640152 2023-01-24 06:42:43.264768: step: 560/464, loss: 0.03694292530417442 2023-01-24 06:42:44.252948: step: 562/464, loss: 0.02041666954755783 2023-01-24 06:42:44.978446: step: 564/464, loss: 0.026419784873723984 2023-01-24 06:42:45.653737: step: 566/464, loss: 0.005885733757168055 2023-01-24 06:42:46.318197: step: 568/464, loss: 0.002397893462330103 2023-01-24 06:42:46.943111: step: 570/464, loss: 0.0011069076135754585 2023-01-24 06:42:47.665334: step: 572/464, loss: 0.007068040315061808 2023-01-24 06:42:48.399121: step: 574/464, loss: 0.022324377670884132 2023-01-24 06:42:49.120141: step: 576/464, loss: 0.007453723344951868 2023-01-24 06:42:49.853876: step: 578/464, loss: 0.0008849214063957334 2023-01-24 06:42:50.563610: step: 580/464, loss: 0.015994714573025703 2023-01-24 06:42:51.382738: step: 582/464, loss: 0.09704574197530746 2023-01-24 06:42:52.161739: step: 584/464, loss: 0.01191841159015894 2023-01-24 06:42:52.808188: step: 586/464, loss: 0.00012223394878674299 2023-01-24 06:42:53.555589: step: 588/464, loss: 0.004012149292975664 2023-01-24 06:42:54.294634: step: 590/464, loss: 0.006843290291726589 2023-01-24 06:42:54.990858: step: 592/464, loss: 0.0001453045551897958 2023-01-24 06:42:55.754485: step: 594/464, loss: 0.0007951535517349839 2023-01-24 06:42:56.412369: step: 596/464, loss: 0.00012174924631835893 2023-01-24 06:42:57.052696: step: 598/464, loss: 0.017362039536237717 2023-01-24 06:42:57.797725: step: 600/464, loss: 0.009249621070921421 2023-01-24 06:42:58.543116: step: 602/464, loss: 0.08007968962192535 2023-01-24 06:42:59.295876: step: 604/464, loss: 0.001187682501040399 2023-01-24 06:43:00.020205: step: 606/464, loss: 0.0017568308394402266 2023-01-24 06:43:00.776656: step: 608/464, loss: 0.03269410505890846 2023-01-24 06:43:01.522945: step: 610/464, loss: 0.0011200553271919489 2023-01-24 06:43:02.236783: step: 612/464, loss: 0.060264717787504196 2023-01-24 06:43:02.967366: step: 614/464, loss: 0.0024317821953445673 2023-01-24 06:43:04.391708: step: 616/464, loss: 0.02085116133093834 2023-01-24 06:43:05.155609: step: 618/464, loss: 0.004324778914451599 2023-01-24 06:43:05.827231: step: 620/464, loss: 0.0014203897444531322 2023-01-24 06:43:06.536703: step: 622/464, loss: 0.0027653370052576065 2023-01-24 06:43:07.341234: step: 624/464, loss: 0.04954336956143379 2023-01-24 06:43:08.076429: step: 626/464, loss: 0.008402707986533642 2023-01-24 06:43:08.911518: step: 628/464, loss: 0.0007335083209909499 2023-01-24 06:43:09.610567: step: 630/464, loss: 0.002768837846815586 2023-01-24 06:43:10.347602: step: 632/464, loss: 0.0957309752702713 2023-01-24 06:43:11.063909: step: 634/464, loss: 0.003592605469748378 2023-01-24 06:43:11.910414: step: 636/464, loss: 0.017020680010318756 2023-01-24 06:43:12.715597: step: 638/464, loss: 0.006964333821088076 2023-01-24 06:43:13.423168: step: 640/464, loss: 0.008871006779372692 2023-01-24 06:43:14.156324: step: 642/464, loss: 0.00032925105188041925 2023-01-24 06:43:14.924232: step: 644/464, loss: 0.15270043909549713 2023-01-24 06:43:15.673144: step: 646/464, loss: 0.011862103827297688 2023-01-24 06:43:16.446335: step: 648/464, loss: 0.004576689563691616 2023-01-24 06:43:17.209949: step: 650/464, loss: 0.01853887178003788 2023-01-24 06:43:17.961599: step: 652/464, loss: 0.0003241170779801905 2023-01-24 06:43:18.560687: step: 654/464, loss: 0.005045429803431034 2023-01-24 06:43:19.267202: step: 656/464, loss: 0.0013937974581494927 2023-01-24 06:43:19.980465: step: 658/464, loss: 0.012597468681633472 2023-01-24 06:43:20.657332: step: 660/464, loss: 0.006145347375422716 2023-01-24 06:43:21.363918: step: 662/464, loss: 0.0005045664729550481 2023-01-24 06:43:22.073949: step: 664/464, loss: 0.013134666718542576 2023-01-24 06:43:22.791511: step: 666/464, loss: 0.019119717180728912 2023-01-24 06:43:23.529151: step: 668/464, loss: 0.0007407998782582581 2023-01-24 06:43:24.192135: step: 670/464, loss: 0.011953119188547134 2023-01-24 06:43:24.864940: step: 672/464, loss: 0.004008065443485975 2023-01-24 06:43:25.534980: step: 674/464, loss: 0.009490004740655422 2023-01-24 06:43:26.173621: step: 676/464, loss: 0.02649562619626522 2023-01-24 06:43:26.930898: step: 678/464, loss: 0.057214513421058655 2023-01-24 06:43:27.728206: step: 680/464, loss: 0.05446144938468933 2023-01-24 06:43:28.479259: step: 682/464, loss: 0.014377056621015072 2023-01-24 06:43:29.351936: step: 684/464, loss: 0.0008804807439446449 2023-01-24 06:43:30.060012: step: 686/464, loss: 0.019741732627153397 2023-01-24 06:43:30.796919: step: 688/464, loss: 0.0017235928680747747 2023-01-24 06:43:31.559307: step: 690/464, loss: 0.013048955239355564 2023-01-24 06:43:32.349150: step: 692/464, loss: 0.027585506439208984 2023-01-24 06:43:33.006436: step: 694/464, loss: 0.0010335876140743494 2023-01-24 06:43:33.802667: step: 696/464, loss: 0.0006243172683753073 2023-01-24 06:43:34.504729: step: 698/464, loss: 0.0284845232963562 2023-01-24 06:43:35.256050: step: 700/464, loss: 0.0001900457573356107 2023-01-24 06:43:36.009824: step: 702/464, loss: 9.985039469029289e-06 2023-01-24 06:43:36.757073: step: 704/464, loss: 0.9770488142967224 2023-01-24 06:43:37.469890: step: 706/464, loss: 0.005254943389445543 2023-01-24 06:43:38.160744: step: 708/464, loss: 0.006244266871362925 2023-01-24 06:43:38.923672: step: 710/464, loss: 0.0006113231065683067 2023-01-24 06:43:39.603680: step: 712/464, loss: 0.0013864204520359635 2023-01-24 06:43:40.320778: step: 714/464, loss: 0.060835305601358414 2023-01-24 06:43:41.021910: step: 716/464, loss: 0.02436334826052189 2023-01-24 06:43:41.807372: step: 718/464, loss: 0.04365500807762146 2023-01-24 06:43:42.613235: step: 720/464, loss: 0.036159999668598175 2023-01-24 06:43:43.359825: step: 722/464, loss: 0.017852721735835075 2023-01-24 06:43:44.128631: step: 724/464, loss: 0.005818805657327175 2023-01-24 06:43:44.762273: step: 726/464, loss: 0.006242651026695967 2023-01-24 06:43:45.482218: step: 728/464, loss: 0.002634809585288167 2023-01-24 06:43:46.219649: step: 730/464, loss: 0.007493005599826574 2023-01-24 06:43:46.979882: step: 732/464, loss: 0.000106641418824438 2023-01-24 06:43:47.740609: step: 734/464, loss: 0.0031143163796514273 2023-01-24 06:43:48.405576: step: 736/464, loss: 0.00782071053981781 2023-01-24 06:43:49.113209: step: 738/464, loss: 0.006812175270169973 2023-01-24 06:43:49.826443: step: 740/464, loss: 0.0028683652635663748 2023-01-24 06:43:50.513976: step: 742/464, loss: 0.006989945657551289 2023-01-24 06:43:51.199949: step: 744/464, loss: 0.018487777560949326 2023-01-24 06:43:51.971021: step: 746/464, loss: 0.0024340515956282616 2023-01-24 06:43:52.646738: step: 748/464, loss: 0.02723161317408085 2023-01-24 06:43:53.331864: step: 750/464, loss: 0.005367112345993519 2023-01-24 06:43:54.055763: step: 752/464, loss: 0.0003777325327973813 2023-01-24 06:43:54.818922: step: 754/464, loss: 0.005238216836005449 2023-01-24 06:43:55.496716: step: 756/464, loss: 0.0005308697000145912 2023-01-24 06:43:56.267192: step: 758/464, loss: 0.04467277601361275 2023-01-24 06:43:56.997135: step: 760/464, loss: 0.0029819693882018328 2023-01-24 06:43:57.829341: step: 762/464, loss: 0.006541172508150339 2023-01-24 06:43:58.548455: step: 764/464, loss: 0.012404871173202991 2023-01-24 06:43:59.209932: step: 766/464, loss: 0.0012938773725181818 2023-01-24 06:43:59.963236: step: 768/464, loss: 0.05380694568157196 2023-01-24 06:44:00.742483: step: 770/464, loss: 0.011378041468560696 2023-01-24 06:44:01.400339: step: 772/464, loss: 0.00021820772963110358 2023-01-24 06:44:02.117259: step: 774/464, loss: 0.01853141188621521 2023-01-24 06:44:02.834478: step: 776/464, loss: 0.15019969642162323 2023-01-24 06:44:03.596186: step: 778/464, loss: 0.028686635196208954 2023-01-24 06:44:04.350370: step: 780/464, loss: 0.006159959360957146 2023-01-24 06:44:05.060826: step: 782/464, loss: 0.03424028679728508 2023-01-24 06:44:05.804715: step: 784/464, loss: 0.006407311651855707 2023-01-24 06:44:06.467793: step: 786/464, loss: 0.0034531159326434135 2023-01-24 06:44:07.291299: step: 788/464, loss: 0.0063906945288181305 2023-01-24 06:44:07.974300: step: 790/464, loss: 0.0017129798652604222 2023-01-24 06:44:08.716596: step: 792/464, loss: 0.03176811337471008 2023-01-24 06:44:09.521395: step: 794/464, loss: 0.004839983303099871 2023-01-24 06:44:10.191052: step: 796/464, loss: 0.0023881683591753244 2023-01-24 06:44:10.954052: step: 798/464, loss: 0.0019653679337352514 2023-01-24 06:44:11.650775: step: 800/464, loss: 0.025102870538830757 2023-01-24 06:44:12.370806: step: 802/464, loss: 0.07787781953811646 2023-01-24 06:44:13.056075: step: 804/464, loss: 0.0024989210069179535 2023-01-24 06:44:13.743953: step: 806/464, loss: 0.00024524348555132747 2023-01-24 06:44:14.380016: step: 808/464, loss: 0.026250962167978287 2023-01-24 06:44:15.180527: step: 810/464, loss: 0.009458690881729126 2023-01-24 06:44:15.822400: step: 812/464, loss: 0.0012966262875124812 2023-01-24 06:44:16.565476: step: 814/464, loss: 0.008395758457481861 2023-01-24 06:44:17.279447: step: 816/464, loss: 0.00576248113065958 2023-01-24 06:44:17.958677: step: 818/464, loss: 0.06546097248792648 2023-01-24 06:44:18.617373: step: 820/464, loss: 0.0005249642999842763 2023-01-24 06:44:19.363132: step: 822/464, loss: 0.011017367243766785 2023-01-24 06:44:20.094474: step: 824/464, loss: 0.0021830948535352945 2023-01-24 06:44:20.803289: step: 826/464, loss: 0.022510824725031853 2023-01-24 06:44:21.533473: step: 828/464, loss: 0.002732113003730774 2023-01-24 06:44:22.291410: step: 830/464, loss: 0.06168021261692047 2023-01-24 06:44:22.970737: step: 832/464, loss: 0.0061044213362038136 2023-01-24 06:44:23.665558: step: 834/464, loss: 0.004234203137457371 2023-01-24 06:44:24.373129: step: 836/464, loss: 0.0001307379425270483 2023-01-24 06:44:25.120062: step: 838/464, loss: 0.0038768108934164047 2023-01-24 06:44:25.849804: step: 840/464, loss: 0.00010593992192298174 2023-01-24 06:44:26.598900: step: 842/464, loss: 0.00027795901405625045 2023-01-24 06:44:27.300014: step: 844/464, loss: 0.03145487606525421 2023-01-24 06:44:28.025649: step: 846/464, loss: 1.2001028060913086 2023-01-24 06:44:28.756057: step: 848/464, loss: 0.001613723929040134 2023-01-24 06:44:29.507847: step: 850/464, loss: 0.004396263509988785 2023-01-24 06:44:30.354944: step: 852/464, loss: 0.05548679083585739 2023-01-24 06:44:31.160262: step: 854/464, loss: 0.0015070741064846516 2023-01-24 06:44:31.866414: step: 856/464, loss: 0.00996997207403183 2023-01-24 06:44:32.585470: step: 858/464, loss: 0.04742727428674698 2023-01-24 06:44:33.357694: step: 860/464, loss: 0.036565475165843964 2023-01-24 06:44:34.078703: step: 862/464, loss: 0.00024810165632516146 2023-01-24 06:44:34.862640: step: 864/464, loss: 0.0038507478311657906 2023-01-24 06:44:35.625269: step: 866/464, loss: 0.018209144473075867 2023-01-24 06:44:36.322683: step: 868/464, loss: 0.06962604075670242 2023-01-24 06:44:36.947303: step: 870/464, loss: 0.0011270270915701985 2023-01-24 06:44:37.754570: step: 872/464, loss: 0.03728864714503288 2023-01-24 06:44:38.433633: step: 874/464, loss: 0.0010363421170040965 2023-01-24 06:44:39.144670: step: 876/464, loss: 0.05309408903121948 2023-01-24 06:44:39.807082: step: 878/464, loss: 0.05263727530837059 2023-01-24 06:44:40.587110: step: 880/464, loss: 1.1782157116613234e-06 2023-01-24 06:44:41.323331: step: 882/464, loss: 0.3160185217857361 2023-01-24 06:44:42.048842: step: 884/464, loss: 0.00017775286687538028 2023-01-24 06:44:42.738556: step: 886/464, loss: 0.0008852652972564101 2023-01-24 06:44:43.385927: step: 888/464, loss: 0.002755802357569337 2023-01-24 06:44:44.062847: step: 890/464, loss: 0.0003568526590242982 2023-01-24 06:44:44.832085: step: 892/464, loss: 0.005164716858416796 2023-01-24 06:44:45.516505: step: 894/464, loss: 0.021540869027376175 2023-01-24 06:44:46.312581: step: 896/464, loss: 0.0017362685175612569 2023-01-24 06:44:47.026159: step: 898/464, loss: 0.001434554811567068 2023-01-24 06:44:47.793012: step: 900/464, loss: 0.010029935277998447 2023-01-24 06:44:48.571488: step: 902/464, loss: 0.005941364914178848 2023-01-24 06:44:49.309492: step: 904/464, loss: 0.0255038533359766 2023-01-24 06:44:49.980405: step: 906/464, loss: 0.022550631314516068 2023-01-24 06:44:50.681221: step: 908/464, loss: 0.024902237579226494 2023-01-24 06:44:51.289043: step: 910/464, loss: 0.14438927173614502 2023-01-24 06:44:51.924051: step: 912/464, loss: 0.0119105763733387 2023-01-24 06:44:52.660647: step: 914/464, loss: 0.014482015743851662 2023-01-24 06:44:53.428949: step: 916/464, loss: 0.04747198522090912 2023-01-24 06:44:54.204360: step: 918/464, loss: 0.025189634412527084 2023-01-24 06:44:54.887088: step: 920/464, loss: 0.0019587657880038023 2023-01-24 06:44:55.612095: step: 922/464, loss: 0.022820036858320236 2023-01-24 06:44:56.322158: step: 924/464, loss: 0.022515999153256416 2023-01-24 06:44:57.019036: step: 926/464, loss: 0.0030971961095929146 2023-01-24 06:44:57.872402: step: 928/464, loss: 0.06827942281961441 2023-01-24 06:44:58.505340: step: 930/464, loss: 4.529808575171046e-05 ================================================== Loss: 0.036 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3487816113211683, 'r': 0.3137049027822273, 'f1': 0.330314652879588}, 'combined': 0.24338974422706483, 'epoch': 36} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32478517267214757, 'r': 0.2679236735989823, 'f1': 0.2936269203344944}, 'combined': 0.18235777157615968, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32716817918389923, 'r': 0.3060605547204219, 'f1': 0.31626257321110257}, 'combined': 0.23303558026081242, 'epoch': 36} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31789381231956015, 'r': 0.2638644291980539, 'f1': 0.2883701969205513}, 'combined': 0.17909306966644767, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3437450070488721, 'r': 0.3130884314676634, 'f1': 0.3277012976831353}, 'combined': 0.2414641140823102, 'epoch': 36} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.330544991535196, 'r': 0.27023845954899156, 'f1': 0.29736494626443566}, 'combined': 0.18467928241686005, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35597826086956524, 'r': 0.23392857142857143, 'f1': 0.2823275862068966}, 'combined': 0.1882183908045977, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3225806451612903, 'r': 0.43478260869565216, 'f1': 0.37037037037037035}, 'combined': 0.18518518518518517, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.49880382775119614, 'r': 0.18920145190562612, 'f1': 0.2743421052631579}, 'combined': 0.18289473684210525, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:47:37.940543: step: 2/464, loss: 0.280551552772522 2023-01-24 06:47:38.663309: step: 4/464, loss: 0.008939560502767563 2023-01-24 06:47:39.489790: step: 6/464, loss: 0.002099171746522188 2023-01-24 06:47:40.204332: step: 8/464, loss: 0.0003868904896080494 2023-01-24 06:47:40.961813: step: 10/464, loss: 0.06558524817228317 2023-01-24 06:47:41.656505: step: 12/464, loss: 0.0013958881609141827 2023-01-24 06:47:42.354958: step: 14/464, loss: 0.0008803207892924547 2023-01-24 06:47:43.054771: step: 16/464, loss: 0.021780746057629585 2023-01-24 06:47:43.750565: step: 18/464, loss: 0.0008355473983101547 2023-01-24 06:47:44.523410: step: 20/464, loss: 0.7923649549484253 2023-01-24 06:47:45.184541: step: 22/464, loss: 0.0001320467854384333 2023-01-24 06:47:45.977503: step: 24/464, loss: 0.0014507113955914974 2023-01-24 06:47:46.689784: step: 26/464, loss: 0.011154986917972565 2023-01-24 06:47:47.465818: step: 28/464, loss: 0.07527535408735275 2023-01-24 06:47:48.143214: step: 30/464, loss: 0.006951568648219109 2023-01-24 06:47:48.833592: step: 32/464, loss: 9.29707384784706e-05 2023-01-24 06:47:49.576034: step: 34/464, loss: 0.005324409808963537 2023-01-24 06:47:50.296451: step: 36/464, loss: 0.5303267240524292 2023-01-24 06:47:50.938599: step: 38/464, loss: 4.769072256749496e-05 2023-01-24 06:47:51.795818: step: 40/464, loss: 0.014213241636753082 2023-01-24 06:47:52.572209: step: 42/464, loss: 0.011028922162950039 2023-01-24 06:47:53.378332: step: 44/464, loss: 0.002563275396823883 2023-01-24 06:47:54.178310: step: 46/464, loss: 1.1958390474319458 2023-01-24 06:47:54.937920: step: 48/464, loss: 0.3304753005504608 2023-01-24 06:47:55.672502: step: 50/464, loss: 0.0014656719285994768 2023-01-24 06:47:56.404961: step: 52/464, loss: 0.0022754385136067867 2023-01-24 06:47:57.123008: step: 54/464, loss: 0.0007055680034682155 2023-01-24 06:47:57.805004: step: 56/464, loss: 0.005791050381958485 2023-01-24 06:47:58.459322: step: 58/464, loss: 0.02034652605652809 2023-01-24 06:47:59.154052: step: 60/464, loss: 0.015613360330462456 2023-01-24 06:47:59.899328: step: 62/464, loss: 0.004705897066742182 2023-01-24 06:48:00.694649: step: 64/464, loss: 0.008632996119558811 2023-01-24 06:48:01.377594: step: 66/464, loss: 0.011131679639220238 2023-01-24 06:48:02.141388: step: 68/464, loss: 0.009376917965710163 2023-01-24 06:48:02.899642: step: 70/464, loss: 0.008288220502436161 2023-01-24 06:48:03.690429: step: 72/464, loss: 0.001611040672287345 2023-01-24 06:48:04.477197: step: 74/464, loss: 0.01145377941429615 2023-01-24 06:48:05.240537: step: 76/464, loss: 0.0019228061428293586 2023-01-24 06:48:05.991843: step: 78/464, loss: 0.0053077698685228825 2023-01-24 06:48:06.733927: step: 80/464, loss: 0.0004243594885338098 2023-01-24 06:48:07.434204: step: 82/464, loss: 0.0004092931339982897 2023-01-24 06:48:08.111045: step: 84/464, loss: 0.0032144596334546804 2023-01-24 06:48:08.847005: step: 86/464, loss: 0.001705450122244656 2023-01-24 06:48:09.555559: step: 88/464, loss: 0.0032923638354986906 2023-01-24 06:48:10.211859: step: 90/464, loss: 0.00027621854678727686 2023-01-24 06:48:11.136586: step: 92/464, loss: 0.009215369820594788 2023-01-24 06:48:11.847670: step: 94/464, loss: 1.0244518307445105e-05 2023-01-24 06:48:12.598807: step: 96/464, loss: 0.0014645474730059505 2023-01-24 06:48:13.364088: step: 98/464, loss: 0.007156630512326956 2023-01-24 06:48:14.095318: step: 100/464, loss: 0.006801954470574856 2023-01-24 06:48:14.883481: step: 102/464, loss: 0.00554502522572875 2023-01-24 06:48:15.667075: step: 104/464, loss: 0.03765822947025299 2023-01-24 06:48:16.373035: step: 106/464, loss: 0.0032080982346087694 2023-01-24 06:48:17.016952: step: 108/464, loss: 0.00014240032760426402 2023-01-24 06:48:17.717206: step: 110/464, loss: 1.0253015756607056 2023-01-24 06:48:18.453239: step: 112/464, loss: 0.0023225920740514994 2023-01-24 06:48:19.225145: step: 114/464, loss: 0.45400118827819824 2023-01-24 06:48:19.928873: step: 116/464, loss: 0.008975445292890072 2023-01-24 06:48:20.650858: step: 118/464, loss: 0.011956234462559223 2023-01-24 06:48:21.461777: step: 120/464, loss: 0.011662127450108528 2023-01-24 06:48:22.249058: step: 122/464, loss: 0.02039065584540367 2023-01-24 06:48:23.008571: step: 124/464, loss: 0.03815082460641861 2023-01-24 06:48:23.783285: step: 126/464, loss: 5.76664533582516e-05 2023-01-24 06:48:24.523030: step: 128/464, loss: 0.0028054367285221815 2023-01-24 06:48:25.337732: step: 130/464, loss: 0.45062610507011414 2023-01-24 06:48:26.002099: step: 132/464, loss: 0.028487809002399445 2023-01-24 06:48:26.723683: step: 134/464, loss: 0.7996045351028442 2023-01-24 06:48:27.541035: step: 136/464, loss: 0.00445803627371788 2023-01-24 06:48:28.295365: step: 138/464, loss: 0.001006808946840465 2023-01-24 06:48:28.993920: step: 140/464, loss: 0.10681931674480438 2023-01-24 06:48:29.637119: step: 142/464, loss: 0.002509431215003133 2023-01-24 06:48:30.348925: step: 144/464, loss: 0.0034187352284789085 2023-01-24 06:48:31.061136: step: 146/464, loss: 0.013594591058790684 2023-01-24 06:48:31.752106: step: 148/464, loss: 0.001105702482163906 2023-01-24 06:48:32.501224: step: 150/464, loss: 0.009012116119265556 2023-01-24 06:48:33.160216: step: 152/464, loss: 0.0008563753799535334 2023-01-24 06:48:33.893569: step: 154/464, loss: 0.013175604864954948 2023-01-24 06:48:34.673913: step: 156/464, loss: 0.014561844058334827 2023-01-24 06:48:35.448395: step: 158/464, loss: 0.05313951522111893 2023-01-24 06:48:36.323403: step: 160/464, loss: 0.0025458012241870165 2023-01-24 06:48:37.163313: step: 162/464, loss: 0.0032371277920901775 2023-01-24 06:48:37.911410: step: 164/464, loss: 0.34664544463157654 2023-01-24 06:48:38.711883: step: 166/464, loss: 0.01839899830520153 2023-01-24 06:48:39.453841: step: 168/464, loss: 0.031566642224788666 2023-01-24 06:48:40.071444: step: 170/464, loss: 0.01217776257544756 2023-01-24 06:48:40.796803: step: 172/464, loss: 1.7539546489715576 2023-01-24 06:48:41.569224: step: 174/464, loss: 0.029823634773492813 2023-01-24 06:48:42.320915: step: 176/464, loss: 0.005956373643130064 2023-01-24 06:48:43.110124: step: 178/464, loss: 0.0004605269932653755 2023-01-24 06:48:43.907296: step: 180/464, loss: 0.01578412391245365 2023-01-24 06:48:44.593044: step: 182/464, loss: 0.007628194522112608 2023-01-24 06:48:45.330939: step: 184/464, loss: 0.0007106433040462434 2023-01-24 06:48:46.113422: step: 186/464, loss: 0.00034200208028778434 2023-01-24 06:48:46.794631: step: 188/464, loss: 0.003999216482043266 2023-01-24 06:48:47.503488: step: 190/464, loss: 0.0010963481618091464 2023-01-24 06:48:48.202393: step: 192/464, loss: 0.0002236723667010665 2023-01-24 06:48:48.898381: step: 194/464, loss: 0.0016451469855383039 2023-01-24 06:48:49.572041: step: 196/464, loss: 0.008220840245485306 2023-01-24 06:48:50.327143: step: 198/464, loss: 0.0009095626883208752 2023-01-24 06:48:50.997839: step: 200/464, loss: 0.4115036725997925 2023-01-24 06:48:51.745123: step: 202/464, loss: 0.002638082019984722 2023-01-24 06:48:52.449327: step: 204/464, loss: 4.80562994198408e-05 2023-01-24 06:48:53.281779: step: 206/464, loss: 0.01674598455429077 2023-01-24 06:48:53.981233: step: 208/464, loss: 0.0022681746631860733 2023-01-24 06:48:54.727428: step: 210/464, loss: 0.0005993362283334136 2023-01-24 06:48:55.414335: step: 212/464, loss: 0.0013340349541977048 2023-01-24 06:48:56.087295: step: 214/464, loss: 0.0249335877597332 2023-01-24 06:48:56.846730: step: 216/464, loss: 0.005155049730092287 2023-01-24 06:48:57.594767: step: 218/464, loss: 0.005427779629826546 2023-01-24 06:48:58.399769: step: 220/464, loss: 0.0010236561065539718 2023-01-24 06:48:59.073616: step: 222/464, loss: 2.951405076601077e-05 2023-01-24 06:48:59.824477: step: 224/464, loss: 0.00332543533295393 2023-01-24 06:49:00.508518: step: 226/464, loss: 0.003271919209510088 2023-01-24 06:49:01.200458: step: 228/464, loss: 0.05662386491894722 2023-01-24 06:49:02.031588: step: 230/464, loss: 0.005633444990962744 2023-01-24 06:49:02.826025: step: 232/464, loss: 0.0012898902641609311 2023-01-24 06:49:03.575822: step: 234/464, loss: 0.0004988862201571465 2023-01-24 06:49:04.291733: step: 236/464, loss: 0.000257426465395838 2023-01-24 06:49:04.981357: step: 238/464, loss: 0.017902931198477745 2023-01-24 06:49:05.712119: step: 240/464, loss: 0.002358401892706752 2023-01-24 06:49:06.386157: step: 242/464, loss: 0.05138232186436653 2023-01-24 06:49:07.163252: step: 244/464, loss: 0.0016939010238274932 2023-01-24 06:49:07.909051: step: 246/464, loss: 0.011640515178442001 2023-01-24 06:49:08.614694: step: 248/464, loss: 0.000751245825085789 2023-01-24 06:49:09.464354: step: 250/464, loss: 0.0003486153727862984 2023-01-24 06:49:10.202840: step: 252/464, loss: 0.045683737844228745 2023-01-24 06:49:10.974316: step: 254/464, loss: 0.0002169935469282791 2023-01-24 06:49:11.640768: step: 256/464, loss: 0.012211199849843979 2023-01-24 06:49:12.464430: step: 258/464, loss: 0.008998665027320385 2023-01-24 06:49:13.253493: step: 260/464, loss: 0.04509185254573822 2023-01-24 06:49:13.973971: step: 262/464, loss: 0.016031892970204353 2023-01-24 06:49:14.702210: step: 264/464, loss: 0.00014143930457066745 2023-01-24 06:49:15.446866: step: 266/464, loss: 0.0026746096555143595 2023-01-24 06:49:16.152467: step: 268/464, loss: 0.06285201013088226 2023-01-24 06:49:16.815392: step: 270/464, loss: 0.025904759764671326 2023-01-24 06:49:17.535377: step: 272/464, loss: 0.0293950904160738 2023-01-24 06:49:18.220578: step: 274/464, loss: 0.00917926337569952 2023-01-24 06:49:18.984844: step: 276/464, loss: 0.00245084916241467 2023-01-24 06:49:19.762884: step: 278/464, loss: 0.06213583052158356 2023-01-24 06:49:20.457836: step: 280/464, loss: 0.00697915768250823 2023-01-24 06:49:21.207664: step: 282/464, loss: 0.026562105864286423 2023-01-24 06:49:21.897626: step: 284/464, loss: 0.017584990710020065 2023-01-24 06:49:22.597735: step: 286/464, loss: 0.00643016304820776 2023-01-24 06:49:23.329870: step: 288/464, loss: 0.004855574574321508 2023-01-24 06:49:24.063836: step: 290/464, loss: 0.0016783819301053882 2023-01-24 06:49:24.811045: step: 292/464, loss: 0.0005006135324947536 2023-01-24 06:49:25.541833: step: 294/464, loss: 0.0228210911154747 2023-01-24 06:49:26.281213: step: 296/464, loss: 0.0024767278227955103 2023-01-24 06:49:26.964884: step: 298/464, loss: 0.00437591876834631 2023-01-24 06:49:27.660876: step: 300/464, loss: 0.28448402881622314 2023-01-24 06:49:28.310296: step: 302/464, loss: 0.00031451816903427243 2023-01-24 06:49:29.051758: step: 304/464, loss: 0.05789633467793465 2023-01-24 06:49:29.816176: step: 306/464, loss: 0.029583610594272614 2023-01-24 06:49:30.528492: step: 308/464, loss: 0.0015377000672742724 2023-01-24 06:49:31.294017: step: 310/464, loss: 0.029524048790335655 2023-01-24 06:49:32.071434: step: 312/464, loss: 0.012191911228001118 2023-01-24 06:49:32.768750: step: 314/464, loss: 0.0005353660089895129 2023-01-24 06:49:33.561672: step: 316/464, loss: 0.03353969007730484 2023-01-24 06:49:34.303634: step: 318/464, loss: 0.28003543615341187 2023-01-24 06:49:35.031141: step: 320/464, loss: 6.730011955369264e-05 2023-01-24 06:49:35.782215: step: 322/464, loss: 0.0003083897172473371 2023-01-24 06:49:36.518503: step: 324/464, loss: 0.0009723399416543543 2023-01-24 06:49:37.196108: step: 326/464, loss: 0.0031207154970616102 2023-01-24 06:49:37.883204: step: 328/464, loss: 0.004871489480137825 2023-01-24 06:49:38.584493: step: 330/464, loss: 0.006424302235245705 2023-01-24 06:49:39.324238: step: 332/464, loss: 0.028175652027130127 2023-01-24 06:49:40.029061: step: 334/464, loss: 0.00297021446749568 2023-01-24 06:49:40.699440: step: 336/464, loss: 0.012575359083712101 2023-01-24 06:49:41.382408: step: 338/464, loss: 0.0013873311690986156 2023-01-24 06:49:42.094526: step: 340/464, loss: 0.012714538723230362 2023-01-24 06:49:42.828942: step: 342/464, loss: 0.0001900491479318589 2023-01-24 06:49:43.566572: step: 344/464, loss: 0.013001831248402596 2023-01-24 06:49:44.329198: step: 346/464, loss: 0.00460166297852993 2023-01-24 06:49:45.051227: step: 348/464, loss: 0.004661817103624344 2023-01-24 06:49:45.778652: step: 350/464, loss: 0.03515256196260452 2023-01-24 06:49:46.486556: step: 352/464, loss: 0.01803845725953579 2023-01-24 06:49:47.150732: step: 354/464, loss: 0.014347223564982414 2023-01-24 06:49:47.833005: step: 356/464, loss: 0.023120839148759842 2023-01-24 06:49:48.537604: step: 358/464, loss: 0.030444692820310593 2023-01-24 06:49:49.284803: step: 360/464, loss: 0.000849287782330066 2023-01-24 06:49:50.051687: step: 362/464, loss: 0.0013305057073011994 2023-01-24 06:49:50.798374: step: 364/464, loss: 0.0025766007602214813 2023-01-24 06:49:51.475386: step: 366/464, loss: 0.00023264545598067343 2023-01-24 06:49:52.267804: step: 368/464, loss: 0.008877119980752468 2023-01-24 06:49:53.021700: step: 370/464, loss: 0.03428717330098152 2023-01-24 06:49:53.697352: step: 372/464, loss: 0.05827299878001213 2023-01-24 06:49:54.459035: step: 374/464, loss: 0.00013529513671528548 2023-01-24 06:49:55.136364: step: 376/464, loss: 0.0038425836246460676 2023-01-24 06:49:55.886063: step: 378/464, loss: 0.11871061474084854 2023-01-24 06:49:56.611907: step: 380/464, loss: 0.0017034834017977118 2023-01-24 06:49:57.296886: step: 382/464, loss: 0.011208605021238327 2023-01-24 06:49:58.115322: step: 384/464, loss: 0.002513043349608779 2023-01-24 06:49:58.867577: step: 386/464, loss: 0.004687040578573942 2023-01-24 06:49:59.579040: step: 388/464, loss: 4.855592123931274e-05 2023-01-24 06:50:00.345028: step: 390/464, loss: 0.004560593515634537 2023-01-24 06:50:01.109134: step: 392/464, loss: 0.043515417724847794 2023-01-24 06:50:01.844170: step: 394/464, loss: 0.42079973220825195 2023-01-24 06:50:02.680299: step: 396/464, loss: 0.0009142745402641594 2023-01-24 06:50:03.382722: step: 398/464, loss: 0.0012250650906935334 2023-01-24 06:50:04.191774: step: 400/464, loss: 0.0012005030876025558 2023-01-24 06:50:04.986588: step: 402/464, loss: 0.03129834681749344 2023-01-24 06:50:05.681781: step: 404/464, loss: 0.00040537992026656866 2023-01-24 06:50:06.430535: step: 406/464, loss: 0.052299920469522476 2023-01-24 06:50:07.206397: step: 408/464, loss: 0.00806864257901907 2023-01-24 06:50:07.989264: step: 410/464, loss: 0.004822226706892252 2023-01-24 06:50:08.742902: step: 412/464, loss: 0.13479790091514587 2023-01-24 06:50:09.384182: step: 414/464, loss: 0.00043789477786049247 2023-01-24 06:50:10.127330: step: 416/464, loss: 0.016873696818947792 2023-01-24 06:50:10.877418: step: 418/464, loss: 0.0004592374316416681 2023-01-24 06:50:11.624664: step: 420/464, loss: 4.474152774491813e-06 2023-01-24 06:50:12.420814: step: 422/464, loss: 0.020167384296655655 2023-01-24 06:50:13.159271: step: 424/464, loss: 0.0105759147554636 2023-01-24 06:50:13.876552: step: 426/464, loss: 0.03869227319955826 2023-01-24 06:50:14.533624: step: 428/464, loss: 0.024213694036006927 2023-01-24 06:50:15.212209: step: 430/464, loss: 0.005004123318940401 2023-01-24 06:50:15.894538: step: 432/464, loss: 0.012551484629511833 2023-01-24 06:50:16.651602: step: 434/464, loss: 0.03439558297395706 2023-01-24 06:50:17.353202: step: 436/464, loss: 0.0023460935335606337 2023-01-24 06:50:18.086729: step: 438/464, loss: 0.0003566384839359671 2023-01-24 06:50:18.809910: step: 440/464, loss: 0.007467083632946014 2023-01-24 06:50:19.565723: step: 442/464, loss: 0.00042208057129755616 2023-01-24 06:50:20.266146: step: 444/464, loss: 0.0004894437151961029 2023-01-24 06:50:20.974716: step: 446/464, loss: 0.0005053331260569394 2023-01-24 06:50:21.729224: step: 448/464, loss: 0.0006129793473519385 2023-01-24 06:50:22.403470: step: 450/464, loss: 0.004137951415032148 2023-01-24 06:50:23.178868: step: 452/464, loss: 0.02457266114652157 2023-01-24 06:50:23.850835: step: 454/464, loss: 0.010156502947211266 2023-01-24 06:50:24.628742: step: 456/464, loss: 0.007887723855674267 2023-01-24 06:50:25.295862: step: 458/464, loss: 0.0018748701550066471 2023-01-24 06:50:25.969569: step: 460/464, loss: 0.0013414479326456785 2023-01-24 06:50:26.769937: step: 462/464, loss: 0.0009742419933900237 2023-01-24 06:50:27.611581: step: 464/464, loss: 0.013239771127700806 2023-01-24 06:50:28.370689: step: 466/464, loss: 0.023154204711318016 2023-01-24 06:50:29.060697: step: 468/464, loss: 0.0389837771654129 2023-01-24 06:50:29.752937: step: 470/464, loss: 0.005595343187451363 2023-01-24 06:50:30.453595: step: 472/464, loss: 0.04169272258877754 2023-01-24 06:50:31.187779: step: 474/464, loss: 0.033552419394254684 2023-01-24 06:50:31.902323: step: 476/464, loss: 0.01440391968935728 2023-01-24 06:50:32.675851: step: 478/464, loss: 1.0795247554779053 2023-01-24 06:50:33.415604: step: 480/464, loss: 0.019383076578378677 2023-01-24 06:50:34.142245: step: 482/464, loss: 0.006699536461383104 2023-01-24 06:50:34.839955: step: 484/464, loss: 0.006788499187678099 2023-01-24 06:50:35.604207: step: 486/464, loss: 0.004629552364349365 2023-01-24 06:50:36.368588: step: 488/464, loss: 0.0003677864442579448 2023-01-24 06:50:37.090245: step: 490/464, loss: 0.014383114874362946 2023-01-24 06:50:37.816294: step: 492/464, loss: 0.036328598856925964 2023-01-24 06:50:38.543295: step: 494/464, loss: 0.02989009954035282 2023-01-24 06:50:39.327184: step: 496/464, loss: 0.008138212375342846 2023-01-24 06:50:40.067012: step: 498/464, loss: 0.0007988435099832714 2023-01-24 06:50:40.768800: step: 500/464, loss: 0.006192335858941078 2023-01-24 06:50:41.471109: step: 502/464, loss: 0.009122030809521675 2023-01-24 06:50:42.234050: step: 504/464, loss: 0.0024027577601373196 2023-01-24 06:50:42.951297: step: 506/464, loss: 0.0008780999341979623 2023-01-24 06:50:43.679485: step: 508/464, loss: 0.05414319410920143 2023-01-24 06:50:44.411985: step: 510/464, loss: 0.15176241099834442 2023-01-24 06:50:45.088146: step: 512/464, loss: 0.0005607667262665927 2023-01-24 06:50:45.724390: step: 514/464, loss: 1.1074645954067819e-05 2023-01-24 06:50:46.521913: step: 516/464, loss: 0.004496861714869738 2023-01-24 06:50:47.267782: step: 518/464, loss: 0.0012348692398518324 2023-01-24 06:50:47.995081: step: 520/464, loss: 0.0013506343821063638 2023-01-24 06:50:48.691489: step: 522/464, loss: 0.0007885704399086535 2023-01-24 06:50:49.454100: step: 524/464, loss: 0.003030086401849985 2023-01-24 06:50:50.184167: step: 526/464, loss: 0.001310868770815432 2023-01-24 06:50:50.850948: step: 528/464, loss: 0.00021499492868315428 2023-01-24 06:50:51.592169: step: 530/464, loss: 0.0947151705622673 2023-01-24 06:50:52.228270: step: 532/464, loss: 1.7647675122134387e-05 2023-01-24 06:50:52.972306: step: 534/464, loss: 0.0041124713607132435 2023-01-24 06:50:53.673001: step: 536/464, loss: 0.0023744269274175167 2023-01-24 06:50:54.408983: step: 538/464, loss: 0.02584262192249298 2023-01-24 06:50:55.126057: step: 540/464, loss: 0.003794726449996233 2023-01-24 06:50:55.853929: step: 542/464, loss: 0.007657110691070557 2023-01-24 06:50:56.538626: step: 544/464, loss: 0.0011781043140217662 2023-01-24 06:50:57.219024: step: 546/464, loss: 0.016155825927853584 2023-01-24 06:50:57.946200: step: 548/464, loss: 0.009724811650812626 2023-01-24 06:50:58.696799: step: 550/464, loss: 0.0010752100497484207 2023-01-24 06:50:59.524093: step: 552/464, loss: 0.00013952278823126107 2023-01-24 06:51:00.217064: step: 554/464, loss: 0.02432950958609581 2023-01-24 06:51:00.927046: step: 556/464, loss: 0.11144956946372986 2023-01-24 06:51:01.744082: step: 558/464, loss: 0.0349210649728775 2023-01-24 06:51:02.510978: step: 560/464, loss: 0.0006711905589327216 2023-01-24 06:51:03.297502: step: 562/464, loss: 0.0016934089362621307 2023-01-24 06:51:03.974067: step: 564/464, loss: 0.0023588351905345917 2023-01-24 06:51:04.691998: step: 566/464, loss: 0.003813401563093066 2023-01-24 06:51:05.375248: step: 568/464, loss: 0.07490542531013489 2023-01-24 06:51:06.088744: step: 570/464, loss: 0.007735707797110081 2023-01-24 06:51:06.823772: step: 572/464, loss: 0.0013534906320273876 2023-01-24 06:51:07.614968: step: 574/464, loss: 0.008723358623683453 2023-01-24 06:51:08.329500: step: 576/464, loss: 0.027125995606184006 2023-01-24 06:51:09.033827: step: 578/464, loss: 0.00965013075619936 2023-01-24 06:51:09.796773: step: 580/464, loss: 0.010939667001366615 2023-01-24 06:51:10.515623: step: 582/464, loss: 0.00047597894445061684 2023-01-24 06:51:11.325421: step: 584/464, loss: 0.03781023249030113 2023-01-24 06:51:12.215559: step: 586/464, loss: 0.023593248799443245 2023-01-24 06:51:12.927700: step: 588/464, loss: 0.054472390562295914 2023-01-24 06:51:13.640647: step: 590/464, loss: 0.02335720881819725 2023-01-24 06:51:14.439627: step: 592/464, loss: 0.2685994505882263 2023-01-24 06:51:15.177036: step: 594/464, loss: 0.007727600634098053 2023-01-24 06:51:15.920056: step: 596/464, loss: 0.03737568482756615 2023-01-24 06:51:16.715595: step: 598/464, loss: 0.004133549984544516 2023-01-24 06:51:17.386652: step: 600/464, loss: 0.0037972447462379932 2023-01-24 06:51:18.123949: step: 602/464, loss: 0.012717491947114468 2023-01-24 06:51:18.877814: step: 604/464, loss: 0.002593568991869688 2023-01-24 06:51:19.617652: step: 606/464, loss: 0.00020642158051487058 2023-01-24 06:51:20.364528: step: 608/464, loss: 0.029949229210615158 2023-01-24 06:51:21.122472: step: 610/464, loss: 0.012447851710021496 2023-01-24 06:51:21.754520: step: 612/464, loss: 0.0029245391488075256 2023-01-24 06:51:22.496337: step: 614/464, loss: 0.014462887309491634 2023-01-24 06:51:23.320941: step: 616/464, loss: 0.026355883106589317 2023-01-24 06:51:23.968713: step: 618/464, loss: 0.007805028930306435 2023-01-24 06:51:24.661949: step: 620/464, loss: 0.012717099860310555 2023-01-24 06:51:25.348615: step: 622/464, loss: 0.007876298390328884 2023-01-24 06:51:26.113394: step: 624/464, loss: 0.00024364175624214113 2023-01-24 06:51:26.828149: step: 626/464, loss: 7.127138087525964e-05 2023-01-24 06:51:27.588110: step: 628/464, loss: 0.001971708843484521 2023-01-24 06:51:28.321667: step: 630/464, loss: 7.846429070923477e-05 2023-01-24 06:51:29.045871: step: 632/464, loss: 0.0007255128002725542 2023-01-24 06:51:29.808038: step: 634/464, loss: 0.014832047745585442 2023-01-24 06:51:30.469759: step: 636/464, loss: 0.0014034698251634836 2023-01-24 06:51:31.357121: step: 638/464, loss: 0.011184017173945904 2023-01-24 06:51:32.117578: step: 640/464, loss: 0.001698363688774407 2023-01-24 06:51:32.962382: step: 642/464, loss: 0.018939031288027763 2023-01-24 06:51:33.698175: step: 644/464, loss: 0.03413274884223938 2023-01-24 06:51:34.452041: step: 646/464, loss: 0.008116284385323524 2023-01-24 06:51:35.244188: step: 648/464, loss: 0.0332346111536026 2023-01-24 06:51:35.965448: step: 650/464, loss: 0.17691725492477417 2023-01-24 06:51:36.678022: step: 652/464, loss: 0.007915996946394444 2023-01-24 06:51:37.393613: step: 654/464, loss: 0.001967285992577672 2023-01-24 06:51:38.106146: step: 656/464, loss: 0.003613883862271905 2023-01-24 06:51:38.950160: step: 658/464, loss: 0.034955792129039764 2023-01-24 06:51:39.630522: step: 660/464, loss: 0.003080249996855855 2023-01-24 06:51:40.357165: step: 662/464, loss: 0.0010708275949582458 2023-01-24 06:51:41.113750: step: 664/464, loss: 0.013194491155445576 2023-01-24 06:51:42.026344: step: 666/464, loss: 0.009825510904192924 2023-01-24 06:51:42.767620: step: 668/464, loss: 0.009700184687972069 2023-01-24 06:51:43.484935: step: 670/464, loss: 0.0038985528517514467 2023-01-24 06:51:44.205021: step: 672/464, loss: 0.002727124374359846 2023-01-24 06:51:44.897414: step: 674/464, loss: 0.002421292709186673 2023-01-24 06:51:45.545697: step: 676/464, loss: 0.002174343913793564 2023-01-24 06:51:46.298645: step: 678/464, loss: 0.03671526908874512 2023-01-24 06:51:47.030426: step: 680/464, loss: 0.00047577309305779636 2023-01-24 06:51:47.993822: step: 682/464, loss: 0.006250323727726936 2023-01-24 06:51:48.728349: step: 684/464, loss: 0.0016291936626657844 2023-01-24 06:51:49.520025: step: 686/464, loss: 0.09297700971364975 2023-01-24 06:51:50.266381: step: 688/464, loss: 0.001678445260040462 2023-01-24 06:51:51.042836: step: 690/464, loss: 0.011845313012599945 2023-01-24 06:51:51.784944: step: 692/464, loss: 0.024060700088739395 2023-01-24 06:51:52.500990: step: 694/464, loss: 0.47590959072113037 2023-01-24 06:51:53.219634: step: 696/464, loss: 0.0004216399975121021 2023-01-24 06:51:53.929043: step: 698/464, loss: 0.002506182761862874 2023-01-24 06:51:54.698706: step: 700/464, loss: 0.008632753044366837 2023-01-24 06:51:55.346915: step: 702/464, loss: 0.020507795736193657 2023-01-24 06:51:56.019141: step: 704/464, loss: 0.23401466012001038 2023-01-24 06:51:56.758811: step: 706/464, loss: 0.0024203970097005367 2023-01-24 06:51:57.543947: step: 708/464, loss: 0.007572733331471682 2023-01-24 06:51:58.276724: step: 710/464, loss: 0.018590884283185005 2023-01-24 06:51:58.988552: step: 712/464, loss: 0.019573379307985306 2023-01-24 06:51:59.790781: step: 714/464, loss: 0.0003258471260778606 2023-01-24 06:52:00.528730: step: 716/464, loss: 0.003308718092739582 2023-01-24 06:52:01.297805: step: 718/464, loss: 0.025799686089158058 2023-01-24 06:52:02.240640: step: 720/464, loss: 0.0014121445128694177 2023-01-24 06:52:02.964860: step: 722/464, loss: 0.0010850804392248392 2023-01-24 06:52:03.681851: step: 724/464, loss: 0.027522243559360504 2023-01-24 06:52:04.481516: step: 726/464, loss: 0.015951845794916153 2023-01-24 06:52:05.204331: step: 728/464, loss: 0.006579286884516478 2023-01-24 06:52:05.952141: step: 730/464, loss: 0.021987447515130043 2023-01-24 06:52:06.714263: step: 732/464, loss: 0.009621849283576012 2023-01-24 06:52:07.363541: step: 734/464, loss: 0.006305316463112831 2023-01-24 06:52:08.114530: step: 736/464, loss: 0.0016307436162605882 2023-01-24 06:52:08.884921: step: 738/464, loss: 0.011331386864185333 2023-01-24 06:52:09.648197: step: 740/464, loss: 0.00675381300970912 2023-01-24 06:52:10.361234: step: 742/464, loss: 0.0013096178881824017 2023-01-24 06:52:11.113434: step: 744/464, loss: 0.007577427197247744 2023-01-24 06:52:11.873941: step: 746/464, loss: 0.022213483229279518 2023-01-24 06:52:12.615967: step: 748/464, loss: 0.02859911136329174 2023-01-24 06:52:13.337776: step: 750/464, loss: 0.00373831856995821 2023-01-24 06:52:14.041582: step: 752/464, loss: 0.0947578027844429 2023-01-24 06:52:14.710582: step: 754/464, loss: 0.0009563664207234979 2023-01-24 06:52:15.386540: step: 756/464, loss: 0.022929368540644646 2023-01-24 06:52:16.132319: step: 758/464, loss: 0.005303644575178623 2023-01-24 06:52:17.012939: step: 760/464, loss: 0.002270778641104698 2023-01-24 06:52:17.739209: step: 762/464, loss: 0.04593295603990555 2023-01-24 06:52:18.471246: step: 764/464, loss: 0.00035218169796280563 2023-01-24 06:52:19.196150: step: 766/464, loss: 0.00043932118569500744 2023-01-24 06:52:19.928452: step: 768/464, loss: 0.01299199927598238 2023-01-24 06:52:20.717598: step: 770/464, loss: 0.00485680066049099 2023-01-24 06:52:21.404843: step: 772/464, loss: 0.005588888190686703 2023-01-24 06:52:22.183829: step: 774/464, loss: 6.823728654126171e-06 2023-01-24 06:52:22.954298: step: 776/464, loss: 0.007791449781507254 2023-01-24 06:52:23.697652: step: 778/464, loss: 0.0028351175133138895 2023-01-24 06:52:24.385387: step: 780/464, loss: 0.001013401080854237 2023-01-24 06:52:25.252614: step: 782/464, loss: 0.00967742782086134 2023-01-24 06:52:25.902986: step: 784/464, loss: 0.002139471471309662 2023-01-24 06:52:26.597071: step: 786/464, loss: 0.014759652316570282 2023-01-24 06:52:27.362733: step: 788/464, loss: 0.024528315290808678 2023-01-24 06:52:28.108914: step: 790/464, loss: 0.0002210488310083747 2023-01-24 06:52:28.829496: step: 792/464, loss: 0.07229010760784149 2023-01-24 06:52:29.616378: step: 794/464, loss: 0.005594093352556229 2023-01-24 06:52:30.362731: step: 796/464, loss: 0.00963997095823288 2023-01-24 06:52:31.073850: step: 798/464, loss: 0.00913211889564991 2023-01-24 06:52:31.897971: step: 800/464, loss: 0.014080382883548737 2023-01-24 06:52:32.743085: step: 802/464, loss: 0.010862361639738083 2023-01-24 06:52:33.501749: step: 804/464, loss: 0.15599840879440308 2023-01-24 06:52:34.272278: step: 806/464, loss: 0.05338187515735626 2023-01-24 06:52:34.917939: step: 808/464, loss: 0.00022713113867212087 2023-01-24 06:52:35.634370: step: 810/464, loss: 0.007019891869276762 2023-01-24 06:52:36.462363: step: 812/464, loss: 0.033112138509750366 2023-01-24 06:52:37.309389: step: 814/464, loss: 0.021351516246795654 2023-01-24 06:52:38.008188: step: 816/464, loss: 0.004479506053030491 2023-01-24 06:52:38.714795: step: 818/464, loss: 0.0241513904184103 2023-01-24 06:52:39.356797: step: 820/464, loss: 0.001121489447541535 2023-01-24 06:52:40.090501: step: 822/464, loss: 0.0023313446436077356 2023-01-24 06:52:40.939833: step: 824/464, loss: 0.009491167962551117 2023-01-24 06:52:41.631431: step: 826/464, loss: 0.002323998138308525 2023-01-24 06:52:42.395123: step: 828/464, loss: 0.012958469800651073 2023-01-24 06:52:43.076642: step: 830/464, loss: 0.008748810738325119 2023-01-24 06:52:43.926198: step: 832/464, loss: 0.09143657237291336 2023-01-24 06:52:44.615757: step: 834/464, loss: 8.623718895250931e-05 2023-01-24 06:52:45.444272: step: 836/464, loss: 0.044922150671482086 2023-01-24 06:52:46.135173: step: 838/464, loss: 0.0027205843944102526 2023-01-24 06:52:46.906179: step: 840/464, loss: 0.0037673949263989925 2023-01-24 06:52:47.687286: step: 842/464, loss: 0.014279971830546856 2023-01-24 06:52:48.399573: step: 844/464, loss: 0.005875707138329744 2023-01-24 06:52:49.282481: step: 846/464, loss: 1.09738028049469 2023-01-24 06:52:49.979922: step: 848/464, loss: 0.001458100276067853 2023-01-24 06:52:50.745723: step: 850/464, loss: 0.004742420744150877 2023-01-24 06:52:51.573263: step: 852/464, loss: 0.015593930147588253 2023-01-24 06:52:52.335004: step: 854/464, loss: 0.0016010843683034182 2023-01-24 06:52:53.089558: step: 856/464, loss: 0.004820770584046841 2023-01-24 06:52:53.796930: step: 858/464, loss: 0.0033338917419314384 2023-01-24 06:52:54.514794: step: 860/464, loss: 0.025021173059940338 2023-01-24 06:52:55.223653: step: 862/464, loss: 0.0010791352251544595 2023-01-24 06:52:55.925231: step: 864/464, loss: 2.1274170875549316 2023-01-24 06:52:56.591926: step: 866/464, loss: 0.0003470660303719342 2023-01-24 06:52:57.379597: step: 868/464, loss: 0.001641890499740839 2023-01-24 06:52:58.044523: step: 870/464, loss: 0.00018000038107857108 2023-01-24 06:52:58.748270: step: 872/464, loss: 0.0009995194850489497 2023-01-24 06:52:59.532062: step: 874/464, loss: 0.0012401562416926026 2023-01-24 06:53:00.244482: step: 876/464, loss: 0.0017109549371525645 2023-01-24 06:53:00.943525: step: 878/464, loss: 0.010825077071785927 2023-01-24 06:53:01.698968: step: 880/464, loss: 0.59823077917099 2023-01-24 06:53:02.366381: step: 882/464, loss: 0.0009296668577007949 2023-01-24 06:53:03.039668: step: 884/464, loss: 0.00184035359416157 2023-01-24 06:53:03.808140: step: 886/464, loss: 0.031071102246642113 2023-01-24 06:53:04.640734: step: 888/464, loss: 0.0023760192561894655 2023-01-24 06:53:05.349087: step: 890/464, loss: 0.004190961830317974 2023-01-24 06:53:06.066847: step: 892/464, loss: 0.0033965427428483963 2023-01-24 06:53:06.830330: step: 894/464, loss: 0.0021972153335809708 2023-01-24 06:53:07.533258: step: 896/464, loss: 0.00022150274890009314 2023-01-24 06:53:08.281143: step: 898/464, loss: 0.02371404506266117 2023-01-24 06:53:09.040894: step: 900/464, loss: 0.0433046817779541 2023-01-24 06:53:09.770624: step: 902/464, loss: 0.010654138401150703 2023-01-24 06:53:10.484038: step: 904/464, loss: 0.00342572876252234 2023-01-24 06:53:11.241530: step: 906/464, loss: 0.0011897621443495154 2023-01-24 06:53:12.044126: step: 908/464, loss: 0.04091949760913849 2023-01-24 06:53:12.749529: step: 910/464, loss: 0.008571179583668709 2023-01-24 06:53:13.479350: step: 912/464, loss: 0.006447421386837959 2023-01-24 06:53:14.180476: step: 914/464, loss: 0.00698639964684844 2023-01-24 06:53:14.907307: step: 916/464, loss: 0.004937224555760622 2023-01-24 06:53:15.626012: step: 918/464, loss: 0.017821161076426506 2023-01-24 06:53:16.421022: step: 920/464, loss: 0.2027270495891571 2023-01-24 06:53:17.131214: step: 922/464, loss: 0.002561023458838463 2023-01-24 06:53:17.962088: step: 924/464, loss: 0.4017066955566406 2023-01-24 06:53:18.642004: step: 926/464, loss: 0.0035937901120632887 2023-01-24 06:53:19.377371: step: 928/464, loss: 0.004983225371688604 2023-01-24 06:53:20.067710: step: 930/464, loss: 0.021204371005296707 ================================================== Loss: 0.047 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35058800011029306, 'r': 0.3133338672712486, 'f1': 0.33091572755901405}, 'combined': 0.2438326413592735, 'epoch': 37} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32484386005290083, 'r': 0.2612874526512463, 'f1': 0.28961982703511635}, 'combined': 0.1798691557375986, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3419401555088439, 'r': 0.31339107231645463, 'f1': 0.32704375269459723}, 'combined': 0.24097960724865058, 'epoch': 37} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32011187667587576, 'r': 0.2558997116904975, 'f1': 0.28442669767246953}, 'combined': 0.1766439490807969, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3520809634067722, 'r': 0.3153362708311128, 'f1': 0.3326971265825755}, 'combined': 0.24514525116610825, 'epoch': 37} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.33595157819847077, 'r': 0.26424649826678137, 'f1': 0.2958157701836092}, 'combined': 0.18371716253508363, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3804347826086957, 'r': 0.25, 'f1': 0.3017241379310345}, 'combined': 0.2011494252873563, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29838709677419356, 'r': 0.40217391304347827, 'f1': 0.34259259259259267}, 'combined': 0.17129629629629634, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5926535087719298, 'r': 0.2452359346642468, 'f1': 0.3469191270860077}, 'combined': 0.23127941805733845, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32928812535905044, 'r': 0.3017953786497559, 'f1': 0.31494290009588394}, 'combined': 0.23206318954433552, 'epoch': 8} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3148827005380212, 'r': 0.25763130044019916, 'f1': 0.2833944304842191}, 'combined': 0.17600285682704134, 'epoch': 8} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3359375, 'r': 0.4673913043478261, 'f1': 0.39090909090909093}, 'combined': 0.19545454545454546, 'epoch': 8} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3506143956254641, 'r': 0.28952249335739083, 'f1': 0.31715327073174765}, 'combined': 0.2336918836970772, 'epoch': 11} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3138933704245542, 'r': 0.26679387160468027, 'f1': 0.28843350259929684}, 'combined': 0.17913238582482646, 'epoch': 11} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6354166666666666, 'r': 0.2629310344827586, 'f1': 0.3719512195121952}, 'combined': 0.24796747967479676, 'epoch': 11} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 06:55:59.528895: step: 2/464, loss: 0.004826576914638281 2023-01-24 06:56:00.284968: step: 4/464, loss: 0.00030840112594887614 2023-01-24 06:56:01.113599: step: 6/464, loss: 0.002776885172352195 2023-01-24 06:56:01.874187: step: 8/464, loss: 0.005393106956034899 2023-01-24 06:56:02.661955: step: 10/464, loss: 0.00719247292727232 2023-01-24 06:56:03.395449: step: 12/464, loss: 0.017119480296969414 2023-01-24 06:56:04.103113: step: 14/464, loss: 0.001992583042010665 2023-01-24 06:56:04.832926: step: 16/464, loss: 0.004189030267298222 2023-01-24 06:56:05.486241: step: 18/464, loss: 0.009218255989253521 2023-01-24 06:56:06.242240: step: 20/464, loss: 0.002750575076788664 2023-01-24 06:56:06.944011: step: 22/464, loss: 0.0023512584157288074 2023-01-24 06:56:07.735902: step: 24/464, loss: 0.013374313712120056 2023-01-24 06:56:08.462196: step: 26/464, loss: 0.0004921825020574033 2023-01-24 06:56:09.212878: step: 28/464, loss: 0.02349039912223816 2023-01-24 06:56:09.975388: step: 30/464, loss: 0.004341502673923969 2023-01-24 06:56:10.661388: step: 32/464, loss: 0.010627289302647114 2023-01-24 06:56:11.379670: step: 34/464, loss: 0.00019175864872522652 2023-01-24 06:56:12.108868: step: 36/464, loss: 0.001980855828151107 2023-01-24 06:56:12.919637: step: 38/464, loss: 3.581074270186946e-05 2023-01-24 06:56:13.688655: step: 40/464, loss: 0.001287630875594914 2023-01-24 06:56:14.388784: step: 42/464, loss: 0.005695367231965065 2023-01-24 06:56:15.088146: step: 44/464, loss: 0.00015220152272377163 2023-01-24 06:56:15.834182: step: 46/464, loss: 0.0023461703676730394 2023-01-24 06:56:16.536723: step: 48/464, loss: 0.0034109558910131454 2023-01-24 06:56:17.288441: step: 50/464, loss: 3.344095966895111e-05 2023-01-24 06:56:17.983478: step: 52/464, loss: 0.0007164563285186887 2023-01-24 06:56:18.731524: step: 54/464, loss: 0.020312059670686722 2023-01-24 06:56:19.459617: step: 56/464, loss: 0.008092692121863365 2023-01-24 06:56:20.211351: step: 58/464, loss: 0.11512520164251328 2023-01-24 06:56:20.858437: step: 60/464, loss: 4.2281106289010495e-05 2023-01-24 06:56:21.551345: step: 62/464, loss: 0.015612255781888962 2023-01-24 06:56:22.262898: step: 64/464, loss: 0.01599789410829544 2023-01-24 06:56:22.953050: step: 66/464, loss: 0.0010073683224618435 2023-01-24 06:56:23.731055: step: 68/464, loss: 2.1914695025770925e-05 2023-01-24 06:56:24.397988: step: 70/464, loss: 8.730077388463542e-05 2023-01-24 06:56:25.164955: step: 72/464, loss: 8.373921446036547e-05 2023-01-24 06:56:25.913582: step: 74/464, loss: 0.004494973458349705 2023-01-24 06:56:26.619087: step: 76/464, loss: 0.0002359635109314695 2023-01-24 06:56:27.358148: step: 78/464, loss: 0.11488990485668182 2023-01-24 06:56:28.149738: step: 80/464, loss: 0.00331053021363914 2023-01-24 06:56:28.920492: step: 82/464, loss: 0.08849333971738815 2023-01-24 06:56:29.623593: step: 84/464, loss: 0.0001819575554691255 2023-01-24 06:56:30.379918: step: 86/464, loss: 0.0004114827897865325 2023-01-24 06:56:31.062474: step: 88/464, loss: 0.014233220368623734 2023-01-24 06:56:31.855154: step: 90/464, loss: 0.0013636646326631308 2023-01-24 06:56:32.571087: step: 92/464, loss: 0.0010587204014882445 2023-01-24 06:56:33.369566: step: 94/464, loss: 0.005007532890886068 2023-01-24 06:56:34.172652: step: 96/464, loss: 0.0003044283948838711 2023-01-24 06:56:34.846345: step: 98/464, loss: 0.6308871507644653 2023-01-24 06:56:35.607264: step: 100/464, loss: 0.01804986223578453 2023-01-24 06:56:36.340914: step: 102/464, loss: 0.0004457629984244704 2023-01-24 06:56:37.177966: step: 104/464, loss: 8.525677549187094e-05 2023-01-24 06:56:37.908512: step: 106/464, loss: 0.05905519053339958 2023-01-24 06:56:38.782909: step: 108/464, loss: 0.019224489107728004 2023-01-24 06:56:39.504679: step: 110/464, loss: 0.13390938937664032 2023-01-24 06:56:40.205449: step: 112/464, loss: 0.00015067239291965961 2023-01-24 06:56:40.902732: step: 114/464, loss: 0.0028867078945040703 2023-01-24 06:56:41.602373: step: 116/464, loss: 0.0004979309160262346 2023-01-24 06:56:42.390760: step: 118/464, loss: 0.0012932976242154837 2023-01-24 06:56:43.099539: step: 120/464, loss: 0.00414996175095439 2023-01-24 06:56:43.846742: step: 122/464, loss: 1.270747361559188e-05 2023-01-24 06:56:44.562485: step: 124/464, loss: 0.0025079974438995123 2023-01-24 06:56:45.403560: step: 126/464, loss: 0.3264477252960205 2023-01-24 06:56:46.097740: step: 128/464, loss: 0.01213053148239851 2023-01-24 06:56:46.734712: step: 130/464, loss: 0.0013147556455805898 2023-01-24 06:56:47.499595: step: 132/464, loss: 0.011113953776657581 2023-01-24 06:56:48.214814: step: 134/464, loss: 0.005514933727681637 2023-01-24 06:56:48.900353: step: 136/464, loss: 0.023089662194252014 2023-01-24 06:56:49.702391: step: 138/464, loss: 0.10404906421899796 2023-01-24 06:56:50.435980: step: 140/464, loss: 0.035954758524894714 2023-01-24 06:56:51.127739: step: 142/464, loss: 0.0005670940154232085 2023-01-24 06:56:51.881955: step: 144/464, loss: 0.009082665666937828 2023-01-24 06:56:52.733259: step: 146/464, loss: 0.012343711219727993 2023-01-24 06:56:53.462997: step: 148/464, loss: 6.339289029710926e-06 2023-01-24 06:56:54.157419: step: 150/464, loss: 0.0026830360293388367 2023-01-24 06:56:54.863582: step: 152/464, loss: 0.00025342742446810007 2023-01-24 06:56:55.487593: step: 154/464, loss: 0.003277807030826807 2023-01-24 06:56:56.230087: step: 156/464, loss: 0.0008476334623992443 2023-01-24 06:56:56.936479: step: 158/464, loss: 0.007485872600227594 2023-01-24 06:56:57.643567: step: 160/464, loss: 0.9153180718421936 2023-01-24 06:56:58.361006: step: 162/464, loss: 0.003973011393100023 2023-01-24 06:56:59.079692: step: 164/464, loss: 0.01418553851544857 2023-01-24 06:56:59.724132: step: 166/464, loss: 0.0006633122102357447 2023-01-24 06:57:00.548294: step: 168/464, loss: 0.12291660159826279 2023-01-24 06:57:01.263493: step: 170/464, loss: 2.9597180400742218e-05 2023-01-24 06:57:02.008468: step: 172/464, loss: 0.00031909276731312275 2023-01-24 06:57:02.808768: step: 174/464, loss: 0.0021952923852950335 2023-01-24 06:57:03.503194: step: 176/464, loss: 0.8404306769371033 2023-01-24 06:57:04.274290: step: 178/464, loss: 0.0026680396404117346 2023-01-24 06:57:05.035604: step: 180/464, loss: 0.1378488689661026 2023-01-24 06:57:05.718124: step: 182/464, loss: 0.0038573513738811016 2023-01-24 06:57:06.383233: step: 184/464, loss: 1.6524886632396374e-06 2023-01-24 06:57:07.062523: step: 186/464, loss: 0.0010226487647742033 2023-01-24 06:57:07.784542: step: 188/464, loss: 0.0053894431330263615 2023-01-24 06:57:08.480732: step: 190/464, loss: 0.0068910811096429825 2023-01-24 06:57:09.218621: step: 192/464, loss: 0.014292379841208458 2023-01-24 06:57:09.887562: step: 194/464, loss: 0.015958290547132492 2023-01-24 06:57:10.566952: step: 196/464, loss: 0.003270985558629036 2023-01-24 06:57:11.418731: step: 198/464, loss: 0.019883891567587852 2023-01-24 06:57:12.191230: step: 200/464, loss: 0.0022445889189839363 2023-01-24 06:57:12.899461: step: 202/464, loss: 0.0007975028711371124 2023-01-24 06:57:13.625173: step: 204/464, loss: 0.00023076686193235219 2023-01-24 06:57:14.321771: step: 206/464, loss: 0.11212483048439026 2023-01-24 06:57:14.961224: step: 208/464, loss: 0.0010478850454092026 2023-01-24 06:57:15.683134: step: 210/464, loss: 0.03414137288928032 2023-01-24 06:57:16.372215: step: 212/464, loss: 0.056933943182229996 2023-01-24 06:57:17.125542: step: 214/464, loss: 0.00579112721607089 2023-01-24 06:57:17.809061: step: 216/464, loss: 0.015554566867649555 2023-01-24 06:57:18.514528: step: 218/464, loss: 0.004184139892458916 2023-01-24 06:57:19.155118: step: 220/464, loss: 0.05660339072346687 2023-01-24 06:57:19.878815: step: 222/464, loss: 0.012609807774424553 2023-01-24 06:57:20.634032: step: 224/464, loss: 0.036066535860300064 2023-01-24 06:57:21.353700: step: 226/464, loss: 0.003133221063762903 2023-01-24 06:57:22.023486: step: 228/464, loss: 0.0016689972253516316 2023-01-24 06:57:22.747669: step: 230/464, loss: 0.026168620213866234 2023-01-24 06:57:23.511571: step: 232/464, loss: 0.029742280021309853 2023-01-24 06:57:24.272655: step: 234/464, loss: 0.11596041917800903 2023-01-24 06:57:25.007437: step: 236/464, loss: 0.010135364718735218 2023-01-24 06:57:25.804767: step: 238/464, loss: 0.03542346879839897 2023-01-24 06:57:26.592775: step: 240/464, loss: 0.0002479084942024201 2023-01-24 06:57:27.383689: step: 242/464, loss: 8.900500688469037e-05 2023-01-24 06:57:28.157274: step: 244/464, loss: 0.007964391261339188 2023-01-24 06:57:28.901521: step: 246/464, loss: 0.04299350827932358 2023-01-24 06:57:29.618655: step: 248/464, loss: 0.024968914687633514 2023-01-24 06:57:30.316132: step: 250/464, loss: 0.001517502241767943 2023-01-24 06:57:31.092438: step: 252/464, loss: 0.0010750810615718365 2023-01-24 06:57:31.962097: step: 254/464, loss: 0.00852601882070303 2023-01-24 06:57:32.688275: step: 256/464, loss: 0.0008214469416998327 2023-01-24 06:57:33.408999: step: 258/464, loss: 0.0049279495142400265 2023-01-24 06:57:34.180218: step: 260/464, loss: 0.03465091809630394 2023-01-24 06:57:34.916637: step: 262/464, loss: 0.000700597302056849 2023-01-24 06:57:35.627676: step: 264/464, loss: 0.00017623744497541338 2023-01-24 06:57:36.360333: step: 266/464, loss: 0.001508101588115096 2023-01-24 06:57:37.113721: step: 268/464, loss: 0.03918704390525818 2023-01-24 06:57:37.807143: step: 270/464, loss: 0.003989690914750099 2023-01-24 06:57:38.541507: step: 272/464, loss: 0.007421809248626232 2023-01-24 06:57:39.256667: step: 274/464, loss: 0.03553994372487068 2023-01-24 06:57:40.149600: step: 276/464, loss: 0.005269336514174938 2023-01-24 06:57:40.890514: step: 278/464, loss: 0.001513259601779282 2023-01-24 06:57:41.587667: step: 280/464, loss: 0.005394801031798124 2023-01-24 06:57:42.252367: step: 282/464, loss: 0.005470091011375189 2023-01-24 06:57:43.065533: step: 284/464, loss: 0.00022049256949685514 2023-01-24 06:57:43.785941: step: 286/464, loss: 1.162865555670578e-05 2023-01-24 06:57:44.494693: step: 288/464, loss: 0.00864122062921524 2023-01-24 06:57:45.224829: step: 290/464, loss: 0.03521820902824402 2023-01-24 06:57:45.940968: step: 292/464, loss: 0.007795601151883602 2023-01-24 06:57:46.696125: step: 294/464, loss: 0.04232088848948479 2023-01-24 06:57:47.431590: step: 296/464, loss: 0.0011312862625345588 2023-01-24 06:57:48.126329: step: 298/464, loss: 0.006688180845230818 2023-01-24 06:57:48.841884: step: 300/464, loss: 0.00024246759130619466 2023-01-24 06:57:49.606885: step: 302/464, loss: 0.003744281828403473 2023-01-24 06:57:50.367757: step: 304/464, loss: 0.030908262357115746 2023-01-24 06:57:51.060527: step: 306/464, loss: 0.035456642508506775 2023-01-24 06:57:51.914670: step: 308/464, loss: 0.04970543459057808 2023-01-24 06:57:52.647744: step: 310/464, loss: 0.005955439060926437 2023-01-24 06:57:53.341865: step: 312/464, loss: 0.0006838430999778211 2023-01-24 06:57:54.165052: step: 314/464, loss: 0.0007688677869737148 2023-01-24 06:57:54.935306: step: 316/464, loss: 0.0032956611830741167 2023-01-24 06:57:55.628431: step: 318/464, loss: 0.0005155724938958883 2023-01-24 06:57:56.312324: step: 320/464, loss: 0.0029878311324864626 2023-01-24 06:57:56.987843: step: 322/464, loss: 0.0009836448589339852 2023-01-24 06:57:57.738979: step: 324/464, loss: 0.0037996331229805946 2023-01-24 06:57:58.418250: step: 326/464, loss: 5.877662624698132e-05 2023-01-24 06:57:59.215055: step: 328/464, loss: 0.006655172444880009 2023-01-24 06:57:59.955018: step: 330/464, loss: 0.0013236195081844926 2023-01-24 06:58:00.750939: step: 332/464, loss: 0.00981642585247755 2023-01-24 06:58:01.439120: step: 334/464, loss: 0.0006703927647322416 2023-01-24 06:58:02.126025: step: 336/464, loss: 0.021209033206105232 2023-01-24 06:58:02.931047: step: 338/464, loss: 0.04230085760354996 2023-01-24 06:58:03.745727: step: 340/464, loss: 0.22357164323329926 2023-01-24 06:58:04.420739: step: 342/464, loss: 0.00016751704970374703 2023-01-24 06:58:05.158056: step: 344/464, loss: 0.056093987077474594 2023-01-24 06:58:05.802222: step: 346/464, loss: 0.0025848422665148973 2023-01-24 06:58:06.596606: step: 348/464, loss: 0.0039555374532938 2023-01-24 06:58:07.313497: step: 350/464, loss: 0.0008982414146885276 2023-01-24 06:58:08.070011: step: 352/464, loss: 0.06868308782577515 2023-01-24 06:58:08.760693: step: 354/464, loss: 0.00014734716387465596 2023-01-24 06:58:09.518580: step: 356/464, loss: 0.0005575288669206202 2023-01-24 06:58:10.220917: step: 358/464, loss: 0.007786013185977936 2023-01-24 06:58:10.959979: step: 360/464, loss: 0.03875568136572838 2023-01-24 06:58:11.717336: step: 362/464, loss: 0.034470029175281525 2023-01-24 06:58:12.442392: step: 364/464, loss: 0.00023473362671211362 2023-01-24 06:58:13.164339: step: 366/464, loss: 0.014822765253484249 2023-01-24 06:58:13.823287: step: 368/464, loss: 0.00023555981169920415 2023-01-24 06:58:14.622305: step: 370/464, loss: 0.0026356929447501898 2023-01-24 06:58:15.379419: step: 372/464, loss: 0.014596613124012947 2023-01-24 06:58:16.148960: step: 374/464, loss: 9.38707817113027e-05 2023-01-24 06:58:16.881256: step: 376/464, loss: 0.007376750465482473 2023-01-24 06:58:17.535429: step: 378/464, loss: 0.00013531959848478436 2023-01-24 06:58:18.344461: step: 380/464, loss: 0.009358882904052734 2023-01-24 06:58:19.072578: step: 382/464, loss: 0.046712055802345276 2023-01-24 06:58:19.826168: step: 384/464, loss: 0.004233027808368206 2023-01-24 06:58:20.523843: step: 386/464, loss: 4.672848808695562e-05 2023-01-24 06:58:21.190448: step: 388/464, loss: 0.0007691225619055331 2023-01-24 06:58:22.091357: step: 390/464, loss: 0.01412983052432537 2023-01-24 06:58:22.807508: step: 392/464, loss: 0.0025889184325933456 2023-01-24 06:58:23.525254: step: 394/464, loss: 0.0006169418338686228 2023-01-24 06:58:24.237759: step: 396/464, loss: 0.02392732910811901 2023-01-24 06:58:24.944131: step: 398/464, loss: 0.02258199453353882 2023-01-24 06:58:25.732960: step: 400/464, loss: 0.00559289800003171 2023-01-24 06:58:26.415219: step: 402/464, loss: 0.0742979645729065 2023-01-24 06:58:27.072735: step: 404/464, loss: 0.010385122150182724 2023-01-24 06:58:27.825492: step: 406/464, loss: 0.0037662286777049303 2023-01-24 06:58:28.578717: step: 408/464, loss: 0.04555836319923401 2023-01-24 06:58:29.336502: step: 410/464, loss: 0.006724359467625618 2023-01-24 06:58:30.060372: step: 412/464, loss: 0.01723787561058998 2023-01-24 06:58:30.672778: step: 414/464, loss: 0.0011205865303054452 2023-01-24 06:58:31.408629: step: 416/464, loss: 0.30172547698020935 2023-01-24 06:58:32.311299: step: 418/464, loss: 0.0027903704904019833 2023-01-24 06:58:32.998045: step: 420/464, loss: 0.0023601017892360687 2023-01-24 06:58:33.680130: step: 422/464, loss: 0.0002794855972751975 2023-01-24 06:58:34.587424: step: 424/464, loss: 0.13339070975780487 2023-01-24 06:58:35.236370: step: 426/464, loss: 0.003943042363971472 2023-01-24 06:58:35.945677: step: 428/464, loss: 0.00010437369928695261 2023-01-24 06:58:36.651568: step: 430/464, loss: 0.017912743613123894 2023-01-24 06:58:37.378147: step: 432/464, loss: 0.004407365340739489 2023-01-24 06:58:38.148448: step: 434/464, loss: 0.005560706369578838 2023-01-24 06:58:38.856422: step: 436/464, loss: 0.03185200318694115 2023-01-24 06:58:39.555053: step: 438/464, loss: 0.001232458045706153 2023-01-24 06:58:40.330632: step: 440/464, loss: 0.00033859541872516274 2023-01-24 06:58:41.135939: step: 442/464, loss: 6.870036304462701e-05 2023-01-24 06:58:41.960569: step: 444/464, loss: 0.14986322820186615 2023-01-24 06:58:42.661915: step: 446/464, loss: 0.0128685487434268 2023-01-24 06:58:43.450613: step: 448/464, loss: 0.0524282306432724 2023-01-24 06:58:44.227245: step: 450/464, loss: 0.0016600600210949779 2023-01-24 06:58:44.950232: step: 452/464, loss: 0.08632011711597443 2023-01-24 06:58:45.667400: step: 454/464, loss: 0.006322702392935753 2023-01-24 06:58:46.381925: step: 456/464, loss: 0.06686757504940033 2023-01-24 06:58:47.089786: step: 458/464, loss: 0.02065141499042511 2023-01-24 06:58:47.824585: step: 460/464, loss: 0.0003768194292206317 2023-01-24 06:58:48.539097: step: 462/464, loss: 0.002721271710470319 2023-01-24 06:58:49.243476: step: 464/464, loss: 3.4734282507997705e-06 2023-01-24 06:58:50.022460: step: 466/464, loss: 6.966947694309056e-05 2023-01-24 06:58:50.730776: step: 468/464, loss: 0.03409808874130249 2023-01-24 06:58:51.472026: step: 470/464, loss: 0.008349225856363773 2023-01-24 06:58:52.214714: step: 472/464, loss: 0.00819906685501337 2023-01-24 06:58:52.906337: step: 474/464, loss: 0.003774096257984638 2023-01-24 06:58:53.680899: step: 476/464, loss: 0.0038325139321386814 2023-01-24 06:58:54.439048: step: 478/464, loss: 0.0010969663271680474 2023-01-24 06:58:55.164761: step: 480/464, loss: 0.06739410012960434 2023-01-24 06:58:55.881598: step: 482/464, loss: 0.023230241611599922 2023-01-24 06:58:56.636243: step: 484/464, loss: 0.022559460252523422 2023-01-24 06:58:57.289904: step: 486/464, loss: 0.041261497884988785 2023-01-24 06:58:57.962827: step: 488/464, loss: 0.14989309012889862 2023-01-24 06:58:58.708630: step: 490/464, loss: 0.0018158291932195425 2023-01-24 06:58:59.375992: step: 492/464, loss: 0.0007690245984122157 2023-01-24 06:59:00.015695: step: 494/464, loss: 0.0004796649154741317 2023-01-24 06:59:00.747310: step: 496/464, loss: 0.02845880761742592 2023-01-24 06:59:01.473125: step: 498/464, loss: 0.0004121836391277611 2023-01-24 06:59:02.178263: step: 500/464, loss: 6.842023867648095e-05 2023-01-24 06:59:02.868740: step: 502/464, loss: 0.037237752228975296 2023-01-24 06:59:03.590655: step: 504/464, loss: 0.01267719455063343 2023-01-24 06:59:04.349809: step: 506/464, loss: 0.024383530020713806 2023-01-24 06:59:05.165685: step: 508/464, loss: 0.0009457824053242803 2023-01-24 06:59:05.920948: step: 510/464, loss: 0.006350579205900431 2023-01-24 06:59:06.637545: step: 512/464, loss: 0.024768246337771416 2023-01-24 06:59:07.367033: step: 514/464, loss: 0.0007438718457706273 2023-01-24 06:59:08.183876: step: 516/464, loss: 0.008646605536341667 2023-01-24 06:59:08.983421: step: 518/464, loss: 0.0017969388281926513 2023-01-24 06:59:09.737204: step: 520/464, loss: 0.008778219111263752 2023-01-24 06:59:10.462487: step: 522/464, loss: 0.00014523882418870926 2023-01-24 06:59:11.252810: step: 524/464, loss: 0.0007008722168393433 2023-01-24 06:59:12.031243: step: 526/464, loss: 0.015383994206786156 2023-01-24 06:59:12.709135: step: 528/464, loss: 0.035820845514535904 2023-01-24 06:59:13.373496: step: 530/464, loss: 0.012869827449321747 2023-01-24 06:59:14.137327: step: 532/464, loss: 0.03324505686759949 2023-01-24 06:59:14.903518: step: 534/464, loss: 0.000605732318945229 2023-01-24 06:59:15.627221: step: 536/464, loss: 0.14915016293525696 2023-01-24 06:59:16.385271: step: 538/464, loss: 0.01700645498931408 2023-01-24 06:59:17.152666: step: 540/464, loss: 0.008300750516355038 2023-01-24 06:59:17.958707: step: 542/464, loss: 0.020550068467855453 2023-01-24 06:59:18.712578: step: 544/464, loss: 0.007534760981798172 2023-01-24 06:59:19.397823: step: 546/464, loss: 0.03769280016422272 2023-01-24 06:59:20.195921: step: 548/464, loss: 0.02654329501092434 2023-01-24 06:59:20.867054: step: 550/464, loss: 0.00033084870665334165 2023-01-24 06:59:21.660253: step: 552/464, loss: 0.0725986659526825 2023-01-24 06:59:22.344565: step: 554/464, loss: 0.005369146820157766 2023-01-24 06:59:23.052833: step: 556/464, loss: 0.004675235599279404 2023-01-24 06:59:23.772766: step: 558/464, loss: 0.03285602480173111 2023-01-24 06:59:24.505001: step: 560/464, loss: 0.00970652885735035 2023-01-24 06:59:25.297309: step: 562/464, loss: 0.0004878818872384727 2023-01-24 06:59:26.067733: step: 564/464, loss: 0.003205288900062442 2023-01-24 06:59:26.687189: step: 566/464, loss: 0.0010604523122310638 2023-01-24 06:59:27.366705: step: 568/464, loss: 0.007997574284672737 2023-01-24 06:59:28.090252: step: 570/464, loss: 0.004095721058547497 2023-01-24 06:59:28.853400: step: 572/464, loss: 0.376163125038147 2023-01-24 06:59:29.547300: step: 574/464, loss: 0.004886925686150789 2023-01-24 06:59:30.305014: step: 576/464, loss: 0.0008122111321426928 2023-01-24 06:59:31.043126: step: 578/464, loss: 0.0018273144960403442 2023-01-24 06:59:31.758026: step: 580/464, loss: 0.00041048970888368785 2023-01-24 06:59:32.501424: step: 582/464, loss: 0.0002744023222476244 2023-01-24 06:59:33.204755: step: 584/464, loss: 0.000339846417773515 2023-01-24 06:59:33.878777: step: 586/464, loss: 0.05465429648756981 2023-01-24 06:59:34.567774: step: 588/464, loss: 0.006726523395627737 2023-01-24 06:59:35.277408: step: 590/464, loss: 0.005674843676388264 2023-01-24 06:59:36.116658: step: 592/464, loss: 0.0012488930951803923 2023-01-24 06:59:36.798804: step: 594/464, loss: 7.244118023663759e-05 2023-01-24 06:59:37.489958: step: 596/464, loss: 0.09878098219633102 2023-01-24 06:59:38.250823: step: 598/464, loss: 0.004160263109952211 2023-01-24 06:59:38.958499: step: 600/464, loss: 0.006494167726486921 2023-01-24 06:59:39.788890: step: 602/464, loss: 0.0021416889503598213 2023-01-24 06:59:40.564076: step: 604/464, loss: 0.030240392312407494 2023-01-24 06:59:41.183774: step: 606/464, loss: 0.0015008965274319053 2023-01-24 06:59:41.913208: step: 608/464, loss: 0.0006604917580261827 2023-01-24 06:59:42.574859: step: 610/464, loss: 0.0010907500982284546 2023-01-24 06:59:43.353156: step: 612/464, loss: 0.003240359015762806 2023-01-24 06:59:44.047546: step: 614/464, loss: 0.002071299823001027 2023-01-24 06:59:44.737768: step: 616/464, loss: 0.06142830848693848 2023-01-24 06:59:45.508641: step: 618/464, loss: 0.014102988876402378 2023-01-24 06:59:46.131444: step: 620/464, loss: 0.003527525346726179 2023-01-24 06:59:46.745329: step: 622/464, loss: 0.007172831334173679 2023-01-24 06:59:47.547024: step: 624/464, loss: 0.009524202905595303 2023-01-24 06:59:48.387023: step: 626/464, loss: 0.012650671415030956 2023-01-24 06:59:49.086650: step: 628/464, loss: 0.023050807416439056 2023-01-24 06:59:49.835900: step: 630/464, loss: 0.00029439141508191824 2023-01-24 06:59:50.554027: step: 632/464, loss: 0.0003891981323249638 2023-01-24 06:59:51.239985: step: 634/464, loss: 0.06722453981637955 2023-01-24 06:59:52.077219: step: 636/464, loss: 0.006464850623160601 2023-01-24 06:59:52.809764: step: 638/464, loss: 0.019264616072177887 2023-01-24 06:59:53.514673: step: 640/464, loss: 0.03517953306436539 2023-01-24 06:59:54.235561: step: 642/464, loss: 0.0011486627627164125 2023-01-24 06:59:54.962093: step: 644/464, loss: 0.0007427395903505385 2023-01-24 06:59:55.656696: step: 646/464, loss: 0.014897801913321018 2023-01-24 06:59:56.385029: step: 648/464, loss: 0.000844307302031666 2023-01-24 06:59:57.056951: step: 650/464, loss: 0.31130504608154297 2023-01-24 06:59:57.850698: step: 652/464, loss: 0.007045819889754057 2023-01-24 06:59:58.551037: step: 654/464, loss: 0.006329555530101061 2023-01-24 06:59:59.328756: step: 656/464, loss: 0.20530587434768677 2023-01-24 07:00:00.084219: step: 658/464, loss: 0.013408103957772255 2023-01-24 07:00:00.811972: step: 660/464, loss: 0.0012683480745181441 2023-01-24 07:00:01.546934: step: 662/464, loss: 0.0009223067318089306 2023-01-24 07:00:02.335151: step: 664/464, loss: 0.011936173774302006 2023-01-24 07:00:03.023685: step: 666/464, loss: 0.00199041492305696 2023-01-24 07:00:03.733491: step: 668/464, loss: 0.000733358261641115 2023-01-24 07:00:04.446542: step: 670/464, loss: 0.0031419494189321995 2023-01-24 07:00:05.207214: step: 672/464, loss: 0.0013263403670862317 2023-01-24 07:00:05.901563: step: 674/464, loss: 0.046067021787166595 2023-01-24 07:00:06.636943: step: 676/464, loss: 0.0020457401406019926 2023-01-24 07:00:07.415742: step: 678/464, loss: 0.02471664361655712 2023-01-24 07:00:08.195439: step: 680/464, loss: 0.022153720259666443 2023-01-24 07:00:08.913623: step: 682/464, loss: 0.0025153113529086113 2023-01-24 07:00:09.621681: step: 684/464, loss: 0.14750951528549194 2023-01-24 07:00:10.407802: step: 686/464, loss: 0.01097826100885868 2023-01-24 07:00:11.227117: step: 688/464, loss: 0.00044294664985500276 2023-01-24 07:00:12.064512: step: 690/464, loss: 0.010748908855021 2023-01-24 07:00:12.845835: step: 692/464, loss: 0.009270434267818928 2023-01-24 07:00:13.561713: step: 694/464, loss: 0.019096214324235916 2023-01-24 07:00:14.319936: step: 696/464, loss: 0.001764296437613666 2023-01-24 07:00:15.034689: step: 698/464, loss: 0.00897511001676321 2023-01-24 07:00:15.841986: step: 700/464, loss: 0.004141754005104303 2023-01-24 07:00:16.608802: step: 702/464, loss: 0.02591550722718239 2023-01-24 07:00:17.335797: step: 704/464, loss: 0.015690796077251434 2023-01-24 07:00:18.019552: step: 706/464, loss: 0.0023084969725459814 2023-01-24 07:00:18.713609: step: 708/464, loss: 0.000845626404043287 2023-01-24 07:00:19.458225: step: 710/464, loss: 0.0013910304987803102 2023-01-24 07:00:20.230973: step: 712/464, loss: 0.04212622344493866 2023-01-24 07:00:20.935730: step: 714/464, loss: 0.0007252685609273612 2023-01-24 07:00:21.728535: step: 716/464, loss: 0.012809190899133682 2023-01-24 07:00:22.510524: step: 718/464, loss: 0.004088373389095068 2023-01-24 07:00:23.184146: step: 720/464, loss: 0.01290807407349348 2023-01-24 07:00:23.945346: step: 722/464, loss: 0.09836144000291824 2023-01-24 07:00:24.739011: step: 724/464, loss: 0.012062566354870796 2023-01-24 07:00:25.463834: step: 726/464, loss: 0.01598495803773403 2023-01-24 07:00:26.207431: step: 728/464, loss: 0.03479862958192825 2023-01-24 07:00:26.920427: step: 730/464, loss: 0.013840602710843086 2023-01-24 07:00:27.627673: step: 732/464, loss: 0.006491428706794977 2023-01-24 07:00:28.330782: step: 734/464, loss: 0.00132731010671705 2023-01-24 07:00:29.038542: step: 736/464, loss: 0.006518483627587557 2023-01-24 07:00:29.778847: step: 738/464, loss: 0.0020145985763520002 2023-01-24 07:00:30.403755: step: 740/464, loss: 0.14279569685459137 2023-01-24 07:00:31.088149: step: 742/464, loss: 6.712900358252227e-05 2023-01-24 07:00:31.769296: step: 744/464, loss: 0.019915182143449783 2023-01-24 07:00:32.443617: step: 746/464, loss: 0.002811913378536701 2023-01-24 07:00:33.030605: step: 748/464, loss: 0.007623051758855581 2023-01-24 07:00:33.779713: step: 750/464, loss: 9.78571260930039e-05 2023-01-24 07:00:34.445853: step: 752/464, loss: 0.008471081033349037 2023-01-24 07:00:35.120160: step: 754/464, loss: 0.0037404377944767475 2023-01-24 07:00:35.848484: step: 756/464, loss: 0.00015219133638311177 2023-01-24 07:00:36.553816: step: 758/464, loss: 2.966730244224891e-05 2023-01-24 07:00:37.409991: step: 760/464, loss: 0.01211349293589592 2023-01-24 07:00:38.155324: step: 762/464, loss: 0.0003824532323051244 2023-01-24 07:00:38.884437: step: 764/464, loss: 0.0011139989364892244 2023-01-24 07:00:39.620016: step: 766/464, loss: 0.008691009134054184 2023-01-24 07:00:40.432131: step: 768/464, loss: 0.0024587425868958235 2023-01-24 07:00:41.186707: step: 770/464, loss: 0.0020354236476123333 2023-01-24 07:00:41.877993: step: 772/464, loss: 0.0019010152900591493 2023-01-24 07:00:42.721426: step: 774/464, loss: 0.002046798123046756 2023-01-24 07:00:43.481641: step: 776/464, loss: 0.0019110854482278228 2023-01-24 07:00:44.186031: step: 778/464, loss: 0.010291693732142448 2023-01-24 07:00:44.915400: step: 780/464, loss: 0.0003693166945595294 2023-01-24 07:00:45.596058: step: 782/464, loss: 0.002170081250369549 2023-01-24 07:00:46.311901: step: 784/464, loss: 0.05067446082830429 2023-01-24 07:00:46.992448: step: 786/464, loss: 0.006645172368735075 2023-01-24 07:00:47.681745: step: 788/464, loss: 0.01579456590116024 2023-01-24 07:00:48.344673: step: 790/464, loss: 0.0007638560491614044 2023-01-24 07:00:49.049605: step: 792/464, loss: 0.00043909618398174644 2023-01-24 07:00:49.763470: step: 794/464, loss: 0.0005379181820899248 2023-01-24 07:00:50.535743: step: 796/464, loss: 0.022501660510897636 2023-01-24 07:00:51.205293: step: 798/464, loss: 0.0009315320639871061 2023-01-24 07:00:51.949236: step: 800/464, loss: 0.1112055554986 2023-01-24 07:00:52.704968: step: 802/464, loss: 0.05598204582929611 2023-01-24 07:00:53.390349: step: 804/464, loss: 0.014602440409362316 2023-01-24 07:00:54.252315: step: 806/464, loss: 0.18087033927440643 2023-01-24 07:00:55.006413: step: 808/464, loss: 0.012002396397292614 2023-01-24 07:00:55.837908: step: 810/464, loss: 0.0077812401577830315 2023-01-24 07:00:56.575093: step: 812/464, loss: 9.820223203860223e-05 2023-01-24 07:00:57.314709: step: 814/464, loss: 0.0015867205802351236 2023-01-24 07:00:58.045227: step: 816/464, loss: 0.006540404632687569 2023-01-24 07:00:58.801302: step: 818/464, loss: 0.00014760888007003814 2023-01-24 07:00:59.604454: step: 820/464, loss: 0.002761528827250004 2023-01-24 07:01:00.284973: step: 822/464, loss: 0.03273645043373108 2023-01-24 07:01:01.012563: step: 824/464, loss: 0.003857521340250969 2023-01-24 07:01:01.737631: step: 826/464, loss: 0.0049551865085959435 2023-01-24 07:01:02.486212: step: 828/464, loss: 0.00504017248749733 2023-01-24 07:01:03.272586: step: 830/464, loss: 0.011408940888941288 2023-01-24 07:01:04.017650: step: 832/464, loss: 0.010786442086100578 2023-01-24 07:01:04.803944: step: 834/464, loss: 0.002086550695821643 2023-01-24 07:01:05.504671: step: 836/464, loss: 0.0010599680244922638 2023-01-24 07:01:06.257579: step: 838/464, loss: 0.0013713724911212921 2023-01-24 07:01:06.948289: step: 840/464, loss: 0.0037390729412436485 2023-01-24 07:01:07.637948: step: 842/464, loss: 0.0004886464448645711 2023-01-24 07:01:08.410364: step: 844/464, loss: 0.000662407313939184 2023-01-24 07:01:09.184171: step: 846/464, loss: 0.0027486770413815975 2023-01-24 07:01:09.880750: step: 848/464, loss: 0.014897621236741543 2023-01-24 07:01:10.629541: step: 850/464, loss: 0.03529081121087074 2023-01-24 07:01:11.372927: step: 852/464, loss: 0.0020230154041200876 2023-01-24 07:01:12.179645: step: 854/464, loss: 0.0013419726165011525 2023-01-24 07:01:12.923669: step: 856/464, loss: 0.0024132877588272095 2023-01-24 07:01:13.598488: step: 858/464, loss: 7.780754094710574e-05 2023-01-24 07:01:14.300067: step: 860/464, loss: 0.02189820446074009 2023-01-24 07:01:15.067314: step: 862/464, loss: 0.018136218190193176 2023-01-24 07:01:15.832839: step: 864/464, loss: 0.008349993266165257 2023-01-24 07:01:16.547128: step: 866/464, loss: 0.01271690521389246 2023-01-24 07:01:17.339097: step: 868/464, loss: 0.0003639784117694944 2023-01-24 07:01:18.073735: step: 870/464, loss: 0.001440309570170939 2023-01-24 07:01:18.835945: step: 872/464, loss: 0.0009805704466998577 2023-01-24 07:01:19.454202: step: 874/464, loss: 0.033950306475162506 2023-01-24 07:01:20.126017: step: 876/464, loss: 0.0008513766224496067 2023-01-24 07:01:20.838330: step: 878/464, loss: 0.0002595721452962607 2023-01-24 07:01:21.559175: step: 880/464, loss: 8.58848579810001e-05 2023-01-24 07:01:22.357604: step: 882/464, loss: 0.018774444237351418 2023-01-24 07:01:23.076621: step: 884/464, loss: 0.006721880286931992 2023-01-24 07:01:23.778858: step: 886/464, loss: 0.0034052832052111626 2023-01-24 07:01:24.603314: step: 888/464, loss: 0.0030635748989880085 2023-01-24 07:01:25.310642: step: 890/464, loss: 0.014266138896346092 2023-01-24 07:01:26.050936: step: 892/464, loss: 0.00177770818118006 2023-01-24 07:01:26.764227: step: 894/464, loss: 0.008312602527439594 2023-01-24 07:01:27.545319: step: 896/464, loss: 0.012707074172794819 2023-01-24 07:01:28.311812: step: 898/464, loss: 0.0032947026193141937 2023-01-24 07:01:29.034488: step: 900/464, loss: 9.511190000921488e-05 2023-01-24 07:01:29.713713: step: 902/464, loss: 0.0001595946669112891 2023-01-24 07:01:30.420780: step: 904/464, loss: 0.0008967430330812931 2023-01-24 07:01:31.094813: step: 906/464, loss: 0.001551638706587255 2023-01-24 07:01:31.977877: step: 908/464, loss: 0.11213032901287079 2023-01-24 07:01:32.726714: step: 910/464, loss: 0.011793390847742558 2023-01-24 07:01:33.505445: step: 912/464, loss: 0.009459859691560268 2023-01-24 07:01:34.213417: step: 914/464, loss: 0.0012327745789662004 2023-01-24 07:01:34.925310: step: 916/464, loss: 0.017146775498986244 2023-01-24 07:01:35.715458: step: 918/464, loss: 0.01610678993165493 2023-01-24 07:01:36.514892: step: 920/464, loss: 5.426480493042618e-05 2023-01-24 07:01:37.289333: step: 922/464, loss: 0.0013343350728973746 2023-01-24 07:01:38.009729: step: 924/464, loss: 0.00017622199084144086 2023-01-24 07:01:38.724753: step: 926/464, loss: 0.004111295100301504 2023-01-24 07:01:39.505446: step: 928/464, loss: 0.020738869905471802 2023-01-24 07:01:40.209719: step: 930/464, loss: 0.0001751655072439462 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34881591796875, 'r': 0.33888757115749524, 'f1': 0.3437800769971126}, 'combined': 0.253311635682083, 'epoch': 38} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3212990851567031, 'r': 0.27145327846737266, 'f1': 0.2942803618735738}, 'combined': 0.18276359316358795, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3319094637855142, 'r': 0.33064984532712516, 'f1': 0.33127845720037064}, 'combined': 0.24409991583185203, 'epoch': 38} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3155887519905696, 'r': 0.26823486225457105, 'f1': 0.28999137189314883}, 'combined': 0.1800999046494293, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33985958512722525, 'r': 0.3301861624006439, 'f1': 0.3349530463621546}, 'combined': 0.24680750784579814, 'epoch': 38} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32503878183760987, 'r': 0.2728404355917671, 'f1': 0.2966609985277382}, 'combined': 0.18424209382249004, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.33958333333333335, 'r': 0.2910714285714286, 'f1': 0.31346153846153846}, 'combined': 0.20897435897435895, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3092105263157895, 'r': 0.5108695652173914, 'f1': 0.3852459016393443}, 'combined': 0.19262295081967215, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4652777777777778, 'r': 0.28879310344827586, 'f1': 0.35638297872340424}, 'combined': 0.23758865248226949, 'epoch': 38} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3319094637855142, 'r': 0.33064984532712516, 'f1': 0.33127845720037064}, 'combined': 0.24409991583185203, 'epoch': 38} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3155887519905696, 'r': 0.26823486225457105, 'f1': 0.28999137189314883}, 'combined': 0.1800999046494293, 'epoch': 38} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3092105263157895, 'r': 0.5108695652173914, 'f1': 0.3852459016393443}, 'combined': 0.19262295081967215, 'epoch': 38} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33985958512722525, 'r': 0.3301861624006439, 'f1': 0.3349530463621546}, 'combined': 0.24680750784579814, 'epoch': 38} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32503878183760987, 'r': 0.2728404355917671, 'f1': 0.2966609985277382}, 'combined': 0.18424209382249004, 'epoch': 38} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4652777777777778, 'r': 0.28879310344827586, 'f1': 0.35638297872340424}, 'combined': 0.23758865248226949, 'epoch': 38} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 07:04:42.377254: step: 2/464, loss: 0.012623732909560204 2023-01-24 07:04:43.145918: step: 4/464, loss: 0.06931336969137192 2023-01-24 07:04:43.832803: step: 6/464, loss: 3.5806838241114747e-06 2023-01-24 07:04:44.493287: step: 8/464, loss: 0.00021697515330743045 2023-01-24 07:04:45.214764: step: 10/464, loss: 0.0017598243430256844 2023-01-24 07:04:45.990684: step: 12/464, loss: 0.0003556575393304229 2023-01-24 07:04:46.728550: step: 14/464, loss: 0.01138739287853241 2023-01-24 07:04:47.472482: step: 16/464, loss: 0.006124960258603096 2023-01-24 07:04:48.180795: step: 18/464, loss: 0.033372364938259125 2023-01-24 07:04:48.806180: step: 20/464, loss: 0.01938818395137787 2023-01-24 07:04:49.456088: step: 22/464, loss: 0.0005049011087976396 2023-01-24 07:04:50.153327: step: 24/464, loss: 0.05342579632997513 2023-01-24 07:04:50.855888: step: 26/464, loss: 0.016973961144685745 2023-01-24 07:04:51.624850: step: 28/464, loss: 0.0019795908592641354 2023-01-24 07:04:52.329527: step: 30/464, loss: 9.77626841631718e-05 2023-01-24 07:04:53.059954: step: 32/464, loss: 0.0011293364223092794 2023-01-24 07:04:53.759882: step: 34/464, loss: 0.00017125466547440737 2023-01-24 07:04:54.507260: step: 36/464, loss: 0.024412035942077637 2023-01-24 07:04:55.216818: step: 38/464, loss: 0.27952486276626587 2023-01-24 07:04:56.034447: step: 40/464, loss: 0.027969282120466232 2023-01-24 07:04:56.694909: step: 42/464, loss: 0.0006921574240550399 2023-01-24 07:04:57.353138: step: 44/464, loss: 0.011047055013477802 2023-01-24 07:04:58.038482: step: 46/464, loss: 0.003937239293009043 2023-01-24 07:04:58.768463: step: 48/464, loss: 0.05621099844574928 2023-01-24 07:04:59.436825: step: 50/464, loss: 8.688573143444955e-05 2023-01-24 07:05:00.255144: step: 52/464, loss: 0.0035085054114460945 2023-01-24 07:05:00.962877: step: 54/464, loss: 0.00022192315373104066 2023-01-24 07:05:01.651776: step: 56/464, loss: 0.0003033808898180723 2023-01-24 07:05:02.387092: step: 58/464, loss: 0.044928643852472305 2023-01-24 07:05:03.188452: step: 60/464, loss: 0.044337864965200424 2023-01-24 07:05:03.977521: step: 62/464, loss: 0.0020050599705427885 2023-01-24 07:05:04.804938: step: 64/464, loss: 0.0005321039352566004 2023-01-24 07:05:05.626732: step: 66/464, loss: 0.0008718542521819472 2023-01-24 07:05:06.241395: step: 68/464, loss: 0.00042165510240010917 2023-01-24 07:05:06.937725: step: 70/464, loss: 0.009614691138267517 2023-01-24 07:05:07.761038: step: 72/464, loss: 0.0006315399659797549 2023-01-24 07:05:08.516044: step: 74/464, loss: 0.013663442805409431 2023-01-24 07:05:09.294935: step: 76/464, loss: 8.693081326782703e-05 2023-01-24 07:05:10.000817: step: 78/464, loss: 0.006749124266207218 2023-01-24 07:05:10.712983: step: 80/464, loss: 0.017502861097455025 2023-01-24 07:05:11.440845: step: 82/464, loss: 0.033279284834861755 2023-01-24 07:05:12.178481: step: 84/464, loss: 7.313829701161012e-05 2023-01-24 07:05:12.962482: step: 86/464, loss: 0.02006073109805584 2023-01-24 07:05:13.684056: step: 88/464, loss: 0.0011353358859196305 2023-01-24 07:05:14.538635: step: 90/464, loss: 0.02507760189473629 2023-01-24 07:05:15.285163: step: 92/464, loss: 0.004654605407267809 2023-01-24 07:05:15.973725: step: 94/464, loss: 0.03477931395173073 2023-01-24 07:05:16.708875: step: 96/464, loss: 0.10145996510982513 2023-01-24 07:05:17.545703: step: 98/464, loss: 0.0018320104572921991 2023-01-24 07:05:18.189351: step: 100/464, loss: 6.049231524229981e-05 2023-01-24 07:05:18.909878: step: 102/464, loss: 0.0021832017228007317 2023-01-24 07:05:19.610047: step: 104/464, loss: 0.008522151038050652 2023-01-24 07:05:20.398165: step: 106/464, loss: 0.01118018664419651 2023-01-24 07:05:21.078886: step: 108/464, loss: 0.00034222868271172047 2023-01-24 07:05:21.745511: step: 110/464, loss: 0.0002311748976353556 2023-01-24 07:05:22.514249: step: 112/464, loss: 0.010870211757719517 2023-01-24 07:05:23.311902: step: 114/464, loss: 0.0024785962887108326 2023-01-24 07:05:24.142614: step: 116/464, loss: 0.0037308991886675358 2023-01-24 07:05:24.812523: step: 118/464, loss: 0.001720698899589479 2023-01-24 07:05:25.523099: step: 120/464, loss: 0.002680752892047167 2023-01-24 07:05:26.212712: step: 122/464, loss: 0.0026074047200381756 2023-01-24 07:05:26.901037: step: 124/464, loss: 0.006068503018468618 2023-01-24 07:05:27.665781: step: 126/464, loss: 0.0024157632142305374 2023-01-24 07:05:28.357769: step: 128/464, loss: 0.11783696711063385 2023-01-24 07:05:29.101117: step: 130/464, loss: 0.006381072103977203 2023-01-24 07:05:29.854431: step: 132/464, loss: 0.004709702450782061 2023-01-24 07:05:30.644403: step: 134/464, loss: 0.006073821801692247 2023-01-24 07:05:31.414119: step: 136/464, loss: 0.016582611948251724 2023-01-24 07:05:32.209942: step: 138/464, loss: 0.000737413065508008 2023-01-24 07:05:32.935841: step: 140/464, loss: 0.017299091443419456 2023-01-24 07:05:33.673636: step: 142/464, loss: 0.0006641384679824114 2023-01-24 07:05:34.363327: step: 144/464, loss: 0.0037697700317949057 2023-01-24 07:05:35.081176: step: 146/464, loss: 0.0001279134739888832 2023-01-24 07:05:35.843966: step: 148/464, loss: 0.025734873488545418 2023-01-24 07:05:36.590634: step: 150/464, loss: 0.035919588059186935 2023-01-24 07:05:37.253645: step: 152/464, loss: 0.01290472038090229 2023-01-24 07:05:37.975440: step: 154/464, loss: 0.030983198434114456 2023-01-24 07:05:38.693365: step: 156/464, loss: 6.772899359930307e-05 2023-01-24 07:05:39.392846: step: 158/464, loss: 0.0029904814437031746 2023-01-24 07:05:40.137864: step: 160/464, loss: 7.27025544620119e-05 2023-01-24 07:05:40.851415: step: 162/464, loss: 0.0003170033742208034 2023-01-24 07:05:41.557367: step: 164/464, loss: 0.006331458222121 2023-01-24 07:05:42.350399: step: 166/464, loss: 0.0005581967998296022 2023-01-24 07:05:43.063800: step: 168/464, loss: 0.013321910053491592 2023-01-24 07:05:43.756378: step: 170/464, loss: 0.008421930484473705 2023-01-24 07:05:44.504587: step: 172/464, loss: 0.002395325107499957 2023-01-24 07:05:45.212724: step: 174/464, loss: 0.004580559674650431 2023-01-24 07:05:46.037996: step: 176/464, loss: 0.002185387536883354 2023-01-24 07:05:46.781783: step: 178/464, loss: 0.004770377185195684 2023-01-24 07:05:47.499743: step: 180/464, loss: 0.0006089820526540279 2023-01-24 07:05:48.215780: step: 182/464, loss: 0.0010552399326115847 2023-01-24 07:05:48.983063: step: 184/464, loss: 0.0032529293093830347 2023-01-24 07:05:49.692035: step: 186/464, loss: 0.0037063981872051954 2023-01-24 07:05:50.352366: step: 188/464, loss: 0.001429045107215643 2023-01-24 07:05:51.160139: step: 190/464, loss: 0.053881268948316574 2023-01-24 07:05:51.834607: step: 192/464, loss: 0.0669993981719017 2023-01-24 07:05:52.590352: step: 194/464, loss: 0.0028011894319206476 2023-01-24 07:05:53.270178: step: 196/464, loss: 0.003625689772889018 2023-01-24 07:05:54.008094: step: 198/464, loss: 0.0032770622055977583 2023-01-24 07:05:54.795206: step: 200/464, loss: 0.004626928828656673 2023-01-24 07:05:55.579692: step: 202/464, loss: 0.0008296154555864632 2023-01-24 07:05:56.184489: step: 204/464, loss: 0.061222951859235764 2023-01-24 07:05:56.995190: step: 206/464, loss: 4.607578375726007e-05 2023-01-24 07:05:57.769922: step: 208/464, loss: 0.03948868438601494 2023-01-24 07:05:58.512654: step: 210/464, loss: 0.3414325416088104 2023-01-24 07:05:59.358603: step: 212/464, loss: 0.0017582981381565332 2023-01-24 07:06:00.174060: step: 214/464, loss: 0.0009569655521772802 2023-01-24 07:06:00.943710: step: 216/464, loss: 0.017099468037486076 2023-01-24 07:06:01.692985: step: 218/464, loss: 0.19633235037326813 2023-01-24 07:06:02.358034: step: 220/464, loss: 0.0007412639679387212 2023-01-24 07:06:03.046826: step: 222/464, loss: 0.00041419549961574376 2023-01-24 07:06:03.837125: step: 224/464, loss: 0.00018719013314694166 2023-01-24 07:06:04.518621: step: 226/464, loss: 0.0016826813807711005 2023-01-24 07:06:05.313124: step: 228/464, loss: 0.0009907839121297002 2023-01-24 07:06:06.059641: step: 230/464, loss: 0.00036048784386366606 2023-01-24 07:06:06.746372: step: 232/464, loss: 0.0005840350640937686 2023-01-24 07:06:07.496845: step: 234/464, loss: 0.04000601917505264 2023-01-24 07:06:08.162568: step: 236/464, loss: 0.000522164162248373 2023-01-24 07:06:08.910994: step: 238/464, loss: 0.0007575997151434422 2023-01-24 07:06:09.640992: step: 240/464, loss: 0.029385874047875404 2023-01-24 07:06:10.330686: step: 242/464, loss: 0.0005397187196649611 2023-01-24 07:06:11.024496: step: 244/464, loss: 7.480083149857819e-05 2023-01-24 07:06:11.729997: step: 246/464, loss: 0.001386028598062694 2023-01-24 07:06:12.523044: step: 248/464, loss: 0.0008085906156338751 2023-01-24 07:06:13.220245: step: 250/464, loss: 0.0006489267107099295 2023-01-24 07:06:14.026011: step: 252/464, loss: 0.002511844737455249 2023-01-24 07:06:14.718027: step: 254/464, loss: 0.0003009107313118875 2023-01-24 07:06:15.429578: step: 256/464, loss: 9.194859012495726e-05 2023-01-24 07:06:16.234719: step: 258/464, loss: 0.0005083756987005472 2023-01-24 07:06:16.994030: step: 260/464, loss: 0.005527664441615343 2023-01-24 07:06:17.730225: step: 262/464, loss: 0.005268595647066832 2023-01-24 07:06:18.416886: step: 264/464, loss: 0.0018064638134092093 2023-01-24 07:06:19.163241: step: 266/464, loss: 0.00646596634760499 2023-01-24 07:06:19.864529: step: 268/464, loss: 0.0031213362235575914 2023-01-24 07:06:20.621428: step: 270/464, loss: 0.000955466297455132 2023-01-24 07:06:21.290581: step: 272/464, loss: 0.026391014456748962 2023-01-24 07:06:21.975367: step: 274/464, loss: 0.010174794122576714 2023-01-24 07:06:22.679444: step: 276/464, loss: 6.715762719977647e-05 2023-01-24 07:06:23.409739: step: 278/464, loss: 0.023771759122610092 2023-01-24 07:06:24.070921: step: 280/464, loss: 0.0007089504506438971 2023-01-24 07:06:24.819763: step: 282/464, loss: 0.009939974173903465 2023-01-24 07:06:25.531137: step: 284/464, loss: 3.812194790953072e-07 2023-01-24 07:06:26.195007: step: 286/464, loss: 0.013614516705274582 2023-01-24 07:06:26.959775: step: 288/464, loss: 0.012401481159031391 2023-01-24 07:06:27.819868: step: 290/464, loss: 0.002258396940305829 2023-01-24 07:06:28.592554: step: 292/464, loss: 0.007057834882289171 2023-01-24 07:06:29.251716: step: 294/464, loss: 0.0009403349831700325 2023-01-24 07:06:29.909662: step: 296/464, loss: 0.004534940700978041 2023-01-24 07:06:30.655147: step: 298/464, loss: 0.008743273094296455 2023-01-24 07:06:31.443579: step: 300/464, loss: 0.018859921023249626 2023-01-24 07:06:32.154838: step: 302/464, loss: 0.0003946318756788969 2023-01-24 07:06:32.905176: step: 304/464, loss: 0.006023469381034374 2023-01-24 07:06:33.592095: step: 306/464, loss: 0.006886173039674759 2023-01-24 07:06:34.280467: step: 308/464, loss: 0.00015002640429884195 2023-01-24 07:06:35.011296: step: 310/464, loss: 0.0035786149092018604 2023-01-24 07:06:35.776171: step: 312/464, loss: 0.0021690481808036566 2023-01-24 07:06:36.426106: step: 314/464, loss: 0.0029878343921154737 2023-01-24 07:06:37.160425: step: 316/464, loss: 1.694048523902893 2023-01-24 07:06:37.816233: step: 318/464, loss: 0.003704532515257597 2023-01-24 07:06:38.533339: step: 320/464, loss: 0.004257072228938341 2023-01-24 07:06:39.269969: step: 322/464, loss: 0.0011549916816875339 2023-01-24 07:06:39.886850: step: 324/464, loss: 0.0013482326176017523 2023-01-24 07:06:40.622958: step: 326/464, loss: 0.01761588826775551 2023-01-24 07:06:41.346180: step: 328/464, loss: 0.018198247998952866 2023-01-24 07:06:42.071967: step: 330/464, loss: 0.0001296098344027996 2023-01-24 07:06:42.977170: step: 332/464, loss: 0.009164217859506607 2023-01-24 07:06:43.772717: step: 334/464, loss: 0.026571668684482574 2023-01-24 07:06:44.451710: step: 336/464, loss: 0.1765873283147812 2023-01-24 07:06:45.250605: step: 338/464, loss: 0.012295668944716454 2023-01-24 07:06:45.872632: step: 340/464, loss: 0.0002983546582981944 2023-01-24 07:06:46.715427: step: 342/464, loss: 0.005674338433891535 2023-01-24 07:06:47.419407: step: 344/464, loss: 0.6395951509475708 2023-01-24 07:06:48.107989: step: 346/464, loss: 0.0010086627444252372 2023-01-24 07:06:48.832071: step: 348/464, loss: 0.0005183350294828415 2023-01-24 07:06:49.531594: step: 350/464, loss: 0.00011906491272384301 2023-01-24 07:06:50.290225: step: 352/464, loss: 7.418800669256598e-05 2023-01-24 07:06:51.086661: step: 354/464, loss: 0.02757696434855461 2023-01-24 07:06:51.819237: step: 356/464, loss: 0.00876846443861723 2023-01-24 07:06:52.522024: step: 358/464, loss: 0.004197806119918823 2023-01-24 07:06:53.315454: step: 360/464, loss: 0.04080234467983246 2023-01-24 07:06:54.075987: step: 362/464, loss: 0.015933020040392876 2023-01-24 07:06:54.860026: step: 364/464, loss: 0.05404546484351158 2023-01-24 07:06:55.564048: step: 366/464, loss: 0.01660231687128544 2023-01-24 07:06:56.312290: step: 368/464, loss: 0.006422176957130432 2023-01-24 07:06:57.157635: step: 370/464, loss: 0.03905533626675606 2023-01-24 07:06:57.874394: step: 372/464, loss: 0.0025848206132650375 2023-01-24 07:06:58.604823: step: 374/464, loss: 0.01470188982784748 2023-01-24 07:06:59.460491: step: 376/464, loss: 0.014380333013832569 2023-01-24 07:07:00.342443: step: 378/464, loss: 0.03706406056880951 2023-01-24 07:07:00.994492: step: 380/464, loss: 0.0020173077937215567 2023-01-24 07:07:01.690220: step: 382/464, loss: 0.0005429274751804769 2023-01-24 07:07:02.453224: step: 384/464, loss: 0.0015572941629216075 2023-01-24 07:07:03.199036: step: 386/464, loss: 0.020318368449807167 2023-01-24 07:07:03.934520: step: 388/464, loss: 0.006858357228338718 2023-01-24 07:07:04.596405: step: 390/464, loss: 0.004659565631300211 2023-01-24 07:07:05.322744: step: 392/464, loss: 0.004899164661765099 2023-01-24 07:07:06.080170: step: 394/464, loss: 0.0036024400033056736 2023-01-24 07:07:06.881893: step: 396/464, loss: 0.014771565794944763 2023-01-24 07:07:07.561592: step: 398/464, loss: 0.0004116464115213603 2023-01-24 07:07:08.346514: step: 400/464, loss: 0.004268472082912922 2023-01-24 07:07:08.995891: step: 402/464, loss: 0.013734730891883373 2023-01-24 07:07:09.702805: step: 404/464, loss: 0.0004078770871274173 2023-01-24 07:07:10.375161: step: 406/464, loss: 0.027729980647563934 2023-01-24 07:07:11.096949: step: 408/464, loss: 0.003318031784147024 2023-01-24 07:07:11.907490: step: 410/464, loss: 0.0029189365450292826 2023-01-24 07:07:12.688250: step: 412/464, loss: 0.00035886603291146457 2023-01-24 07:07:13.436063: step: 414/464, loss: 0.0016576717607676983 2023-01-24 07:07:14.162120: step: 416/464, loss: 0.014981823973357677 2023-01-24 07:07:14.965250: step: 418/464, loss: 0.003958564717322588 2023-01-24 07:07:15.711046: step: 420/464, loss: 0.007210117299109697 2023-01-24 07:07:16.478363: step: 422/464, loss: 0.0018005042802542448 2023-01-24 07:07:17.115705: step: 424/464, loss: 0.0001036086687236093 2023-01-24 07:07:17.758098: step: 426/464, loss: 0.0024507339112460613 2023-01-24 07:07:18.593445: step: 428/464, loss: 0.038161493837833405 2023-01-24 07:07:19.308836: step: 430/464, loss: 0.00398477166891098 2023-01-24 07:07:20.066232: step: 432/464, loss: 2.654248964972794e-05 2023-01-24 07:07:20.877644: step: 434/464, loss: 0.022698312997817993 2023-01-24 07:07:21.595168: step: 436/464, loss: 0.00023635398247279227 2023-01-24 07:07:22.302909: step: 438/464, loss: 0.007394007872790098 2023-01-24 07:07:23.027880: step: 440/464, loss: 0.007277240045368671 2023-01-24 07:07:23.778276: step: 442/464, loss: 0.01735176146030426 2023-01-24 07:07:24.582280: step: 444/464, loss: 0.003354936372488737 2023-01-24 07:07:25.363808: step: 446/464, loss: 0.00018706907576415688 2023-01-24 07:07:26.122592: step: 448/464, loss: 0.016221504658460617 2023-01-24 07:07:26.822495: step: 450/464, loss: 0.00032600644044578075 2023-01-24 07:07:27.517652: step: 452/464, loss: 0.6398082971572876 2023-01-24 07:07:28.290204: step: 454/464, loss: 0.00451373215764761 2023-01-24 07:07:28.989925: step: 456/464, loss: 0.0015716877533122897 2023-01-24 07:07:29.790941: step: 458/464, loss: 0.0003451913653407246 2023-01-24 07:07:30.466522: step: 460/464, loss: 0.00024207618844229728 2023-01-24 07:07:31.247730: step: 462/464, loss: 0.045718129724264145 2023-01-24 07:07:32.037429: step: 464/464, loss: 0.00918582733720541 2023-01-24 07:07:32.722954: step: 466/464, loss: 0.006854454055428505 2023-01-24 07:07:33.443826: step: 468/464, loss: 0.013512649573385715 2023-01-24 07:07:34.156071: step: 470/464, loss: 0.003024796023964882 2023-01-24 07:07:34.895119: step: 472/464, loss: 0.01615089550614357 2023-01-24 07:07:35.619254: step: 474/464, loss: 0.008264783769845963 2023-01-24 07:07:36.346219: step: 476/464, loss: 0.023860646411776543 2023-01-24 07:07:37.066870: step: 478/464, loss: 0.0016786216292530298 2023-01-24 07:07:37.814412: step: 480/464, loss: 0.05656686797738075 2023-01-24 07:07:38.533858: step: 482/464, loss: 0.008730104193091393 2023-01-24 07:07:39.255955: step: 484/464, loss: 0.0288022942841053 2023-01-24 07:07:39.936919: step: 486/464, loss: 0.04661581665277481 2023-01-24 07:07:40.711270: step: 488/464, loss: 0.004736119415611029 2023-01-24 07:07:41.449725: step: 490/464, loss: 0.002068700036033988 2023-01-24 07:07:42.221026: step: 492/464, loss: 0.004908754024654627 2023-01-24 07:07:42.945030: step: 494/464, loss: 0.004340517334640026 2023-01-24 07:07:43.630270: step: 496/464, loss: 0.0011247927322983742 2023-01-24 07:07:44.336694: step: 498/464, loss: 0.0042472220957279205 2023-01-24 07:07:45.045117: step: 500/464, loss: 0.0008127755718305707 2023-01-24 07:07:45.755379: step: 502/464, loss: 0.010160490870475769 2023-01-24 07:07:46.425786: step: 504/464, loss: 0.010667679831385612 2023-01-24 07:07:47.141515: step: 506/464, loss: 0.0014209101209416986 2023-01-24 07:07:47.898879: step: 508/464, loss: 0.0068949805572628975 2023-01-24 07:07:48.709119: step: 510/464, loss: 0.01052104588598013 2023-01-24 07:07:49.446865: step: 512/464, loss: 0.011864281259477139 2023-01-24 07:07:50.211998: step: 514/464, loss: 0.007104154676198959 2023-01-24 07:07:50.933169: step: 516/464, loss: 0.0002336573088541627 2023-01-24 07:07:51.736686: step: 518/464, loss: 0.02338859997689724 2023-01-24 07:07:52.541727: step: 520/464, loss: 0.06585898250341415 2023-01-24 07:07:53.249577: step: 522/464, loss: 0.0012727356515824795 2023-01-24 07:07:54.002460: step: 524/464, loss: 0.001677141641266644 2023-01-24 07:07:54.772715: step: 526/464, loss: 0.007514504715800285 2023-01-24 07:07:55.514873: step: 528/464, loss: 0.007119217421859503 2023-01-24 07:07:56.272818: step: 530/464, loss: 0.002552604768425226 2023-01-24 07:07:56.971274: step: 532/464, loss: 0.0002722721255850047 2023-01-24 07:07:57.690950: step: 534/464, loss: 0.0024043868761509657 2023-01-24 07:07:58.390478: step: 536/464, loss: 0.0030759493820369244 2023-01-24 07:07:59.110153: step: 538/464, loss: 0.011626550927758217 2023-01-24 07:07:59.979191: step: 540/464, loss: 0.026739951223134995 2023-01-24 07:08:00.661927: step: 542/464, loss: 0.0007168625597842038 2023-01-24 07:08:01.457311: step: 544/464, loss: 0.005891432985663414 2023-01-24 07:08:02.210804: step: 546/464, loss: 0.0010922928340733051 2023-01-24 07:08:02.936977: step: 548/464, loss: 0.013535697013139725 2023-01-24 07:08:03.713391: step: 550/464, loss: 0.0210479274392128 2023-01-24 07:08:04.468428: step: 552/464, loss: 0.009432249702513218 2023-01-24 07:08:05.272714: step: 554/464, loss: 0.001195187564007938 2023-01-24 07:08:06.073403: step: 556/464, loss: 0.007210468873381615 2023-01-24 07:08:06.881047: step: 558/464, loss: 0.014359408989548683 2023-01-24 07:08:07.557138: step: 560/464, loss: 0.00511901406571269 2023-01-24 07:08:08.351093: step: 562/464, loss: 0.020551275461912155 2023-01-24 07:08:09.067532: step: 564/464, loss: 9.198107363772579e-06 2023-01-24 07:08:09.906865: step: 566/464, loss: 0.05644404888153076 2023-01-24 07:08:10.625966: step: 568/464, loss: 0.016529276967048645 2023-01-24 07:08:11.403924: step: 570/464, loss: 0.7873603701591492 2023-01-24 07:08:12.144581: step: 572/464, loss: 0.785995602607727 2023-01-24 07:08:12.848304: step: 574/464, loss: 0.03733735904097557 2023-01-24 07:08:13.524121: step: 576/464, loss: 4.771085878019221e-05 2023-01-24 07:08:14.250259: step: 578/464, loss: 0.00013325363397598267 2023-01-24 07:08:14.968469: step: 580/464, loss: 1.1196244955062866 2023-01-24 07:08:15.739334: step: 582/464, loss: 0.001423005131073296 2023-01-24 07:08:16.421989: step: 584/464, loss: 0.004689953289926052 2023-01-24 07:08:17.138920: step: 586/464, loss: 0.004397342447191477 2023-01-24 07:08:17.970609: step: 588/464, loss: 0.046125661581754684 2023-01-24 07:08:18.679012: step: 590/464, loss: 0.026454763486981392 2023-01-24 07:08:19.489768: step: 592/464, loss: 0.006255241576582193 2023-01-24 07:08:20.258722: step: 594/464, loss: 0.037292227149009705 2023-01-24 07:08:20.971760: step: 596/464, loss: 0.0010081107029691339 2023-01-24 07:08:21.653590: step: 598/464, loss: 0.001386135583743453 2023-01-24 07:08:22.375597: step: 600/464, loss: 0.002910367678850889 2023-01-24 07:08:23.048530: step: 602/464, loss: 0.0006286511197686195 2023-01-24 07:08:23.846299: step: 604/464, loss: 0.004660574719309807 2023-01-24 07:08:24.535440: step: 606/464, loss: 0.00016427884111180902 2023-01-24 07:08:25.278932: step: 608/464, loss: 0.010264435783028603 2023-01-24 07:08:26.038186: step: 610/464, loss: 0.008379405364394188 2023-01-24 07:08:26.799536: step: 612/464, loss: 8.429507943219505e-06 2023-01-24 07:08:27.505764: step: 614/464, loss: 0.011425070464611053 2023-01-24 07:08:28.183086: step: 616/464, loss: 0.007392220664769411 2023-01-24 07:08:28.918457: step: 618/464, loss: 0.0074181947857141495 2023-01-24 07:08:29.622520: step: 620/464, loss: 0.03724908456206322 2023-01-24 07:08:30.319760: step: 622/464, loss: 0.020692970603704453 2023-01-24 07:08:30.997769: step: 624/464, loss: 0.0033561927266418934 2023-01-24 07:08:31.767987: step: 626/464, loss: 0.00014869822189211845 2023-01-24 07:08:32.530979: step: 628/464, loss: 0.015324524603784084 2023-01-24 07:08:33.231942: step: 630/464, loss: 0.012809859588742256 2023-01-24 07:08:34.010940: step: 632/464, loss: 0.07623506337404251 2023-01-24 07:08:34.699423: step: 634/464, loss: 0.0023513073101639748 2023-01-24 07:08:35.507247: step: 636/464, loss: 0.003813466290012002 2023-01-24 07:08:36.196698: step: 638/464, loss: 0.00017119161202572286 2023-01-24 07:08:36.845803: step: 640/464, loss: 0.00017306060180999339 2023-01-24 07:08:37.547792: step: 642/464, loss: 0.004058307968080044 2023-01-24 07:08:38.358077: step: 644/464, loss: 0.00024587870575487614 2023-01-24 07:08:39.004076: step: 646/464, loss: 0.02357977256178856 2023-01-24 07:08:39.724635: step: 648/464, loss: 0.0015318029327318072 2023-01-24 07:08:40.448570: step: 650/464, loss: 0.01352463848888874 2023-01-24 07:08:41.184939: step: 652/464, loss: 0.0049011362716555595 2023-01-24 07:08:41.910732: step: 654/464, loss: 0.021986398845911026 2023-01-24 07:08:42.641996: step: 656/464, loss: 0.0012051882222294807 2023-01-24 07:08:43.394866: step: 658/464, loss: 0.016538813710212708 2023-01-24 07:08:44.108786: step: 660/464, loss: 0.0037078920286148787 2023-01-24 07:08:44.825960: step: 662/464, loss: 0.0004478042246773839 2023-01-24 07:08:45.573529: step: 664/464, loss: 0.0003174376906827092 2023-01-24 07:08:46.282927: step: 666/464, loss: 0.3379771113395691 2023-01-24 07:08:47.015930: step: 668/464, loss: 0.009541943669319153 2023-01-24 07:08:47.696023: step: 670/464, loss: 0.0030752429738640785 2023-01-24 07:08:48.386879: step: 672/464, loss: 0.05253473296761513 2023-01-24 07:08:49.145262: step: 674/464, loss: 0.36408454179763794 2023-01-24 07:08:49.901556: step: 676/464, loss: 0.0048395427875220776 2023-01-24 07:08:50.603218: step: 678/464, loss: 0.00031773888622410595 2023-01-24 07:08:51.217102: step: 680/464, loss: 0.01639465056359768 2023-01-24 07:08:51.873720: step: 682/464, loss: 0.00026009962311945856 2023-01-24 07:08:52.559899: step: 684/464, loss: 0.13537754118442535 2023-01-24 07:08:53.253876: step: 686/464, loss: 0.007647455669939518 2023-01-24 07:08:53.911716: step: 688/464, loss: 0.013909382745623589 2023-01-24 07:08:54.685263: step: 690/464, loss: 0.018954748287796974 2023-01-24 07:08:55.496834: step: 692/464, loss: 0.0004389037494547665 2023-01-24 07:08:56.306380: step: 694/464, loss: 0.038470931351184845 2023-01-24 07:08:56.993378: step: 696/464, loss: 0.0010190318571403623 2023-01-24 07:08:57.691854: step: 698/464, loss: 0.006811690982431173 2023-01-24 07:08:58.493386: step: 700/464, loss: 0.013463972136378288 2023-01-24 07:08:59.222598: step: 702/464, loss: 0.017721960321068764 2023-01-24 07:08:59.946902: step: 704/464, loss: 0.0021989597007632256 2023-01-24 07:09:00.675795: step: 706/464, loss: 0.00756972236558795 2023-01-24 07:09:01.418480: step: 708/464, loss: 0.15557102859020233 2023-01-24 07:09:02.157027: step: 710/464, loss: 0.013237417675554752 2023-01-24 07:09:02.845977: step: 712/464, loss: 0.0038034068420529366 2023-01-24 07:09:03.610122: step: 714/464, loss: 0.011569908820092678 2023-01-24 07:09:04.332486: step: 716/464, loss: 0.0009132428094744682 2023-01-24 07:09:04.969116: step: 718/464, loss: 0.0012660945067182183 2023-01-24 07:09:05.696819: step: 720/464, loss: 0.001009093364700675 2023-01-24 07:09:06.430495: step: 722/464, loss: 0.3648377060890198 2023-01-24 07:09:07.212979: step: 724/464, loss: 0.00179006636608392 2023-01-24 07:09:07.987123: step: 726/464, loss: 0.004186419770121574 2023-01-24 07:09:08.790662: step: 728/464, loss: 0.0023058911319822073 2023-01-24 07:09:09.527695: step: 730/464, loss: 0.001836820738390088 2023-01-24 07:09:10.169689: step: 732/464, loss: 0.0008673551492393017 2023-01-24 07:09:10.922590: step: 734/464, loss: 0.025690661743283272 2023-01-24 07:09:11.815025: step: 736/464, loss: 0.0021721988450735807 2023-01-24 07:09:12.628605: step: 738/464, loss: 0.003007357008755207 2023-01-24 07:09:13.438096: step: 740/464, loss: 0.005906921811401844 2023-01-24 07:09:14.079756: step: 742/464, loss: 0.002932978793978691 2023-01-24 07:09:14.773724: step: 744/464, loss: 3.0419625545619056e-05 2023-01-24 07:09:15.499196: step: 746/464, loss: 0.0017899353988468647 2023-01-24 07:09:16.272117: step: 748/464, loss: 1.9426894141361117e-05 2023-01-24 07:09:16.990981: step: 750/464, loss: 0.03254895657300949 2023-01-24 07:09:17.732769: step: 752/464, loss: 0.014089247211813927 2023-01-24 07:09:18.471847: step: 754/464, loss: 7.201086555141956e-05 2023-01-24 07:09:19.259450: step: 756/464, loss: 0.007483582943677902 2023-01-24 07:09:19.907347: step: 758/464, loss: 0.002280889078974724 2023-01-24 07:09:20.681008: step: 760/464, loss: 0.041584137827157974 2023-01-24 07:09:21.385701: step: 762/464, loss: 0.02409733645617962 2023-01-24 07:09:22.079281: step: 764/464, loss: 0.03089963272213936 2023-01-24 07:09:22.843702: step: 766/464, loss: 0.003421128960326314 2023-01-24 07:09:23.656985: step: 768/464, loss: 0.011093460954725742 2023-01-24 07:09:24.367065: step: 770/464, loss: 0.010788795538246632 2023-01-24 07:09:25.152713: step: 772/464, loss: 0.00963507778942585 2023-01-24 07:09:25.876433: step: 774/464, loss: 0.0002834459883160889 2023-01-24 07:09:26.580610: step: 776/464, loss: 0.005152272526174784 2023-01-24 07:09:27.338433: step: 778/464, loss: 0.0010087155969813466 2023-01-24 07:09:28.027793: step: 780/464, loss: 0.0005180512671358883 2023-01-24 07:09:28.758644: step: 782/464, loss: 0.018876563757658005 2023-01-24 07:09:29.399303: step: 784/464, loss: 0.00018986199575010687 2023-01-24 07:09:30.093193: step: 786/464, loss: 0.0006348424940370023 2023-01-24 07:09:30.780408: step: 788/464, loss: 0.008300436660647392 2023-01-24 07:09:31.539512: step: 790/464, loss: 0.0004914195160381496 2023-01-24 07:09:32.261767: step: 792/464, loss: 0.03281170502305031 2023-01-24 07:09:32.992569: step: 794/464, loss: 0.030931316316127777 2023-01-24 07:09:33.693980: step: 796/464, loss: 0.009102153591811657 2023-01-24 07:09:34.439274: step: 798/464, loss: 0.0110086128115654 2023-01-24 07:09:35.087693: step: 800/464, loss: 0.022512733936309814 2023-01-24 07:09:35.905291: step: 802/464, loss: 0.00442664697766304 2023-01-24 07:09:36.662333: step: 804/464, loss: 0.011539860628545284 2023-01-24 07:09:37.330662: step: 806/464, loss: 0.0057589225471019745 2023-01-24 07:09:38.081880: step: 808/464, loss: 0.001552989473566413 2023-01-24 07:09:38.838137: step: 810/464, loss: 0.0032748279627412558 2023-01-24 07:09:39.593627: step: 812/464, loss: 0.018966708332300186 2023-01-24 07:09:40.273828: step: 814/464, loss: 0.0037927976809442043 2023-01-24 07:09:40.998045: step: 816/464, loss: 0.00040608947165310383 2023-01-24 07:09:41.648984: step: 818/464, loss: 0.0003042019088752568 2023-01-24 07:09:42.409333: step: 820/464, loss: 0.008616427890956402 2023-01-24 07:09:43.079594: step: 822/464, loss: 0.0003781984851229936 2023-01-24 07:09:43.818813: step: 824/464, loss: 0.03239799290895462 2023-01-24 07:09:44.494464: step: 826/464, loss: 0.004440160468220711 2023-01-24 07:09:45.171813: step: 828/464, loss: 0.00048742041690275073 2023-01-24 07:09:45.850049: step: 830/464, loss: 0.07057930529117584 2023-01-24 07:09:46.527313: step: 832/464, loss: 0.004274421371519566 2023-01-24 07:09:47.250542: step: 834/464, loss: 0.0001647328754188493 2023-01-24 07:09:47.871703: step: 836/464, loss: 0.00017869319708552212 2023-01-24 07:09:48.574414: step: 838/464, loss: 0.008176120929419994 2023-01-24 07:09:49.325967: step: 840/464, loss: 0.018365247175097466 2023-01-24 07:09:50.048790: step: 842/464, loss: 0.017919452860951424 2023-01-24 07:09:50.871573: step: 844/464, loss: 0.012984522618353367 2023-01-24 07:09:51.653327: step: 846/464, loss: 0.005812663119286299 2023-01-24 07:09:52.359489: step: 848/464, loss: 0.008565914817154408 2023-01-24 07:09:53.126530: step: 850/464, loss: 1.5529409211012535e-05 2023-01-24 07:09:53.973036: step: 852/464, loss: 0.015416462905704975 2023-01-24 07:09:54.730260: step: 854/464, loss: 0.0015312719624489546 2023-01-24 07:09:55.506019: step: 856/464, loss: 0.005828104447573423 2023-01-24 07:09:56.186730: step: 858/464, loss: 0.0007630424806848168 2023-01-24 07:09:56.878152: step: 860/464, loss: 1.020262360572815 2023-01-24 07:09:57.694870: step: 862/464, loss: 1.69687900779536e-05 2023-01-24 07:09:58.471556: step: 864/464, loss: 0.0002915971272159368 2023-01-24 07:09:59.233781: step: 866/464, loss: 0.0002090566122205928 2023-01-24 07:09:59.996719: step: 868/464, loss: 0.0440477691590786 2023-01-24 07:10:00.675693: step: 870/464, loss: 0.004202152136713266 2023-01-24 07:10:01.424923: step: 872/464, loss: 0.013471094891428947 2023-01-24 07:10:02.192720: step: 874/464, loss: 0.0003817703400272876 2023-01-24 07:10:02.874684: step: 876/464, loss: 0.024504436179995537 2023-01-24 07:10:03.598502: step: 878/464, loss: 0.006114604417234659 2023-01-24 07:10:04.388408: step: 880/464, loss: 0.0008290052646771073 2023-01-24 07:10:05.107674: step: 882/464, loss: 0.02669224515557289 2023-01-24 07:10:05.872972: step: 884/464, loss: 0.00015607234672643244 2023-01-24 07:10:06.590143: step: 886/464, loss: 11.929213523864746 2023-01-24 07:10:07.291172: step: 888/464, loss: 0.00027429041801951826 2023-01-24 07:10:07.985139: step: 890/464, loss: 0.0008549271733500063 2023-01-24 07:10:08.692503: step: 892/464, loss: 1.0526523510634433e-05 2023-01-24 07:10:09.451320: step: 894/464, loss: 0.056621868163347244 2023-01-24 07:10:10.274686: step: 896/464, loss: 0.001759719685651362 2023-01-24 07:10:10.991062: step: 898/464, loss: 0.006513232830911875 2023-01-24 07:10:11.782093: step: 900/464, loss: 0.006867598742246628 2023-01-24 07:10:12.635640: step: 902/464, loss: 0.10892080515623093 2023-01-24 07:10:13.365538: step: 904/464, loss: 0.016597241163253784 2023-01-24 07:10:14.120128: step: 906/464, loss: 0.030734620988368988 2023-01-24 07:10:14.881320: step: 908/464, loss: 1.996102582779713e-05 2023-01-24 07:10:15.610835: step: 910/464, loss: 0.2448931187391281 2023-01-24 07:10:16.317889: step: 912/464, loss: 0.0026010761503130198 2023-01-24 07:10:17.082953: step: 914/464, loss: 0.0009186618844978511 2023-01-24 07:10:17.796210: step: 916/464, loss: 0.10815151035785675 2023-01-24 07:10:18.496827: step: 918/464, loss: 0.0037876542191952467 2023-01-24 07:10:19.277314: step: 920/464, loss: 0.018428770825266838 2023-01-24 07:10:20.039124: step: 922/464, loss: 0.020010873675346375 2023-01-24 07:10:20.831297: step: 924/464, loss: 0.1322910338640213 2023-01-24 07:10:21.502360: step: 926/464, loss: 0.009188034571707249 2023-01-24 07:10:22.269533: step: 928/464, loss: 0.011618698947131634 2023-01-24 07:10:22.910981: step: 930/464, loss: 0.0005447377334348857 ================================================== Loss: 0.056 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3685229939465053, 'r': 0.34055160920673255, 'f1': 0.3539855977355978}, 'combined': 0.2608314930683352, 'epoch': 39} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134297926888097, 'r': 0.2605211110009652, 'f1': 0.2845368037131835}, 'combined': 0.17671233072713505, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3521508090486749, 'r': 0.32876318415929423, 'f1': 0.3400553445573073}, 'combined': 0.2505670959895948, 'epoch': 39} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31381138248635676, 'r': 0.26269699145945763, 'f1': 0.285988234657099}, 'combined': 0.17761374573440886, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3553045537354681, 'r': 0.32833646616541357, 'f1': 0.34128859500823067}, 'combined': 0.25147580684816995, 'epoch': 39} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32168230440711393, 'r': 0.2667454449180412, 'f1': 0.29164936395248325}, 'combined': 0.18112960498101593, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39955357142857145, 'r': 0.3196428571428571, 'f1': 0.3551587301587302}, 'combined': 0.23677248677248677, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3416666666666667, 'r': 0.44565217391304346, 'f1': 0.3867924528301887}, 'combined': 0.19339622641509435, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6047570850202428, 'r': 0.27109800362976405, 'f1': 0.3743734335839598}, 'combined': 0.24958228905597318, 'epoch': 39} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33120143673735564, 'r': 0.3500554084681349, 'f1': 0.3403675281599762}, 'combined': 0.250797126012614, 'epoch': 13} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.3134253354441965, 'r': 0.2678981375091205, 'f1': 0.2888789719331166}, 'combined': 0.17940904572688296, 'epoch': 13} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3715277777777778, 'r': 0.3821428571428571, 'f1': 0.37676056338028174}, 'combined': 0.2511737089201878, 'epoch': 13} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3521508090486749, 'r': 0.32876318415929423, 'f1': 0.3400553445573073}, 'combined': 0.2505670959895948, 'epoch': 39} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.31381138248635676, 'r': 0.26269699145945763, 'f1': 0.285988234657099}, 'combined': 0.17761374573440886, 'epoch': 39} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3416666666666667, 'r': 0.44565217391304346, 'f1': 0.3867924528301887}, 'combined': 0.19339622641509435, 'epoch': 39} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3553045537354681, 'r': 0.32833646616541357, 'f1': 0.34128859500823067}, 'combined': 0.25147580684816995, 'epoch': 39} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.32168230440711393, 'r': 0.2667454449180412, 'f1': 0.29164936395248325}, 'combined': 0.18112960498101593, 'epoch': 39} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6047570850202428, 'r': 0.27109800362976405, 'f1': 0.3743734335839598}, 'combined': 0.24958228905597318, 'epoch': 39}